decode.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579
  1. package decode
  2. import (
  3. "errors"
  4. "os"
  5. "path/filepath"
  6. "regexp"
  7. "strconv"
  8. "strings"
  9. "time"
  10. "github.com/allanpk716/ChineseSubFinder/pkg/types"
  11. "github.com/allanpk716/ChineseSubFinder/pkg/types/common"
  12. "github.com/beevik/etree"
  13. PTN "github.com/middelink/go-parse-torrent-name"
  14. )
  15. func getVideoNfoInfoFromMovieXml(movieFilePath string) (types.VideoNfoInfo, error) {
  16. videoInfo := types.VideoNfoInfo{}
  17. doc := etree.NewDocument()
  18. doc.ReadSettings.Permissive = true
  19. if err := doc.ReadFromFile(movieFilePath); err != nil {
  20. return videoInfo, err
  21. }
  22. // --------------------------------------------------
  23. // IMDB
  24. for _, t := range doc.FindElements("//imdb") {
  25. videoInfo.ImdbId = t.Text()
  26. break
  27. }
  28. for _, t := range doc.FindElements("//IMDB") {
  29. videoInfo.ImdbId = t.Text()
  30. break
  31. }
  32. for _, t := range doc.FindElements("//Imdb") {
  33. videoInfo.ImdbId = t.Text()
  34. break
  35. }
  36. // --------------------------------------------------
  37. // TMDB
  38. for _, t := range doc.FindElements("//tmdb") {
  39. videoInfo.TmdbId = t.Text()
  40. break
  41. }
  42. for _, t := range doc.FindElements("//TMDB") {
  43. videoInfo.TmdbId = t.Text()
  44. break
  45. }
  46. for _, t := range doc.FindElements("//Tmdb") {
  47. videoInfo.TmdbId = t.Text()
  48. break
  49. }
  50. // --------------------------------------------------
  51. for _, t := range doc.FindElements("//ProductionYear") {
  52. videoInfo.Year = t.Text()
  53. break
  54. }
  55. //if videoInfo.ImdbId != "" {
  56. // return videoInfo, nil
  57. //}
  58. return videoInfo, nil
  59. }
  60. func getVideoNfoInfo(nfoFilePath string, rootKey string) (types.VideoNfoInfo, error) {
  61. imdbInfo := types.VideoNfoInfo{}
  62. doc := etree.NewDocument()
  63. doc.ReadSettings.Permissive = true
  64. // 这里会遇到一个梗,下面的关键词,可能是小写、大写、首字母大写
  65. // 读取文件转换为全部的小写,然后在解析 xml ? etree 在转换为小写后,某些类型的文件的内容会崩溃···
  66. // 所以这里很傻的方式解决
  67. err := doc.ReadFromFile(nfoFilePath)
  68. if err != nil {
  69. return imdbInfo, err
  70. }
  71. for _, t := range doc.FindElements("./" + rootKey + "/title") {
  72. imdbInfo.Title = t.Text()
  73. break
  74. }
  75. //---------------------------------------------------------------------
  76. // IMDB
  77. for _, t := range doc.FindElements("./" + rootKey + "/imdbid") {
  78. imdbInfo.ImdbId = t.Text()
  79. break
  80. }
  81. for _, t := range doc.FindElements("./" + rootKey + "/imdb_id") {
  82. imdbInfo.ImdbId = t.Text()
  83. break
  84. }
  85. for _, t := range doc.FindElements("//uniqueid[@type='imdb']") {
  86. imdbInfo.ImdbId = t.Text()
  87. break
  88. }
  89. for _, t := range doc.FindElements("//uniqueid[@type='Imdb']") {
  90. imdbInfo.ImdbId = t.Text()
  91. break
  92. }
  93. for _, t := range doc.FindElements("//uniqueid[@type='IMDB']") {
  94. imdbInfo.ImdbId = t.Text()
  95. break
  96. }
  97. //---------------------------------------------------------------------
  98. // TMDB
  99. for _, t := range doc.FindElements("./" + rootKey + "/tmdbid") {
  100. imdbInfo.TmdbId = t.Text()
  101. break
  102. }
  103. for _, t := range doc.FindElements("./" + rootKey + "/tmdb_id") {
  104. imdbInfo.TmdbId = t.Text()
  105. break
  106. }
  107. for _, t := range doc.FindElements("//uniqueid[@type='tmdb']") {
  108. imdbInfo.TmdbId = t.Text()
  109. break
  110. }
  111. for _, t := range doc.FindElements("//uniqueid[@type='Tmdb']") {
  112. imdbInfo.TmdbId = t.Text()
  113. break
  114. }
  115. for _, t := range doc.FindElements("//uniqueid[@type='TMDB']") {
  116. imdbInfo.TmdbId = t.Text()
  117. break
  118. }
  119. //---------------------------------------------------------------------
  120. // TVDB
  121. for _, t := range doc.FindElements("./" + rootKey + "/tvdbid") {
  122. imdbInfo.TVdbId = t.Text()
  123. break
  124. }
  125. for _, t := range doc.FindElements("./" + rootKey + "/tvdb_id") {
  126. imdbInfo.TVdbId = t.Text()
  127. break
  128. }
  129. for _, t := range doc.FindElements("//uniqueid[@type='tvdb']") {
  130. imdbInfo.TVdbId = t.Text()
  131. break
  132. }
  133. for _, t := range doc.FindElements("//uniqueid[@type='Tvdb']") {
  134. imdbInfo.TVdbId = t.Text()
  135. break
  136. }
  137. for _, t := range doc.FindElements("//uniqueid[@type='TVDB']") {
  138. imdbInfo.TVdbId = t.Text()
  139. break
  140. }
  141. //---------------------------------------------------------------------
  142. //Season int
  143. //Episode int
  144. for _, t := range doc.FindElements("./" + rootKey + "/Season") {
  145. season, err := strconv.Atoi(t.Text())
  146. if err != nil {
  147. continue
  148. }
  149. imdbInfo.Season = season
  150. break
  151. }
  152. for _, t := range doc.FindElements("./" + rootKey + "/season") {
  153. season, err := strconv.Atoi(t.Text())
  154. if err != nil {
  155. continue
  156. }
  157. imdbInfo.Season = season
  158. break
  159. }
  160. for _, t := range doc.FindElements("./" + rootKey + "/SEASON") {
  161. season, err := strconv.Atoi(t.Text())
  162. if err != nil {
  163. continue
  164. }
  165. imdbInfo.Season = season
  166. break
  167. }
  168. for _, t := range doc.FindElements("./" + rootKey + "/Episode") {
  169. episode, err := strconv.Atoi(t.Text())
  170. if err != nil {
  171. continue
  172. }
  173. imdbInfo.Episode = episode
  174. break
  175. }
  176. for _, t := range doc.FindElements("./" + rootKey + "/episode") {
  177. episode, err := strconv.Atoi(t.Text())
  178. if err != nil {
  179. continue
  180. }
  181. imdbInfo.Episode = episode
  182. break
  183. }
  184. for _, t := range doc.FindElements("./" + rootKey + "/EPISODE") {
  185. episode, err := strconv.Atoi(t.Text())
  186. if err != nil {
  187. continue
  188. }
  189. imdbInfo.Episode = episode
  190. break
  191. }
  192. //---------------------------------------------------------------------
  193. for _, t := range doc.FindElements("./" + rootKey + "/year") {
  194. imdbInfo.Year = t.Text()
  195. break
  196. }
  197. for _, t := range doc.FindElements("./" + rootKey + "/Year") {
  198. imdbInfo.Year = t.Text()
  199. break
  200. }
  201. for _, t := range doc.FindElements("./" + rootKey + "/YEAR") {
  202. imdbInfo.Year = t.Text()
  203. break
  204. }
  205. //---------------------------------------------------------------------
  206. for _, t := range doc.FindElements("./" + rootKey + "/releasedate") {
  207. imdbInfo.ReleaseDate = t.Text()
  208. break
  209. }
  210. for _, t := range doc.FindElements("./" + rootKey + "/aired") {
  211. imdbInfo.ReleaseDate = t.Text()
  212. break
  213. }
  214. //---------------------------------------------------------------------
  215. for _, t := range doc.FindElements("./" + rootKey + "/premiered") {
  216. imdbInfo.ReleaseDate = t.Text()
  217. break
  218. }
  219. //if imdbInfo.ImdbId != "" {
  220. // return imdbInfo, nil
  221. //}
  222. return imdbInfo, nil
  223. }
  224. // GetVideoNfoInfo4Movie 从电影视频文件获取 IMDB info,只能确定拿到 IMDB ID 是靠谱的
  225. func GetVideoNfoInfo4Movie(movieFileFullPath string) (types.VideoNfoInfo, error) {
  226. videoNfoInfo := types.VideoNfoInfo{}
  227. // movie 当前的目录
  228. dirPth := filepath.Dir(movieFileFullPath)
  229. // 与 movie 文件名一致的 nfo 文件名称
  230. movieNfoFileName := filepath.Base(movieFileFullPath)
  231. movieNfoFileName = strings.ReplaceAll(movieNfoFileName, filepath.Ext(movieFileFullPath), suffixNameNfo)
  232. // movie.xml
  233. movieXmlFPath := ""
  234. // movieName.nfo 文件
  235. movieNameNfoFPath := ""
  236. // 通用的 *.nfo
  237. nfoFilePath := ""
  238. dir, err := os.ReadDir(dirPth)
  239. if err != nil {
  240. return videoNfoInfo, err
  241. }
  242. for _, fi := range dir {
  243. if fi.IsDir() == true {
  244. continue
  245. }
  246. upperName := strings.ToLower(fi.Name())
  247. if upperName == MetadataMovieXml {
  248. // 找 movie.xml
  249. movieXmlFPath = filepath.Join(dirPth, fi.Name())
  250. } else if upperName == movieNfoFileName {
  251. // movieName.nfo 文件
  252. movieNameNfoFPath = filepath.Join(dirPth, fi.Name())
  253. } else {
  254. // 找 *.nfo,很可能是 movie.nfo
  255. ok := strings.HasSuffix(fi.Name(), suffixNameNfo)
  256. if ok {
  257. nfoFilePath = filepath.Join(dirPth, fi.Name())
  258. }
  259. }
  260. }
  261. // 根据找到的开始解析
  262. if movieNameNfoFPath == "" && movieXmlFPath == "" && nfoFilePath == "" {
  263. return videoNfoInfo, common.NoMetadataFile
  264. }
  265. // 优先分析 movieName.nfo 文件
  266. if movieNameNfoFPath != "" {
  267. videoNfoInfo, err = getVideoNfoInfo(movieNameNfoFPath, "movie")
  268. if err != nil {
  269. return videoNfoInfo, err
  270. }
  271. return videoNfoInfo, nil
  272. }
  273. if nfoFilePath != "" {
  274. videoNfoInfo, err = getVideoNfoInfo(nfoFilePath, "movie")
  275. if err != nil {
  276. return videoNfoInfo, err
  277. } else {
  278. return videoNfoInfo, nil
  279. }
  280. }
  281. if movieXmlFPath != "" {
  282. videoNfoInfo, err = getVideoNfoInfoFromMovieXml(movieXmlFPath)
  283. if err != nil {
  284. } else {
  285. return videoNfoInfo, nil
  286. }
  287. }
  288. return videoNfoInfo, common.NoMetadataFile
  289. }
  290. // GetVideoNfoInfo4SeriesDir 从一个连续剧的根目录获取 IMDB info
  291. func GetVideoNfoInfo4SeriesDir(seriesDir string) (types.VideoNfoInfo, error) {
  292. imdbInfo := types.VideoNfoInfo{}
  293. dir, err := os.ReadDir(seriesDir)
  294. if err != nil {
  295. return imdbInfo, err
  296. }
  297. nfoFilePath := ""
  298. for _, fi := range dir {
  299. if fi.IsDir() == true {
  300. continue
  301. }
  302. upperName := strings.ToUpper(fi.Name())
  303. if upperName == strings.ToUpper(MetadateTVNfo) {
  304. // 连续剧的 nfo 文件
  305. nfoFilePath = filepath.Join(seriesDir, fi.Name())
  306. break
  307. } else {
  308. // 找 *.nfo
  309. ok := strings.HasSuffix(fi.Name(), suffixNameNfo)
  310. if ok {
  311. nfoFilePath = filepath.Join(seriesDir, fi.Name())
  312. }
  313. }
  314. }
  315. // 根据找到的开始解析
  316. if nfoFilePath == "" {
  317. return imdbInfo, common.NoMetadataFile
  318. }
  319. return getVideoNfoInfo(nfoFilePath, "tvshow")
  320. }
  321. // GetSeriesSeasonVideoNfoInfoFromEpisode 从一集获取这个 Series 的 IMDB info
  322. func GetSeriesSeasonVideoNfoInfoFromEpisode(oneEpFPath string) (types.VideoNfoInfo, error) {
  323. // 当前季的路径
  324. EPdir := filepath.Dir(oneEpFPath)
  325. // 先判断是否存在 tvshow.nfo
  326. nfoFilePath := ""
  327. dir, err := os.ReadDir(EPdir)
  328. if err != nil {
  329. return types.VideoNfoInfo{}, err
  330. }
  331. for _, fi := range dir {
  332. if fi.IsDir() == true {
  333. continue
  334. }
  335. upperName := strings.ToUpper(fi.Name())
  336. if upperName == strings.ToUpper(MetadateTVNfo) {
  337. // 连续剧的 nfo 文件
  338. nfoFilePath = filepath.Join(EPdir, fi.Name())
  339. break
  340. }
  341. }
  342. if nfoFilePath == "" {
  343. // 没有找到,那么就向上一级再次找
  344. seasonDir := filepath.Base(EPdir)
  345. seriesDir := EPdir[:len(EPdir)-len(seasonDir)]
  346. return GetVideoNfoInfo4SeriesDir(seriesDir)
  347. } else {
  348. return getVideoNfoInfo(nfoFilePath, "tvshow")
  349. }
  350. }
  351. // GetVideoNfoInfo4OneSeriesEpisode 获取这一集的 IMDB info,可能会因为没有获取到 IMDB ID 而返回 common.CanNotFindIMDBID 错误,但是 imdbInfo 其他信息是可用的
  352. func GetVideoNfoInfo4OneSeriesEpisode(oneEpFPath string) (types.VideoNfoInfo, error) {
  353. // 从这一集的视频文件全路径去推算对应的 nfo 文件是否存在
  354. EPdir := filepath.Dir(oneEpFPath)
  355. // 与 EP 文件名一致的 nfo 文件名称
  356. EpNfoFileName := filepath.Base(oneEpFPath)
  357. EpNfoFileName = strings.ReplaceAll(EpNfoFileName, filepath.Ext(oneEpFPath), suffixNameNfo)
  358. // 全路径
  359. EpNfoFPath := filepath.Join(EPdir, EpNfoFileName)
  360. return getVideoNfoInfo(EpNfoFPath, "episodedetails")
  361. }
  362. // GetSeriesDirRootFPath 从一集的绝对路径推断这个连续剧的根目录绝对路径
  363. func GetSeriesDirRootFPath(oneEpFPath string) string {
  364. oneSeasonDirFPath := filepath.Dir(oneEpFPath)
  365. oneSeriesDirFPath := filepath.Dir(oneSeasonDirFPath)
  366. if IsFile(filepath.Join(oneSeriesDirFPath, MetadateTVNfo)) == true {
  367. return oneSeriesDirFPath
  368. } else {
  369. return ""
  370. }
  371. }
  372. // GetVideoInfoFromFileName 从文件名推断文件信息,这个应该是次要方案,优先还是从 nfo 文件获取这些信息
  373. func GetVideoInfoFromFileName(fileName string) (*PTN.TorrentInfo, error) {
  374. parse, err := PTN.Parse(fileName)
  375. if err != nil {
  376. return nil, err
  377. }
  378. compile, err := regexp.Compile(regFixTitle2)
  379. if err != nil {
  380. return nil, err
  381. }
  382. match := compile.ReplaceAllString(parse.Title, "")
  383. match = strings.TrimRight(match, "")
  384. parse.Title = match
  385. return parse, nil
  386. }
  387. //GetVideoInfoFromFileFullPath 从全文件路径推断文件信息,这个应该是次要方案,优先还是从 nfo 文件获取这些信息
  388. func GetVideoInfoFromFileFullPath(videoFileFullPath string, isMovie bool) (types.VideoNfoInfo, time.Time, error) {
  389. var err error
  390. var videoNfoInfo types.VideoNfoInfo
  391. if isMovie == true {
  392. videoNfoInfo, err = GetVideoNfoInfo4Movie(videoFileFullPath)
  393. if err != nil {
  394. return types.VideoNfoInfo{}, time.Time{}, err
  395. }
  396. } else {
  397. videoNfoInfo, err = GetVideoNfoInfo4OneSeriesEpisode(videoFileFullPath)
  398. if err != nil {
  399. return types.VideoNfoInfo{}, time.Time{}, err
  400. }
  401. }
  402. /*
  403. 这里有个特殊情况,如果是某一种蓝光的文件结构,不是一个单一的视频文件
  404. * 失控玩家 (2021)
  405. * BDMV
  406. * CERTIFICATE
  407. * id.bdmv
  408. 大致是这样的目录结构,两个文件夹,下面按个文件夹中一定有这个文件 id.bdmv
  409. 那么,在前期的扫描视频的阶段,会把这样的蓝光视频给伪造一个假的不存在的视频传入进来
  410. 失控玩家 (2021).mp4 比如这个
  411. 然后需要 check 这个文件是否存在:
  412. 1. 如果 check 这个文件存在,那么就是之前的逻辑
  413. 2. 如果是这个情况肯定是不存在的,那么就要判断是否有这文件结构是否符合这种蓝光结构
  414. */
  415. if IsFile(videoFileFullPath) == true {
  416. // 常见的视频情况
  417. fInfo, err := os.Stat(videoFileFullPath)
  418. if err != nil {
  419. return types.VideoNfoInfo{}, time.Time{}, err
  420. }
  421. return videoNfoInfo, fInfo.ModTime(), nil
  422. } else {
  423. // 再次判断是否是蓝光结构
  424. // 因为在前面扫描视频的时候,发现特殊的蓝光结构会伪造一个不存在的 xx.mp4 的视频文件过来,这里就需要额外检测一次
  425. bok, idBDMVFPath, _ := IsFakeBDMVWorked(videoFileFullPath)
  426. if bok == false {
  427. return types.VideoNfoInfo{}, time.Time{}, errors.New("GetVideoInfoFromFileFullPath.IsFakeBDMVWorked == false")
  428. }
  429. // 获取这个蓝光 ID BDMV 文件的时间
  430. fInfo, err := os.Stat(idBDMVFPath)
  431. if err != nil {
  432. return types.VideoNfoInfo{}, time.Time{}, err
  433. }
  434. return videoNfoInfo, fInfo.ModTime(), nil
  435. }
  436. }
  437. // GetSeasonAndEpisodeFromSubFileName 从文件名推断 季 和 集 的信息 Season Episode,这个应该是次要方案,优先还是从 nfo 文件获取这些信息
  438. func GetSeasonAndEpisodeFromSubFileName(videoFileName string) (bool, int, int, error) {
  439. upperName := strings.ToUpper(videoFileName)
  440. // 先进行单个 Episode 的匹配
  441. // Killing.Eve.S02E01.Do.You.Know.How
  442. var re = regexp.MustCompile(`(?m)[\.\s]S(\d+).*?E(\d+)[\.\s]`)
  443. matched := re.FindAllStringSubmatch(upperName, -1)
  444. if matched == nil || len(matched) < 1 {
  445. // Killing.Eve.S02.Do.You.Know.How
  446. // 看看是不是季度字幕打包
  447. re = regexp.MustCompile(`(?m)[\.\s]S(\d+)[\.\s]`)
  448. matched = re.FindAllStringSubmatch(upperName, -1)
  449. if matched == nil || len(matched) < 1 {
  450. return false, 0, 0, nil
  451. }
  452. season, err := GetNumber2int(matched[0][1])
  453. if err != nil {
  454. return false, 0, 0, err
  455. }
  456. return true, season, 0, nil
  457. } else {
  458. // 一集的字幕
  459. season, err := GetNumber2int(matched[0][1])
  460. if err != nil {
  461. return false, 0, 0, err
  462. }
  463. episode, err := GetNumber2int(matched[0][2])
  464. if err != nil {
  465. return false, 0, 0, err
  466. }
  467. return false, season, episode, nil
  468. }
  469. }
  470. func GetNumber2Float(input string) (float32, error) {
  471. compile := regexp.MustCompile(regGetNumber)
  472. params := compile.FindStringSubmatch(input)
  473. if params == nil || len(params) == 0 {
  474. return 0, errors.New("get number not match")
  475. }
  476. fNum, err := strconv.ParseFloat(params[0], 32)
  477. if err != nil {
  478. return 0, errors.New("get number ParseFloat error")
  479. }
  480. return float32(fNum), nil
  481. }
  482. func GetNumber2int(input string) (int, error) {
  483. compile := regexp.MustCompile(regGetNumber)
  484. params := compile.FindStringSubmatch(input)
  485. if params == nil || len(params) == 0 {
  486. return 0, errors.New("get number not match")
  487. }
  488. fNum, err := strconv.Atoi(params[0])
  489. if err != nil {
  490. return 0, errors.New("get number ParseFloat error")
  491. }
  492. return fNum, nil
  493. }
  494. // IsFile 存在且是文件
  495. func IsFile(filePath string) bool {
  496. s, err := os.Stat(filePath)
  497. if err != nil {
  498. return false
  499. }
  500. return !s.IsDir()
  501. }
  502. // IsDir 存在且是文件夹
  503. func IsDir(path string) bool {
  504. s, err := os.Stat(path)
  505. if err != nil {
  506. return false
  507. }
  508. return s.IsDir()
  509. }
  510. // IsFakeBDMVWorked 传入的是伪造的不存在的蓝光结构的视频全路径,如果是就返回 true 和 id.bdmv 的绝对路径 和 STREAM 绝对路径
  511. func IsFakeBDMVWorked(fakseVideFPath string) (bool, string, string) {
  512. rootDir := filepath.Dir(fakseVideFPath)
  513. CERDir := filepath.Join(rootDir, "CERTIFICATE")
  514. BDMVDir := filepath.Join(rootDir, "BDMV")
  515. STREAMDir := filepath.Join(BDMVDir, "STREAM")
  516. idBDMVFPath := filepath.Join(CERDir, common.FileBDMV)
  517. if IsDir(CERDir) == true && IsDir(BDMVDir) == true && IsFile(idBDMVFPath) == true {
  518. return true, idBDMVFPath, STREAMDir
  519. }
  520. return false, "", ""
  521. }
  522. const (
  523. MetadataMovieXml = "movie.xml"
  524. suffixNameXml = ".xml"
  525. suffixNameNfo = ".nfo"
  526. MetadateTVNfo = "tvshow.nfo"
  527. // 去除特殊字符,仅仅之有中文
  528. regFixTitle = "[^\u4e00-\u9fa5a-zA-Z0-9\\s]"
  529. // 去除特殊字符,把特殊字符都写进去
  530. regFixTitle2 = "[~!@#$%^&*:()\\+\\-=|{}';'\\[\\].<>/?~!@#¥%……&*()——+|{}【】';”“’。、?]"
  531. // 获取数字
  532. regGetNumber = "(?:\\-)?\\d{1,}(?:\\.\\d{1,})?"
  533. )