decode.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. package decode
  2. import (
  3. "errors"
  4. "github.com/allanpk716/ChineseSubFinder/internal/types"
  5. common2 "github.com/allanpk716/ChineseSubFinder/internal/types/common"
  6. "github.com/beevik/etree"
  7. PTN "github.com/middelink/go-parse-torrent-name"
  8. "os"
  9. "path/filepath"
  10. "regexp"
  11. "strconv"
  12. "strings"
  13. "time"
  14. )
  15. func getImdbAndYearMovieXml(movieFilePath string) (types.VideoIMDBInfo, error) {
  16. videoInfo := types.VideoIMDBInfo{}
  17. doc := etree.NewDocument()
  18. doc.ReadSettings.Permissive = true
  19. if err := doc.ReadFromFile(movieFilePath); err != nil {
  20. return videoInfo, err
  21. }
  22. for _, t := range doc.FindElements("//IMDB") {
  23. videoInfo.ImdbId = t.Text()
  24. break
  25. }
  26. for _, t := range doc.FindElements("//ProductionYear") {
  27. videoInfo.Year = t.Text()
  28. break
  29. }
  30. if videoInfo.ImdbId != "" {
  31. return videoInfo, nil
  32. }
  33. return videoInfo, common2.CanNotFindIMDBID
  34. }
  35. func getImdbAndYearNfo(nfoFilePath string, rootKey string) (types.VideoIMDBInfo, error) {
  36. imdbInfo := types.VideoIMDBInfo{}
  37. doc := etree.NewDocument()
  38. doc.ReadSettings.Permissive = true
  39. // 这里会遇到一个梗,下面的关键词,可能是小写、大写、首字母大写
  40. // 读取文件转换为全部的小写,然后在解析 xml ? etree 在转换为小写后,某些类型的文件的内容会崩溃···
  41. // 所以这里很傻的方式解决
  42. err := doc.ReadFromFile(nfoFilePath)
  43. if err != nil {
  44. return imdbInfo, err
  45. }
  46. for _, t := range doc.FindElements("./" + rootKey + "/title") {
  47. imdbInfo.Title = t.Text()
  48. break
  49. }
  50. //---------------------------------------------------------------------
  51. for _, t := range doc.FindElements("./" + rootKey + "/imdbid") {
  52. imdbInfo.ImdbId = t.Text()
  53. break
  54. }
  55. for _, t := range doc.FindElements("./" + rootKey + "/imdb_id") {
  56. imdbInfo.ImdbId = t.Text()
  57. break
  58. }
  59. for _, t := range doc.FindElements("//uniqueid[@type='imdb']") {
  60. imdbInfo.ImdbId = t.Text()
  61. break
  62. }
  63. for _, t := range doc.FindElements("//uniqueid[@type='Imdb']") {
  64. imdbInfo.ImdbId = t.Text()
  65. break
  66. }
  67. for _, t := range doc.FindElements("//uniqueid[@type='IMDB']") {
  68. imdbInfo.ImdbId = t.Text()
  69. break
  70. }
  71. //---------------------------------------------------------------------
  72. for _, t := range doc.FindElements("./" + rootKey + "/year") {
  73. imdbInfo.Year = t.Text()
  74. break
  75. }
  76. //---------------------------------------------------------------------
  77. for _, t := range doc.FindElements("./" + rootKey + "/releasedate") {
  78. imdbInfo.ReleaseDate = t.Text()
  79. break
  80. }
  81. //---------------------------------------------------------------------
  82. for _, t := range doc.FindElements("./" + rootKey + "/premiered") {
  83. imdbInfo.ReleaseDate = t.Text()
  84. break
  85. }
  86. if imdbInfo.ImdbId != "" {
  87. return imdbInfo, nil
  88. }
  89. return imdbInfo, common2.CanNotFindIMDBID
  90. }
  91. // GetImdbInfo4Movie 从电影视频文件获取 IMDB info
  92. func GetImdbInfo4Movie(movieFileFullPath string) (types.VideoIMDBInfo, error) {
  93. imdbInfo := types.VideoIMDBInfo{}
  94. // movie 当前的目录
  95. dirPth := filepath.Dir(movieFileFullPath)
  96. // 与 movie 文件名一致的 nfo 文件名称
  97. movieNfoFileName := filepath.Base(movieFileFullPath)
  98. movieNfoFileName = strings.ReplaceAll(movieNfoFileName, filepath.Ext(movieFileFullPath), suffixNameNfo)
  99. // movie.xml
  100. movieXmlFPath := ""
  101. // movieName.nfo 文件
  102. movieNameNfoFPath := ""
  103. // 通用的 *.nfo
  104. nfoFilePath := ""
  105. dir, err := os.ReadDir(dirPth)
  106. if err != nil {
  107. return imdbInfo, err
  108. }
  109. for _, fi := range dir {
  110. if fi.IsDir() == true {
  111. continue
  112. }
  113. upperName := strings.ToLower(fi.Name())
  114. if upperName == MetadataMovieXml {
  115. // 找 movie.xml
  116. movieXmlFPath = filepath.Join(dirPth, fi.Name())
  117. break
  118. } else if upperName == movieNfoFileName {
  119. // movieName.nfo 文件
  120. movieNameNfoFPath = filepath.Join(dirPth, fi.Name())
  121. break
  122. } else {
  123. // 找 *.nfo,很可能是 movie.nfo
  124. ok := strings.HasSuffix(fi.Name(), suffixNameNfo)
  125. if ok {
  126. nfoFilePath = filepath.Join(dirPth, fi.Name())
  127. }
  128. }
  129. }
  130. // 根据找到的开始解析
  131. if movieNameNfoFPath == "" && movieXmlFPath == "" && nfoFilePath == "" {
  132. return imdbInfo, common2.NoMetadataFile
  133. }
  134. // 优先分析 movieName.nfo 文件
  135. if movieNameNfoFPath != "" {
  136. imdbInfo, err = getImdbAndYearNfo(movieNameNfoFPath, "movie")
  137. if err != nil {
  138. return types.VideoIMDBInfo{}, err
  139. }
  140. return imdbInfo, nil
  141. }
  142. if movieXmlFPath != "" {
  143. imdbInfo, err = getImdbAndYearMovieXml(movieXmlFPath)
  144. if err != nil {
  145. } else {
  146. return imdbInfo, nil
  147. }
  148. }
  149. if nfoFilePath != "" {
  150. imdbInfo, err = getImdbAndYearNfo(nfoFilePath, "movie")
  151. if err != nil {
  152. return imdbInfo, err
  153. } else {
  154. return imdbInfo, nil
  155. }
  156. }
  157. return imdbInfo, common2.CanNotFindIMDBID
  158. }
  159. // GetImdbInfo4SeriesDir 从一个连续剧的根目录获取 IMDB info
  160. func GetImdbInfo4SeriesDir(seriesDir string) (types.VideoIMDBInfo, error) {
  161. imdbInfo := types.VideoIMDBInfo{}
  162. dir, err := os.ReadDir(seriesDir)
  163. if err != nil {
  164. return imdbInfo, err
  165. }
  166. nfoFilePath := ""
  167. for _, fi := range dir {
  168. if fi.IsDir() == true {
  169. continue
  170. }
  171. upperName := strings.ToUpper(fi.Name())
  172. if upperName == strings.ToUpper(MetadateTVNfo) {
  173. // 连续剧的 nfo 文件
  174. nfoFilePath = filepath.Join(seriesDir, fi.Name())
  175. break
  176. } else {
  177. // 找 *.nfo
  178. ok := strings.HasSuffix(fi.Name(), suffixNameNfo)
  179. if ok {
  180. nfoFilePath = filepath.Join(seriesDir, fi.Name())
  181. }
  182. }
  183. }
  184. // 根据找到的开始解析
  185. if nfoFilePath == "" {
  186. return imdbInfo, common2.NoMetadataFile
  187. }
  188. imdbInfo, err = getImdbAndYearNfo(nfoFilePath, "tvshow")
  189. if err != nil {
  190. return types.VideoIMDBInfo{}, err
  191. }
  192. return imdbInfo, nil
  193. }
  194. // GetSeriesSeasonImdbInfoFromEpisode 从一集获取这个 Series 的 IMDB info
  195. func GetSeriesSeasonImdbInfoFromEpisode(oneEpFPath string) (types.VideoIMDBInfo, error) {
  196. var err error
  197. // 当前季的路径
  198. EPdir := filepath.Dir(oneEpFPath)
  199. // 先判断是否存在 tvshow.nfo
  200. nfoFilePath := ""
  201. dir, err := os.ReadDir(EPdir)
  202. for _, fi := range dir {
  203. if fi.IsDir() == true {
  204. continue
  205. }
  206. upperName := strings.ToUpper(fi.Name())
  207. if upperName == strings.ToUpper(MetadateTVNfo) {
  208. // 连续剧的 nfo 文件
  209. nfoFilePath = filepath.Join(EPdir, fi.Name())
  210. break
  211. }
  212. }
  213. if nfoFilePath == "" {
  214. // 没有找到,那么就向上一级再次找
  215. seasonDir := filepath.Base(EPdir)
  216. seriesDir := EPdir[:len(EPdir)-len(seasonDir)]
  217. return GetImdbInfo4SeriesDir(seriesDir)
  218. } else {
  219. var imdbInfo types.VideoIMDBInfo
  220. imdbInfo, err = getImdbAndYearNfo(nfoFilePath, "tvshow")
  221. if err != nil {
  222. return types.VideoIMDBInfo{}, err
  223. }
  224. return imdbInfo, nil
  225. }
  226. }
  227. // GetImdbInfo4OneSeriesEpisode 获取这一集的 IMDB info
  228. func GetImdbInfo4OneSeriesEpisode(oneEpFPath string) (types.VideoIMDBInfo, error) {
  229. // 从这一集的视频文件全路径去推算对应的 nfo 文件是否存在
  230. EPdir := filepath.Dir(oneEpFPath)
  231. // 与 EP 文件名一致的 nfo 文件名称
  232. EpNfoFileName := filepath.Base(oneEpFPath)
  233. EpNfoFileName = strings.ReplaceAll(EpNfoFileName, filepath.Ext(oneEpFPath), suffixNameNfo)
  234. // 全路径
  235. EpNfoFPath := filepath.Join(EPdir, EpNfoFileName)
  236. //
  237. imdbInfo := types.VideoIMDBInfo{}
  238. doc := etree.NewDocument()
  239. doc.ReadSettings.Permissive = true
  240. // 这里会遇到一个梗,下面的关键词,可能是小写、大写、首字母大写
  241. // 读取文件转换为全部的小写,然后在解析 xml ? etree 在转换为小写后,某些类型的文件的内容会崩溃···
  242. // 所以这里很傻的方式解决
  243. err := doc.ReadFromFile(EpNfoFPath)
  244. if err != nil {
  245. return imdbInfo, err
  246. }
  247. for _, t := range doc.FindElements("./episodedetails/aired") {
  248. imdbInfo.ReleaseDate = t.Text()
  249. break
  250. }
  251. for _, t := range doc.FindElements("./episodedetails/premiered") {
  252. imdbInfo.ReleaseDate = t.Text()
  253. break
  254. }
  255. if imdbInfo.ReleaseDate != "" {
  256. return imdbInfo, nil
  257. }
  258. return imdbInfo, common2.CanNotFindEpAiredTime
  259. }
  260. // GetVideoInfoFromFileName 从文件名推断文件信息
  261. func GetVideoInfoFromFileName(fileName string) (*PTN.TorrentInfo, error) {
  262. parse, err := PTN.Parse(fileName)
  263. if err != nil {
  264. return nil, err
  265. }
  266. compile, err := regexp.Compile(regFixTitle2)
  267. if err != nil {
  268. return nil, err
  269. }
  270. match := compile.ReplaceAllString(parse.Title, "")
  271. match = strings.TrimRight(match, "")
  272. parse.Title = match
  273. return parse, nil
  274. }
  275. //GetVideoInfoFromFileFullPath 从全文件路径推断文件信息
  276. func GetVideoInfoFromFileFullPath(videoFileFullPath string) (*PTN.TorrentInfo, time.Time, error) {
  277. parse, err := PTN.Parse(filepath.Base(videoFileFullPath))
  278. if err != nil {
  279. return nil, time.Time{}, err
  280. }
  281. compile, err := regexp.Compile(regFixTitle2)
  282. if err != nil {
  283. return nil, time.Time{}, err
  284. }
  285. match := compile.ReplaceAllString(parse.Title, "")
  286. match = strings.TrimRight(match, "")
  287. parse.Title = match
  288. /*
  289. 这里有个特殊情况,如果是某一种蓝光的文件结构,不是一个单一的视频文件
  290. * 失控玩家 (2021)
  291. * BDMV
  292. * CERTIFICATE
  293. * id.bdmv
  294. 大致是这样的目录结构,两个文件夹,下面按个文件夹中一定有这个文件 id.bdmv
  295. 那么,在前期的扫描视频的阶段,会把这样的蓝光视频给伪造一个假的不存在的视频传入进来
  296. 失控玩家 (2021).mp4 比如这个
  297. 然后需要 check 这个文件是否存在:
  298. 1. 如果 check 这个文件存在,那么就是之前的逻辑
  299. 2. 如果是这个情况肯定是不存在的,那么就要判断是否有这文件结构是否符合这种蓝光结构
  300. */
  301. if IsFile(videoFileFullPath) == true {
  302. // 常见的视频情况
  303. fInfo, err := os.Stat(videoFileFullPath)
  304. if err != nil {
  305. return nil, time.Time{}, err
  306. }
  307. return parse, fInfo.ModTime(), nil
  308. } else {
  309. // 再次判断是否是蓝光结构
  310. // 因为在前面扫描视频的时候,发现特殊的蓝光结构会伪造一个不存在的 xx.mp4 的视频文件过来,这里就需要额外检测一次
  311. bok, idBDMVFPath, _ := IsFakeBDMVWorked(videoFileFullPath)
  312. if bok == false {
  313. return nil, time.Time{}, err
  314. }
  315. // 获取这个蓝光 ID BDMV 文件的时间
  316. fInfo, err := os.Stat(idBDMVFPath)
  317. if err != nil {
  318. return nil, time.Time{}, err
  319. }
  320. return parse, fInfo.ModTime(), nil
  321. }
  322. }
  323. // GetSeasonAndEpisodeFromSubFileName 从文件名推断 季 和 集 的信息 Season Episode
  324. func GetSeasonAndEpisodeFromSubFileName(videoFileName string) (bool, int, int, error) {
  325. upperName := strings.ToUpper(videoFileName)
  326. // 先进行单个 Episode 的匹配
  327. // Killing.Eve.S02E01.Do.You.Know.How
  328. var re = regexp.MustCompile(`(?m)\.S(\d+)E(\d+)\.`)
  329. matched := re.FindAllStringSubmatch(upperName, -1)
  330. if matched == nil || len(matched) < 1 {
  331. // Killing.Eve.S02.Do.You.Know.How
  332. // 看看是不是季度字幕打包
  333. re = regexp.MustCompile(`(?m)\.S(\d+)\.`)
  334. matched = re.FindAllStringSubmatch(upperName, -1)
  335. if matched == nil || len(matched) < 1 {
  336. return false, 0, 0, nil
  337. }
  338. season, err := GetNumber2int(matched[0][1])
  339. if err != nil {
  340. return false, 0, 0, err
  341. }
  342. return true, season, 0, nil
  343. } else {
  344. // 一集的字幕
  345. season, err := GetNumber2int(matched[0][1])
  346. if err != nil {
  347. return false, 0, 0, err
  348. }
  349. episode, err := GetNumber2int(matched[0][2])
  350. if err != nil {
  351. return false, 0, 0, err
  352. }
  353. return false, season, episode, nil
  354. }
  355. }
  356. func GetNumber2Float(input string) (float32, error) {
  357. compile := regexp.MustCompile(regGetNumber)
  358. params := compile.FindStringSubmatch(input)
  359. if params == nil || len(params) == 0 {
  360. return 0, errors.New("get number not match")
  361. }
  362. fNum, err := strconv.ParseFloat(params[0], 32)
  363. if err != nil {
  364. return 0, errors.New("get number ParseFloat error")
  365. }
  366. return float32(fNum), nil
  367. }
  368. func GetNumber2int(input string) (int, error) {
  369. compile := regexp.MustCompile(regGetNumber)
  370. params := compile.FindStringSubmatch(input)
  371. if params == nil || len(params) == 0 {
  372. return 0, errors.New("get number not match")
  373. }
  374. fNum, err := strconv.Atoi(params[0])
  375. if err != nil {
  376. return 0, errors.New("get number ParseFloat error")
  377. }
  378. return fNum, nil
  379. }
  380. // IsFile 存在且是文件
  381. func IsFile(filePath string) bool {
  382. s, err := os.Stat(filePath)
  383. if err != nil {
  384. return false
  385. }
  386. return !s.IsDir()
  387. }
  388. // IsDir 存在且是文件夹
  389. func IsDir(path string) bool {
  390. s, err := os.Stat(path)
  391. if err != nil {
  392. return false
  393. }
  394. return s.IsDir()
  395. }
  396. // IsFakeBDMVWorked 传入的是伪造的不存在的蓝光结构的视频全路径,如果是就返回 true 和 id.bdmv 的绝对路径 和 STREAM 绝对路径
  397. func IsFakeBDMVWorked(fakseVideFPath string) (bool, string, string) {
  398. rootDir := filepath.Dir(fakseVideFPath)
  399. CERDir := filepath.Join(rootDir, "CERTIFICATE")
  400. BDMVDir := filepath.Join(rootDir, "BDMV")
  401. STREAMDir := filepath.Join(BDMVDir, "STREAM")
  402. idBDMVFPath := filepath.Join(CERDir, common2.FileBDMV)
  403. if IsDir(CERDir) == true && IsDir(BDMVDir) == true && IsFile(idBDMVFPath) == true {
  404. return true, idBDMVFPath, STREAMDir
  405. }
  406. return false, "", ""
  407. }
  408. const (
  409. MetadataMovieXml = "movie.xml"
  410. suffixNameXml = ".xml"
  411. suffixNameNfo = ".nfo"
  412. MetadateTVNfo = "tvshow.nfo"
  413. // 去除特殊字符,仅仅之有中文
  414. regFixTitle = "[^\u4e00-\u9fa5a-zA-Z0-9\\s]"
  415. // 去除特殊字符,把特殊字符都写进去
  416. regFixTitle2 = "[~!@#$%^&*()\\+\\-=|{}';'\\[\\].<>/?~!@#¥%……&*()——+|{}【】';”“’。、?]"
  417. // 获取数字
  418. regGetNumber = "(?:\\-)?\\d{1,}(?:\\.\\d{1,})?"
  419. )