normal.go 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. package normal
  2. import (
  3. "github.com/allanpk716/ChineseSubFinder/internal/logic/sub_parser/ass"
  4. "github.com/allanpk716/ChineseSubFinder/internal/logic/sub_parser/srt"
  5. "github.com/allanpk716/ChineseSubFinder/internal/pkg/language"
  6. "github.com/allanpk716/ChineseSubFinder/internal/pkg/my_util"
  7. "github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_formatter/common"
  8. "github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_parser_hub"
  9. languageConst "github.com/allanpk716/ChineseSubFinder/internal/types/language"
  10. "github.com/allanpk716/ChineseSubFinder/internal/types/subparser"
  11. "github.com/sirupsen/logrus"
  12. "path/filepath"
  13. "regexp"
  14. "strings"
  15. )
  16. type Formatter struct {
  17. log *logrus.Logger
  18. subParser *sub_parser_hub.SubParserHub
  19. }
  20. func NewFormatter(log *logrus.Logger) *Formatter {
  21. return &Formatter{log: log, subParser: sub_parser_hub.NewSubParserHub(log, ass.NewParser(log), srt.NewParser(log))}
  22. }
  23. // GetFormatterName 当前的 Formatter 是那个
  24. func (f Formatter) GetFormatterName() string {
  25. return common.FormatterNameString_Normal
  26. }
  27. func (f Formatter) GetFormatterFormatterName() int {
  28. return int(common.Normal)
  29. }
  30. // IsMatchThisFormat 是否满足当前实现接口的字幕命名格式 - 是否符合规则、fileNameWithOutExt string, subExt string, subLang types.MyLanguage, extraSubPreName string
  31. func (f Formatter) IsMatchThisFormat(subName string) (bool, string, string, languageConst.MyLanguage, string) {
  32. /*
  33. Emby 的命名规则比较特殊,而且本程序就是做中文字幕下载的,所以,下面的正则表达式比较特殊
  34. 见本程序内 internal/types/language/ISOLanguage.go 这里的支持 ISO 规范和中文编码变种
  35. 见文档、讨论:
  36. https://emby.media/community/index.php?/topic/94504-current-chinese-subtitle-filter-not-so-accurate-and-hope-improve-like-this/
  37. https://en.wikipedia.org/wiki/Chinese_Wikipedia#Automatic_conversion_between_traditional_and_simplified_Chinese_characters
  38. https://stackoverflow.com/questions/18902072/what-standard-do-language-codes-of-the-form-zh-hans-belong-to
  39. */
  40. //subName = strings.ToLower(subName)
  41. // get basename to avoid relative path like "../../.." cause issue for regexp
  42. // CANT just get Base, as autoDetectAndChange expect a full path
  43. subNameBase := filepath.Base(subName)
  44. subNameDir := filepath.Dir(subName)
  45. var re = regexp.MustCompile(language.ISOSupportRegexRule())
  46. matched := re.FindAllStringSubmatch(subNameBase, -1)
  47. /*
  48. 详细看测试用例
  49. The Boss Baby Family Business (2021) WEBDL-1080p.zh.ass
  50. The Boss Baby Family Business (2021) WEBDL-1080p.zh.default.ass
  51. The Boss Baby Family Business (2021) WEBDL-1080p.zh.forced.ass
  52. The Boss Baby Family Business (2021) WEBDL-1080p.chi.ass
  53. The Boss Baby Family Business (2021) WEBDL-1080p.chi.default.ass
  54. The Boss Baby Family Business (2021) WEBDL-1080p.chi.forced.ass
  55. The Boss Baby Family Business (2021) WEBDL-1080p.zho.ass
  56. The Boss Baby Family Business (2021) WEBDL-1080p.zho.default.ass
  57. The Boss Baby Family Business (2021) WEBDL-1080p.zho.forced.ass
  58. [0][0] .zh.ass
  59. [0][1] zh
  60. [0][2] .ass
  61. */
  62. if matched == nil || len(matched) < 1 || len(matched[0]) < 3 {
  63. return false, "", "", languageConst.Unknown, ""
  64. }
  65. var subLang languageConst.MyLanguage
  66. var extraSubPreName string
  67. // replace only applys to basename
  68. fileNameWithOutExt := strings.ReplaceAll(subNameBase, matched[0][0], "")
  69. subExt := matched[0][2]
  70. var subLangStr = matched[0][1]
  71. extraSubPreName = ""
  72. // 这里有一个点,是直接从 zh zho ch 去转换成中文语言就行了,还是要做字幕的语言识别
  73. // 目前倾向于这里用后面的逻辑
  74. subLang = language.ISOString2SupportLang(subLangStr)
  75. // 这里可能是拿到的是文件的全路径,那么就可以读取文件内容去判断文件的语言
  76. if my_util.IsFile(subName) == true {
  77. bok, fileInfo, err := f.subParser.DetermineFileTypeFromFile(subName)
  78. if err != nil || bok == false {
  79. // add original Dir to fileNameWithOutExt to ensure file can be reached
  80. return true, filepath.Join(subNameDir, fileNameWithOutExt), subExt, subLang, extraSubPreName
  81. }
  82. subLang = fileInfo.Lang
  83. }
  84. // add original Dir to fileNameWithOutExt to ensure file can be reached
  85. return true, filepath.Join(subNameDir, fileNameWithOutExt), subExt, subLang, extraSubPreName
  86. }
  87. // GenerateMixSubName 通过视频和字幕信息,生成当前实现接口的字幕命名格式。extraSubPreName 一般是填写字幕网站,不填写则留空 - 新名称、新名称带有 default 标记,新名称带有 forced 标记
  88. func (f Formatter) GenerateMixSubName(videoFileName, subExt string, subLang languageConst.MyLanguage, extraSubPreName string) (string, string, string) {
  89. /*
  90. 这里会生成类似的文件名 xxxx.zh
  91. */
  92. videoFileNameWithOutExt := strings.ReplaceAll(filepath.Base(videoFileName),
  93. filepath.Ext(videoFileName), "")
  94. return f.GenerateMixSubNameBase(videoFileNameWithOutExt, subExt, subLang, extraSubPreName)
  95. }
  96. func (f Formatter) GenerateMixSubNameBase(fileNameWithOutExt, subExt string, subLang languageConst.MyLanguage, extraSubPreName string) (string, string, string) {
  97. // 这里传入字幕后缀名的时候,可能会带有 default 或者 forced 字段,需要剔除
  98. nowSubExt := strings.ReplaceAll(subExt, subparser.Sub_Ext_Mark_Default, "")
  99. nowSubExt = strings.ReplaceAll(nowSubExt, subparser.Sub_Ext_Mark_Forced, "")
  100. subNewName := fileNameWithOutExt + "." + languageConst.ISO_639_1_Chinese + nowSubExt
  101. subNewNameWithDefault := fileNameWithOutExt + "." + languageConst.ISO_639_1_Chinese + subparser.Sub_Ext_Mark_Default + nowSubExt
  102. subNewNameWithForced := fileNameWithOutExt + "." + languageConst.ISO_639_1_Chinese + subparser.Sub_Ext_Mark_Forced + nowSubExt
  103. return subNewName, subNewNameWithDefault, subNewNameWithForced
  104. }