subParserHub.go 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. package sub_parser_hub
  2. import (
  3. "github.com/allanpk716/ChineseSubFinder/internal/ifaces"
  4. "github.com/allanpk716/ChineseSubFinder/internal/pkg/language"
  5. "github.com/allanpk716/ChineseSubFinder/internal/types/common"
  6. languageConst "github.com/allanpk716/ChineseSubFinder/internal/types/language"
  7. "github.com/allanpk716/ChineseSubFinder/internal/types/subparser"
  8. "github.com/sirupsen/logrus"
  9. "os"
  10. "path/filepath"
  11. "regexp"
  12. "strings"
  13. )
  14. type SubParserHub struct {
  15. log *logrus.Logger
  16. Parser []ifaces.ISubParser
  17. }
  18. // NewSubParserHub 处理的字幕文件需要符合 [siteName]_ 的前缀描述,是本程序专用的
  19. func NewSubParserHub(log *logrus.Logger, parser ifaces.ISubParser, _parser ...ifaces.ISubParser) *SubParserHub {
  20. s := SubParserHub{}
  21. s.log = log
  22. s.Parser = make([]ifaces.ISubParser, 0)
  23. s.Parser = append(s.Parser, parser)
  24. if len(_parser) > 0 {
  25. for _, one := range _parser {
  26. s.Parser = append(s.Parser, one)
  27. }
  28. }
  29. return &s
  30. }
  31. // DetermineFileTypeFromFile 确定字幕文件的类型,是双语字幕或者某一种语言等等信息,如果返回 nil ,那么就说明都没有字幕的格式匹配上
  32. func (p SubParserHub) DetermineFileTypeFromFile(filePath string) (bool, *subparser.FileInfo, error) {
  33. for _, parser := range p.Parser {
  34. bFind, subFileInfo, err := parser.DetermineFileTypeFromFile(filePath)
  35. if err != nil {
  36. return false, nil, err
  37. }
  38. if bFind == false {
  39. continue
  40. }
  41. // 正常至少应该匹配一个吧,不然就是最外层继续返回 nil 出去了
  42. // 简体和繁体字幕的判断,通过文件名来做到的,基本就算个补判而已
  43. //newLang := IsChineseSimpleOrTraditional(filePath, subFileInfo.Lang)
  44. subFileInfo.Name = filepath.Base(filePath)
  45. //subFileInfo.Lang = newLang
  46. subFileInfo.FileFullPath = filePath
  47. subFileInfo.FromWhereSite = p.getFromWhereSite(filePath)
  48. return true, subFileInfo, nil
  49. }
  50. // 如果返回 nil ,那么就说明都没有字幕的格式匹配上
  51. return false, nil, nil
  52. }
  53. // DetermineFileTypeFromBytes 确定字幕文件的类型,是双语字幕或者某一种语言等等信息,如果返回 nil ,那么就说明都没有字幕的格式匹配上
  54. // 如果要做字幕的时间轴匹配,很可能需要一个功能 sub_helper.MergeMultiDialogue4EngSubtitle,但是仅仅是合并了 English 字幕时间轴
  55. func (p SubParserHub) DetermineFileTypeFromBytes(inBytes []byte, nowExt string) (bool, *subparser.FileInfo, error) {
  56. for _, parser := range p.Parser {
  57. bFind, subFileInfo, err := parser.DetermineFileTypeFromBytes(inBytes, nowExt)
  58. if err != nil {
  59. return false, nil, err
  60. }
  61. if bFind == false {
  62. continue
  63. }
  64. return true, subFileInfo, nil
  65. }
  66. // 如果返回 nil ,那么就说明都没有字幕的格式匹配上
  67. return false, nil, nil
  68. }
  69. // IsSubHasChinese 字幕文件是否包含中文
  70. func (p SubParserHub) IsSubHasChinese(fileFPath string) bool {
  71. // 增加判断已存在的字幕是否有中文
  72. bFind, file, err := p.DetermineFileTypeFromFile(fileFPath)
  73. if err != nil {
  74. p.log.Errorln("IsSubHasChinese.DetermineFileTypeFromFile", fileFPath, err)
  75. return false
  76. }
  77. if bFind == false {
  78. p.log.Warnln("IsSubHasChinese.DetermineFileTypeFromFile", fileFPath, "not support SubType")
  79. return false
  80. }
  81. if language.HasChineseLang(file.Lang) == false {
  82. p.log.Warnln("IsSubHasChinese.HasChineseLang", fileFPath, "not chinese sub, is ", file.Lang.String())
  83. return false
  84. }
  85. return true
  86. }
  87. // getFromWhereSite 从文件名找出是从那个网站下载的。这里的文件名的前缀是下载时候标记好的,比较特殊
  88. func (p SubParserHub) getFromWhereSite(filePath string) string {
  89. fileName := filepath.Base(filePath)
  90. var re = regexp.MustCompile(`^\[(\w+)\]_`)
  91. matched := re.FindStringSubmatch(fileName)
  92. if matched == nil || len(matched) < 1 {
  93. return ""
  94. }
  95. return matched[1]
  96. }
  97. // IsSubTypeWanted 这里匹配的字幕的格式,不包含 Ext 的 . 小数点,注意,仅仅是包含关系
  98. func IsSubTypeWanted(subName string) bool {
  99. nowLowerName := strings.ToLower(subName)
  100. if strings.Contains(nowLowerName, common.SubTypeASS) ||
  101. strings.Contains(nowLowerName, common.SubTypeSSA) ||
  102. strings.Contains(nowLowerName, common.SubTypeSRT) {
  103. return true
  104. }
  105. return false
  106. }
  107. // IsSubExtWanted 输入的字幕文件名,判断后缀名是否符合期望的字幕后缀名列表
  108. func IsSubExtWanted(subName string) bool {
  109. inExt := filepath.Ext(subName)
  110. switch strings.ToLower(inExt) {
  111. case common.SubExtSSA, common.SubExtASS, common.SubExtSRT:
  112. return true
  113. default:
  114. return false
  115. }
  116. }
  117. // IsEmbySubCodecWanted 从 Emby api 拿到字幕的 sub 类型 string (Codec) 是否是符合本程序要求的
  118. func IsEmbySubCodecWanted(inSubCodec string) bool {
  119. tmpString := strings.ToLower(inSubCodec)
  120. if tmpString == common.SubTypeSRT ||
  121. tmpString == common.SubTypeASS ||
  122. tmpString == common.SubTypeSSA {
  123. return true
  124. }
  125. return false
  126. }
  127. // IsEmbySubChineseLangStringWanted 是否是 Emby 自己解析出来的中文语言类型
  128. func IsEmbySubChineseLangStringWanted(inLangString string) bool {
  129. isWanted := false
  130. tmpString := strings.ToLower(inLangString)
  131. nextString := tmpString
  132. spStrings := strings.Split(tmpString, "[")
  133. if len(spStrings) > 1 {
  134. // 去除 chi[xunlie] 类似的标记
  135. nextString = spStrings[0]
  136. } else {
  137. // 去除 chinese(简英,zimuku)
  138. spStrings = strings.Split(tmpString, "(")
  139. if len(spStrings) > 1 {
  140. nextString = spStrings[0]
  141. }
  142. }
  143. // 先判断 ISO 标准的和变种的支持列表,仅仅是中文的
  144. if language.IsSupportISOChineseString(nextString) {
  145. // fmt.Println("###: ERROR")
  146. isWanted = true
  147. }
  148. // 再判断之前支持的列表
  149. switch nextString {
  150. case languageConst.Emby_chinese_chs,
  151. languageConst.Emby_chinese_cht,
  152. languageConst.Emby_chinese_chi:
  153. // chi chs cht
  154. isWanted = true
  155. case replaceLangString(languageConst.Emby_chinese):
  156. // chinese,这个比较特殊,是本程序定义的 chinese 的字段,再 Emby API 下特殊的字幕命名字段
  157. isWanted = true
  158. }
  159. return isWanted
  160. }
  161. // SearchMatchedSubFile 搜索符合后缀名的字幕文件
  162. func SearchMatchedSubFile(log *logrus.Logger, dir string) ([]string, error) {
  163. var fileFullPathList = make([]string, 0)
  164. pathSep := string(os.PathSeparator)
  165. files, err := os.ReadDir(dir)
  166. if err != nil {
  167. return nil, err
  168. }
  169. for _, curFile := range files {
  170. fullPath := dir + pathSep + curFile.Name()
  171. if curFile.IsDir() {
  172. // 内层的错误就无视了
  173. oneList, _ := SearchMatchedSubFile(log, fullPath)
  174. if oneList != nil {
  175. fileFullPathList = append(fileFullPathList, oneList...)
  176. }
  177. } else {
  178. // 这里就是文件了
  179. if IsSubExtWanted(curFile.Name()) == false {
  180. continue
  181. } else {
  182. // 跳过不符合的文件,比如 MAC OS 下可能有缓存文件,见 #138
  183. fi, err := curFile.Info()
  184. if err != nil {
  185. log.Debugln("SearchMatchedSubFile, file.Info:", fullPath, err)
  186. continue
  187. }
  188. if fi.Size() == 4096 && strings.HasPrefix(curFile.Name(), "._") == true {
  189. log.Debugln("SearchMatchedSubFile file.Size() == 4096 && Prefix Name == ._*", fullPath)
  190. continue
  191. }
  192. fileFullPathList = append(fileFullPathList, fullPath)
  193. }
  194. }
  195. }
  196. return fileFullPathList, nil
  197. }
  198. func replaceLangString(inString string) string {
  199. tmpString := strings.ToLower(inString)
  200. one := strings.ReplaceAll(tmpString, ".", "")
  201. two := strings.ReplaceAll(one, "_", "")
  202. return two
  203. }