lang.go 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. package common
  2. import (
  3. "github.com/abadojack/whatlanggo"
  4. "strings"
  5. )
  6. // LangConverter 语言转换器
  7. func LangConverter(subLang string) Language {
  8. /*
  9. xunlei:未知语言、简体&英语、繁体&英语、简体、繁体、英语
  10. */
  11. if strings.Contains(subLang, MatchLangChs) {
  12. // 优先简体
  13. if strings.Contains(subLang, MatchLangEn) {
  14. // 简英
  15. return ChineseSimpleEnglish
  16. } else if strings.Contains(subLang, MatchLangJp) {
  17. // 简日
  18. return ChineseSimpleJapanese
  19. } else if strings.Contains(subLang, MatchLangKr) {
  20. // 简韩
  21. return ChineseSimpleKorean
  22. }
  23. // 默认简体中文
  24. return ChineseSimple
  25. } else if strings.Contains(subLang, MatchLangCht) {
  26. // 然后是繁体
  27. if strings.Contains(subLang, MatchLangEn) {
  28. // 繁英
  29. return ChineseTraditionalEnglish
  30. } else if strings.Contains(subLang, MatchLangJp) {
  31. // 繁日
  32. return ChineseTraditionalJapanese
  33. } else if strings.Contains(subLang, MatchLangKr) {
  34. // 繁韩
  35. return ChineseTraditionalKorean
  36. }
  37. // 默认繁体中文
  38. return ChineseTraditional
  39. } else if strings.Contains(subLang, MatchLangEn) {
  40. // 英文
  41. return English
  42. } else if strings.Contains(subLang, MatchLangJp) {
  43. // 日文
  44. return Japanese
  45. } else if strings.Contains(subLang, MatchLangKr) {
  46. // 韩文
  47. return Korean
  48. } else {
  49. // 都没有,则标记未知
  50. return Unknow
  51. }
  52. }
  53. // HasChineseLang 是否包含中文
  54. func HasChineseLang(lan Language) bool {
  55. switch lan {
  56. case ChineseSimple,
  57. ChineseTraditional,
  58. ChineseSimpleEnglish,
  59. ChineseTraditionalEnglish,
  60. ChineseSimpleJapanese,
  61. ChineseTraditionalJapanese,
  62. ChineseSimpleKorean,
  63. ChineseTraditionalKorean:
  64. return true
  65. default:
  66. return false
  67. }
  68. }
  69. // GetLangOptions 语言识别的 Options Whitelist
  70. func GetLangOptions() whatlanggo.Options {
  71. return whatlanggo.Options{
  72. Whitelist: map[whatlanggo.Lang]bool{
  73. whatlanggo.Cmn: true, // 中文 11
  74. whatlanggo.Eng: true, // 英文 15
  75. whatlanggo.Jpn: true, // 日文 32
  76. whatlanggo.Kor: true, // 韩文 37
  77. },
  78. }
  79. }
  80. // IsWhiteListLang 是否是白名单语言
  81. func IsWhiteListLang(lang whatlanggo.Lang) bool {
  82. switch lang {
  83. // 中文 英文 日文 韩文
  84. case whatlanggo.Cmn, whatlanggo.Eng,whatlanggo.Jpn,whatlanggo.Kor:
  85. return true
  86. default:
  87. return false
  88. }
  89. }
  90. // DetectSubLangAndStatistics 检测语言然后统计
  91. func DetectSubLangAndStatistics(lines []string, langDict map[int]int) {
  92. for _, line := range lines {
  93. info := whatlanggo.DetectWithOptions(line, GetLangOptions())
  94. tmpLang := -1
  95. if IsWhiteListLang(info.Lang) == true {
  96. tmpLang = (int)(info.Lang)
  97. }
  98. // 这一种语言的 key 是否存在,不存在则新建,存在再数值 +1
  99. value, ok := langDict[tmpLang]
  100. if ok == true {
  101. // 累加
  102. value++
  103. langDict[tmpLang] = value
  104. } else {
  105. langDict[tmpLang] = 1
  106. }
  107. }
  108. }
  109. // SubLangStatistics2SubLangType 由分析的信息转换为具体是什么字幕的语言类型
  110. func SubLangStatistics2SubLangType(isDouble bool, langDict map[int]int) Language {
  111. // TODO 现在是没有很好的办法去识别是简体还是繁体中文的,所以···
  112. // 中文
  113. _, hasChinese := langDict[int(whatlanggo.Cmn)]
  114. // 英文
  115. _, hasEnglish := langDict[int(whatlanggo.Eng)]
  116. // 日文
  117. _, hasJapanese := langDict[int(whatlanggo.Jpn)]
  118. // 韩文
  119. _, hasKorean := langDict[int(whatlanggo.Kor)]
  120. // 优先判断双语
  121. if hasChinese && hasEnglish {
  122. // 简体 英文
  123. return ChineseSimpleEnglish
  124. } else if hasChinese && hasJapanese {
  125. // 简体 日文
  126. return ChineseSimpleJapanese
  127. } else if hasChinese && hasKorean {
  128. // 简体 韩文
  129. return ChineseSimpleKorean
  130. } else if hasChinese {
  131. return ChineseSimple
  132. } else if hasEnglish {
  133. return English
  134. } else if hasJapanese {
  135. return Japanese
  136. } else if hasKorean {
  137. return Korean
  138. } else {
  139. return Unknow
  140. }
  141. }
  142. // Language 语言类型,注意,这里默认还是查找的是中文字幕,只不过下载的时候可能附带了其他的
  143. type Language int
  144. const (
  145. Unknow Language = iota // 未知语言
  146. ChineseSimple // 简体中文
  147. ChineseTraditional // 繁体中文
  148. ChineseSimpleEnglish // 简英双语字幕
  149. ChineseTraditionalEnglish // 繁英双语字幕
  150. English // 英文
  151. Japanese // 日语
  152. ChineseSimpleJapanese // 简日双语字幕
  153. ChineseTraditionalJapanese // 繁日双语字幕
  154. Korean // 韩语
  155. ChineseSimpleKorean // 简韩双语字幕
  156. ChineseTraditionalKorean // 繁韩双语字幕
  157. )
  158. const (
  159. MathLangChnUnknow = "未知语言"
  160. MatchLangChs = "简"
  161. MatchLangCht = "繁"
  162. MatchLangChsEn = "简英"
  163. MatchLangChtEn = "繁英"
  164. MatchLangEn = "英"
  165. MatchLangJp = "日"
  166. MatchLangChsJp = "简日"
  167. MatchLangChtJp = "繁日"
  168. MatchLangKr = "韩"
  169. MatchLangChsKr = "简韩"
  170. MatchLangChtKr = "繁韩"
  171. )
  172. func (l Language) String() string {
  173. switch l {
  174. case ChineseSimple:
  175. return MatchLangChs
  176. case ChineseTraditional:
  177. return MatchLangCht
  178. case ChineseSimpleEnglish:
  179. return MatchLangChsEn
  180. case ChineseTraditionalEnglish:
  181. return MatchLangChtEn
  182. case English:
  183. return MatchLangEn
  184. case Japanese:
  185. return MatchLangJp
  186. case ChineseSimpleJapanese:
  187. return MatchLangChsJp
  188. case ChineseTraditionalJapanese:
  189. return MatchLangChtJp
  190. case Korean:
  191. return MatchLangKr
  192. case ChineseSimpleKorean:
  193. return MatchLangChsKr
  194. case ChineseTraditionalKorean:
  195. return MatchLangChtKr
  196. default:
  197. return MathLangChnUnknow
  198. }
  199. }