| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213 |
- package common
- import (
- "github.com/abadojack/whatlanggo"
- "strings"
- )
- // LangConverter 语言转换器
- func LangConverter(subLang string) Language {
- /*
- xunlei:未知语言、简体&英语、繁体&英语、简体、繁体、英语
- */
- if strings.Contains(subLang, MatchLangChs) {
- // 优先简体
- if strings.Contains(subLang, MatchLangEn) {
- // 简英
- return ChineseSimpleEnglish
- } else if strings.Contains(subLang, MatchLangJp) {
- // 简日
- return ChineseSimpleJapanese
- } else if strings.Contains(subLang, MatchLangKr) {
- // 简韩
- return ChineseSimpleKorean
- }
- // 默认简体中文
- return ChineseSimple
- } else if strings.Contains(subLang, MatchLangCht) {
- // 然后是繁体
- if strings.Contains(subLang, MatchLangEn) {
- // 繁英
- return ChineseTraditionalEnglish
- } else if strings.Contains(subLang, MatchLangJp) {
- // 繁日
- return ChineseTraditionalJapanese
- } else if strings.Contains(subLang, MatchLangKr) {
- // 繁韩
- return ChineseTraditionalKorean
- }
- // 默认繁体中文
- return ChineseTraditional
- } else if strings.Contains(subLang, MatchLangEn) {
- // 英文
- return English
- } else if strings.Contains(subLang, MatchLangJp) {
- // 日文
- return Japanese
- } else if strings.Contains(subLang, MatchLangKr) {
- // 韩文
- return Korean
- } else {
- // 都没有,则标记未知
- return Unknow
- }
- }
- // HasChineseLang 是否包含中文
- func HasChineseLang(lan Language) bool {
- switch lan {
- case ChineseSimple,
- ChineseTraditional,
- ChineseSimpleEnglish,
- ChineseTraditionalEnglish,
- ChineseSimpleJapanese,
- ChineseTraditionalJapanese,
- ChineseSimpleKorean,
- ChineseTraditionalKorean:
- return true
- default:
- return false
- }
- }
- // GetLangOptions 语言识别的 Options Whitelist
- func GetLangOptions() whatlanggo.Options {
- return whatlanggo.Options{
- Whitelist: map[whatlanggo.Lang]bool{
- whatlanggo.Cmn: true, // 中文 11
- whatlanggo.Eng: true, // 英文 15
- whatlanggo.Jpn: true, // 日文 32
- whatlanggo.Kor: true, // 韩文 37
- },
- }
- }
- // IsWhiteListLang 是否是白名单语言
- func IsWhiteListLang(lang whatlanggo.Lang) bool {
- switch lang {
- // 中文 英文 日文 韩文
- case whatlanggo.Cmn, whatlanggo.Eng,whatlanggo.Jpn,whatlanggo.Kor:
- return true
- default:
- return false
- }
- }
- // DetectSubLangAndStatistics 检测语言然后统计
- func DetectSubLangAndStatistics(lines []string, langDict map[int]int) {
- for _, line := range lines {
- info := whatlanggo.DetectWithOptions(line, GetLangOptions())
- tmpLang := -1
- if IsWhiteListLang(info.Lang) == true {
- tmpLang = (int)(info.Lang)
- }
- // 这一种语言的 key 是否存在,不存在则新建,存在再数值 +1
- value, ok := langDict[tmpLang]
- if ok == true {
- // 累加
- value++
- langDict[tmpLang] = value
- } else {
- langDict[tmpLang] = 1
- }
- }
- }
- // SubLangStatistics2SubLangType 由分析的信息转换为具体是什么字幕的语言类型
- func SubLangStatistics2SubLangType(isDouble bool, langDict map[int]int) Language {
- // TODO 现在是没有很好的办法去识别是简体还是繁体中文的,所以···
- // 中文
- _, hasChinese := langDict[int(whatlanggo.Cmn)]
- // 英文
- _, hasEnglish := langDict[int(whatlanggo.Eng)]
- // 日文
- _, hasJapanese := langDict[int(whatlanggo.Jpn)]
- // 韩文
- _, hasKorean := langDict[int(whatlanggo.Kor)]
- // 优先判断双语
- if hasChinese && hasEnglish {
- // 简体 英文
- return ChineseSimpleEnglish
- } else if hasChinese && hasJapanese {
- // 简体 日文
- return ChineseSimpleJapanese
- } else if hasChinese && hasKorean {
- // 简体 韩文
- return ChineseSimpleKorean
- } else if hasChinese {
- return ChineseSimple
- } else if hasEnglish {
- return English
- } else if hasJapanese {
- return Japanese
- } else if hasKorean {
- return Korean
- } else {
- return Unknow
- }
- }
- // Language 语言类型,注意,这里默认还是查找的是中文字幕,只不过下载的时候可能附带了其他的
- type Language int
- const (
- Unknow Language = iota // 未知语言
- ChineseSimple // 简体中文
- ChineseTraditional // 繁体中文
- ChineseSimpleEnglish // 简英双语字幕
- ChineseTraditionalEnglish // 繁英双语字幕
- English // 英文
- Japanese // 日语
- ChineseSimpleJapanese // 简日双语字幕
- ChineseTraditionalJapanese // 繁日双语字幕
- Korean // 韩语
- ChineseSimpleKorean // 简韩双语字幕
- ChineseTraditionalKorean // 繁韩双语字幕
- )
- const (
- MathLangChnUnknow = "未知语言"
- MatchLangChs = "简"
- MatchLangCht = "繁"
- MatchLangChsEn = "简英"
- MatchLangChtEn = "繁英"
- MatchLangEn = "英"
- MatchLangJp = "日"
- MatchLangChsJp = "简日"
- MatchLangChtJp = "繁日"
- MatchLangKr = "韩"
- MatchLangChsKr = "简韩"
- MatchLangChtKr = "繁韩"
- )
- func (l Language) String() string {
- switch l {
- case ChineseSimple:
- return MatchLangChs
- case ChineseTraditional:
- return MatchLangCht
- case ChineseSimpleEnglish:
- return MatchLangChsEn
- case ChineseTraditionalEnglish:
- return MatchLangChtEn
- case English:
- return MatchLangEn
- case Japanese:
- return MatchLangJp
- case ChineseSimpleJapanese:
- return MatchLangChsJp
- case ChineseTraditionalJapanese:
- return MatchLangChtJp
- case Korean:
- return MatchLangKr
- case ChineseSimpleKorean:
- return MatchLangChsKr
- case ChineseTraditionalKorean:
- return MatchLangChtKr
- default:
- return MathLangChnUnknow
- }
- }
|