Browse Source

差一点完成抉择下载字幕保存功能

Signed-off-by: allan716 <[email protected]>
allan716 4 years ago
parent
commit
0ebed4236f

+ 1 - 0
.gitignore

@@ -3,3 +3,4 @@
 /common/result.png
 /result.png
 /Logs
+/debugThings

+ 2 - 5
README.md

@@ -25,15 +25,12 @@
 
 * 完成初版自动下载
   * ~~多个字幕网站的下载支持~~
-  
   * ~~解析下载到的字幕是什么语言的(直接分析字幕文件)~~
-  
   * ~~搜索视频文件~~
-  
   * ~~日志支持~~
-  
+  * 压缩文件的解压(看了一圈,没有能使用 io.read 传参的,都是路径,所以就没法做到 in memory 完全操作了,得写缓存)
   * 识别字幕文件是简体还是繁体中文的(貌似只能从文件名判断,内容粗略搜索没有发现可操作)
-
+  
   
   * 配置文件支持
 * 字幕的风评(有些字幕太差了,需要进行过滤,考虑排除,字幕组,关键词,机翻,以及评分等条件

+ 2 - 1
common/common.go

@@ -6,4 +6,5 @@ const HTMLTimeOut = 60 * time.Second	// HttpClient 超时时间
 
 const DownloadSubsPerSite = 1 // 默认,每个网站下载一个字幕,允许额外传参调整
 
-const DebugFolder = "debugThings"
+const DebugFolder = "debugThings"
+const TmpFolder = "tmpThings"

+ 72 - 23
common/lang.go

@@ -115,40 +115,89 @@ func DetectSubLangAndStatistics(lines []string, langDict map[int]int) {
 	}
 }
 
-
 // SubLangStatistics2SubLangType 由分析的信息转换为具体是什么字幕的语言类型
-func SubLangStatistics2SubLangType(isDouble bool, langDict map[int]int) Language {
+func SubLangStatistics2SubLangType(countLineFeed, AllLines float32, langDict map[int]int) Language {
+	const basePer = 0.8
+	// 是否是双语?
+	isDouble := false
+	perLines := countLineFeed / AllLines
+	// 第二行字幕出现的概率大于 80% 应该稳了吧,不然还能三语?
+	if perLines > basePer {
+		isDouble = true
+	}
 	// TODO 现在是没有很好的办法去识别是简体还是繁体中文的,所以···
 	// 中文
-	_, hasChinese := langDict[int(whatlanggo.Cmn)]
+	countChinese, hasChinese := langDict[int(whatlanggo.Cmn)]
 	// 英文
-	_, hasEnglish := langDict[int(whatlanggo.Eng)]
+	countEnglish, hasEnglish := langDict[int(whatlanggo.Eng)]
 	// 日文
-	_, hasJapanese := langDict[int(whatlanggo.Jpn)]
+	countJapanese, hasJapanese := langDict[int(whatlanggo.Jpn)]
 	// 韩文
-	_, hasKorean := langDict[int(whatlanggo.Kor)]
+	countKorean, hasKorean := langDict[int(whatlanggo.Kor)]
 
 	// 优先判断双语
-	if hasChinese && hasEnglish {
-		// 简体	英文
-		return ChineseSimpleEnglish
-	} else if hasChinese && hasJapanese {
-		// 简体 日文
-		return ChineseSimpleJapanese
-	} else if hasChinese && hasKorean {
-		// 简体 韩文
-		return ChineseSimpleKorean
-	} else if hasChinese {
-		return ChineseSimple
-	} else if hasEnglish {
-		return English
-	} else if hasJapanese {
-		return Japanese
-	} else if hasKorean {
-		return Korean
+	if isDouble == true {
+		// 首先得在外面统计就知道是双语
+		if hasChinese && hasEnglish {
+			// 简体	英文
+			return ChineseSimpleEnglish
+		} else if hasChinese && hasJapanese {
+			// 简体 日文
+			return ChineseSimpleJapanese
+		} else if hasChinese && hasKorean {
+			// 简体 韩文
+			return ChineseSimpleKorean
+		} else if hasChinese {
+			return ChineseSimple
+		} else if hasEnglish {
+			return English
+		} else if hasJapanese {
+			return Japanese
+		} else if hasKorean {
+			return Korean
+		} else {
+			return Unknow
+		}
 	} else {
+		// 如果比例达不到,那么就是单语言,所以最多的那个就是当前的语言
+		// 这里的字典是有可能出现
+		if hasChinese {
+			// 那么起码要占比 80% 对吧
+			perLines = float32(countChinese) / AllLines
+			if perLines > basePer {
+				return ChineseSimple
+			}
+		}
+		if hasEnglish {
+			// 那么起码要占比 80% 对吧
+			perLines = float32(countEnglish) / AllLines
+			if perLines > basePer {
+				return English
+			}
+		}
+		if hasJapanese {
+			// 那么起码要占比 80% 对吧
+			perLines = float32(countJapanese) / AllLines
+			if perLines > basePer {
+				return Japanese
+			}
+		}
+		if hasKorean {
+			// 那么起码要占比 80% 对吧
+			perLines = float32(countKorean) / AllLines
+			if perLines > basePer {
+				return Korean
+			}
+		}
+
 		return Unknow
 	}
+
+}
+
+// IsChineseSimpleOrTraditional 从字幕的名称中尝试确认是简体还是繁体
+func IsChineseSimpleOrTraditional(inputFileName string) bool {
+	return true
 }
 
 // Language 语言类型,注意,这里默认还是查找的是中文字幕,只不过下载的时候可能附带了其他的

+ 5 - 4
common/subType.go

@@ -7,9 +7,10 @@ import (
 
 // IsSubTypeWanted 这里匹配的字幕的格式,不包含 Ext 的 . 小数点,注意,仅仅是包含关系
 func IsSubTypeWanted(subName string) bool {
-	if strings.Contains(strings.ToLower(subName), SubTypeASS) ||
-		strings.Contains(strings.ToLower(subName), SubTypeSSA) ||
-		strings.Contains(strings.ToLower(subName), SubTypeSRT) {
+	nowLowerName := strings.ToLower(subName)
+	if strings.Contains(nowLowerName, SubTypeASS) ||
+		strings.Contains(nowLowerName, SubTypeSSA) ||
+		strings.Contains(nowLowerName, SubTypeSRT) {
 		return true
 	}
 
@@ -19,7 +20,7 @@ func IsSubTypeWanted(subName string) bool {
 // IsSubExtWanted 输入的字幕文件名,判断后缀名是否符合期望的字幕后缀名列表
 func IsSubExtWanted(subName string) bool {
 	inExt := filepath.Ext(subName)
-	switch inExt {
+	switch strings.ToLower(inExt) {
 	case SubExtSSA,SubExtASS,SubExtSRT:
 		return true
 	default:

+ 59 - 5
common/util.go

@@ -3,6 +3,7 @@ package common
 import (
 	"fmt"
 	"github.com/go-resty/resty/v2"
+	"io/ioutil"
 	"net/http"
 	"os"
 	"path"
@@ -89,15 +90,68 @@ func AddBaseUrl(baseUrl, url string) string {
 }
 
 func GetDebugFolder() (string, error) {
-	nowProcessRoot, _ := os.Getwd()
-	nowProcessRoot = path.Join(nowProcessRoot, DebugFolder)
-	err := os.MkdirAll(nowProcessRoot, os.ModePerm)
+	if defDebugFolder == "" {
+		nowProcessRoot, _ := os.Getwd()
+		nowProcessRoot = path.Join(nowProcessRoot, DebugFolder)
+		err := os.MkdirAll(nowProcessRoot, os.ModePerm)
+		if err != nil {
+			return "", err
+		}
+		defDebugFolder = nowProcessRoot
+		return nowProcessRoot, err
+	}
+	return defDebugFolder, nil
+}
+
+func GetTmpFolder() (string, error) {
+	if defTmpFolder == "" {
+		nowProcessRoot, _ := os.Getwd()
+		nowProcessRoot = path.Join(nowProcessRoot, TmpFolder)
+		err := os.MkdirAll(nowProcessRoot, os.ModePerm)
+		if err != nil {
+			return "", err
+		}
+		defTmpFolder = nowProcessRoot
+		return nowProcessRoot, err
+	}
+	return defTmpFolder, nil
+}
+
+func ClearTmpFolder() error {
+	nowTmpFolder, err := GetTmpFolder()
 	if err != nil {
-		return "", err
+		return err
 	}
-	return nowProcessRoot, err
+
+	pathSep := string(os.PathSeparator)
+	files, err := ioutil.ReadDir(nowTmpFolder)
+	if err != nil {
+		return err
+	}
+	for _, curFile := range files {
+		fullPath := nowTmpFolder + pathSep + curFile.Name()
+		if curFile.IsDir() {
+			err = os.RemoveAll(fullPath)
+			if err != nil {
+				return err
+			}
+		} else {
+			// 这里就是文件了
+			err = os.Remove(fullPath)
+			if err != nil {
+				return err
+			}
+		}
+	}
+
+	return nil
 }
 
+var (
+	defDebugFolder = ""
+	defTmpFolder = ""
+)
+
 // ReqParam 可选择传入的参数
 type ReqParam struct {
 	UserExtList []string	// 用户确认的视频后缀名支持列表

+ 126 - 24
downloader.go

@@ -8,6 +8,7 @@ import (
 	"github.com/allanpk716/ChineseSubFinder/sub_supplier/xunlei"
 	"github.com/allanpk716/ChineseSubFinder/sub_supplier/zimuku"
 	"github.com/go-rod/rod/lib/utils"
+	"github.com/mholt/archiver/v3"
 	"github.com/sirupsen/logrus"
 	"io/ioutil"
 	"os"
@@ -15,7 +16,6 @@ import (
 	"path/filepath"
 	"strconv"
 	"strings"
-	"sync"
 )
 
 type Downloader struct {
@@ -67,7 +67,7 @@ func (d Downloader) GetDefSupportExtList() []string {
 }
 
 func (d Downloader) DownloadSub(dir string) error {
-	nowVideoList, err := d.searchFile(dir)
+	nowVideoList, err := d.searchMatchedVideoFile(dir)
 	if err != nil {
 		return err
 	}
@@ -80,48 +80,116 @@ func (d Downloader) DownloadSub(dir string) error {
 	// TODO 后续再改为每个视频以上的流程都是一个 channel 来做,并且需要控制在一个并发量之下(很可能没必要,毕竟要在弱鸡机器上挂机用的)
 	// 一个视频文件同时多个站点查询,阻塞完毕后,在进行下一个
 	for i, oneVideoFullPath := range nowVideoList {
-		d.downloadSub4OneVideo(oneVideoFullPath, suppliers, i)
+		nowSubInfos := d.downloadSub4OneVideo(oneVideoFullPath, suppliers, i)
 		// 字幕都下载缓存好了,需要抉择存哪一个,优先选择中文双语的,然后到中文
-		d.chooseAndSaveSubFile(oneVideoFullPath, suppliers)
+		err = d.chooseAndSaveSubFile(oneVideoFullPath, nowSubInfos)
+		if err != nil {
+			d.log.Error(oneVideoFullPath, "Download Sub Error",err)
+			continue
+		}
 	}
 	return nil
 }
 
-func (d Downloader) chooseAndSaveSubFile(oneVideoFullPath string, suppliers []sub_supplier.ISupplier) {
-	// 判断下载的文件是什么,可能需要解压
-	for i, info := range suppliers {
-		
+// chooseAndSaveSubFile 需要从汇总来是网站字幕中,找到合适的
+func (d Downloader) chooseAndSaveSubFile(oneVideoFullPath string, subInfos []sub_supplier.SubInfo) error {
+
+	// 得到目标视频文件的根目录
+	videoRootPath := filepath.Dir(oneVideoFullPath)
+	tmpFolderFullPath, err := common.GetTmpFolder()
+	if err != nil {
+		return err
 	}
+
+	var siteSubInfoDict = make([]string, 0)
+	// 第三方的解压函数,首先不支持 io.Reader 的操作,也就是得缓存到本地硬盘再读取解压
+	// 且使用 walk 会无法解压 rar,得指定具体的实例,太麻烦了,直接用通用的接口得了,就是得都缓存下来再判断
+	for _, subInfo := range subInfos {
+		// TODO 这里先处理 Top1 的字幕,后续再考虑怎么觉得 Top N 选择哪一个,很可能选择每个网站 Top 1就行了,具体的过滤逻辑在其内部实现
+		// 先存下来,保存是时候需要前缀,前缀就是从那个网站下载来的
+		nowFileSaveFullPath := path.Join(tmpFolderFullPath, d.getFrontNameAndOrgName(subInfo))
+		err = utils.OutputFile(nowFileSaveFullPath, subInfo.Data)
+		if err != nil {
+			d.log.Error(subInfo.FromWhere, subInfo.Name, subInfo.TopN, err)
+			continue
+		}
+
+		nowExt := strings.ToLower(subInfo.Ext)
+		if nowExt != ".zip" && nowExt != ".tar" && nowExt != ".rar" && nowExt != ".7z" {
+			// 是否是受支持的字幕类型
+			if common.IsSubExtWanted(nowExt) == false {
+				continue
+			}
+			// 加入缓存列表
+			siteSubInfoDict = append(siteSubInfoDict, nowFileSaveFullPath)
+		} else {
+			// 那么就是需要解压的文件了
+			// 解压,给一个单独的文件夹
+			unzipTmpFolder := path.Join(tmpFolderFullPath, subInfo.FromWhere)
+			err = archiver.Unarchive(nowFileSaveFullPath, unzipTmpFolder)
+			// 解压完成后,遍历受支持的字幕列表,加入缓存列表
+			if err != nil {
+				d.log.Error(subInfo.FromWhere, subInfo.Name, subInfo.TopN, err)
+				continue
+			}
+			// 搜索这个目录下的所有符合字幕格式的文件
+			subFileFullPaths, err := d.searchMatchedSubFile(unzipTmpFolder)
+			if err != nil {
+				d.log.Error(subInfo.FromWhere, subInfo.Name, subInfo.TopN, err)
+				continue
+			}
+			for _, fileFullPath := range subFileFullPaths {
+				// 加入缓存列表
+				siteSubInfoDict = append(siteSubInfoDict, fileFullPath)
+			}
+		}
+	}
+	// 拿到现有的字幕列表,开始抉择
+	// 还需要考虑,判断这个字幕是简体还是繁体
+	
+	println(videoRootPath)
+
+	// 抉择完毕,需要清理缓存目录
+	err = common.ClearTmpFolder()
+	if err != nil {
+		return err
+	}
+	return nil
 }
 
-// downloadSub4OneVideo 为这个视频下载字幕
-func (d Downloader) downloadSub4OneVideo(oneVideoFullPath string, suppliers []sub_supplier.ISupplier, i int) {
+// downloadSub4OneVideo 为这个视频下载字幕,所有网站找到的字幕都会汇总输出
+func (d Downloader) downloadSub4OneVideo(oneVideoFullPath string, suppliers []sub_supplier.ISupplier, i int) []sub_supplier.SubInfo {
 	ontVideoRootPath := filepath.Dir(oneVideoFullPath)
+	var outSUbInfos = make([]sub_supplier.SubInfo, 0)
 	// 同时进行查询
-	wg := sync.WaitGroup{}
-	wg.Add(len(suppliers))
+	subInfosChannel := make(chan []sub_supplier.SubInfo)
 	d.log.Infoln("DlSub Start", oneVideoFullPath)
 	for _, supplier := range suppliers {
 		supplier := supplier
 		go func() {
-			err := d.downloadSub4OneSite(oneVideoFullPath, i, supplier, &wg, ontVideoRootPath)
+			subInfos, err := d.downloadSub4OneSite(oneVideoFullPath, i, supplier, ontVideoRootPath)
 			if err != nil {
 				d.log.Error(err)
-				return
 			}
+			subInfosChannel <- subInfos
 		}()
 	}
-	wg.Wait()
+	for i := 0; i < len(suppliers); i++ {
+		v, ok := <-subInfosChannel
+		if ok == true {
+			outSUbInfos = append(outSUbInfos, v...)
+		}
+	}
 	d.log.Infoln(i, "DlSub End", oneVideoFullPath)
+	return outSUbInfos
 }
 
 // downloadSub4OneSite 在一个站点下载这个视频的字幕
-func (d Downloader) downloadSub4OneSite(oneVideoFullPath string, i int, supplier sub_supplier.ISupplier, wg *sync.WaitGroup, ontVideoRootPath string) error {
-	defer wg.Done()
+func (d Downloader) downloadSub4OneSite(oneVideoFullPath string, i int, supplier sub_supplier.ISupplier, ontVideoRootPath string) ([]sub_supplier.SubInfo, error) {
 	d.log.Infoln(i, supplier.GetSupplierName(), "Start...")
 	subInfos, err := supplier.GetSubListFromFile(oneVideoFullPath)
 	if err != nil {
-		return err
+		return nil, err
 	}
 	// 把后缀名给改好
 	for x, info := range subInfos {
@@ -136,22 +204,24 @@ func (d Downloader) downloadSub4OneSite(oneVideoFullPath string, i int, supplier
 		desFolderFullPath := path.Join(ontVideoRootPath, SubTmpFolderName)
 		err = os.MkdirAll(desFolderFullPath, os.ModePerm)
 		if err != nil {
-			return err
+			d.log.Error(err)
+			return subInfos, nil
 		}
 		for x, info := range subInfos {
 			desSubFileFullPath := path.Join(desFolderFullPath, supplier.GetSupplierName() + "_" + strconv.Itoa(x)+"_"+info.Name)
 			err = utils.OutputFile(desSubFileFullPath, info.Data)
 			if err != nil {
-				return err
+				d.log.Error(err)
+				break
 			}
 		}
 	}
 	d.log.Infoln(i, supplier.GetSupplierName(), "End...")
-	return nil
+	return subInfos, nil
 }
 
-// searchFile 搜索符合后缀名的视频文件
-func (d Downloader)searchFile(dir string) ([]string, error) {
+// searchMatchedVideoFile 搜索符合后缀名的视频文件
+func (d Downloader) searchMatchedVideoFile(dir string) ([]string, error) {
 
 	var fileFullPathList = make([]string, 0)
 	pathSep := string(os.PathSeparator)
@@ -163,7 +233,7 @@ func (d Downloader)searchFile(dir string) ([]string, error) {
 		fullPath := dir + pathSep + curFile.Name()
 		if curFile.IsDir() {
 			// 内层的错误就无视了
-			oneList, _ := d.searchFile(fullPath)
+			oneList, _ := d.searchMatchedVideoFile(fullPath)
 			if oneList != nil {
 				fileFullPathList = append(fileFullPathList, oneList...)
 			}
@@ -177,6 +247,33 @@ func (d Downloader)searchFile(dir string) ([]string, error) {
 	return fileFullPathList, nil
 }
 
+// searchMatchedSubFile 搜索符合后缀名的视频文件
+func (d Downloader) searchMatchedSubFile(dir string) ([]string, error) {
+
+	var fileFullPathList = make([]string, 0)
+	pathSep := string(os.PathSeparator)
+	files, err := ioutil.ReadDir(dir)
+	if err != nil {
+		return nil, err
+	}
+	for _, curFile := range files {
+		fullPath := dir + pathSep + curFile.Name()
+		if curFile.IsDir() {
+			// 内层的错误就无视了
+			oneList, _ := d.searchMatchedSubFile(fullPath)
+			if oneList != nil {
+				fileFullPathList = append(fileFullPathList, oneList...)
+			}
+		} else {
+			// 这里就是文件了
+			if common.IsSubExtWanted(filepath.Ext(curFile.Name())) == true {
+				fileFullPathList = append(fileFullPathList, fullPath)
+			}
+		}
+	}
+	return fileFullPathList, nil
+}
+
 // isWantedVideoExtDef 后缀名是否符合规则
 func (d Downloader) isWantedVideoExtDef(fileName string) bool {
 	fileName = strings.ToLower(filepath.Ext(fileName))
@@ -188,6 +285,11 @@ func (d Downloader) isWantedVideoExtDef(fileName string) bool {
 	return false
 }
 
+// 返回的名称包含,那个网站下载的,这个网站中排名第几,文件名
+func (d Downloader) getFrontNameAndOrgName(info sub_supplier.SubInfo) string {
+	return "[" + info.FromWhere + "]" + strconv.FormatInt(info.TopN,10) +info.Name
+}
+
 const (
 	VideoExtMp4 = ".mp4"
 	VideoExtMkv = ".mkv"

+ 3 - 2
downloader_test.go

@@ -12,7 +12,7 @@ func TestDownloader_searchFile(t *testing.T) {
 	dirRoot := "X:\\电影\\Spiral From the Book of Saw (2021)"
 
 	dl := NewDownloader()
-	files, err := dl.searchFile(dirRoot)
+	files, err := dl.searchMatchedVideoFile(dirRoot)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -27,10 +27,11 @@ func TestDownloader_searchFile(t *testing.T) {
 }
 
 func TestDownloader_DownloadSub(t *testing.T) {
+	var err error
 	dirRoot := "X:\\电影\\Spiral From the Book of Saw (2021)"
 
 	dl := NewDownloader(common.ReqParam{DebugMode: true})
-	err := dl.DownloadSub(dirRoot)
+	err = dl.DownloadSub(dirRoot)
 	if err != nil {
 		t.Fatal(err)
 	}

+ 1 - 2
go.mod

@@ -6,12 +6,11 @@ require (
 	github.com/PuerkitoBio/goquery v1.6.1
 	github.com/abadojack/whatlanggo v1.0.1
 	github.com/beevik/etree v1.1.0
-	github.com/gen2brain/go-unarr v0.1.1 // indirect
 	github.com/go-resty/resty/v2 v2.6.0
 	github.com/go-rod/rod v0.97.2
-	github.com/jonboulle/clockwork v0.2.2 // indirect
 	github.com/lestrrat-go/file-rotatelogs v2.4.0+incompatible
 	github.com/lestrrat-go/strftime v1.0.4 // indirect
+	github.com/mholt/archiver/v3 v3.5.0 // indirect
 	github.com/middelink/go-parse-torrent-name v0.0.0-20190301154245-3ff4efacd4c4
 	github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646
 	github.com/pkg/errors v0.9.1 // indirect

+ 24 - 0
go.sum

@@ -2,6 +2,8 @@ github.com/PuerkitoBio/goquery v1.6.1 h1:FgjbQZKl5HTmcn4sKBgvx8vv63nhyhIpv7lJpFG
 github.com/PuerkitoBio/goquery v1.6.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
 github.com/abadojack/whatlanggo v1.0.1 h1:19N6YogDnf71CTHm3Mp2qhYfkRdyvbgwWdd2EPxJRG4=
 github.com/abadojack/whatlanggo v1.0.1/go.mod h1:66WiQbSbJBIlOZMsvbKe5m6pzQovxCH9B/K8tQB2uoc=
+github.com/andybalholm/brotli v1.0.0 h1:7UCwP93aiSfvWpapti8g88vVVGp2qqtGyePsSuDafo4=
+github.com/andybalholm/brotli v1.0.0/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
 github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
 github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
 github.com/beevik/etree v1.1.0 h1:T0xke/WvNtMoCqgzPhkX2r4rjY3GDZFi+FjpRZY2Jbs=
@@ -9,14 +11,25 @@ github.com/beevik/etree v1.1.0/go.mod h1:r8Aw8JqVegEf0w2fDnATrX9VpkMcyFeM0FhwO62
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q=
+github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo=
+github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
 github.com/gen2brain/go-unarr v0.1.1 h1:wZl53oYzEN1PEIA/dPa/FjBq9rRqPmS/Gzul8BdKYK4=
 github.com/gen2brain/go-unarr v0.1.1/go.mod h1:P05CsEe8jVEXhxqXqp9mFKUKFV0BKpFmtgNWf8Mcoos=
 github.com/go-resty/resty/v2 v2.6.0 h1:joIR5PNLM2EFqqESUjCMGXrWmXNHEU9CEiK813oKYS4=
 github.com/go-resty/resty/v2 v2.6.0/go.mod h1:PwvJS6hvaPkjtjNg9ph+VrSD92bi5Zq73w/BIH7cC3Q=
 github.com/go-rod/rod v0.97.2 h1:4AWtucf0fXKbdaEjNNhjIcdYXQyc4+yx8YYI73jyP5A=
 github.com/go-rod/rod v0.97.2/go.mod h1:DgPYd1ql/oCzGxrM5aiCcVM+kA4MFCJ+Mht7ZVBSiG0=
+github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
+github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/jonboulle/clockwork v0.2.2 h1:UOGuzwb1PwsrDAObMuhUnj0p5ULPj8V/xJ7Kx9qUBdQ=
 github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8=
+github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
+github.com/klauspost/compress v1.10.10 h1:a/y8CglcM7gLGYmlbP/stPE5sR3hbhFRUjCBfd/0B3I=
+github.com/klauspost/compress v1.10.10/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
+github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
+github.com/klauspost/pgzip v1.2.4 h1:TQ7CNpYKovDOmqzRHKxJh0BeaBI7UdQZYc6p7pMQh1A=
+github.com/klauspost/pgzip v1.2.4/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8=
 github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is=
@@ -24,10 +37,16 @@ github.com/lestrrat-go/file-rotatelogs v2.4.0+incompatible h1:Y6sqxHMyB1D2YSzWkL
 github.com/lestrrat-go/file-rotatelogs v2.4.0+incompatible/go.mod h1:ZQnN8lSECaebrkQytbHj4xNgtg8CR7RYXnPok8e0EHA=
 github.com/lestrrat-go/strftime v1.0.4 h1:T1Rb9EPkAhgxKqbcMIPguPq8glqXTA1koF8n9BHElA8=
 github.com/lestrrat-go/strftime v1.0.4/go.mod h1:E1nN3pCbtMSu1yjSVeyuRFVm/U0xoR76fd03sz+Qz4g=
+github.com/mholt/archiver/v3 v3.5.0 h1:nE8gZIrw66cu4osS/U7UW7YDuGMHssxKutU8IfWxwWE=
+github.com/mholt/archiver/v3 v3.5.0/go.mod h1:qqTTPUK/HZPFgFQ/TJ3BzvTpF/dPtFVJXdQbCmeMxwc=
 github.com/middelink/go-parse-torrent-name v0.0.0-20190301154245-3ff4efacd4c4 h1:C/VViMMbR/4Ti2aXrWpKy34S05cRaVd6EvV9BFR3qJ8=
 github.com/middelink/go-parse-torrent-name v0.0.0-20190301154245-3ff4efacd4c4/go.mod h1:H66QhXPJpUSdWschhL6u//v3ge96/qMnQ9mWp3efbxA=
 github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6Oo2LfFZAehjjQMERAvZLEDnQ=
 github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8=
+github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=
+github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
+github.com/pierrec/lz4/v4 v4.0.3 h1:vNQKSVZNYUEAvRY9FaUXAF1XPbSOHJtDTiP41kzDz2E=
+github.com/pierrec/lz4/v4 v4.0.3/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@@ -43,6 +62,11 @@ github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/t-tomalak/logrus-easy-formatter v0.0.0-20190827215021-c074f06c5816 h1:J6v8awz+me+xeb/cUTotKgceAYouhIB3pjzgRd6IlGk=
 github.com/t-tomalak/logrus-easy-formatter v0.0.0-20190827215021-c074f06c5816/go.mod h1:tzym/CEb5jnFI+Q0k4Qq3+LvRF4gO3E2pxS8fHP8jcA=
+github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
+github.com/ulikunitz/xz v0.5.7 h1:YvTNdFzX6+W5m9msiYg/zpkSURPPtOlzbqYjrFn7Yt4=
+github.com/ulikunitz/xz v0.5.7/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
+github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
+github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
 github.com/ysmood/goob v0.3.0 h1:XZ51cZJ4W3WCoCiUktixzMIQF86W7G5VFL4QQ/Q2uS0=
 github.com/ysmood/goob v0.3.0/go.mod h1:S3lq113Y91y1UBf1wj1pFOxeahvfKkCk6mTWTWbDdWs=
 github.com/ysmood/got v0.9.3 h1:qx51X49jL/WAiqZzPTkPZ0zp5pTmrWJa4zYFTYo0gHI=

+ 1 - 7
sub_parser/ass/ass.go

@@ -82,12 +82,6 @@ func (p Parser) DetermineFileTypeFromBytes(inBytes []byte, nowExt string) (*sub_
 		subFileInfo.Dialogues = append(subFileInfo.Dialogues, odl)
 	}
 	// 再分析
-	// 是不是双语字幕,定义,超过 80% 就一定是了(不可能三语吧···)
-	isDouble := false
-	perLines := float32(countLineFeed) / float32(len(matched))
-	if perLines > 0.8 {
-		isDouble = true
-	}
 	// 需要判断每一个 Line 是啥语言,[语言的code]次数
 	var langDict map[int]int
 	langDict = make(map[int]int)
@@ -95,7 +89,7 @@ func (p Parser) DetermineFileTypeFromBytes(inBytes []byte, nowExt string) (*sub_
 		common.DetectSubLangAndStatistics(dialogue.Lines, langDict)
 	}
 	// 从统计出来的字典,找出 Top 1 或者 2 的出来,然后计算出是什么语言的字幕
-	detectLang := common.SubLangStatistics2SubLangType(isDouble, langDict)
+	detectLang := common.SubLangStatistics2SubLangType(float32(countLineFeed), float32(len(matched)), langDict)
 	subFileInfo.Lang = detectLang
 	return &subFileInfo, nil
 }

+ 1 - 7
sub_parser/srt/srt.go

@@ -69,12 +69,6 @@ func (p Parser) DetermineFileTypeFromBytes(inBytes []byte, nowExt string) (*sub_
 		subFileInfo.Dialogues = append(subFileInfo.Dialogues, odl)
 	}
 	// 再分析
-	// 是不是双语字幕,定义,超过 80% 就一定是了(不可能三语吧···)
-	isDouble := false
-	perLines := float32(countLineFeed) / float32(len(matched))
-	if perLines > 0.8 {
-		isDouble = true
-	}
 	// 需要判断每一个 Line 是啥语言,[语言的code]次数
 	var langDict map[int]int
 	langDict = make(map[int]int)
@@ -82,7 +76,7 @@ func (p Parser) DetermineFileTypeFromBytes(inBytes []byte, nowExt string) (*sub_
 		common.DetectSubLangAndStatistics(dialogue.Lines, langDict)
 	}
 	// 从统计出来的字典,找出 Top 1 或者 2 的出来,然后计算出是什么语言的字幕
-	detectLang := common.SubLangStatistics2SubLangType(isDouble, langDict)
+	detectLang := common.SubLangStatistics2SubLangType(float32(countLineFeed), float32(len(matched)), langDict)
 	subFileInfo.Lang = detectLang
 	return &subFileInfo, nil
 }

+ 4 - 2
sub_supplier/shooter/shooter.go

@@ -67,7 +67,7 @@ func (s Supplier) GetSubListFromFile(filePath string) ([]sub_supplier.SubInfo, e
 	if err != nil {
 		return nil, err
 	}
-	for _, shooter := range jsonList {
+	for i, shooter := range jsonList {
 		for _, file := range shooter.Files {
 			subExt := file.Ext
 			if strings.Contains(file.Ext, ".") == false {
@@ -79,11 +79,13 @@ func (s Supplier) GetSubListFromFile(filePath string) ([]sub_supplier.SubInfo, e
 				s.log.Error(err.Error())
 				continue
 			}
-			outSubInfoList = append(outSubInfoList, *sub_supplier.NewSubInfo(fileName, common.ChineseSimple, file.Link, 0, shooter.Delay, subExt, data))
+			outSubInfoList = append(outSubInfoList, *sub_supplier.NewSubInfo(s.GetSupplierName(), int64(i), fileName, common.ChineseSimple, file.Link, 0, shooter.Delay, subExt, data))
 			// 如果够了那么多个字幕就返回
 			if len(outSubInfoList) >= s.topic {
 				return outSubInfoList, nil
 			}
+			// 一层里面,下载一个文件就行了
+			break
 		}
 	}
 	return outSubInfoList, nil

+ 3 - 3
sub_supplier/subhd/subhd.go

@@ -116,12 +116,12 @@ func (s Supplier) GetSubListFromKeyword(keyword string) ([]sub_supplier.SubInfo,
 		return nil, err
 	}
 
-	for _, item := range subList {
+	for i, item := range subList {
 		hdContent, err := s.Step2Ex(browser, item.Url)
 		if err != nil {
 			return nil, err
 		}
-		subInfos = append(subInfos, *sub_supplier.NewSubInfo(hdContent.Filename, common.ChineseSimple, common.AddBaseUrl(common.SubSubHDRootUrl, item.Url), 0, 0, hdContent.Ext, hdContent.Data))
+		subInfos = append(subInfos, *sub_supplier.NewSubInfo(s.GetSupplierName(), int64(i), hdContent.Filename, common.ChineseSimple, common.AddBaseUrl(common.SubSubHDRootUrl, item.Url), 0, 0, hdContent.Ext, hdContent.Data))
 	}
 
 	return subInfos, nil
@@ -423,7 +423,7 @@ search:
 				abs(int(color_a_G)-int(color_b_G)) > threshold ||
 				abs(int(color_a_B)-int(color_b_B)) > threshold {
 				distance += float64(i)
-				s.log.Debug("對比完畢, 偏移量: %v", distance)
+				s.log.Debug("對比完畢, 偏移量:", distance)
 				break search
 			}
 		}

+ 4 - 2
sub_supplier/sublist.go

@@ -3,6 +3,8 @@ package sub_supplier
 import "github.com/allanpk716/ChineseSubFinder/common"
 
 type SubInfo struct {
+	FromWhere	string `json:"from_where"`	// 从哪个网站下载来的
+	TopN		int64  `json:"top_n"`		// 是 Top 几?
 	Name 		string `json:"name"`		// 字幕的名称,这个比较随意,优先是影片的名称,然后才是从网上下载字幕的对应名称
 	Language 	common.Language `json:"language"`	// 字幕的语言
 	FileUrl     string `json:"file-url"`	// 字幕文件下载的路径
@@ -12,7 +14,7 @@ type SubInfo struct {
 	Data		[]byte	`json:"data"`		// 字幕文件的二进制数据
 }
 
-func NewSubInfo(name string, language common.Language, fileUrl string, vote int64, offset int64, ext string, data []byte) *SubInfo {
-	return &SubInfo{Name: name, Language: language, FileUrl: fileUrl, Vote: vote, Offset: offset, Ext: ext, Data: data}
+func NewSubInfo(fromWhere string, topn int64, name string, language common.Language, fileUrl string, vote int64, offset int64, ext string, data []byte) *SubInfo {
+	return &SubInfo{FromWhere: fromWhere, TopN: topn,Name: name, Language: language, FileUrl: fileUrl, Vote: vote, Offset: offset, Ext: ext, Data: data}
 }
 

+ 2 - 2
sub_supplier/xunlei/xunlei.go

@@ -75,7 +75,7 @@ func (s Supplier) GetSubListFromFile(filePath string) ([]sub_supplier.SubInfo, e
 		}
 	}
 	// 再开始下载字幕
-	for _, v := range tmpXunLeiSubListChinese {
+	for i, v := range tmpXunLeiSubListChinese {
 		tmpLang := common.LangConverter(v.Language)
 		data, filename, err := common.DownFile(v.Surl)
 		if err != nil {
@@ -89,7 +89,7 @@ func (s Supplier) GetSubListFromFile(filePath string) ([]sub_supplier.SubInfo, e
 			ext = filepath.Ext(filename)
 		}
 
-		outSubList = append(outSubList, *sub_supplier.NewSubInfo(v.Sname, tmpLang, v.Surl, v.Svote, v.Roffset, ext, data))
+		outSubList = append(outSubList, *sub_supplier.NewSubInfo(s.GetSupplierName(), int64(i), v.Sname, tmpLang, v.Surl, v.Svote, v.Roffset, ext, data))
 	}
 
 

+ 2 - 2
sub_supplier/zimuku/zimuku.go

@@ -132,14 +132,14 @@ func (s Supplier) GetSubListFromKeyword(keyword string) ([]sub_supplier.SubInfo,
 	}
 
 	// 第四级界面,具体字幕下载
-	for _, subInfo := range tmpSubInfo {
+	for i, subInfo := range tmpSubInfo {
 		fileName, data, err := s.Step3(subInfo.SubDownloadPageUrl)
 		if err != nil {
 			s.log.Error(err.Error())
 			continue
 		}
 		// 默认都是包含中文字幕的,然后具体使用的时候再进行区分
-		outSubInfoList = append(outSubInfoList, *sub_supplier.NewSubInfo(fileName, common.ChineseSimple, common.AddBaseUrl(common.SubZiMuKuRootUrl, subInfo.SubDownloadPageUrl), 0,
+		outSubInfoList = append(outSubInfoList, *sub_supplier.NewSubInfo(s.GetSupplierName(), int64(i), fileName, common.ChineseSimple, common.AddBaseUrl(common.SubZiMuKuRootUrl, subInfo.SubDownloadPageUrl), 0,
 			0, filepath.Ext(fileName), data))
 	}