|
|
@@ -2,19 +2,19 @@ package ChineseSubFinder
|
|
|
|
|
|
import (
|
|
|
"github.com/allanpk716/ChineseSubFinder/common"
|
|
|
+ "github.com/allanpk716/ChineseSubFinder/sub_parser"
|
|
|
+ "github.com/allanpk716/ChineseSubFinder/sub_parser/ass"
|
|
|
+ "github.com/allanpk716/ChineseSubFinder/sub_parser/srt"
|
|
|
"github.com/allanpk716/ChineseSubFinder/sub_supplier"
|
|
|
"github.com/allanpk716/ChineseSubFinder/sub_supplier/shooter"
|
|
|
"github.com/allanpk716/ChineseSubFinder/sub_supplier/subhd"
|
|
|
"github.com/allanpk716/ChineseSubFinder/sub_supplier/xunlei"
|
|
|
"github.com/allanpk716/ChineseSubFinder/sub_supplier/zimuku"
|
|
|
- "github.com/go-rod/rod/lib/utils"
|
|
|
- "github.com/mholt/archiver/v3"
|
|
|
"github.com/sirupsen/logrus"
|
|
|
"io/ioutil"
|
|
|
"os"
|
|
|
"path"
|
|
|
"path/filepath"
|
|
|
- "strconv"
|
|
|
"strings"
|
|
|
)
|
|
|
|
|
|
@@ -72,17 +72,16 @@ func (d Downloader) DownloadSub(dir string) error {
|
|
|
return err
|
|
|
}
|
|
|
// 构建每个字幕站点下载者的实例
|
|
|
- var suppliers = make([]sub_supplier.ISupplier, 0)
|
|
|
- suppliers = append(suppliers, shooter.NewSupplier(d.reqParam))
|
|
|
- suppliers = append(suppliers, subhd.NewSupplier(d.reqParam))
|
|
|
- suppliers = append(suppliers, xunlei.NewSupplier(d.reqParam))
|
|
|
- suppliers = append(suppliers, zimuku.NewSupplier(d.reqParam))
|
|
|
+ subSupplierHub := sub_supplier.NewSubSupplierHub(shooter.NewSupplier(d.reqParam),
|
|
|
+ subhd.NewSupplier(d.reqParam),
|
|
|
+ xunlei.NewSupplier(d.reqParam),
|
|
|
+ zimuku.NewSupplier(d.reqParam),
|
|
|
+ )
|
|
|
// TODO 后续再改为每个视频以上的流程都是一个 channel 来做,并且需要控制在一个并发量之下(很可能没必要,毕竟要在弱鸡机器上挂机用的)
|
|
|
// 一个视频文件同时多个站点查询,阻塞完毕后,在进行下一个
|
|
|
for i, oneVideoFullPath := range nowVideoList {
|
|
|
- nowSubInfos := d.downloadSub4OneVideo(oneVideoFullPath, suppliers, i)
|
|
|
// 字幕都下载缓存好了,需要抉择存哪一个,优先选择中文双语的,然后到中文
|
|
|
- organizeSubFiles, err := d.organizeDlSubFiles(nowSubInfos)
|
|
|
+ organizeSubFiles, err := subSupplierHub.DownloadSub(oneVideoFullPath, i)
|
|
|
if err != nil {
|
|
|
d.log.Error(oneVideoFullPath, "Download Sub Error",err)
|
|
|
continue
|
|
|
@@ -101,129 +100,27 @@ func (d Downloader) DownloadSub(dir string) error {
|
|
|
// TODO 这里先处理 Top1 的字幕,后续再考虑怎么觉得 Top N 选择哪一个,很可能选择每个网站 Top 1就行了,具体的过滤逻辑在其内部实现
|
|
|
// 拿到现有的字幕列表,开始抉择
|
|
|
// 先判断当前字幕是什么语言(如果是简体,还需要考虑,判断这个字幕是简体还是繁体)
|
|
|
- //for i, oneSubFileFullPath := range organizeSubFiles {
|
|
|
- //
|
|
|
- //}
|
|
|
- println(videoRootPath)
|
|
|
- // 抉择完毕,需要清理缓存目录
|
|
|
- err = common.ClearTmpFolder()
|
|
|
- if err != nil {
|
|
|
- return err
|
|
|
- }
|
|
|
- }
|
|
|
- return nil
|
|
|
-}
|
|
|
-
|
|
|
-// organizeDlSubFiles 需要从汇总来是网站字幕中,找到合适的
|
|
|
-func (d Downloader) organizeDlSubFiles(subInfos []sub_supplier.SubInfo) ([]string, error) {
|
|
|
-
|
|
|
- // 缓存列表,整理后的字幕列表
|
|
|
- var siteSubInfoDict = make([]string, 0)
|
|
|
- tmpFolderFullPath, err := common.GetTmpFolder()
|
|
|
- if err != nil {
|
|
|
- return nil, err
|
|
|
- }
|
|
|
- // 先清理缓存目录
|
|
|
- err = common.ClearTmpFolder()
|
|
|
- if err != nil {
|
|
|
- return nil, err
|
|
|
- }
|
|
|
- // 第三方的解压库,首先不支持 io.Reader 的操作,也就是得缓存到本地硬盘再读取解压
|
|
|
- // 且使用 walk 会无法解压 rar,得指定具体的实例,太麻烦了,直接用通用的接口得了,就是得都缓存下来再判断
|
|
|
- // 基于以上两点,写了一堆啰嗦的逻辑···
|
|
|
- for _, subInfo := range subInfos {
|
|
|
- // 先存下来,保存是时候需要前缀,前缀就是从那个网站下载来的
|
|
|
- nowFileSaveFullPath := path.Join(tmpFolderFullPath, d.getFrontNameAndOrgName(subInfo))
|
|
|
- err = utils.OutputFile(nowFileSaveFullPath, subInfo.Data)
|
|
|
- if err != nil {
|
|
|
- d.log.Error(subInfo.FromWhere, subInfo.Name, subInfo.TopN, err)
|
|
|
- continue
|
|
|
- }
|
|
|
- nowExt := strings.ToLower(subInfo.Ext)
|
|
|
- if nowExt != ".zip" && nowExt != ".tar" && nowExt != ".rar" && nowExt != ".7z" {
|
|
|
- // 是否是受支持的字幕类型
|
|
|
- if common.IsSubExtWanted(nowExt) == false {
|
|
|
- continue
|
|
|
- }
|
|
|
- // 加入缓存列表
|
|
|
- siteSubInfoDict = append(siteSubInfoDict, nowFileSaveFullPath)
|
|
|
- } else {
|
|
|
- // 那么就是需要解压的文件了
|
|
|
- // 解压,给一个单独的文件夹
|
|
|
- unzipTmpFolder := path.Join(tmpFolderFullPath, subInfo.FromWhere)
|
|
|
- err = archiver.Unarchive(nowFileSaveFullPath, unzipTmpFolder)
|
|
|
- // 解压完成后,遍历受支持的字幕列表,加入缓存列表
|
|
|
+ subParserHub := sub_parser.NewSubParserHub(ass.NewParser(), srt.NewParser())
|
|
|
+ for _, oneSubFileFullPath := range organizeSubFiles {
|
|
|
+ subFileInfo, err := subParserHub.DetermineFileTypeFromFile(oneSubFileFullPath)
|
|
|
if err != nil {
|
|
|
- d.log.Error(subInfo.FromWhere, subInfo.Name, subInfo.TopN, err)
|
|
|
+ d.log.Error(err)
|
|
|
continue
|
|
|
}
|
|
|
- // 搜索这个目录下的所有符合字幕格式的文件
|
|
|
- subFileFullPaths, err := d.searchMatchedSubFile(unzipTmpFolder)
|
|
|
- if err != nil {
|
|
|
- d.log.Error(subInfo.FromWhere, subInfo.Name, subInfo.TopN, err)
|
|
|
+ if subFileInfo == nil {
|
|
|
+ // 说明这个字幕无法解析
|
|
|
+ d.log.Warning(oneSubFileFullPath, "DetermineFileTypeFromFile is nill")
|
|
|
continue
|
|
|
}
|
|
|
- // 这里需要给这些下载到的文件进行改名,加是从那个网站来的前缀,后续好查找
|
|
|
- for _, fileFullPath := range subFileFullPaths {
|
|
|
- newSubName := d.addFrontName(subInfo, filepath.Base(fileFullPath))
|
|
|
- newSubNameFullPath := path.Join(tmpFolderFullPath, newSubName)
|
|
|
- // 改名
|
|
|
- err = os.Rename(fileFullPath, newSubNameFullPath)
|
|
|
- if err != nil {
|
|
|
- d.log.Error(subInfo.FromWhere, subInfo.Name, subInfo.TopN, err)
|
|
|
- continue
|
|
|
- }
|
|
|
- // 加入缓存列表
|
|
|
- siteSubInfoDict = append(siteSubInfoDict, newSubNameFullPath)
|
|
|
- }
|
|
|
}
|
|
|
- }
|
|
|
-
|
|
|
- return siteSubInfoDict, nil
|
|
|
-}
|
|
|
-
|
|
|
-// downloadSub4OneVideo 为这个视频下载字幕,所有网站找到的字幕都会汇总输出
|
|
|
-func (d Downloader) downloadSub4OneVideo(oneVideoFullPath string, suppliers []sub_supplier.ISupplier, i int) []sub_supplier.SubInfo {
|
|
|
- var outSUbInfos = make([]sub_supplier.SubInfo, 0)
|
|
|
- // 同时进行查询
|
|
|
- subInfosChannel := make(chan []sub_supplier.SubInfo)
|
|
|
- d.log.Infoln("DlSub Start", oneVideoFullPath)
|
|
|
- for _, supplier := range suppliers {
|
|
|
- supplier := supplier
|
|
|
- go func() {
|
|
|
- subInfos, err := d.downloadSub4OneSite(oneVideoFullPath, i, supplier)
|
|
|
- if err != nil {
|
|
|
- d.log.Error(err)
|
|
|
- }
|
|
|
- subInfosChannel <- subInfos
|
|
|
- }()
|
|
|
- }
|
|
|
- for i := 0; i < len(suppliers); i++ {
|
|
|
- v, ok := <-subInfosChannel
|
|
|
- if ok == true {
|
|
|
- outSUbInfos = append(outSUbInfos, v...)
|
|
|
- }
|
|
|
- }
|
|
|
- d.log.Infoln(i, "DlSub End", oneVideoFullPath)
|
|
|
- return outSUbInfos
|
|
|
-}
|
|
|
-
|
|
|
-// downloadSub4OneSite 在一个站点下载这个视频的字幕
|
|
|
-func (d Downloader) downloadSub4OneSite(oneVideoFullPath string, i int, supplier sub_supplier.ISupplier) ([]sub_supplier.SubInfo, error) {
|
|
|
- d.log.Infoln(i, supplier.GetSupplierName(), "Start...")
|
|
|
- subInfos, err := supplier.GetSubListFromFile(oneVideoFullPath)
|
|
|
- if err != nil {
|
|
|
- return nil, err
|
|
|
- }
|
|
|
- // 把后缀名给改好
|
|
|
- for x, info := range subInfos {
|
|
|
- tmpSubFileName := info.Name
|
|
|
- if strings.Contains(tmpSubFileName, info.Ext) == false {
|
|
|
- subInfos[x].Name = tmpSubFileName + info.Ext
|
|
|
+ println(videoRootPath)
|
|
|
+ // 抉择完毕,需要清理缓存目录
|
|
|
+ err = common.ClearTmpFolder()
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
}
|
|
|
}
|
|
|
- d.log.Infoln(i, supplier.GetSupplierName(), "End...")
|
|
|
- return subInfos, nil
|
|
|
+ return nil
|
|
|
}
|
|
|
|
|
|
// searchMatchedVideoFile 搜索符合后缀名的视频文件
|
|
|
@@ -253,36 +150,6 @@ func (d Downloader) searchMatchedVideoFile(dir string) ([]string, error) {
|
|
|
return fileFullPathList, nil
|
|
|
}
|
|
|
|
|
|
-// searchMatchedSubFile 搜索符合后缀名的视频文件
|
|
|
-func (d Downloader) searchMatchedSubFile(dir string) ([]string, error) {
|
|
|
- // 这里有个梗,会出现 __MACOSX 这类文件夹,那么里面会有一样的文件,需要用文件大小排除一下,至少大于 1 kb 吧
|
|
|
- var fileFullPathList = make([]string, 0)
|
|
|
- pathSep := string(os.PathSeparator)
|
|
|
- files, err := ioutil.ReadDir(dir)
|
|
|
- if err != nil {
|
|
|
- return nil, err
|
|
|
- }
|
|
|
- for _, curFile := range files {
|
|
|
- fullPath := dir + pathSep + curFile.Name()
|
|
|
- if curFile.IsDir() {
|
|
|
- // 内层的错误就无视了
|
|
|
- oneList, _ := d.searchMatchedSubFile(fullPath)
|
|
|
- if oneList != nil {
|
|
|
- fileFullPathList = append(fileFullPathList, oneList...)
|
|
|
- }
|
|
|
- } else {
|
|
|
- // 这里就是文件了
|
|
|
- if curFile.Size() < 1000 {
|
|
|
- continue
|
|
|
- }
|
|
|
- if common.IsSubExtWanted(filepath.Ext(curFile.Name())) == true {
|
|
|
- fileFullPathList = append(fileFullPathList, fullPath)
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return fileFullPathList, nil
|
|
|
-}
|
|
|
-
|
|
|
// isWantedVideoExtDef 后缀名是否符合规则
|
|
|
func (d Downloader) isWantedVideoExtDef(fileName string) bool {
|
|
|
fileName = strings.ToLower(filepath.Ext(fileName))
|
|
|
@@ -294,15 +161,6 @@ func (d Downloader) isWantedVideoExtDef(fileName string) bool {
|
|
|
return false
|
|
|
}
|
|
|
|
|
|
-// 返回的名称包含,那个网站下载的,这个网站中排名第几,文件名
|
|
|
-func (d Downloader) getFrontNameAndOrgName(info sub_supplier.SubInfo) string {
|
|
|
- return "[" + info.FromWhere + "]_" + strconv.FormatInt(info.TopN,10) + "_" + info.Name
|
|
|
-}
|
|
|
-
|
|
|
-func (d Downloader) addFrontName(info sub_supplier.SubInfo, orgName string) string {
|
|
|
- return "[" + info.FromWhere + "]_" + strconv.FormatInt(info.TopN,10) + "_" + orgName
|
|
|
-}
|
|
|
-
|
|
|
func (d Downloader) copySubFile2DesFolder(desFolder string, subFiles []string) error {
|
|
|
|
|
|
// 需要进行字幕文件的缓存
|