Browse Source

重构调整逻辑,有待继续实现字幕选择问题

Signed-off-by: allan716 <[email protected]>
allan716 4 years ago
parent
commit
0a0a6d0e27

+ 2 - 2
common/common.go

@@ -6,5 +6,5 @@ const HTMLTimeOut = 60 * time.Second	// HttpClient 超时时间
 
 const DownloadSubsPerSite = 1 // 默认,每个网站下载一个字幕,允许额外传参调整
 
-const DebugFolder = "debugThings"
-const TmpFolder = "tmpThings"
+const DebugFolder = "debugthings"
+const TmpFolder = "tmpthings"

+ 14 - 3
common/lang.go

@@ -195,11 +195,22 @@ func SubLangStatistics2SubLangType(countLineFeed, AllLines float32, langDict map
 
 }
 
-// IsChineseSimpleOrTraditional 从字幕的名称中尝试确认是简体还是繁体
-func IsChineseSimpleOrTraditional(inputFileName string) bool {
-	return true
+// IsChineseSimpleOrTraditional 从字幕的文件名称中尝试确认是简体还是繁体,不需要判断双语问题,有额外的解析器完成。只可能出现 ChineseSimple ChineseTraditional Unknow 三种情况
+func IsChineseSimpleOrTraditional(inputFileName string) Language {
+
+	if strings.Contains(inputFileName, SubNameKeywordChineseSimple) || strings.Contains(inputFileName, MatchLangChs) {
+		return ChineseSimple
+	} else if strings.Contains(inputFileName, SubNameKeywordTraditional) || strings.Contains(inputFileName, MatchLangCht) {
+		return ChineseTraditional
+	}
+	return Unknow
 }
 
+const (
+	SubNameKeywordChineseSimple = "chs"
+	SubNameKeywordTraditional	 = "cht"
+)
+
 // Language 语言类型,注意,这里默认还是查找的是中文字幕,只不过下载的时候可能附带了其他的
 type Language int
 const (

+ 16 - 0
common/util.go

@@ -3,6 +3,7 @@ package common
 import (
 	"fmt"
 	"github.com/go-resty/resty/v2"
+	"io"
 	"io/ioutil"
 	"net/http"
 	"os"
@@ -147,6 +148,21 @@ func ClearTmpFolder() error {
 	return nil
 }
 
+func CopyFile(dstName, srcName string) (written int64, err error) {
+	src, err := os.Open(srcName)
+	if err != nil {
+		return
+	}
+	defer src.Close()
+
+	dst, err := os.OpenFile(dstName, os.O_WRONLY|os.O_CREATE, 0644)
+	if err != nil {
+		return
+	}
+	defer dst.Close()
+	return io.Copy(dst, src)
+}
+
 var (
 	defDebugFolder = ""
 	defTmpFolder = ""

+ 80 - 46
downloader.go

@@ -82,30 +82,56 @@ func (d Downloader) DownloadSub(dir string) error {
 	for i, oneVideoFullPath := range nowVideoList {
 		nowSubInfos := d.downloadSub4OneVideo(oneVideoFullPath, suppliers, i)
 		// 字幕都下载缓存好了,需要抉择存哪一个,优先选择中文双语的,然后到中文
-		err = d.chooseAndSaveSubFile(oneVideoFullPath, nowSubInfos)
+		organizeSubFiles, err := d.organizeDlSubFiles(nowSubInfos)
 		if err != nil {
 			d.log.Error(oneVideoFullPath, "Download Sub Error",err)
 			continue
 		}
+		// 得到目标视频文件的根目录
+		videoRootPath := filepath.Dir(oneVideoFullPath)
+		// -------------------------------------------------
+		// 调试缓存,把下载好的字幕写到对应的视频目录下,方便调试
+		if d.reqParam.DebugMode == true {
+			err = d.copySubFile2DesFolder(videoRootPath, organizeSubFiles)
+			if err != nil {
+				d.log.Error(err)
+			}
+		}
+		// -------------------------------------------------
+		// TODO 这里先处理 Top1 的字幕,后续再考虑怎么觉得 Top N 选择哪一个,很可能选择每个网站 Top 1就行了,具体的过滤逻辑在其内部实现
+		// 拿到现有的字幕列表,开始抉择
+		// 先判断当前字幕是什么语言(如果是简体,还需要考虑,判断这个字幕是简体还是繁体)
+		//for i, oneSubFileFullPath := range organizeSubFiles {
+		//
+		//}
+		println(videoRootPath)
+		// 抉择完毕,需要清理缓存目录
+		err = common.ClearTmpFolder()
+		if err != nil {
+			return err
+		}
 	}
 	return nil
 }
 
-// chooseAndSaveSubFile 需要从汇总来是网站字幕中,找到合适的
-func (d Downloader) chooseAndSaveSubFile(oneVideoFullPath string, subInfos []sub_supplier.SubInfo) error {
+// organizeDlSubFiles 需要从汇总来是网站字幕中,找到合适的
+func (d Downloader) organizeDlSubFiles(subInfos []sub_supplier.SubInfo) ([]string, error) {
 
-	// 得到目标视频文件的根目录
-	videoRootPath := filepath.Dir(oneVideoFullPath)
+	// 缓存列表,整理后的字幕列表
+	var siteSubInfoDict = make([]string, 0)
 	tmpFolderFullPath, err := common.GetTmpFolder()
 	if err != nil {
-		return err
+		return nil, err
 	}
-
-	var siteSubInfoDict = make([]string, 0)
-	// 第三方的解压函数,首先不支持 io.Reader 的操作,也就是得缓存到本地硬盘再读取解压
+	// 先清理缓存目录
+	err = common.ClearTmpFolder()
+	if err != nil {
+		return nil, err
+	}
+	// 第三方的解压库,首先不支持 io.Reader 的操作,也就是得缓存到本地硬盘再读取解压
 	// 且使用 walk 会无法解压 rar,得指定具体的实例,太麻烦了,直接用通用的接口得了,就是得都缓存下来再判断
+	// 基于以上两点,写了一堆啰嗦的逻辑···
 	for _, subInfo := range subInfos {
-		// TODO 这里先处理 Top1 的字幕,后续再考虑怎么觉得 Top N 选择哪一个,很可能选择每个网站 Top 1就行了,具体的过滤逻辑在其内部实现
 		// 先存下来,保存是时候需要前缀,前缀就是从那个网站下载来的
 		nowFileSaveFullPath := path.Join(tmpFolderFullPath, d.getFrontNameAndOrgName(subInfo))
 		err = utils.OutputFile(nowFileSaveFullPath, subInfo.Data)
@@ -113,7 +139,6 @@ func (d Downloader) chooseAndSaveSubFile(oneVideoFullPath string, subInfos []sub
 			d.log.Error(subInfo.FromWhere, subInfo.Name, subInfo.TopN, err)
 			continue
 		}
-
 		nowExt := strings.ToLower(subInfo.Ext)
 		if nowExt != ".zip" && nowExt != ".tar" && nowExt != ".rar" && nowExt != ".7z" {
 			// 是否是受支持的字幕类型
@@ -138,28 +163,27 @@ func (d Downloader) chooseAndSaveSubFile(oneVideoFullPath string, subInfos []sub
 				d.log.Error(subInfo.FromWhere, subInfo.Name, subInfo.TopN, err)
 				continue
 			}
+			// 这里需要给这些下载到的文件进行改名,加是从那个网站来的前缀,后续好查找
 			for _, fileFullPath := range subFileFullPaths {
+				newSubName := d.addFrontName(subInfo, filepath.Base(fileFullPath))
+				newSubNameFullPath := path.Join(tmpFolderFullPath, newSubName)
+				// 改名
+				err = os.Rename(fileFullPath, newSubNameFullPath)
+				if err != nil {
+					d.log.Error(subInfo.FromWhere, subInfo.Name, subInfo.TopN, err)
+					continue
+				}
 				// 加入缓存列表
-				siteSubInfoDict = append(siteSubInfoDict, fileFullPath)
+				siteSubInfoDict = append(siteSubInfoDict, newSubNameFullPath)
 			}
 		}
 	}
-	// 拿到现有的字幕列表,开始抉择
-	// 还需要考虑,判断这个字幕是简体还是繁体
-	
-	println(videoRootPath)
 
-	// 抉择完毕,需要清理缓存目录
-	err = common.ClearTmpFolder()
-	if err != nil {
-		return err
-	}
-	return nil
+	return siteSubInfoDict, nil
 }
 
 // downloadSub4OneVideo 为这个视频下载字幕,所有网站找到的字幕都会汇总输出
 func (d Downloader) downloadSub4OneVideo(oneVideoFullPath string, suppliers []sub_supplier.ISupplier, i int) []sub_supplier.SubInfo {
-	ontVideoRootPath := filepath.Dir(oneVideoFullPath)
 	var outSUbInfos = make([]sub_supplier.SubInfo, 0)
 	// 同时进行查询
 	subInfosChannel := make(chan []sub_supplier.SubInfo)
@@ -167,7 +191,7 @@ func (d Downloader) downloadSub4OneVideo(oneVideoFullPath string, suppliers []su
 	for _, supplier := range suppliers {
 		supplier := supplier
 		go func() {
-			subInfos, err := d.downloadSub4OneSite(oneVideoFullPath, i, supplier, ontVideoRootPath)
+			subInfos, err := d.downloadSub4OneSite(oneVideoFullPath, i, supplier)
 			if err != nil {
 				d.log.Error(err)
 			}
@@ -185,7 +209,7 @@ func (d Downloader) downloadSub4OneVideo(oneVideoFullPath string, suppliers []su
 }
 
 // downloadSub4OneSite 在一个站点下载这个视频的字幕
-func (d Downloader) downloadSub4OneSite(oneVideoFullPath string, i int, supplier sub_supplier.ISupplier, ontVideoRootPath string) ([]sub_supplier.SubInfo, error) {
+func (d Downloader) downloadSub4OneSite(oneVideoFullPath string, i int, supplier sub_supplier.ISupplier) ([]sub_supplier.SubInfo, error) {
 	d.log.Infoln(i, supplier.GetSupplierName(), "Start...")
 	subInfos, err := supplier.GetSubListFromFile(oneVideoFullPath)
 	if err != nil {
@@ -198,24 +222,6 @@ func (d Downloader) downloadSub4OneSite(oneVideoFullPath string, i int, supplier
 			subInfos[x].Name = tmpSubFileName + info.Ext
 		}
 	}
-	if d.reqParam.DebugMode == true {
-		// 需要进行字幕文件的缓存
-		// 把缓存的文件夹新建出来
-		desFolderFullPath := path.Join(ontVideoRootPath, SubTmpFolderName)
-		err = os.MkdirAll(desFolderFullPath, os.ModePerm)
-		if err != nil {
-			d.log.Error(err)
-			return subInfos, nil
-		}
-		for x, info := range subInfos {
-			desSubFileFullPath := path.Join(desFolderFullPath, supplier.GetSupplierName() + "_" + strconv.Itoa(x)+"_"+info.Name)
-			err = utils.OutputFile(desSubFileFullPath, info.Data)
-			if err != nil {
-				d.log.Error(err)
-				break
-			}
-		}
-	}
 	d.log.Infoln(i, supplier.GetSupplierName(), "End...")
 	return subInfos, nil
 }
@@ -249,7 +255,7 @@ func (d Downloader) searchMatchedVideoFile(dir string) ([]string, error) {
 
 // searchMatchedSubFile 搜索符合后缀名的视频文件
 func (d Downloader) searchMatchedSubFile(dir string) ([]string, error) {
-
+	// 这里有个梗,会出现 __MACOSX 这类文件夹,那么里面会有一样的文件,需要用文件大小排除一下,至少大于 1 kb 吧
 	var fileFullPathList = make([]string, 0)
 	pathSep := string(os.PathSeparator)
 	files, err := ioutil.ReadDir(dir)
@@ -266,6 +272,9 @@ func (d Downloader) searchMatchedSubFile(dir string) ([]string, error) {
 			}
 		} else {
 			// 这里就是文件了
+			if curFile.Size() < 1000 {
+				continue
+			}
 			if common.IsSubExtWanted(filepath.Ext(curFile.Name())) == true {
 				fileFullPathList = append(fileFullPathList, fullPath)
 			}
@@ -287,7 +296,32 @@ func (d Downloader) isWantedVideoExtDef(fileName string) bool {
 
 // 返回的名称包含,那个网站下载的,这个网站中排名第几,文件名
 func (d Downloader) getFrontNameAndOrgName(info sub_supplier.SubInfo) string {
-	return "[" + info.FromWhere + "]" + strconv.FormatInt(info.TopN,10) +info.Name
+	return "[" + info.FromWhere + "]_" + strconv.FormatInt(info.TopN,10) + "_" + info.Name
+}
+
+func (d Downloader) addFrontName(info sub_supplier.SubInfo, orgName string) string {
+	return "[" + info.FromWhere + "]_" + strconv.FormatInt(info.TopN,10) + "_" + orgName
+}
+
+func (d Downloader) copySubFile2DesFolder(desFolder string, subFiles []string) error {
+
+	// 需要进行字幕文件的缓存
+	// 把缓存的文件夹新建出来
+	desFolderFullPath := path.Join(desFolder, SubTmpFolderName)
+	err := os.MkdirAll(desFolderFullPath, os.ModePerm)
+	if err != nil {
+		return err
+	}
+	// 复制下载在 tmp 文件夹中的字幕文件到视频文件夹下面
+	for _, subFile := range subFiles {
+		newFn := path.Join(desFolderFullPath, filepath.Base(subFile))
+		_, err = common.CopyFile(newFn, subFile)
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
 }
 
 const (
@@ -296,5 +330,5 @@ const (
 	VideoExtRmvb = ".rmvb"
 	VideoExtIso = ".iso"
 
-	SubTmpFolderName = "subTmp"
+	SubTmpFolderName = "subtmp"
 )

+ 1 - 1
sub_supplier/shooter/shooter_test.go

@@ -18,6 +18,6 @@ func TestNewSupplier(t *testing.T) {
 	println(outList)
 
 	for i, sublist := range outList {
-		println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Vote, sublist.FileUrl, len(sublist.Data))
+		println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Score, sublist.FileUrl, len(sublist.Data))
 	}
 }

+ 1 - 1
sub_supplier/subhd/subhd_test.go

@@ -23,6 +23,6 @@ func TestSupplier_GetSubListFromFile(t *testing.T) {
 	}
 
 	for i, sublist := range outList {
-		println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Vote, sublist.FileUrl, len(sublist.Data))
+		println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Score, sublist.FileUrl, len(sublist.Data))
 	}
 }

+ 11 - 11
sub_supplier/sublist.go

@@ -3,18 +3,18 @@ package sub_supplier
 import "github.com/allanpk716/ChineseSubFinder/common"
 
 type SubInfo struct {
-	FromWhere	string `json:"from_where"`	// 从哪个网站下载来的
-	TopN		int64  `json:"top_n"`		// 是 Top 几?
-	Name 		string `json:"name"`		// 字幕的名称,这个比较随意,优先是影片的名称,然后才是从网上下载字幕的对应名称
-	Language 	common.Language `json:"language"`	// 字幕的语言
-	FileUrl     string `json:"file-url"`	// 字幕文件下载的路径
-	Vote    	int64  `json:"vote"`		// 投票
-	Offset  	int64  `json:"offset"`		// 字幕的偏移
-	Ext			string `json:"ext"`			// 字幕文件的后缀名带点,有可能是直接能用的字幕文件,也可能是压缩包
-	Data		[]byte	`json:"data"`		// 字幕文件的二进制数据
+	FromWhere string          `json:"from_where"` // 从哪个网站下载来的
+	TopN      int64           `json:"top_n"`      // 是 Top 几?
+	Name      string          `json:"name"`       // 字幕的名称,这个比较随意,优先是影片的名称,然后才是从网上下载字幕的对应名称
+	Language  common.Language `json:"language"`   // 字幕的语言
+	FileUrl   string          `json:"file-url"`   // 字幕文件下载的路径
+	Score     int64           `json:"score"`      // TODO 字幕的评分,需要有一个独立的评价体系
+	Offset    int64           `json:"offset"`     // 字幕的偏移
+	Ext       string          `json:"ext"`        // 字幕文件的后缀名带点,有可能是直接能用的字幕文件,也可能是压缩包
+	Data      []byte          `json:"data"`       // 字幕文件的二进制数据
 }
 
-func NewSubInfo(fromWhere string, topn int64, name string, language common.Language, fileUrl string, vote int64, offset int64, ext string, data []byte) *SubInfo {
-	return &SubInfo{FromWhere: fromWhere, TopN: topn,Name: name, Language: language, FileUrl: fileUrl, Vote: vote, Offset: offset, Ext: ext, Data: data}
+func NewSubInfo(fromWhere string, topn int64, name string, language common.Language, fileUrl string, score int64, offset int64, ext string, data []byte) *SubInfo {
+	return &SubInfo{FromWhere: fromWhere, TopN: topn,Name: name, Language: language, FileUrl: fileUrl, Score: score, Offset: offset, Ext: ext, Data: data}
 }
 

+ 1 - 1
sub_supplier/xunlei/xunlei_test.go

@@ -19,6 +19,6 @@ func TestGetList(t *testing.T) {
 	println(outList)
 
 	for i, sublist := range outList {
-		println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Vote, len(sublist.Data))
+		println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Score, len(sublist.Data))
 	}
 }

+ 1 - 1
sub_supplier/zimuku/zimuku.go

@@ -401,7 +401,7 @@ type SubInfo struct {
 	Ext					string	// 后缀名
 	Score				float32	// 评分
 	DownloadTimes 		int		// 下载的次数
-	Priority			float32	// 优先级,使用评分和次数乘积而来,类似于 Vote 投票
+	Priority			float32	// 优先级,使用评分和次数乘积而来,类似于 Score 投票
 	DetailUrl			string	// 字幕的详情界面,需要再次分析具体的下载地址,地址需要拼接网站的根地址上去
 	SubDownloadPageUrl 	string	// 字幕的具体的下载页面,会有多个下载可用的链接
 	DownloadUrl			string	// 字幕的下载地址

+ 2 - 2
sub_supplier/zimuku/zimuku_test.go

@@ -15,7 +15,7 @@ func TestSupplier_GetSubListFromKeyword(t *testing.T) {
 	}
 	println(outList)
 	for i, sublist := range outList {
-		println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Vote, len(sublist.Data))
+		println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Score, len(sublist.Data))
 	}
 }
 
@@ -33,6 +33,6 @@ func TestSupplier_GetSubListFromFile(t *testing.T) {
 	}
 	println(outList)
 	for i, sublist := range outList {
-		println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Vote, len(sublist.Data))
+		println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Score, len(sublist.Data))
 	}
 }