Browse Source

修复 subhd 解析问题,正在做连续剧的字幕下载

Signed-off-by: allan716 <[email protected]>
allan716 4 years ago
parent
commit
09c3aa3e32

+ 5 - 1
common/seriesInfo.go

@@ -1,4 +1,7 @@
 package common
+
+import "time"
+
 /*
 	这里只需要分为三层结构,因为有 sonarr 和 TMM 整理过
 	所以命名很标注,使用 GetVideoInfoFromFileName 读取 SxxExx 问题不大
@@ -18,7 +21,8 @@ type EpisodeInfo struct {
 	Episode    int
 	SubList	   []SubInfo
 	Dir		   string	// 这里需要记录字幕的位置,因为需要在同级目录匹配相应的字幕才行
-	FileFullPath string 	// 视频文件的全路径
+	FileFullPath string // 视频文件的全路径
+	ModifyTime  time.Time	// 创建时间
 }
 
 type SubInfo struct {

+ 2 - 2
downloader_test.go

@@ -9,8 +9,8 @@ func TestDownloader_DownloadSub(t *testing.T) {
 	var err error
 	//dirRoot := "X:\\电影\\Spiral From the Book of Saw (2021)"
 	//dirRoot := "X:\\电影\\Oslo (2021)"
-	//dirRoot := "X:\\电影\\The Devil All the Time (2020)"
-	dirRoot := "X:\\电影\\冰海陷落 (2018)"
+	dirRoot := "X:\\电影\\The Devil All the Time (2020)"
+	//dirRoot := "X:\\电影\\冰海陷落 (2018)"
 
 	dl := NewDownloader(common.ReqParam{
 		SaveMultiSub: true,

+ 11 - 4
model/decode.go

@@ -11,6 +11,7 @@ import (
 	"regexp"
 	"strconv"
 	"strings"
+	"time"
 )
 
 func getImdbAndYearMovieXml(movieFilePath string) (common.VideoInfo, error) {
@@ -122,20 +123,26 @@ func GetImdbInfo(dirPth string) (common.VideoInfo, error) {
 }
 
 //GetVideoInfoFromFileName 从文件名推断视频文件的信息
-func GetVideoInfoFromFileName(videoFileName string) (*PTN.TorrentInfo, error) {
+func GetVideoInfoFromFileName(videoFileName string) (*PTN.TorrentInfo, time.Time, error) {
 
 	parse, err := PTN.Parse(filepath.Base(videoFileName))
 	if err != nil {
-		return nil, err
+		return nil, time.Time{}, err
 	}
 	compile, err := regexp.Compile(regFixTitle2)
 	if err != nil {
-		return nil, err
+		return nil, time.Time{}, err
 	}
 	match := compile.ReplaceAllString(parse.Title, "")
 	match = strings.TrimRight(match, "")
 	parse.Title = match
-	return parse, nil
+
+	fInfo, err := os.Stat(videoFileName)
+	if err != nil {
+		return nil, time.Time{}, err
+	}
+
+	return parse, fInfo.ModTime(), nil
 }
 
 func GetNumber2Float(input string) (float32, error) {

+ 4 - 4
model/decode_test.go

@@ -48,20 +48,20 @@ func Test_get_IMDB_nfo(t *testing.T) {
 
 func Test_VideoInfo(t *testing.T) {
 
-	//subTitle := "X:\\电影\\Spiral From the Book of Saw (2021)\\Spiral From the Book of Saw (2021) WEBDL-1080p.mkv"
+	subTitle := "X:\\电影\\Spiral From the Book of Saw (2021)\\Spiral From the Book of Saw (2021) WEBDL-1080p.mkv"
 	//subTitle := "人之怒 WEBDL-1080p.mkv"
 	//subTitle := "機動戦士Zガンダム WEBDL-1080p.mkv"
 	//subTitle := "机动战士Z高达:星之继承者 (2005) 1080p TrueHD.mkv"
 	//subTitle := "X:\\连续剧\\The Bad Batch\\Season 1\\The Bad Batch - S01E01 - Aftermath WEBDL-1080p.mkv"
-	subTitle := "X:\\连续剧\\Money.Heist\\Season 1\\Money.Heist.S01E01.SPANISH.WEBRip.x264-ION10.zh-cn.ssa"
+	//subTitle := "X:\\连续剧\\Money.Heist\\Season 1\\Money.Heist.S01E01.SPANISH.WEBRip.x264-ION10.zh-cn.ssa"
 	//subTitle := "Spiral.From.the.Book.of.Saw.2021.1080p.WEBRip.x264-RARBG.chi.srt"
 	//subTitle := "Spiral.From.the.Book.of.Saw.2021.1080p.WEBRip.x264-RARBG.eng.srt"
 	//subTitle := "东城梅尔 第一季第一集【YYeTs字幕组 简繁英双语字幕】Mare.of.Easttown.S01E01.Miss.Lady.Hawk.Herself.720p/1080p.AMZN.WEB-DL.DDP5.1.H.264-TEPES"
-	info, err := GetVideoInfoFromFileName(subTitle)
+	info, modifyTime, err := GetVideoInfoFromFileName(subTitle)
 	if err != nil {
 		t.Error(err)
 	}
-	println("Title:", info.Title, "Season:", info.Season, "Episode:", info.Episode)
+	println("Title:", info.Title, "Season:", info.Season, "Episode:", info.Episode, modifyTime.String())
 }
 
 func TestGetNumber2Float(t *testing.T) {

+ 5 - 1
model/util.go

@@ -184,7 +184,7 @@ func VideoNameSearchKeywordMaker(title string, year string) string {
 	}
 	searchKeyword := title
 	if iYear >= 2020 {
-		searchKeyword = searchKeyword + year
+		searchKeyword = searchKeyword + " " + year
 	}
 
 	return searchKeyword
@@ -271,6 +271,10 @@ func IsWantedVideoExtDef(fileName string) bool {
 	return false
 }
 
+func GetEpisodeKeyName(season, eps int) string {
+	return "S" + strconv.Itoa(season) + "E" +strconv.Itoa(eps)
+}
+
 var (
 	defDebugFolder = ""
 	defTmpFolder = ""

+ 5 - 5
series_helper/seriesHelper.go

@@ -6,7 +6,6 @@ import (
 	"github.com/allanpk716/ChineseSubFinder/sub_parser/ass"
 	"github.com/allanpk716/ChineseSubFinder/sub_parser/srt"
 	"path/filepath"
-	"strconv"
 )
 
 // ReadSeriesInfoFromDir 读取剧集的信息
@@ -45,7 +44,7 @@ func ReadSeriesInfoFromDir(seriesDir string) (*common.SeriesInfo, error) {
 	SubDict := make(map[string][]common.SubInfo)
 	for _, subFile := range subFiles {
 
-		info, err := model.GetVideoInfoFromFileName(subFile)
+		info, _, err := model.GetVideoInfoFromFileName(subFile)
 		if err != nil {
 			model.GetLogger().Errorln(err)
 			continue
@@ -55,7 +54,7 @@ func ReadSeriesInfoFromDir(seriesDir string) (*common.SeriesInfo, error) {
 			model.GetLogger().Errorln(err)
 			continue
 		}
-		epsKey := "S" + strconv.Itoa(info.Season) + "E" +strconv.Itoa(info.Episode)
+		epsKey := model.GetEpisodeKeyName(info.Season, info.Episode)
 		oneFileSubInfo := common.SubInfo{
 			Title: info.Title,
 			Season: info.Season,
@@ -75,12 +74,12 @@ func ReadSeriesInfoFromDir(seriesDir string) (*common.SeriesInfo, error) {
 	EpisodeDict := make(map[string]common.EpisodeInfo)
 	for _, videoFile := range videoFiles {
 		// 正常来说,一集只有一个格式的视频,也就是 S01E01 只有一个,如果有多个则会只保存第一个
-		info, err := model.GetVideoInfoFromFileName(videoFile)
+		info, modifyTime, err := model.GetVideoInfoFromFileName(videoFile)
 		if err != nil {
 			model.GetLogger().Errorln(err)
 			continue
 		}
-		epsKey := "S" + strconv.Itoa(info.Season) + "E" +strconv.Itoa(info.Episode)
+		epsKey := model.GetEpisodeKeyName(info.Season, info.Episode)
 		_, ok := EpisodeDict[epsKey]
 		if ok == false {
 			// 初始化
@@ -90,6 +89,7 @@ func ReadSeriesInfoFromDir(seriesDir string) (*common.SeriesInfo, error) {
 				Episode: info.Episode,
 				Dir: filepath.Dir(videoFile),
 				FileFullPath: videoFile,
+				ModifyTime: modifyTime,
 			}
 			// 需要匹配同级目录下的字幕
 			oneFileEpInfo.SubList = make([]common.SubInfo, 0)

+ 4 - 1
sub_supplier/subSupplierHub.go

@@ -88,6 +88,9 @@ func (d SubSupplierHub) downloadSub4OneVideo(oneVideoFullPath string, i int) []c
 
 // downloadSub4OneSite 在一个站点下载这个视频的字幕
 func (d SubSupplierHub) downloadSub4OneSite(oneVideoFullPath string, i int, supplier _interface.ISupplier) ([]common.SupplierSubInfo, error) {
+	defer func() {
+		d.log.Infoln(i, supplier.GetSupplierName(), "End...")
+	}()
 	d.log.Infoln(i, supplier.GetSupplierName(), "Start...")
 	subInfos, err := supplier.GetSubListFromFile(oneVideoFullPath)
 	if err != nil {
@@ -100,7 +103,7 @@ func (d SubSupplierHub) downloadSub4OneSite(oneVideoFullPath string, i int, supp
 			subInfos[x].Name = tmpSubFileName + info.Ext
 		}
 	}
-	d.log.Infoln(i, supplier.GetSupplierName(), "End...")
+
 	return subInfos, nil
 }
 

+ 26 - 8
sub_supplier/subhd/subhd.go

@@ -67,7 +67,7 @@ func (s Supplier) GetSubListFromFile(filePath string) ([]common.SupplierSubInfo,
 		如果找不到,再靠文件名提取影片名称去查找
 	*/
 	// 得到这个视频文件名中的信息
-	info, err := model.GetVideoInfoFromFileName(filePath)
+	info, _, err := model.GetVideoInfoFromFileName(filePath)
 	if err != nil {
 		return nil, err
 	}
@@ -150,7 +150,7 @@ func (s Supplier) Step0(keyword string) (string, error) {
 		return "", err
 	}
 	// 是否有查找到的结果,至少要有结果。根据这里这样下面才能判断是分析失效了,还是就是没有结果而已
-	re := regexp.MustCompile(`总共\s?<b>(\d+)</b>\s?条`)
+	re := regexp.MustCompile(`共\s*(\d+)\s*条`)
 	matched := re.FindAllStringSubmatch(result, -1)
 	if len(matched) < 1 {
 		return "",  common.SubHDStep0SubCountNotFound
@@ -164,12 +164,30 @@ func (s Supplier) Step0(keyword string) (string, error) {
 		return "", nil
 	}
 	// 这里是确认能继续分析的详细连接
-	re = regexp.MustCompile(`<a\shref="(/d/[\w]+)">\s?<img`)
-	matched = re.FindAllStringSubmatch(result, -1)
-	if len(matched) < 1 || len(matched[0]) < 2{
+	doc, err := goquery.NewDocumentFromReader(strings.NewReader(result))
+	if err != nil {
+		return "", err
+	}
+	imgSelection := doc.Find("img.rounded-start")
+	imgUrl, ok := imgSelection.Attr("src")
+	if ok == true{
+		imgName := filepath.Base(imgUrl)
+		imgExt := filepath.Ext(imgUrl)
+		if strings.Contains(imgName, "_") == true {
+			items := strings.Split(imgName, "_")
+			return "/d/" + items[0], nil
+		} else {
+			return "/d/" + strings.ReplaceAll(imgName, imgExt, ""), nil
+		}
+	} else{
 		return "",  common.SubHDStep0HrefIsNull
 	}
-	return matched[0][1], nil
+	//re = regexp.MustCompile(`<a\shref="(/d/[\w]+)">\s?<img`)
+	//matched = re.FindAllStringSubmatch(result, -1)
+	//if len(matched) < 1 || len(matched[0]) < 2{
+	//	return "",  common.SubHDStep0HrefIsNull
+	//}
+	//return matched[0][1], nil
 }
 // Step1 获取影片的详情字幕列表
 func (s Supplier) Step1(detailPageUrl string) ([]HdListItem, error) {
@@ -187,7 +205,7 @@ func (s Supplier) Step1(detailPageUrl string) ([]HdListItem, error) {
 	const subTableKeyword = ".pt-2"
 	const oneSubTrTitleKeyword = "a.link-dark"
 	const oneSubTrDownloadCountKeyword = "div.px-3"
-	const oneSubLangAndTypeKetword = ".text-secondary"
+	const oneSubLangAndTypeKeyword = ".text-secondary"
 
 	doc.Find(subTableKeyword).EachWithBreak(func(i int, tr *goquery.Selection) bool {
 		if tr.Find(oneSubTrTitleKeyword).Size() == 0 {
@@ -201,7 +219,7 @@ func (s Supplier) Step1(detailPageUrl string) ([]HdListItem, error) {
 		// 文件名
 		title := strings.TrimSpace(tr.Find(oneSubTrTitleKeyword).Text())
 		// 字幕类型
-		insideSubType := tr.Find(oneSubLangAndTypeKetword).Text()
+		insideSubType := tr.Find(oneSubLangAndTypeKeyword).Text()
 		if model.IsSubTypeWanted(insideSubType) == false {
 			return true
 		}

+ 70 - 10
sub_supplier/zimuku/zimuku.go

@@ -12,6 +12,7 @@ import (
 	"regexp"
 	"sort"
 	"strings"
+	"time"
 )
 
 type Supplier struct {
@@ -57,7 +58,7 @@ func (s Supplier) GetSubListFromFile4Series(seriesPath string) ([]common.Supplie
 	// 这里打算牺牲效率,提高代码的复用度,不然后续得维护一套电影的查询逻辑,一套剧集的查询逻辑
 	// 比如,其实可以搜索剧集名称,应该可以得到多个季的列表,然后分析再继续
 	// 现在粗暴点,直接一季搜索一次,跟电影的搜索一样,在首个影片就停止,然后继续往下
-
+	AllSeasonSubResult := SubResult{}
 	for value := range seriesInfo.SeasonDict {
 		// 第一级界面,找到影片的详情界面
 		keyword := seriesInfo.Name + " 第" + zh.Uint64(value).String() + "季"
@@ -70,11 +71,63 @@ func (s Supplier) GetSubListFromFile4Series(seriesPath string) ([]common.Supplie
 		if err != nil {
 			return nil, err
 		}
-		println(subResult.SubInfos.Len())
+
+		if AllSeasonSubResult.Title == "" {
+			AllSeasonSubResult = subResult
+		} else {
+			AllSeasonSubResult.SubInfos = append(AllSeasonSubResult.SubInfos, subResult.SubInfos...)
+		}
 	}
+	// 找到最大的优先级的字幕下载
+	sort.Sort(SortByPriority{AllSeasonSubResult.SubInfos})
+	// 字幕很多,考虑效率,需要做成字典
+	// key SxEx - SubInfos
+	var allSubDict = make(map[string]SubInfos)
+	for _, subInfo := range AllSeasonSubResult.SubInfos {
 
+		info, _, err := model.GetVideoInfoFromFileName(subInfo.Name)
+		if err != nil {
+			s.log.Errorln("GetSubListFromFile4Series.GetVideoInfoFromFileName", subInfo.Name, err)
+			continue
+		}
+		epsKey := model.GetEpisodeKeyName(info.Season, info.Episode)
+		_, ok := allSubDict[epsKey]
+		if ok == false {
+			// 初始化
+			allSubDict[epsKey] = SubInfos{}
+		}
+		// 添加
+		allSubDict[epsKey] = append(allSubDict[epsKey], subInfo)
+	}
+	// 本地的视频列表,找到没有字幕的
+	// 需要进行下载字幕的列表
+	var subInfoNeedDownload = make([]SubInfo, 0)
+	currentTime := time.Now()
+	// 30 天
+	dayRange, _ := time.ParseDuration("720h")
+	for _, epsInfo := range seriesInfo.EpList {
+		// 如果没有字幕,则加入下载列表
+		// 这一集下载后的30天内,都进行字幕的下载
+		if len(epsInfo.SubList) < 1 || epsInfo.ModifyTime.Add(dayRange).Before(currentTime) == true {
+			// 添加
+			info, _, err := model.GetVideoInfoFromFileName(epsInfo.Title)
+			if err != nil {
+				s.log.Errorln("GetSubListFromFile4Series.GetVideoInfoFromFileName", epsInfo.Title, err)
+				continue
+			}
+			epsKey := model.GetEpisodeKeyName(info.Season, info.Episode)
+			// 从一堆字幕里面找合适的
+			value, ok := allSubDict[epsKey]
+			// 是否有
+			if ok == true && len(value) > 0 {
+				subInfoNeedDownload = append(subInfoNeedDownload, value[0])
+			}
+		}
+	}
+	// 剩下的部分跟 GetSubListFroKeyword 一样,就是去下载了
+	outSubInfoList := s.whichSubInfoNeedDownload(subInfoNeedDownload, err)
 
-	return nil, nil
+	return outSubInfoList, nil
 }
 
 func (s Supplier) GetSubListFromFile4Anime(animePath string) ([]common.SupplierSubInfo, error){
@@ -90,7 +143,7 @@ func (s Supplier) GetSubListFromFile(filePath string) ([]common.SupplierSubInfo,
 		如果找不到,再靠文件名提取影片名称去查找
 	*/
 	// 得到这个视频文件名中的信息
-	info, err := model.GetVideoInfoFromFileName(filePath)
+	info, _, err := model.GetVideoInfoFromFileName(filePath)
 	if err != nil {
 		return nil, err
 	}
@@ -145,8 +198,16 @@ func (s Supplier) GetSubListFromKeyword(keyword string) ([]common.SupplierSubInf
 	// 找到最大的优先级的字幕下载
 	sort.Sort(SortByPriority{subResult.SubInfos})
 
-	for i := range subResult.SubInfos {
-		err = s.Step2(&subResult.SubInfos[i])
+	outSubInfoList = s.whichSubInfoNeedDownload(subResult.SubInfos, err)
+
+	return outSubInfoList, nil
+}
+
+func (s Supplier) whichSubInfoNeedDownload(subInfos SubInfos, err error) []common.SupplierSubInfo {
+
+	var outSubInfoList = make([]common.SupplierSubInfo, 0)
+	for i := range subInfos {
+		err = s.Step2(&subInfos[i])
 		if err != nil {
 			s.log.Error(err)
 			continue
@@ -156,7 +217,7 @@ func (s Supplier) GetSubListFromKeyword(keyword string) ([]common.SupplierSubInf
 	// TODO 这里需要考虑,可以设置为高级选项,不够就用 unknow 来补充
 	// 首先过滤出中文的字幕,同时需要满足是支持的字幕
 	var tmpSubInfo = make([]SubInfo, 0)
-	for _, subInfo := range subResult.SubInfos {
+	for _, subInfo := range subInfos {
 		tmpLang := model.LangConverter(subInfo.Lang)
 		if model.HasChineseLang(tmpLang) == true && model.IsSubTypeWanted(subInfo.Ext) == true {
 			tmpSubInfo = append(tmpSubInfo, subInfo)
@@ -164,7 +225,7 @@ func (s Supplier) GetSubListFromKeyword(keyword string) ([]common.SupplierSubInf
 	}
 	// 看字幕够不够
 	if len(tmpSubInfo) < s.topic {
-		for _, subInfo := range subResult.SubInfos {
+		for _, subInfo := range subInfos {
 			if len(tmpSubInfo) >= s.topic {
 				break
 			}
@@ -186,8 +247,7 @@ func (s Supplier) GetSubListFromKeyword(keyword string) ([]common.SupplierSubInf
 		outSubInfoList = append(outSubInfoList, *common.NewSupplierSubInfo(s.GetSupplierName(), int64(i), fileName, common.ChineseSimple, model.AddBaseUrl(common.SubZiMuKuRootUrl, subInfo.SubDownloadPageUrl), 0,
 			0, filepath.Ext(fileName), data))
 	}
-
-	return outSubInfoList, nil
+	return outSubInfoList
 }
 
 // Step0 先在查询界面找到字幕对应第一个影片的详情界面,需要解决自定义错误 ZiMuKuSearchKeyWordStep0DetailPageUrlNotFound