浏览代码

修复 subhd 的问题,以及升级 adblock

Signed-off-by: 716 <[email protected]>
716 3 年之前
父节点
当前提交
b712f67db6

+ 1 - 1
cmd/chinesesubfinder/main.go

@@ -167,7 +167,7 @@ func DownLoadStart(httpProxy string) {
 		commonValue.SubhdCode = ""
 	} else {
 		log.Infoln("GetCode", updateTimeString, code)
-		//commonValue.SubhdCode = code
+		commonValue.SubhdCode = code
 	}
 
 	// 下载实例

+ 3 - 0
internal/downloader.go

@@ -78,6 +78,9 @@ func NewDownloader(inSubFormatter ifaces.ISubFormatter, _reqParam ...types.ReqPa
 	} else {
 		downloader.reqParam = *types.NewReqParam()
 	}
+	// 强制下载线程为 1,太猛,不然都是错误
+	downloader.reqParam.Threads = 1
+
 	// 这里就不单独弄一个 reqParam.SubNameFormatter 字段来传递值了,因为 inSubFormatter 就已经知道是什么 formatter 了
 	downloader.subNameFormatter = subcommon.FormatterName(downloader.subFormatter.GetFormatterFormatterName())
 

+ 93 - 87
internal/logic/sub_supplier/subhd/subhd.go

@@ -121,11 +121,15 @@ func (s Supplier) GetSubListFromFile4Series(seriesInfo *series.SeriesInfo) ([]su
 	subInfoNeedDownload := s.whichEpisodeNeedDownloadSub(seriesInfo, subList)
 	// 下载字幕
 	for i, item := range subInfoNeedDownload {
-		hdContent, err := s.step2Ex(browser, item.Url)
+		bok, hdContent, err := s.step2Ex(browser, item.Url)
 		if err != nil {
 			s.log.Errorln("subhd step2Ex", err)
 			continue
 		}
+		if bok == false {
+			s.log.Errorln("subhd step2Ex return false")
+			continue
+		}
 		oneSubInfo := supplier.NewSubInfo(s.GetSupplierName(), int64(i), hdContent.Filename, language.ChineseSimple, my_util.AddBaseUrl(common.SubSubHDRootUrl, item.Url), 0,
 			0, hdContent.Ext, hdContent.Data)
 		oneSubInfo.Season = item.Season
@@ -212,11 +216,15 @@ func (s Supplier) getSubListFromKeyword4Movie(keyword string) ([]supplier.SubInf
 	}
 
 	for i, item := range subList {
-		hdContent, err := s.step2Ex(browser, item.Url)
+		bok, hdContent, err := s.step2Ex(browser, item.Url)
 		time.Sleep(time.Second)
 		if err != nil {
 			s.log.Errorln("subhd step2Ex", err)
-			return nil, err
+			continue
+		}
+		if bok == false {
+			s.log.Errorln("subhd step2Ex return false")
+			continue
 		}
 		subInfos = append(subInfos, *supplier.NewSubInfo(s.GetSupplierName(), int64(i), hdContent.Filename, language.ChineseSimple, my_util.AddBaseUrl(common.SubSubHDRootUrl, item.Url), 0, 0, hdContent.Ext, hdContent.Data))
 	}
@@ -418,7 +426,7 @@ func (s Supplier) step1(browser *rod.Browser, detailPageUrl string, isMovieOrSer
 }
 
 // step2Ex 下载字幕 过防水墙
-func (s Supplier) step2Ex(browser *rod.Browser, subDownloadPageUrl string) (*HdContent, error) {
+func (s Supplier) step2Ex(browser *rod.Browser, subDownloadPageUrl string) (bool, *HdContent, error) {
 	var err error
 	defer func() {
 		if err != nil {
@@ -427,80 +435,32 @@ func (s Supplier) step2Ex(browser *rod.Browser, subDownloadPageUrl string) (*HdC
 	}()
 	subDownloadPageUrl = my_util.AddBaseUrl(common.SubSubHDRootUrl, subDownloadPageUrl)
 
-	pageString, page, err := s.httpGetFromBrowser(browser, subDownloadPageUrl)
+	_, page, err := s.httpGetFromBrowser(browser, subDownloadPageUrl)
 	if err != nil {
-		return nil, err
+		return false, nil, err
 	}
 	defer func() {
 		_ = page.Close()
 	}()
 
-	doc, err := goquery.NewDocumentFromReader(strings.NewReader(pageString))
-	if err != nil {
-		return nil, err
-	}
 	// 需要先判断是否先要输入验证码,然后才到下载界面
-
-	// 是否有腾讯的防水墙
-	hasWaterWall := true
-	waterWall := doc.Find(TCode)
-	if len(waterWall.Nodes) < 1 {
-		hasWaterWall = false
-	}
-	hasDownBtn, BtnElemenString := s.JugDownloadBtn(doc)
-
-	if hasWaterWall == false && hasDownBtn == false {
-		// 都没有,则返回故障,无法下载
-		return nil, common.SubHDStep2ExCannotFindDownloadBtn
-	}
 	// 下载字幕
-	content, err := s.downloadSubFile(browser, page, hasWaterWall, BtnElemenString)
+	bok, content, err := s.downloadSubFile(browser, page)
 	if err != nil {
-		return nil, err
+		return false, nil, err
 	}
-
-	return content, nil
-}
-
-func (s Supplier) JugDownloadBtn(doc *goquery.Document) (bool, string) {
-
-	const btnDown0 = "#down"
-	const btnDown1 = "button.down"
-	const btnDown2 = "button.btn"
-	// 是否有下载按钮
-	hasDownBtn := true
-	downBtn := doc.Find(btnDown0)
-	if len(downBtn.Nodes) < 1 {
-		hasDownBtn = false
-	} else {
-		return true, btnDown0
-	}
-	// 另一种是否有下载按钮的判断
-	if hasDownBtn == false {
-		downBtn = doc.Find(btnDown1)
-		if len(downBtn.Nodes) < 1 {
-			hasDownBtn = false
-		} else {
-			hasDownBtn = true
-			return true, btnDown1
-		}
-	}
-	// 新的一种下载按钮的判断
-	if hasDownBtn == false {
-		downBtn = doc.Find(btnDown2)
-		if len(downBtn.Nodes) < 1 {
-			hasDownBtn = false
-		} else {
-			hasDownBtn = true
-			return hasDownBtn, btnDown2
-		}
+	if bok == false {
+		return false, nil, nil
 	}
 
-	return false, btnDown1
+	return true, content, nil
 }
 
-func (s Supplier) downloadSubFile(browser *rod.Browser, page *rod.Page, hasWaterWall bool, btnElemenString string) (*HdContent, error) {
+func (s Supplier) downloadSubFile(browser *rod.Browser, page *rod.Page) (bool, *HdContent, error) {
+
 	var err error
+	var doc *goquery.Document
+	downloadSuccess := false
 	fileName := ""
 	fileByte := []byte{0}
 	err = rod.Try(func() {
@@ -516,47 +476,91 @@ func (s Supplier) downloadSubFile(browser *rod.Browser, page *rod.Page, hasWater
 			}
 			return b, info.SuggestedFilename, nil
 		}
+		// 初始化页面用于查询元素
+		pString := page.MustHTML()
+		doc, err = goquery.NewDocumentFromReader(strings.NewReader(pString))
+		if err != nil {
+			return
+		}
+		// 移除广告
+		page.MustEval(`testgssdqw = function () { if (document.getElementById("tbkp")) {document.getElementById("tbkp").remove()}; }`)
+		page.MustEval(`testgssdqw()`)
+		// 点击“验证获取下载地址”
+		clickCodeBtn := doc.Find(btnClickCodeBtn)
+		if len(clickCodeBtn.Nodes) < 1 {
+			return
+		}
+		element := page.MustElement(btnClickCodeBtn)
+		BtnCodeText := element.MustText()
+		if strings.Contains(BtnCodeText, "验证") == true {
+			// 那么需要填写验证码
+			element.MustClick()
+			time.Sleep(time.Second * 2)
+			// 填写“验证码”
+			page.MustEval(`$("#gzhcode").attr("value","` + common.SubhdCode + `");`)
+			// 是否有“完成验证”按钮
+			downBtn := doc.Find(btnCommitCode)
+			if len(downBtn.Nodes) < 1 {
+				return
+			}
+			element = page.MustElement(btnCommitCode)
+			benCommit := element.MustText()
+			if strings.Contains(benCommit, "验证") == false {
+				log_helper.GetLogger().Errorln("btn not found 完整验证")
+				return
+			}
+			element.MustClick()
+			time.Sleep(time.Second * 2)
 
-		// 点击下载按钮
-		//var el *rod.Element
-		if hasWaterWall == true {
-			page.MustElement(TCode).MustClick()
+			// 点击下载按钮
+			page.MustElement(btnClickCodeBtn).MustClick()
+		} else if strings.Contains(BtnCodeText, "下载") == true {
+
+			// 直接可以下载
+			element.MustClick()
+			time.Sleep(time.Second * 2)
 		} else {
-			page.MustElement(btnElemenString).MustClick()
-		}
-		// 找到遮挡的信息块,尝试移除
-		//if err != nil {
-		//if strings.Contains(err.Error(), "element covered by") == true {
-		//	println("11")
-		//	var eel *rod.ErrCovered
-		//	if errors.As(err, &eel) == true {
-		//		eel.MustRemove()
-		//		err = el.Click(proto.InputMouseButtonLeft)
-		//		if err != nil {
-		//			print(123)
-		//		}
-		//	}
-		//}
-		//}
+
+			log_helper.GetLogger().Errorln("btn not found 下载验证 or 下载")
+			return
+		}
+		// 更新 page 的实例对应的 doc Content
+		pString = page.MustHTML()
+		doc, err = goquery.NewDocumentFromReader(strings.NewReader(pString))
+		if err != nil {
+			return
+		}
+		// 是否有腾讯的防水墙
+		hasWaterWall := false
+		waterWall := doc.Find(TCode)
+		if len(waterWall.Nodes) >= 1 {
+			hasWaterWall = true
+		}
+		log_helper.GetLogger().Debugln("Need pass WaterWall", hasWaterWall)
 		// 过墙
 		if hasWaterWall == true {
 			s.passWaterWall(page)
 		}
+		time.Sleep(time.Second * 2)
 		fileByte, fileName, err = getDownloadFile()
 		if err != nil {
-			panic(err)
+			return
 		}
+		downloadSuccess = true
 	})
 	if err != nil {
-		return nil, err
+		return false, nil, err
 	}
-
 	var hdContent HdContent
 	hdContent.Filename = fileName
 	hdContent.Ext = filepath.Ext(fileName)
 	hdContent.Data = fileByte
 
-	return &hdContent, nil
+	if downloadSuccess == false {
+		return false, &hdContent, common.SubHDStep2ExCannotFindDownloadBtn
+	}
+
+	return downloadSuccess, &hdContent, nil
 }
 
 func (s Supplier) passWaterWall(page *rod.Page) {
@@ -688,3 +692,5 @@ type HdContent struct {
 }
 
 const TCode = "#TencentCaptcha"
+const btnClickCodeBtn = "button.btn-danger"
+const btnCommitCode = "button.btn-primary"

+ 78 - 58
internal/logic/sub_supplier/subhd/subhd_test.go

@@ -1,79 +1,99 @@
 package subhd
 
 import (
+	"fmt"
+	commonValue "github.com/allanpk716/ChineseSubFinder/internal/common"
+	series_helper2 "github.com/allanpk716/ChineseSubFinder/internal/logic/series_helper"
+	"github.com/allanpk716/ChineseSubFinder/internal/pkg/something_static"
+	"github.com/allanpk716/ChineseSubFinder/internal/pkg/unit_test_helper"
+	"path/filepath"
 	"testing"
 )
 
 // 无需关注这个测试用例,这个方案暂时弃用
 func TestSupplier_GetSubListFromFile(t *testing.T) {
 
-	////movie1 := "X:\\电影\\The Devil All the Time (2020)\\The Devil All the Time (2020) WEBDL-1080p.mkv"
-	////movie1 := "X:\\电影\\Luca (2021)\\Luca (2021) WEBDL-1080p.mkv"
-	////movie1 := "X:\\电影\\The Boss Baby Family Business (2021)\\The Boss Baby Family Business (2021) WEBDL-1080p.mkv"
-	////movie1 := "X:\\电影\\Oslo (2021)\\Oslo (2021) WEBDL-1080p.mkv"
-	////movie1 := "X:\\电影\\Spiral From the Book of Saw (2021)\\Spiral From the Book of Saw (2021) WEBDL-1080p.mkv"
+	//movie1 := "X:\\电影\\The Devil All the Time (2020)\\The Devil All the Time (2020) WEBDL-1080p.mkv"
+	//movie1 := "X:\\电影\\Luca (2021)\\Luca (2021) WEBDL-1080p.mkv"
+	//movie1 := "X:\\电影\\The Boss Baby Family Business (2021)\\The Boss Baby Family Business (2021) WEBDL-1080p.mkv"
+	//movie1 := "X:\\电影\\Oslo (2021)\\Oslo (2021) WEBDL-1080p.mkv"
+	//movie1 := "X:\\电影\\Spiral From the Book of Saw (2021)\\Spiral From the Book of Saw (2021) WEBDL-1080p.mkv"
 	//movie1 := "X:\\电影\\消失爱人 (2016)\\消失爱人 (2016) 720p AAC.rmvb"
-	////movie1 := "X:\\电影\\机动战士Z高达:星之继承者 (2005)\\机动战士Z高达:星之继承者 (2005) 1080p TrueHD.mkv"
-	//
-	//subhd := NewSupplier()
-	//outList, err := subhd.getSubListFromFile4Movie(movie1)
-	//if err != nil {
-	//	t.Error(err)
-	//}
-	//println(outList)
-	//
-	//if len(outList) == 0 {
-	//	println("now sub found")
-	//}
-	//
-	//for i, sublist := range outList {
-	//	println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Score, sublist.FileUrl, len(sublist.Data))
-	//}
+	//movie1 := "X:\\电影\\机动战士Z高达:星之继承者 (2005)\\机动战士Z高达:星之继承者 (2005) 1080p TrueHD.mkv"
+	getCode()
+	rootDir := unit_test_helper.GetTestDataResourceRootPath([]string{"sub_spplier"}, 5, true)
+	movie1 := filepath.Join(rootDir, "zimuku", "movies", "消失爱人 (2016)", "消失爱人 (2016) 720p AAC.rmvb")
+
+	subhd := NewSupplier()
+	outList, err := subhd.getSubListFromFile4Movie(movie1)
+	if err != nil {
+		t.Error(err)
+	}
+	println(outList)
+
+	if len(outList) == 0 {
+		println("now sub found")
+	}
+
+	for i, sublist := range outList {
+		println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Score, sublist.FileUrl, len(sublist.Data))
+	}
 }
 
 // 无需关注这个测试用例,这个方案暂时弃用
 func TestSupplier_GetSubListFromFile4Series(t *testing.T) {
 
-	////ser := "X:\\连续剧\\The Bad Batch"	// tt12708542
-	////ser := "X:\\连续剧\\瑞克和莫蒂 (2013)" //
-	////ser := "X:\\连续剧\\杀死伊芙 (2018)"	// tt7016936
-	////ser := "X:\\连续剧\\Money.Heist"
-	////ser := "X:\\连续剧\\黑钱胜地 (2017)"
-	//
-	//rootDir := unit_test_helper.GetTestDataResourceRootPath([]string{"zimuku"}, 5, true)
-	//ser := filepath.Join(rootDir, "series", "黄石 (2018)")
-	//// 读取本地的视频和字幕信息
-	//seriesInfo, err := series_helper2.ReadSeriesInfoFromDir(ser, nil, false)
-	//if err != nil {
-	//	t.Fatal(err)
-	//}
-	//s := NewSupplier()
-	//outList, err := s.GetSubListFromFile4Series(seriesInfo)
-	//if err != nil {
-	//	t.Fatal(err)
-	//}
-	//println(outList)
-	//for i, sublist := range outList {
-	//	println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Score, len(sublist.Data))
-	//}
+	//ser := "X:\\连续剧\\The Bad Batch"	// tt12708542
+	//ser := "X:\\连续剧\\瑞克和莫蒂 (2013)" //
+	//ser := "X:\\连续剧\\杀死伊芙 (2018)"	// tt7016936
+	//ser := "X:\\连续剧\\Money.Heist"
+	//ser := "X:\\连续剧\\黑钱胜地 (2017)"
+	getCode()
+	rootDir := unit_test_helper.GetTestDataResourceRootPath([]string{"sub_spplier"}, 5, true)
+	ser := filepath.Join(rootDir, "zimuku", "series", "黄石 (2018)")
+	// 读取本地的视频和字幕信息
+	seriesInfo, err := series_helper2.ReadSeriesInfoFromDir(ser, nil, false)
+	if err != nil {
+		t.Fatal(err)
+	}
+	s := NewSupplier()
+	outList, err := s.GetSubListFromFile4Series(seriesInfo)
+	if err != nil {
+		t.Fatal(err)
+	}
+	println(outList)
+	for i, sublist := range outList {
+		println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Score, len(sublist.Data))
+	}
 }
 
 // 无需关注这个测试用例,这个方案暂时弃用
 func TestSupplier_getSubListFromKeyword4Movie(t *testing.T) {
 
-	////imdbID := "tt12708542" // 星球大战:残次品
-	////imdbID := "tt7016936" // 杀死伊芙
-	//imdbID := "tt2990738" // 恐怖直播
-	////imdbID := "tt3032476" 	// 风骚律师
-	////imdbID := "tt6468322" 	// 纸钞屋
-	////imdbID := "tt15299712" // 云南虫谷
-	////imdbID := "tt3626476" // Vacation Friends (2021)
-	//subhd := NewSupplier()
-	//subInfos, err := subhd.getSubListFromKeyword4Movie(imdbID)
-	//if err != nil {
-	//	t.Fatal(err)
-	//}
-	//for i, sublist := range subInfos {
-	//	println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Score, len(sublist.Data))
-	//}
+	//imdbID := "tt12708542" // 星球大战:残次品
+	//imdbID := "tt7016936" // 杀死伊芙
+	imdbID := "tt2990738" // 恐怖直播
+	//imdbID := "tt3032476" 	// 风骚律师
+	//imdbID := "tt6468322" 	// 纸钞屋
+	//imdbID := "tt15299712" // 云南虫谷
+	//imdbID := "tt3626476" // Vacation Friends (2021)
+	getCode()
+	subhd := NewSupplier()
+	subInfos, err := subhd.getSubListFromKeyword4Movie(imdbID)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for i, sublist := range subInfos {
+		println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Score, len(sublist.Data))
+	}
+}
+
+func getCode() {
+	updateTimeString, code, err := something_static.GetCodeFromWeb()
+	if err != nil {
+		commonValue.SubhdCode = ""
+	} else {
+		commonValue.SubhdCode = code
+	}
+	fmt.Println("UpdateTime", updateTimeString)
 }

+ 4 - 4
internal/logic/sub_supplier/zimuku/zimuku_test.go

@@ -30,8 +30,8 @@ func TestSupplier_GetSubListFromFile(t *testing.T) {
 	//movie1 := "X:\\电影\\机动战士Z高达:星之继承者 (2005)\\机动战士Z高达:星之继承者 (2005) 1080p TrueHD.mkv"
 	//movie1 := "X:\\连续剧\\The Bad Batch\\Season 1\\The Bad Batch - S01E01 - Aftermath WEBDL-1080p.mkv"
 
-	rootDir := unit_test_helper.GetTestDataResourceRootPath([]string{"zimuku"}, 5, true)
-	movie1 := filepath.Join(rootDir, "movies", "The Devil All the Time (2020)", "The Devil All the Time (2020) WEBDL-1080p.mkv")
+	rootDir := unit_test_helper.GetTestDataResourceRootPath([]string{"sub_spplier"}, 5, true)
+	movie1 := filepath.Join(rootDir, "zimuku", "movies", "The Devil All the Time (2020)", "The Devil All the Time (2020) WEBDL-1080p.mkv")
 	s := NewSupplier()
 	outList, err := s.getSubListFromMovie(movie1)
 	if err != nil {
@@ -50,8 +50,8 @@ func TestSupplier_GetSubListFromFile4Series(t *testing.T) {
 	//ser := "X:\\连续剧\\Money.Heist"
 	//ser := "X:\\连续剧\\黄石 (2018)"
 
-	rootDir := unit_test_helper.GetTestDataResourceRootPath([]string{"zimuku"}, 5, true)
-	ser := filepath.Join(rootDir, "series", "黄石 (2018)")
+	rootDir := unit_test_helper.GetTestDataResourceRootPath([]string{"sub_spplier"}, 5, true)
+	ser := filepath.Join(rootDir, "zimuku", "series", "黄石 (2018)")
 	// 读取本地的视频和字幕信息
 	seriesInfo, err := series_helper2.ReadSeriesInfoFromDir(ser, nil, false)
 	if err != nil {

+ 1 - 1
internal/pkg/my_util/util.go

@@ -83,7 +83,7 @@ func DownFile(urlStr string, _reqParam ...types.ReqParam) ([]byte, string, error
 	filename := GetFileName(resp.RawResponse)
 
 	if filename == "" {
-		log_helper.GetLogger().Errorln("DownFile.GetFileName is string.empty", urlStr)
+		log_helper.GetLogger().Warningln("DownFile.GetFileName is string.empty", urlStr)
 	}
 
 	return resp.Body(), filename, nil

二进制
internal/pkg/rod_helper/assets/adblock_v4.34.0.zip → internal/pkg/rod_helper/assets/adblock_4_42_0_0.zip


+ 2 - 1
internal/pkg/rod_helper/rodHelper.go

@@ -188,6 +188,7 @@ func releaseAdblock() (string, error) {
 	if err != nil {
 		return "", err
 	}
+	_ = adblockZipFile.Close()
 
 	r := archiver.NewZip()
 	err = r.Unarchive(outZipFileFPath, desPath)
@@ -202,7 +203,7 @@ const adblockInsideName = "adblock"
 var once sync.Once
 
 // 这个文件内有一个子文件夹 adblock ,制作的时候务必注意
-//go:embed assets/adblock_v4.34.0.zip
+//go:embed assets/adblock_4_42_0_0.zip
 var adblockFolder []byte
 
 var adblockSavePath string