Преглед изворни кода

尝试修复 subhd 的失效的问题,目前定位到下载的时候有遮挡导致失败

Signed-off-by: 716 <[email protected]>
716 пре 4 година
родитељ
комит
0a0803357c

+ 2 - 0
.gitignore

@@ -32,3 +32,5 @@
 /internal/tmpthings
 /TestData/sub_parser/test
 /TestData/sub_format_changer/test
+/internal/logic/sub_supplier/subhd/.rod
+/internal/logic/sub_supplier/subhd/config.yaml

+ 3 - 3
TestCode/test_timeout.go

@@ -1,7 +1,7 @@
 package TestCode
 
 import (
-	"github.com/allanpk716/ChineseSubFinder/internal/pkg"
+	"github.com/allanpk716/ChineseSubFinder/internal/pkg/random_useragent"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/rod_helper"
 	"github.com/go-rod/rod/lib/proto"
 	"github.com/panjf2000/ants/v2"
@@ -120,7 +120,7 @@ func goStep(inData InputData) error {
 						return
 					}
 					page.MustSetUserAgent(&proto.NetworkSetUserAgentOverride{
-						UserAgent: pkg.RandomUserAgent(true),
+						UserAgent: random_useragent.RandomUserAgent(true),
 					})
 					err = page.WaitLoad()
 					time.Sleep(10 * time.Second)
@@ -168,7 +168,7 @@ func oneStep(inData InputData) error {
 		return err
 	}
 	page.MustSetUserAgent(&proto.NetworkSetUserAgentOverride{
-		UserAgent: pkg.RandomUserAgent(true),
+		UserAgent: random_useragent.RandomUserAgent(true),
 	})
 	err = page.WaitLoad()
 	time.Sleep(10 * time.Second)

+ 1 - 1
internal/common/selferr.go

@@ -16,7 +16,7 @@ var (
 	ZiMuKuDownloadUrlStep3AllFailed               = errors.New("zimuku download url step3 all failed")
 
 	SubHDStep0SubCountElementNotFound = errors.New("subhd step0 sub count element not found")
-	SubHDStep0ImgParentLessThan2      = errors.New("subhd step0 Img Parent less than 2")
+	SubHDStep0ImgParentLessThan1      = errors.New("subhd step0 Img Parent less than 1")
 	SubHDStep0HrefIsNull              = errors.New("subhd step0 href is Null")
 	SubHDStep2ExCannotFindDownloadBtn = errors.New("subhd step2ex can not find download btn")
 )

+ 99 - 79
internal/logic/sub_supplier/subhd/subhd.go

@@ -2,6 +2,7 @@ package subhd
 
 import (
 	"bytes"
+	"errors"
 	"fmt"
 	"github.com/PuerkitoBio/goquery"
 	"github.com/Tnze/go.num/v2/zh"
@@ -18,6 +19,7 @@ import (
 	"github.com/go-rod/rod"
 	"github.com/go-rod/rod/lib/launcher"
 	"github.com/go-rod/rod/lib/proto"
+
 	"github.com/nfnt/resize"
 	"github.com/sirupsen/logrus"
 	"image/jpeg"
@@ -37,6 +39,7 @@ type Supplier struct {
 	log         *logrus.Logger
 	topic       int
 	rodLauncher *launcher.Launcher
+	tt          time.Duration
 }
 
 func NewSupplier(_reqParam ...types.ReqParam) *Supplier {
@@ -50,6 +53,13 @@ func NewSupplier(_reqParam ...types.ReqParam) *Supplier {
 			sup.topic = sup.reqParam.Topic
 		}
 	}
+
+	// 默认超时是 2 * 60s,如果是调试模式则是 5 min
+	sup.tt = common.HTMLTimeOut
+	if sup.reqParam.DebugMode == true {
+		sup.tt = common.OneVideoProcessTimeOut
+	}
+
 	return &sup
 }
 
@@ -67,12 +77,20 @@ func (s Supplier) GetSubListFromFile4Movie(filePath string) ([]supplier.SubInfo,
 
 func (s Supplier) GetSubListFromFile4Series(seriesInfo *series.SeriesInfo) ([]supplier.SubInfo, error) {
 
+	var browser *rod.Browser
+	// TODO 是用本地的 Browser 还是远程的,推荐是远程的
+	browser, err := rod_helper.NewBrowser(s.reqParam.HttpProxy)
+	if err != nil {
+		return nil, err
+	}
+	defer browser.Close()
+
 	var subInfos = make([]supplier.SubInfo, 0)
 	var subList = make([]HdListItem, 0)
 	for value := range seriesInfo.NeedDlSeasonDict {
 		// 第一级界面,找到影片的详情界面
 		keyword := seriesInfo.Name + " 第" + zh.Uint64(value).String() + "季"
-		detailPageUrl, err := s.step0(keyword)
+		detailPageUrl, err := s.step0(browser, keyword)
 		if err != nil {
 			s.log.Errorln("subhd step0", keyword)
 			return nil, err
@@ -82,7 +100,7 @@ func (s Supplier) GetSubListFromFile4Series(seriesInfo *series.SeriesInfo) ([]su
 			s.log.Warning("subhd first search keyword", keyword, "not found")
 			keyword = seriesInfo.Name
 			s.log.Warning("subhd Retry", keyword)
-			detailPageUrl, err = s.step0(keyword)
+			detailPageUrl, err = s.step0(browser, keyword)
 			if err != nil {
 				s.log.Errorln("subhd step0", keyword)
 				return nil, err
@@ -93,7 +111,7 @@ func (s Supplier) GetSubListFromFile4Series(seriesInfo *series.SeriesInfo) ([]su
 			continue
 		}
 		// 列举字幕
-		oneSubList, err := s.step1(detailPageUrl, false)
+		oneSubList, err := s.step1(browser, detailPageUrl, false)
 		if err != nil {
 			s.log.Errorln("subhd step1", keyword)
 			return nil, err
@@ -105,13 +123,6 @@ func (s Supplier) GetSubListFromFile4Series(seriesInfo *series.SeriesInfo) ([]su
 	// 找到那些 Eps 需要下载字幕的
 	subInfoNeedDownload := s.whichEpisodeNeedDownloadSub(seriesInfo, subList)
 	// 下载字幕
-	var browser *rod.Browser
-	// 是用本地的 Browser 还是远程的,推荐是远程的
-	browser, err := rod_helper.NewBrowser(s.reqParam.HttpProxy)
-	if err != nil {
-		return nil, err
-	}
-	defer browser.Close()
 	for i, item := range subInfoNeedDownload {
 		hdContent, err := s.step2Ex(browser, item.Url)
 		if err != nil {
@@ -180,8 +191,16 @@ func (s Supplier) getSubListFromFile4Movie(filePath string) ([]supplier.SubInfo,
 
 func (s Supplier) getSubListFromKeyword4Movie(keyword string) ([]supplier.SubInfo, error) {
 
+	var browser *rod.Browser
+	// TODO 是用本地的 Browser 还是远程的,推荐是远程的
+	browser, err := rod_helper.NewBrowser(s.reqParam.HttpProxy)
+	if err != nil {
+		return nil, err
+	}
+	defer browser.Close()
+
 	var subInfos []supplier.SubInfo
-	detailPageUrl, err := s.step0(keyword)
+	detailPageUrl, err := s.step0(browser, keyword)
 	if err != nil {
 		return nil, err
 	}
@@ -189,18 +208,10 @@ func (s Supplier) getSubListFromKeyword4Movie(keyword string) ([]supplier.SubInf
 	if detailPageUrl == "" {
 		return nil, nil
 	}
-	subList, err := s.step1(detailPageUrl, true)
-	if err != nil {
-		return nil, err
-	}
-
-	var browser *rod.Browser
-	// 是用本地的 Browser 还是远程的,推荐是远程的
-	browser, err = rod_helper.NewBrowser(s.reqParam.HttpProxy)
+	subList, err := s.step1(browser, detailPageUrl, true)
 	if err != nil {
 		return nil, err
 	}
-	defer browser.Close()
 
 	for i, item := range subList {
 		hdContent, err := s.step2Ex(browser, item.Url)
@@ -270,7 +281,7 @@ func (s Supplier) whichEpisodeNeedDownloadSub(seriesInfo *series.SeriesInfo, all
 }
 
 // step0 找到这个影片的详情列表
-func (s Supplier) step0(keyword string) (string, error) {
+func (s Supplier) step0(browser *rod.Browser, keyword string) (string, error) {
 	var err error
 	defer func() {
 		if err != nil {
@@ -278,10 +289,11 @@ func (s Supplier) step0(keyword string) (string, error) {
 		}
 	}()
 
-	result, err := s.httpGet(fmt.Sprintf(common.SubSubHDSearchUrl, url.QueryEscape(keyword)))
+	result, page, err := s.httpGetFromBrowser(browser, fmt.Sprintf(common.SubSubHDSearchUrl, url.QueryEscape(keyword)))
 	if err != nil {
 		return "", err
 	}
+	defer page.Close()
 	// 是否有查找到的结果,至少要有结果。根据这里这样下面才能判断是分析失效了,还是就是没有结果而已
 	re := regexp.MustCompile(`共\s*(\d+)\s*条`)
 	matched := re.FindAllStringSubmatch(result, -1)
@@ -305,8 +317,8 @@ func (s Supplier) step0(keyword string) (string, error) {
 	_, ok := imgSelection.Attr("src")
 	if ok == true {
 
-		if len(imgSelection.Nodes) < 2 {
-			return "", common.SubHDStep0ImgParentLessThan2
+		if len(imgSelection.Nodes) < 1 {
+			return "", common.SubHDStep0ImgParentLessThan1
 		}
 		step1Url := ""
 		if imgSelection.Nodes[0].Parent.Data == "a" {
@@ -330,27 +342,13 @@ func (s Supplier) step0(keyword string) (string, error) {
 			return "", common.SubHDStep0HrefIsNull
 		}
 		return step1Url, nil
-		//imgName := filepath.Base(imgUrl)
-		//imgExt := filepath.Ext(imgUrl)
-		//if strings.Contains(imgName, "_") == true {
-		//	items := strings.Split(imgName, "_")
-		//	return "/d/" + items[0], nil
-		//} else {
-		//	return "/d/" + strings.ReplaceAll(imgName, imgExt, ""), nil
-		//}
 	} else {
 		return "", common.SubHDStep0HrefIsNull
 	}
-	//re = regexp.MustCompile(`<a\shref="(/d/[\w]+)">\s?<img`)
-	//matched = re.FindAllStringSubmatch(result, -1)
-	//if len(matched) < 1 || len(matched[0]) < 2{
-	//	return "",  common.SubHDStep0HrefIsNull
-	//}
-	//return matched[0][1], nil
 }
 
 // step1 获取影片的详情字幕列表
-func (s Supplier) step1(detailPageUrl string, isMovieOrSeries bool) ([]HdListItem, error) {
+func (s Supplier) step1(browser *rod.Browser, detailPageUrl string, isMovieOrSeries bool) ([]HdListItem, error) {
 	var err error
 	defer func() {
 		if err != nil {
@@ -358,10 +356,11 @@ func (s Supplier) step1(detailPageUrl string, isMovieOrSeries bool) ([]HdListIte
 		}
 	}()
 	detailPageUrl = pkg.AddBaseUrl(common.SubSubHDRootUrl, detailPageUrl)
-	result, err := s.httpGet(detailPageUrl)
+	result, page, err := s.httpGetFromBrowser(browser, detailPageUrl)
 	if err != nil {
 		return nil, err
 	}
+	defer page.Close()
 	doc, err := goquery.NewDocumentFromReader(strings.NewReader(result))
 	if err != nil {
 		return nil, err
@@ -425,35 +424,20 @@ func (s Supplier) step2Ex(browser *rod.Browser, subDownloadPageUrl string) (*HdC
 		}
 	}()
 	subDownloadPageUrl = pkg.AddBaseUrl(common.SubSubHDRootUrl, subDownloadPageUrl)
-	// 默认超时是 60s,如果是调试模式则是 5 min
-	tt := common.HTMLTimeOut
-	if s.reqParam.DebugMode == true {
-		tt = common.OneVideoProcessTimeOut
-	}
-	// TODO 考虑后续把浏览器爬虫的逻辑剥离出来,需要替换这个到远程的 Docker 执行
-	page, err := rod_helper.NewPageNavigate(browser, subDownloadPageUrl, tt, 5)
+
+	pageString, page, err := s.httpGetFromBrowser(browser, subDownloadPageUrl)
 	if err != nil {
 		return nil, err
 	}
 	defer page.Close()
-	page.MustSetUserAgent(&proto.NetworkSetUserAgentOverride{
-		UserAgent: pkg.RandomUserAgent(true),
-	})
-	err = page.WaitLoad()
-	if err != nil {
-		return nil, err
-	}
-	pageString, err := page.HTML()
-	if err != nil {
-		return nil, err
-	}
+
 	doc, err := goquery.NewDocumentFromReader(strings.NewReader(pageString))
 	if err != nil {
 		return nil, err
 	}
 	// 是否有腾讯的防水墙
 	hasWaterWall := true
-	waterWall := doc.Find("#TencentCaptcha")
+	waterWall := doc.Find(TCode)
 	if len(waterWall.Nodes) < 1 {
 		hasWaterWall = false
 	}
@@ -473,24 +457,27 @@ func (s Supplier) step2Ex(browser *rod.Browser, subDownloadPageUrl string) (*HdC
 }
 
 func (s Supplier) JugDownloadBtn(doc *goquery.Document) (bool, string) {
+
+	const btnDown_0 = "#down"
+	const btnDown_1 = "button.down"
 	// 是否有下载按钮
 	hasDownBtn := true
-	downBtn := doc.Find("#down")
+	downBtn := doc.Find(btnDown_0)
 	if len(downBtn.Nodes) < 1 {
 		hasDownBtn = false
 	} else {
-		return true, "#down"
+		return true, btnDown_0
 	}
 	// 另一种是否有下载按钮的判断
 	if hasDownBtn == false {
-		downBtn = doc.Find("button.down")
+		downBtn = doc.Find(btnDown_1)
 		if len(downBtn.Nodes) < 1 {
 			hasDownBtn = false
 		} else {
 			hasDownBtn = true
 		}
 	}
-	return hasDownBtn, "button.down"
+	return hasDownBtn, btnDown_1
 }
 
 func (s Supplier) downloadSubFile(browser *rod.Browser, page *rod.Page, hasWaterWall bool, btnElemenString string) (*HdContent, error) {
@@ -512,10 +499,25 @@ func (s Supplier) downloadSubFile(browser *rod.Browser, page *rod.Page, hasWater
 		}
 
 		// 点击下载按钮
+		var el *rod.Element
 		if hasWaterWall == true {
-			page.MustElement("#TencentCaptcha").MustClick()
+			el = page.MustElement(TCode)
 		} else {
-			page.MustElement(btnElemenString).MustClick()
+			el = page.MustElement(btnElemenString)
+		}
+		err = el.Click(proto.InputMouseButtonLeft)
+		if err != nil {
+			if strings.Contains(err.Error(), "element covered by") == true {
+				println("11")
+				var eel *rod.ErrCovered
+				if errors.As(err, &eel) == true {
+					eel.MustRemove()
+					err = el.Click(proto.InputMouseButtonLeft)
+					if err != nil {
+						print(123)
+					}
+				}
+			}
 		}
 		// 过墙
 		if hasWaterWall == true {
@@ -631,24 +633,40 @@ search:
 	}
 }
 
-func (s Supplier) httpGet(inputUrl string) (string, error) {
-	s.reqParam.Referer = inputUrl
-	httpClient := pkg.NewHttpClient(s.reqParam)
-	resp, err := httpClient.R().Get(inputUrl)
+//func (s Supplier) httpGet(inputUrl string) (string, error) {
+//	s.reqParam.Referer = inputUrl
+//	httpClient := pkg.NewHttpClient(s.reqParam)
+//	resp, err := httpClient.R().Get(inputUrl)
+//	if err != nil {
+//		return "", err
+//	}
+//	recvText := resp.String()
+//	//搜索验证 点击继续搜索
+//	if strings.Contains(recvText, "搜索验证") || strings.Contains(recvText, "搜索频率") {
+//		s.log.Debugln("搜索验证 or 搜索频率 reload", inputUrl)
+//		// 每次搜索间隔在 30-40s
+//		time.Sleep(pkg.RandomSecondDuration(30, 40))
+//		return s.httpGet(inputUrl)
+//	}
+//	// 每次搜索间隔在 30-40s
+//	time.Sleep(pkg.RandomSecondDuration(30, 40))
+//	return recvText, nil
+//}
+
+func (s Supplier) httpGetFromBrowser(browser *rod.Browser, inputUrl string) (string, *rod.Page, error) {
+
+	page, err := rod_helper.NewPageNavigate(browser, inputUrl, s.tt, 5)
 	if err != nil {
-		return "", err
+		return "", nil, err
 	}
-	recvText := resp.String()
-	//搜索验证 点击继续搜索
-	if strings.Contains(recvText, "搜索验证") || strings.Contains(recvText, "搜索频率") {
-		s.log.Debugln("搜索验证 or 搜索频率 reload", inputUrl)
-		// 每次搜索间隔在 30-40s
-		time.Sleep(pkg.RandomSecondDuration(30, 40))
-		return s.httpGet(inputUrl)
+	pageString, err := page.HTML()
+	if err != nil {
+		return "", nil, err
 	}
 	// 每次搜索间隔在 30-40s
-	time.Sleep(pkg.RandomSecondDuration(30, 40))
-	return recvText, nil
+	time.Sleep(pkg.RandomSecondDuration(5, 10))
+
+	return pageString, page, nil
 }
 
 type HdListItem struct {
@@ -669,3 +687,5 @@ type HdContent struct {
 	Ext      string `json:"ext"`
 	Data     []byte `json:"data"`
 }
+
+const TCode = "#TencentCaptcha"

+ 16 - 4
internal/logic/sub_supplier/subhd/subhd_test.go

@@ -2,6 +2,8 @@ package subhd
 
 import (
 	series_helper2 "github.com/allanpk716/ChineseSubFinder/internal/logic/series_helper"
+	"github.com/allanpk716/ChineseSubFinder/internal/pkg"
+	"github.com/allanpk716/ChineseSubFinder/internal/types"
 	"testing"
 )
 
@@ -15,7 +17,7 @@ func TestSupplier_GetSubListFromFile(t *testing.T) {
 	//movie1 := "X:\\电影\\消失爱人 (2016)\\消失爱人 (2016) 720p AAC.rmvb"
 	//movie1 := "X:\\电影\\机动战士Z高达:星之继承者 (2005)\\机动战士Z高达:星之继承者 (2005) 1080p TrueHD.mkv"
 	//movie1 := "X:\\连续剧\\The Bad Batch\\Season 1\\The Bad Batch - S01E01 - Aftermath WEBDL-1080p.mkv"
-	subhd := NewSupplier()
+	subhd := NewSupplier(getReqParam())
 	outList, err := subhd.getSubListFromFile4Movie(movie1)
 	if err != nil {
 		t.Error(err)
@@ -44,7 +46,7 @@ func TestSupplier_GetSubListFromFile4Series(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	s := NewSupplier()
+	s := NewSupplier(getReqParam())
 	outList, err := s.GetSubListFromFile4Series(seriesInfo)
 	if err != nil {
 		t.Fatal(err)
@@ -63,8 +65,8 @@ func TestSupplier_getSubListFromKeyword4Movie(t *testing.T) {
 	//imdbID := "tt3032476" 	// 风骚律师
 	//imdbID := "tt6468322" 	// 纸钞屋
 	//imdbID := "tt15299712" // 云南虫谷
-	//imdbID := "tt3626476"	// Vacation Friends (2021)
-	subhd := NewSupplier()
+	//imdbID := "tt3626476" // Vacation Friends (2021)
+	subhd := NewSupplier(getReqParam())
 	subInfos, err := subhd.getSubListFromKeyword4Movie(imdbID)
 	if err != nil {
 		t.Fatal(err)
@@ -73,3 +75,13 @@ func TestSupplier_getSubListFromKeyword4Movie(t *testing.T) {
 		println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Score, len(sublist.Data))
 	}
 }
+
+func getReqParam() types.ReqParam {
+
+	config := pkg.GetConfig()
+	req := types.ReqParam{}
+	if config.UseProxy == true {
+		req.HttpProxy = config.HttpProxy
+	}
+	return req
+}

+ 1 - 64
internal/pkg/random.go

@@ -1,7 +1,6 @@
 package pkg
 
 import (
-	browser "github.com/allanpk716/fake-useragent"
 	"math/rand"
 	"time"
 )
@@ -11,68 +10,6 @@ func RandomSecondDuration(min, max int32) time.Duration {
 	return time.Duration(tmp) * time.Second
 }
 
-func RandomUserAgent(UserOrSearchEngine bool) string {
-	if UserOrSearchEngine == true {
-		return browser.Random()
-	} else {
-		// From https://www.cnblogs.com/gengyufei/p/12641200.html
-		return engineUAList[random.Intn(len(engineUAList))]
-	}
-}
-
 var (
-	random       = rand.New(rand.NewSource(time.Now().UnixNano()))
-	engineUAList = []string{
-		// 百度搜索User-Agent:
-		// 百度 PC UA
-		"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)",
-		"Mozilla/5.0 (compatible; Baiduspider-render/2.0; +http://www.baidu.com/search/spider.html)",
-		// 百度移动 UA
-		"Mozilla/5.0 (Linux;u;Android 4.2.2;zh-cn;) AppleWebKit/534.46 (KHTML,like Gecko) Version/5.1",
-		"Mobile Safari/10600.6.3 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)",
-		"Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1 (compatible; Baiduspider-render/2.0; +http://www.baidu.com/search/spider.html)",
-		// 百度图片UA
-		//"Baiduspider-image+(+http://www.baidu.com/search/spider.htm)",
-		// 神马搜索User-Agent:
-		// B神马搜索 PC UA
-		"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 YisouSpider/5.0 Safari/537.36",
-		// 神马搜索移动 UA
-		"Mozilla/5.0 (iPhone; CPU iPhone OS 10_3 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) CriOS/56.0.2924.75 Mobile/14E5239e YisouSpider/5.0 Safari/602.1",
-		// 谷歌User-Agent:
-		// 谷歌 PC UA
-		"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
-		// 谷歌移动UA
-		"AdsBot-Google-Mobile (+http://www.google.com/mobile/adsbot.html) Mozilla (iPhone; U; CPU iPhone OS 3 0 like Mac OS X) AppleWebKit (KHTML, like Gecko) Mobile Safari",
-		// 谷歌图片UA
-		"Mozilla/5.0 (compatible; Googlebot-Image/1.0; +http://www.google.com/bot.html)",
-		// 搜狗User-Agent:
-		// 搜索 PC UA
-		"Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
-		// 搜狗图片 UA
-		"Sogou Pic Spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
-		// 搜狗新闻UA
-		"Sogou News Spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
-		// 搜狗视频UA
-		"Sogou Video Spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
-		// 360搜索User-Agent:
-		// 360搜索UA
-		"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0);",
-		// 360移动UA
-		"Mozilla/5.0 (Linux; U; Android 4.0.2; en-us; Galaxy Nexus Build/ICL53F) AppleWebKit/534.30 (KHTML, like Gecko)Version/4.0 Mobile Safari/534.30; 360Spider",
-		"Mozilla/5.0 (Linux; U; Android 4.0.2; en-us; Galaxy Nexus Build/ICL53F) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30; HaosouSpider",
-		// 360安全UA
-		"360spider (http://webscan.360.cn)",
-		// 必应User-Agent:
-		"Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)",
-		// 搜搜User-Agent:
-		// 搜搜UA:
-		"Sosospider+(+http://help.soso.com/webspider.htm)",
-		// 搜搜图片UA:
-		"Sosoimagespider+(+http://help.soso.com/soso-image-spider.htm)",
-		// 雅虎User-Agent:
-		// 雅虎中文UA:
-		"Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)",
-		// 雅虎英文UA:
-		"Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
-	}
+	random = rand.New(rand.NewSource(time.Now().UnixNano()))
 )

+ 73 - 0
internal/pkg/random_useragent/random_useragent.go

@@ -0,0 +1,73 @@
+package random_useragent
+
+import (
+	browser "github.com/allanpk716/fake-useragent"
+	"math/rand"
+	"time"
+)
+
+func RandomUserAgent(UserOrSearchEngine bool) string {
+	if UserOrSearchEngine == true {
+		return browser.Random()
+	} else {
+		// From https://www.cnblogs.com/gengyufei/p/12641200.html
+		return engineUAList[random.Intn(len(engineUAList))]
+	}
+}
+
+var (
+	random       = rand.New(rand.NewSource(time.Now().UnixNano()))
+	engineUAList = []string{
+		// 百度搜索User-Agent:
+		// 百度 PC UA
+		"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)",
+		"Mozilla/5.0 (compatible; Baiduspider-render/2.0; +http://www.baidu.com/search/spider.html)",
+		// 百度移动 UA
+		"Mozilla/5.0 (Linux;u;Android 4.2.2;zh-cn;) AppleWebKit/534.46 (KHTML,like Gecko) Version/5.1",
+		"Mobile Safari/10600.6.3 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)",
+		"Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1 (compatible; Baiduspider-render/2.0; +http://www.baidu.com/search/spider.html)",
+		// 百度图片UA
+		//"Baiduspider-image+(+http://www.baidu.com/search/spider.htm)",
+		// 神马搜索User-Agent:
+		// B神马搜索 PC UA
+		"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 YisouSpider/5.0 Safari/537.36",
+		// 神马搜索移动 UA
+		"Mozilla/5.0 (iPhone; CPU iPhone OS 10_3 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) CriOS/56.0.2924.75 Mobile/14E5239e YisouSpider/5.0 Safari/602.1",
+		// 谷歌User-Agent:
+		// 谷歌 PC UA
+		"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
+		// 谷歌移动UA
+		"AdsBot-Google-Mobile (+http://www.google.com/mobile/adsbot.html) Mozilla (iPhone; U; CPU iPhone OS 3 0 like Mac OS X) AppleWebKit (KHTML, like Gecko) Mobile Safari",
+		// 谷歌图片UA
+		"Mozilla/5.0 (compatible; Googlebot-Image/1.0; +http://www.google.com/bot.html)",
+		// 搜狗User-Agent:
+		// 搜索 PC UA
+		"Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
+		// 搜狗图片 UA
+		"Sogou Pic Spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
+		// 搜狗新闻UA
+		"Sogou News Spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
+		// 搜狗视频UA
+		"Sogou Video Spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
+		// 360搜索User-Agent:
+		// 360搜索UA
+		"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0);",
+		// 360移动UA
+		"Mozilla/5.0 (Linux; U; Android 4.0.2; en-us; Galaxy Nexus Build/ICL53F) AppleWebKit/534.30 (KHTML, like Gecko)Version/4.0 Mobile Safari/534.30; 360Spider",
+		"Mozilla/5.0 (Linux; U; Android 4.0.2; en-us; Galaxy Nexus Build/ICL53F) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30; HaosouSpider",
+		// 360安全UA
+		"360spider (http://webscan.360.cn)",
+		// 必应User-Agent:
+		"Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)",
+		// 搜搜User-Agent:
+		// 搜搜UA:
+		"Sosospider+(+http://help.soso.com/webspider.htm)",
+		// 搜搜图片UA:
+		"Sosoimagespider+(+http://help.soso.com/soso-image-spider.htm)",
+		// 雅虎User-Agent:
+		// 雅虎中文UA:
+		"Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)",
+		// 雅虎英文UA:
+		"Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
+	}
+)

+ 8 - 0
internal/pkg/rod_helper/rodHelper.go

@@ -4,6 +4,7 @@ import (
 	"context"
 	"crypto/tls"
 	"errors"
+	"github.com/allanpk716/ChineseSubFinder/internal/pkg/random_useragent"
 	"github.com/go-rod/rod"
 	"github.com/go-rod/rod/lib/launcher"
 	"github.com/go-rod/rod/lib/proto"
@@ -73,6 +74,9 @@ func NewPageNavigate(browser *rod.Browser, desURL string, timeOut time.Duration,
 	if err != nil {
 		return nil, err
 	}
+	page.MustSetUserAgent(&proto.NetworkSetUserAgentOverride{
+		UserAgent: random_useragent.RandomUserAgent(true),
+	})
 	page = page.Timeout(timeOut)
 	nowRetryTimes := 0
 	for nowRetryTimes <= maxRetryTimes {
@@ -86,6 +90,10 @@ func NewPageNavigate(browser *rod.Browser, desURL string, timeOut time.Duration,
 			return nil, err
 		} else if err == nil {
 			// 没有问题
+			err = page.WaitLoad()
+			if err != nil {
+				return page, nil
+			}
 			return page, nil
 		}
 	}