Browse Source

重构 rod_helper 解析 subhd 使用时候遇到的问题 fix #48

Signed-off-by: allan716 <[email protected]>
allan716 4 years ago
parent
commit
eff654e382

+ 1 - 0
.gitignore

@@ -34,3 +34,4 @@
 /TestData/sub_format_changer/test
 /internal/logic/sub_supplier/subhd/.rod
 /internal/logic/sub_supplier/subhd/config.yaml
+/internal/pkg/rod_helper/Logs

+ 4 - 4
TestCode/test_timeout.go

@@ -92,13 +92,13 @@ func goStep(inData InputData) error {
 				outDataChan <- outData
 			}()
 
-			browser, err := rod_helper.NewBrowser("")
+			browser, err := rod_helper.NewBrowser("", true)
 			if err != nil {
 				println(inData.Index, in, "rod_helper.NewBrowser", err)
 				return
 			}
 			defer func() {
-				browser.Close()
+				_ = browser.Close()
 				println(inData.Index, in, "browser closed")
 			}()
 
@@ -154,13 +154,13 @@ func goStep(inData InputData) error {
 }
 
 func oneStep(inData InputData) error {
-	browser, err := rod_helper.NewBrowser("")
+	browser, err := rod_helper.NewBrowser("", true)
 	if err != nil {
 		println(inData.Index, "rod_helper.NewBrowser", err)
 		return err
 	}
 	defer func() {
-		browser.Close()
+		_ = browser.Close()
 		println(inData.Index, "browser closed")
 	}()
 	page, err := rod_helper.NewPageNavigate(browser, "https://www.baidu.com", 10*time.Second, 5)

+ 2 - 1
cmd/chinesesubfinder/main.go

@@ -7,6 +7,7 @@ import (
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/hot_fix"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/log_helper"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/notify_center"
+	"github.com/allanpk716/ChineseSubFinder/internal/pkg/rod_helper"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_formatter"
 	"github.com/allanpk716/ChineseSubFinder/internal/types"
 	"github.com/robfig/cron/v3"
@@ -85,7 +86,7 @@ func main() {
 	notify_center.Notify = notify_center.NewNotifyCenter(config.WhenSubSupplierInvalidWebHook)
 
 	// ReloadBrowser 提前把浏览器下载好
-	pkg.ReloadBrowser()
+	rod_helper.ReloadBrowser()
 
 	// 任务还没执行完,下一次执行时间到来,下一次执行就跳过不执行
 	c := cron.New(cron.WithChain(cron.SkipIfStillRunning(cron.DefaultLogger)))

+ 3 - 1
go.mod

@@ -14,8 +14,9 @@ require (
 	github.com/fsnotify/fsnotify v1.4.9 // indirect
 	github.com/go-creed/sat v1.0.3
 	github.com/go-resty/resty/v2 v2.6.0
-	github.com/go-rod/rod v0.97.2
+	github.com/go-rod/rod v0.101.7
 	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
+	github.com/google/uuid v1.3.0 // indirect
 	github.com/jinzhu/now v1.1.2
 	github.com/jonboulle/clockwork v0.2.2 // indirect
 	github.com/klauspost/compress v1.12.2 // indirect
@@ -43,6 +44,7 @@ require (
 	github.com/stretchr/testify v1.7.0 // indirect
 	github.com/t-tomalak/logrus-easy-formatter v0.0.0-20190827215021-c074f06c5816
 	github.com/ulikunitz/xz v0.5.10 // indirect
+	github.com/ysmood/gson v0.7.0 // indirect
 	golang.org/x/net v0.0.0-20210614182718-04defd469f4e
 	golang.org/x/sys v0.0.0-20210616094352-59db8d763f22 // indirect
 	golang.org/x/text v0.3.6

+ 11 - 0
go.sum

@@ -98,6 +98,8 @@ github.com/go-resty/resty/v2 v2.6.0 h1:joIR5PNLM2EFqqESUjCMGXrWmXNHEU9CEiK813oKY
 github.com/go-resty/resty/v2 v2.6.0/go.mod h1:PwvJS6hvaPkjtjNg9ph+VrSD92bi5Zq73w/BIH7cC3Q=
 github.com/go-rod/rod v0.97.2 h1:4AWtucf0fXKbdaEjNNhjIcdYXQyc4+yx8YYI73jyP5A=
 github.com/go-rod/rod v0.97.2/go.mod h1:DgPYd1ql/oCzGxrM5aiCcVM+kA4MFCJ+Mht7ZVBSiG0=
+github.com/go-rod/rod v0.101.7 h1:kbI5CNvcRhf7feybBln4xDutsM0mbsF0ENNZfKcF6WA=
+github.com/go-rod/rod v0.101.7/go.mod h1:N/zlT53CfSpq74nb6rOR0K8UF0SPUPBmzBnArrms+mY=
 github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
 github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
 github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4=
@@ -134,6 +136,8 @@ github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OI
 github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
 github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
+github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
+github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
 github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
 github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8=
@@ -327,12 +331,19 @@ github.com/ysmood/goob v0.3.0 h1:XZ51cZJ4W3WCoCiUktixzMIQF86W7G5VFL4QQ/Q2uS0=
 github.com/ysmood/goob v0.3.0/go.mod h1:S3lq113Y91y1UBf1wj1pFOxeahvfKkCk6mTWTWbDdWs=
 github.com/ysmood/got v0.9.3 h1:qx51X49jL/WAiqZzPTkPZ0zp5pTmrWJa4zYFTYo0gHI=
 github.com/ysmood/got v0.9.3/go.mod h1:pE1l4LOwOBhQg6A/8IAatkGp7uZjnalzrZolnlhhMgY=
+github.com/ysmood/got v0.15.1/go.mod h1:pE1l4LOwOBhQg6A/8IAatkGp7uZjnalzrZolnlhhMgY=
 github.com/ysmood/gotrace v0.2.0 h1:IkTC6rJREwXSaG8yWK+NFwIJGIsxA1DjC6/gxYyQttE=
 github.com/ysmood/gotrace v0.2.0/go.mod h1:TzhIG7nHDry5//eYZDYcTzuJLYQIkykJzCRIo4/dzQM=
+github.com/ysmood/gotrace v0.2.2/go.mod h1:TzhIG7nHDry5//eYZDYcTzuJLYQIkykJzCRIo4/dzQM=
 github.com/ysmood/gson v0.6.3 h1:4cU+5oOdsyundXHy00t99H0rLXLthuseD3x6W+xmCiU=
 github.com/ysmood/gson v0.6.3/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg=
+github.com/ysmood/gson v0.6.4/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg=
+github.com/ysmood/gson v0.7.0 h1:oQhY2FQtfy3+bgaNeqopd7NGAB6Me+UpG0n7oO4VDko=
+github.com/ysmood/gson v0.7.0/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg=
 github.com/ysmood/leakless v0.6.16 h1:PVS0EY7eFLzkUTqVJbST7cLz9V+58L+4TIlgxmCLi3k=
 github.com/ysmood/leakless v0.6.16/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ=
+github.com/ysmood/leakless v0.7.0 h1:XCGdaPExyoreoQd+H5qgxM3ReNbSPFsEXpSKwbXbwQw=
+github.com/ysmood/leakless v0.7.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
 go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=

+ 41 - 56
internal/logic/sub_supplier/subhd/subhd.go

@@ -2,7 +2,6 @@ package subhd
 
 import (
 	"bytes"
-	"errors"
 	"fmt"
 	"github.com/PuerkitoBio/goquery"
 	"github.com/Tnze/go.num/v2/zh"
@@ -18,8 +17,6 @@ import (
 	"github.com/allanpk716/ChineseSubFinder/internal/types/supplier"
 	"github.com/go-rod/rod"
 	"github.com/go-rod/rod/lib/launcher"
-	"github.com/go-rod/rod/lib/proto"
-
 	"github.com/nfnt/resize"
 	"github.com/sirupsen/logrus"
 	"image/jpeg"
@@ -79,12 +76,13 @@ func (s Supplier) GetSubListFromFile4Series(seriesInfo *series.SeriesInfo) ([]su
 
 	var browser *rod.Browser
 	// TODO 是用本地的 Browser 还是远程的,推荐是远程的
-	browser, err := rod_helper.NewBrowser(s.reqParam.HttpProxy)
+	browser, err := rod_helper.NewBrowser(s.reqParam.HttpProxy, true)
 	if err != nil {
 		return nil, err
 	}
-	defer browser.Close()
-
+	defer func() {
+		_ = browser.Close()
+	}()
 	var subInfos = make([]supplier.SubInfo, 0)
 	var subList = make([]HdListItem, 0)
 	for value := range seriesInfo.NeedDlSeasonDict {
@@ -193,12 +191,13 @@ func (s Supplier) getSubListFromKeyword4Movie(keyword string) ([]supplier.SubInf
 
 	var browser *rod.Browser
 	// TODO 是用本地的 Browser 还是远程的,推荐是远程的
-	browser, err := rod_helper.NewBrowser(s.reqParam.HttpProxy)
+	browser, err := rod_helper.NewBrowser(s.reqParam.HttpProxy, true)
 	if err != nil {
 		return nil, err
 	}
-	defer browser.Close()
-
+	defer func() {
+		_ = browser.Close()
+	}()
 	var subInfos []supplier.SubInfo
 	detailPageUrl, err := s.step0(browser, keyword)
 	if err != nil {
@@ -293,7 +292,9 @@ func (s Supplier) step0(browser *rod.Browser, keyword string) (string, error) {
 	if err != nil {
 		return "", err
 	}
-	defer page.Close()
+	defer func() {
+		_ = page.Close()
+	}()
 	// 是否有查找到的结果,至少要有结果。根据这里这样下面才能判断是分析失效了,还是就是没有结果而已
 	re := regexp.MustCompile(`共\s*(\d+)\s*条`)
 	matched := re.FindAllStringSubmatch(result, -1)
@@ -360,7 +361,9 @@ func (s Supplier) step1(browser *rod.Browser, detailPageUrl string, isMovieOrSer
 	if err != nil {
 		return nil, err
 	}
-	defer page.Close()
+	defer func() {
+		_ = page.Close()
+	}()
 	doc, err := goquery.NewDocumentFromReader(strings.NewReader(result))
 	if err != nil {
 		return nil, err
@@ -429,7 +432,9 @@ func (s Supplier) step2Ex(browser *rod.Browser, subDownloadPageUrl string) (*HdC
 	if err != nil {
 		return nil, err
 	}
-	defer page.Close()
+	defer func() {
+		_ = page.Close()
+	}()
 
 	doc, err := goquery.NewDocumentFromReader(strings.NewReader(pageString))
 	if err != nil {
@@ -458,26 +463,26 @@ func (s Supplier) step2Ex(browser *rod.Browser, subDownloadPageUrl string) (*HdC
 
 func (s Supplier) JugDownloadBtn(doc *goquery.Document) (bool, string) {
 
-	const btnDown_0 = "#down"
-	const btnDown_1 = "button.down"
+	const btnDown0 = "#down"
+	const btnDown1 = "button.down"
 	// 是否有下载按钮
 	hasDownBtn := true
-	downBtn := doc.Find(btnDown_0)
+	downBtn := doc.Find(btnDown0)
 	if len(downBtn.Nodes) < 1 {
 		hasDownBtn = false
 	} else {
-		return true, btnDown_0
+		return true, btnDown0
 	}
 	// 另一种是否有下载按钮的判断
 	if hasDownBtn == false {
-		downBtn = doc.Find(btnDown_1)
+		downBtn = doc.Find(btnDown1)
 		if len(downBtn.Nodes) < 1 {
 			hasDownBtn = false
 		} else {
 			hasDownBtn = true
 		}
 	}
-	return hasDownBtn, btnDown_1
+	return hasDownBtn, btnDown1
 }
 
 func (s Supplier) downloadSubFile(browser *rod.Browser, page *rod.Page, hasWaterWall bool, btnElemenString string) (*HdContent, error) {
@@ -499,26 +504,26 @@ func (s Supplier) downloadSubFile(browser *rod.Browser, page *rod.Page, hasWater
 		}
 
 		// 点击下载按钮
-		var el *rod.Element
+		//var el *rod.Element
 		if hasWaterWall == true {
-			el = page.MustElement(TCode)
+			page.MustElement(TCode).MustClick()
 		} else {
-			el = page.MustElement(btnElemenString)
-		}
-		err = el.Click(proto.InputMouseButtonLeft)
-		if err != nil {
-			if strings.Contains(err.Error(), "element covered by") == true {
-				println("11")
-				var eel *rod.ErrCovered
-				if errors.As(err, &eel) == true {
-					eel.MustRemove()
-					err = el.Click(proto.InputMouseButtonLeft)
-					if err != nil {
-						print(123)
-					}
-				}
-			}
-		}
+			page.MustElement(btnElemenString).MustClick()
+		}
+		// 找到遮挡的信息块,尝试移除
+		//if err != nil {
+		//if strings.Contains(err.Error(), "element covered by") == true {
+		//	println("11")
+		//	var eel *rod.ErrCovered
+		//	if errors.As(err, &eel) == true {
+		//		eel.MustRemove()
+		//		err = el.Click(proto.InputMouseButtonLeft)
+		//		if err != nil {
+		//			print(123)
+		//		}
+		//	}
+		//}
+		//}
 		// 过墙
 		if hasWaterWall == true {
 			s.passWaterWall(page)
@@ -633,26 +638,6 @@ search:
 	}
 }
 
-//func (s Supplier) httpGet(inputUrl string) (string, error) {
-//	s.reqParam.Referer = inputUrl
-//	httpClient := pkg.NewHttpClient(s.reqParam)
-//	resp, err := httpClient.R().Get(inputUrl)
-//	if err != nil {
-//		return "", err
-//	}
-//	recvText := resp.String()
-//	//搜索验证 点击继续搜索
-//	if strings.Contains(recvText, "搜索验证") || strings.Contains(recvText, "搜索频率") {
-//		s.log.Debugln("搜索验证 or 搜索频率 reload", inputUrl)
-//		// 每次搜索间隔在 30-40s
-//		time.Sleep(pkg.RandomSecondDuration(30, 40))
-//		return s.httpGet(inputUrl)
-//	}
-//	// 每次搜索间隔在 30-40s
-//	time.Sleep(pkg.RandomSecondDuration(30, 40))
-//	return recvText, nil
-//}
-
 func (s Supplier) httpGetFromBrowser(browser *rod.Browser, inputUrl string) (string, *rod.Page, error) {
 
 	page, err := rod_helper.NewPageNavigate(browser, inputUrl, s.tt, 5)

+ 7 - 8
internal/pkg/archive_helper/unarchiveFile.go

@@ -3,16 +3,15 @@ package archive_helper
 import (
 	"archive/zip"
 	"bytes"
-	"github.com/bodgit/sevenzip"
-	"golang.org/x/text/encoding/simplifiedchinese"
-	"golang.org/x/text/transform"
-	"io/ioutil"
-
 	"compress/flate"
 	"errors"
+	"github.com/bodgit/sevenzip"
 	"github.com/go-rod/rod/lib/utils"
 	"github.com/mholt/archiver/v3"
+	"golang.org/x/text/encoding/simplifiedchinese"
+	"golang.org/x/text/transform"
 	"io"
+	"io/ioutil"
 	"path"
 	"path/filepath"
 	"strings"
@@ -28,7 +27,7 @@ func UnArchiveFile(fileFullPath, desRootPath string) error {
 			MkdirAll:               true,
 			SelectiveCompression:   true,
 			ContinueOnError:        false,
-			OverwriteExisting:      false,
+			OverwriteExisting:      true,
 			ImplicitTopLevelFolder: false,
 		}
 		err := z.Walk(fileFullPath, func(f archiver.File) error {
@@ -51,7 +50,7 @@ func UnArchiveFile(fileFullPath, desRootPath string) error {
 		z := archiver.Tar{
 			MkdirAll:               true,
 			ContinueOnError:        false,
-			OverwriteExisting:      false,
+			OverwriteExisting:      true,
 			ImplicitTopLevelFolder: false,
 			StripComponents:        1,
 		}
@@ -72,7 +71,7 @@ func UnArchiveFile(fileFullPath, desRootPath string) error {
 		z := archiver.Rar{
 			MkdirAll:               true,
 			ContinueOnError:        false,
-			OverwriteExisting:      false,
+			OverwriteExisting:      true,
 			ImplicitTopLevelFolder: false,
 			StripComponents:        1,
 		}

+ 12 - 1
internal/pkg/pass_water_wall/pass_water_wall.go

@@ -16,11 +16,22 @@ import (
 func SimulationTest() {
 	// 具体的应用见 subhd 的解析器
 	// 感谢 https://www.bigs3.com/article/gorod-crack-slider-captcha/
-	page, err := rod_helper.NewBrowserLoadPage("https://007.qq.com/online.html", "", 10*time.Second, 5)
+	browser, err := rod_helper.NewBrowser("", false)
 	if err != nil {
 		println(err.Error())
 		return
 	}
+	defer func() {
+		_ = browser.Close()
+	}()
+	page, err := rod_helper.NewPageNavigate(browser, "https://007.qq.com/online.html", 10*time.Second, 5)
+	if err != nil {
+		println(err.Error())
+		return
+	}
+	defer func() {
+		_ = page.Close()
+	}()
 	// 切换到可疑用户
 	page.MustElement("#app > section.wp-on-online > div > div > div > div.wp-on-box.col-md-5.col-md-offset-1 > div.wp-onb-tit > a:nth-child(2)").MustClick()
 	//模擬Click點擊 "體驗驗證碼" 按鈕

BIN
internal/pkg/rod_helper/assets/adblock_v4.34.0.zip


+ 87 - 156
internal/pkg/rod_helper/rodHelper.go

@@ -2,31 +2,55 @@ package rod_helper
 
 import (
 	"context"
-	"crypto/tls"
+	_ "embed"
 	"errors"
+	"github.com/allanpk716/ChineseSubFinder/internal/pkg"
+	"github.com/allanpk716/ChineseSubFinder/internal/pkg/log_helper"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/random_useragent"
 	"github.com/go-rod/rod"
 	"github.com/go-rod/rod/lib/launcher"
 	"github.com/go-rod/rod/lib/proto"
-	"net/http"
-	"net/url"
+	"github.com/mholt/archiver/v3"
+	"os"
+	"path"
+	"path/filepath"
+	"sync"
 	"time"
 )
 
 /**
- * @Description: 			新建一个支持代理的 browser 对象
+ * @Description: 			新建一个支持代理的 browser 对象,使用完毕后,需要删除 adblockFilePath 文件夹
  * @param httpProxyURL		http://127.0.0.1:10809
  * @return *rod.Browser
  * @return error
  */
-func NewBrowser(httpProxyURL string) (*rod.Browser, error) {
+func NewBrowser(httpProxyURL string, loadAdblock bool) (*rod.Browser, error) {
+
+	var err error
+
+	once.Do(func() {
+		adblockSavePath, err = releaseAdblock()
+		if err != nil {
+			log_helper.GetLogger().Errorln("releaseAdblock", err)
+		}
+	})
 	var browser *rod.Browser
-	err := rod.Try(func() {
-		u := launcher.New().
-			Proxy(httpProxyURL).
-			MustLaunch()
+	err = rod.Try(func() {
+		purl := ""
+		if loadAdblock == true {
+			purl = launcher.New().
+				Delete("disable-extensions").
+				Set("load-extension", adblockSavePath).
+				Proxy(httpProxyURL).
+				Headless(false). // 插件模式需要设置这个
+				MustLaunch()
+		} else {
+			purl = launcher.New().
+				Proxy(httpProxyURL).
+				MustLaunch()
+		}
 
-		browser = rod.New().ControlURL(u).MustConnect()
+		browser = rod.New().ControlURL(purl).MustConnect()
 	})
 	if err != nil {
 		return nil, err
@@ -48,7 +72,7 @@ func NewBrowserFromDocker(httpProxyURL, remoteDockerURL string) (*rod.Browser, e
 	var browser *rod.Browser
 
 	err := rod.Try(func() {
-		l := launcher.MustNewRemote(remoteDockerURL)
+		l := launcher.MustNewManaged(remoteDockerURL)
 		u := l.Proxy(httpProxyURL).MustLaunch()
 		l.Headless(false).XVFB()
 		browser = rod.New().Client(l.Client()).ControlURL(u).MustConnect()
@@ -60,17 +84,9 @@ func NewBrowserFromDocker(httpProxyURL, remoteDockerURL string) (*rod.Browser, e
 	return browser, nil
 }
 
-func NewPage(browser *rod.Browser) (*rod.Page, error) {
-	page, err := browser.Page(proto.TargetCreateTarget{URL: ""})
-	if err != nil {
-		return nil, err
-	}
-	return page, err
-}
-
 func NewPageNavigate(browser *rod.Browser, desURL string, timeOut time.Duration, maxRetryTimes int) (*rod.Page, error) {
 
-	page, err := NewPage(browser)
+	page, err := newPage(browser)
 	if err != nil {
 		return nil, err
 	}
@@ -100,163 +116,78 @@ func NewPageNavigate(browser *rod.Browser, desURL string, timeOut time.Duration,
 	return nil, err
 }
 
-func PageNavigate(page *rod.Page, desURL string, timeOut time.Duration, maxRetryTimes int) (*rod.Page, error) {
-	var err error
-	page = page.Timeout(timeOut)
-	nowRetryTimes := 0
-	for nowRetryTimes <= maxRetryTimes {
-		err = rod.Try(func() {
-			wait := page.MustWaitNavigation()
-			page.MustNavigate(desURL)
-			wait()
-		})
-		if errors.Is(err, context.DeadlineExceeded) {
-			// 超时
-			return nil, err
-		} else if err == nil {
-			// 没有问题
-			return page, nil
-		}
-	}
-	return nil, err
-}
-
-/**
- * @Description: 			访问目标 Url,返回 page,只是这个 page 有效,如果再次出发其他的事件无效
- * @param desURL			目标 Url
- * @param httpProxyURL		http://127.0.0.1:10809
- * @param timeOut			超时时间
- * @param maxRetryTimes		当是非超时 err 的时候,最多可以重试几次
- * @return *rod.Page
- * @return error
- */
-func NewBrowserLoadPage(desURL string, httpProxyURL string, timeOut time.Duration, maxRetryTimes int) (*rod.Page, error) {
-	browser, err := NewBrowser(httpProxyURL)
+// ReloadBrowser 提前把浏览器下载好
+func ReloadBrowser() {
+	newBrowser, err := NewBrowser("", false)
 	if err != nil {
-		return nil, err
+		return
 	}
-	page, err := browser.Page(proto.TargetCreateTarget{URL: ""})
+	defer func() {
+		_ = newBrowser.Close()
+	}()
+	page, err := NewPageNavigate(newBrowser, "https://www.baidu.com", 30*time.Second, 5)
 	if err != nil {
-		return nil, err
+		return
 	}
-	page = page.Timeout(timeOut)
-	nowRetryTimes := 0
-	for nowRetryTimes <= maxRetryTimes {
-		err = rod.Try(func() {
-			wait := page.MustWaitNavigation()
-			page.MustNavigate(desURL)
-			wait()
-		})
-		if errors.Is(err, context.DeadlineExceeded) {
-			// 超时
-			return nil, err
-		} else if err == nil {
-			// 没有问题
-			return page, nil
+	defer func() {
+		_ = page.Close()
+	}()
+}
 
-		}
+func CleearExtensionsCache() {
+	if adblockSavePath != "" {
+		_ = os.Remove(adblockSavePath)
 	}
-	return nil, err
 }
 
-/**
- * @Description: 			访问目标 Url,返回 page,只是这个 page 有效,如果再次出发其他的事件无效
- * @param desURL			目标 Url
- * @param httpProxyURL		http://127.0.0.1:10809
- * @param timeOut			超时时间
- * @param maxRetryTimes		当是非超时 err 的时候,最多可以重试几次
- * @return *rod.Page
- * @return error
- */
-func NewBrowserLoadPageFromRemoteDocker(desURL string, httpProxyURL, remoteDockerURL string, timeOut time.Duration, maxRetryTimes int) (*rod.Page, error) {
-	browser, err := NewBrowserFromDocker(httpProxyURL, remoteDockerURL)
-	if err != nil {
-		return nil, err
-	}
+func newPage(browser *rod.Browser) (*rod.Page, error) {
 	page, err := browser.Page(proto.TargetCreateTarget{URL: ""})
 	if err != nil {
 		return nil, err
 	}
-	page = page.Timeout(timeOut)
-	nowRetryTimes := 0
-	for nowRetryTimes <= maxRetryTimes {
-		err = rod.Try(func() {
-			wait := page.MustWaitNavigation()
-			page.MustNavigate(desURL)
-			wait()
-		})
-		if errors.Is(err, context.DeadlineExceeded) {
-			// 超时
-			return nil, err
-		} else if err == nil {
-			// 没有问题
-			break
-		}
-	}
-
-	return page, nil
+	return page, err
 }
 
-/**
- * @Description:			访问目标 Url,返回 page,只是这个 page 有效,如果再次出发其他的事件无效
- * @param desURL			目标 Url
- * @param httpProxyURL		http://127.0.0.1:10809
- * @param timeOut			超时时间
- * @param maxRetryTimes		当是非超时 err 的时候,最多可以重试几次
- * @return *rod.Page
- * @return error
- */
-func NewBrowserLoadPageByHijackRequests(desURL string, httpProxyURL string, timeOut time.Duration, maxRetryTimes int) (*rod.Page, error) {
+// releaseAdblock 从程序中释放 adblock 插件出来到本地路径
+func releaseAdblock() (string, error) {
 
-	var page *rod.Page
-	var err error
-	// 创建一个 page
-	browser := rod.New()
-	err = browser.Connect()
+	adblockFolderPath := filepath.Join(os.TempDir(), "chinesesubfinder")
+	err := os.MkdirAll(path.Join(adblockFolderPath), os.ModePerm)
 	if err != nil {
-		return nil, err
+		return "", err
 	}
-	page, err = browser.Page(proto.TargetCreateTarget{URL: ""})
+
+	_ = pkg.ClearFolder(adblockFolderPath)
+
+	outZipFileFPath := path.Join(adblockFolderPath, "adblock.zip")
+	adblockZipFile, err := os.Create(outZipFileFPath)
 	if err != nil {
-		return nil, err
+		return "", err
 	}
-	page = page.Timeout(timeOut)
-	// 设置代理
-	router := page.HijackRequests()
-	defer router.Stop()
-
-	err = rod.Try(func() {
-		router.MustAdd("*", func(ctx *rod.Hijack) {
-			px, _ := url.Parse(httpProxyURL)
-			ctx.LoadResponse(&http.Client{
-				Transport: &http.Transport{
-					Proxy:           http.ProxyURL(px),
-					TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
-				},
-				Timeout: timeOut,
-			}, true)
-		})
-	})
+	defer func() {
+		_ = adblockZipFile.Close()
+		_ = os.Remove(outZipFileFPath)
+	}()
+	_, err = adblockZipFile.Write(adblockFolder)
 	if err != nil {
-		return nil, err
+		return "", err
 	}
-	go router.Run()
 
-	nowRetryTimes := 0
-	for nowRetryTimes <= maxRetryTimes {
-		err = rod.Try(func() {
-			page.MustNavigate(desURL).MustWaitLoad()
-		})
-		if errors.Is(err, context.DeadlineExceeded) {
-			// 超时
-			return nil, err
-		} else if err == nil {
-			// 没有问题
-			break
-		}
-		time.Sleep(time.Second)
-		nowRetryTimes++
+	desPath := path.Join(adblockFolderPath, "RunAdblock")
+	r := archiver.NewZip()
+	err = r.Unarchive(outZipFileFPath, desPath)
+	if err != nil {
+		return "", err
 	}
-
-	return page, nil
+	return path.Join(desPath, adblockInsideName), err
 }
+
+const adblockInsideName = "adblock"
+
+var once sync.Once
+
+// 这个文件内有一个子文件夹 adblock ,制作的时候务必注意
+//go:embed assets/adblock_v4.34.0.zip
+var adblockFolder []byte
+
+var adblockSavePath string

+ 15 - 45
internal/pkg/rod_helper/rodHelper_test.go

@@ -3,61 +3,25 @@ package rod_helper
 import (
 	"github.com/go-rod/rod/lib/proto"
 	"testing"
-	"time"
 )
 
-func TestLoadPage(t *testing.T) {
-	desURL := "https://www.wikipedia.org/"
-	httpProxyURL := "http://127.0.0.1:10809"
-	_, err := NewBrowserLoadPage(desURL, httpProxyURL, 10*time.Second, 5)
-	if err != nil {
-		t.Fatal(err)
-	}
-}
-
-func TestLoadPageFromRemoteDocker(t *testing.T) {
-	desURL := "https://www.wikipedia.org/"
-	httpProxyURL := "http://127.0.0.1:10809"
-	remoteDockerURL := "ws://192.168.50.135:9222"
-	_, err := NewBrowserLoadPageFromRemoteDocker(desURL, httpProxyURL, remoteDockerURL, 10*time.Second, 5)
-	if err != nil {
-		t.Fatal(err)
-	}
-}
-
-func TestLoadPageByHijackRequests(t *testing.T) {
-	desURL := "https://www.wikipedia.org/"
-	httpProxyURL := "http://127.0.0.1:10809"
-	_, err := NewBrowserLoadPageByHijackRequests(desURL, httpProxyURL, 10*time.Second, 5)
-	if err != nil {
-		t.Fatal(err)
-	}
-}
-
 func TestNewBrowser(t *testing.T) {
 	desURL := "https://www.wikipedia.org/"
 	httpProxyURL := "http://127.0.0.1:10809"
-	browser, err := NewBrowser(httpProxyURL)
+	browser, err := NewBrowser(httpProxyURL, true)
 	if err != nil {
 		t.Fatal(err)
 	}
-	_, err = browser.Page(proto.TargetCreateTarget{URL: desURL})
+	defer func() {
+		_ = browser.Close()
+	}()
+	page, err := browser.Page(proto.TargetCreateTarget{URL: desURL})
 	if err != nil {
 		t.Fatal(err)
 	}
-	//err = rod.Try(func() {
-	//	page.MustElement("#searchInput").MustInput("earth")
-	//	page.MustElement("#search-form > fieldset > button").MustClick()
-	//
-	//	el := page.MustElement("#mw-content-text > div.mw-parser-output > table.infobox > tbody > tr:nth-child(1) > td > a > img")
-	//	err = utils.OutputFile("b.png", el.MustResource())
-	//	if err != nil {
-	//		t.Fatal(err)
-	//	}
-	//})
-	//if err != nil {
-	//	t.Fatal(err)
-	//}
+	defer func() {
+		_ = page.Close()
+	}()
 }
 
 func TestNewBrowserFromDocker(t *testing.T) {
@@ -69,8 +33,14 @@ func TestNewBrowserFromDocker(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	_, err = browser.Page(proto.TargetCreateTarget{URL: desURL})
+	defer func() {
+		_ = browser.Close()
+	}()
+	page, err := browser.Page(proto.TargetCreateTarget{URL: desURL})
 	if err != nil {
 		t.Fatal(err)
 	}
+	defer func() {
+		_ = page.Close()
+	}()
 }

+ 0 - 12
internal/pkg/util.go

@@ -4,7 +4,6 @@ import (
 	"fmt"
 	"github.com/allanpk716/ChineseSubFinder/internal/common"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/log_helper"
-	"github.com/allanpk716/ChineseSubFinder/internal/pkg/rod_helper"
 	"github.com/allanpk716/ChineseSubFinder/internal/types"
 	browser "github.com/allanpk716/fake-useragent"
 	"github.com/go-resty/resty/v2"
@@ -19,7 +18,6 @@ import (
 	"runtime"
 	"strconv"
 	"strings"
-	"time"
 )
 
 // NewHttpClient 新建一个 resty 的对象
@@ -303,16 +301,6 @@ func GetEpisodeKeyName(season, eps int) string {
 	return "S" + strconv.Itoa(season) + "E" + strconv.Itoa(eps)
 }
 
-// ReloadBrowser 提前把浏览器下载好
-func ReloadBrowser() {
-	// TODO 考虑后续把浏览器爬虫的逻辑剥离出来,需要替换这个到远程的 Docker 执行
-	page, err := rod_helper.NewBrowserLoadPage("https://www.baidu.com", "", 300*time.Second, 2)
-	if err != nil {
-		return
-	}
-	defer page.Close()
-}
-
 // CopyFile copies a single file from src to dst
 func CopyFile(src, dst string) error {
 	var err error