Browse Source

新增 go-rod 封装

Signed-off-by: 716 <[email protected]>
716 4 years ago
parent
commit
581ffa0d63
6 changed files with 347 additions and 47 deletions
  1. 1 0
      .gitignore
  2. 254 0
      common/rodHelper.go
  3. 76 0
      common/rodHelper_test.go
  4. 1 2
      go.mod
  5. 10 10
      go.sum
  6. 5 35
      sub_supplier/subhd/subhd.go

+ 1 - 0
.gitignore

@@ -1 +1,2 @@
 /.idea
+/sub_supplier/subhd/result.png

+ 254 - 0
common/rodHelper.go

@@ -0,0 +1,254 @@
+package common
+
+import (
+	"context"
+	"crypto/tls"
+	"errors"
+	"github.com/go-rod/rod"
+	"github.com/go-rod/rod/lib/launcher"
+	"github.com/go-rod/rod/lib/proto"
+	"net/http"
+	"net/url"
+	"time"
+)
+
+/**
+ * @Description: 			新建一个支持代理的 browser 对象
+ * @param httpProxyURL		http://127.0.0.1:10809
+ * @return *rod.Browser
+ * @return error
+ */
+func NewBrowser(httpProxyURL string) (*rod.Browser, error) {
+	var browser *rod.Browser
+	err := rod.Try(func() {
+		u := launcher.New().
+			Proxy(httpProxyURL).
+			MustLaunch()
+
+		browser = rod.New().ControlURL(u).MustConnect()
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	return browser, nil
+}
+
+/**
+ * @Description: 			访问目标 Url,返回 page,只是这个 page 有效,如果再次出发其他的事件无效
+ * @param desURL			目标 Url
+ * @param httpProxyURL		http://127.0.0.1:10809
+ * @param timeOut			超时时间
+ * @param maxRetryTimes		当是非超时 err 的时候,最多可以重试几次
+ * @return *rod.Page
+ * @return error
+ */
+func NewBrowserFromDocker(httpProxyURL, remoteDockerURL string) (*rod.Browser, error) {
+	var browser *rod.Browser
+
+	err := rod.Try(func() {
+		l := launcher.MustNewRemote(remoteDockerURL)
+		u := l.Proxy(httpProxyURL).MustLaunch()
+		l.Headless(false).XVFB()
+		browser = rod.New().Client(l.Client()).ControlURL(u).MustConnect()
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	return browser, nil
+}
+
+func NewPage(browser *rod.Browser) (*rod.Page, error) {
+	page, err := browser.Page(proto.TargetCreateTarget{URL: ""})
+	if err != nil {
+		return nil, err
+	}
+	return page, err
+}
+
+func NewPageNavigate(browser *rod.Browser, desURL string, timeOut time.Duration, maxRetryTimes int) (*rod.Page, error) {
+
+	page, err := NewPage(browser)
+	if err != nil {
+		return nil, err
+	}
+	page = page.Timeout(timeOut)
+	nowRetryTimes := 0
+	for nowRetryTimes <= maxRetryTimes {
+		err = rod.Try(func() {
+			wait := page.MustWaitNavigation()
+			page.MustNavigate(desURL)
+			wait()
+		})
+		if errors.Is(err, context.DeadlineExceeded) {
+			// 超时
+			return nil, err
+		} else if err == nil {
+			// 没有问题
+			return page, nil
+		}
+	}
+	return nil, err
+}
+
+func PageNavigate(page *rod.Page, desURL string, timeOut time.Duration, maxRetryTimes int) (*rod.Page, error) {
+	var err error
+	page = page.Timeout(timeOut)
+	nowRetryTimes := 0
+	for nowRetryTimes <= maxRetryTimes {
+		err = rod.Try(func() {
+			wait := page.MustWaitNavigation()
+			page.MustNavigate(desURL)
+			wait()
+		})
+		if errors.Is(err, context.DeadlineExceeded) {
+			// 超时
+			return nil, err
+		} else if err == nil {
+			// 没有问题
+			return page, nil
+		}
+	}
+	return nil, err
+}
+
+/**
+ * @Description: 			访问目标 Url,返回 page,只是这个 page 有效,如果再次出发其他的事件无效
+ * @param desURL			目标 Url
+ * @param httpProxyURL		http://127.0.0.1:10809
+ * @param timeOut			超时时间
+ * @param maxRetryTimes		当是非超时 err 的时候,最多可以重试几次
+ * @return *rod.Page
+ * @return error
+ */
+func NewBrowserLoadPage(desURL string, httpProxyURL string, timeOut time.Duration, maxRetryTimes int) (*rod.Page, error) {
+	browser, err := NewBrowser(httpProxyURL)
+	if err != nil {
+		return nil, err
+	}
+	page, err := browser.Page(proto.TargetCreateTarget{URL: ""})
+	if err != nil {
+		return nil, err
+	}
+	page = page.Timeout(timeOut)
+	nowRetryTimes := 0
+	for nowRetryTimes <= maxRetryTimes {
+		err = rod.Try(func() {
+			wait := page.MustWaitNavigation()
+			page.MustNavigate(desURL)
+			wait()
+		})
+		if errors.Is(err, context.DeadlineExceeded) {
+			// 超时
+			return nil, err
+		} else if err == nil {
+			// 没有问题
+			return page, nil
+
+		}
+	}
+	return nil, err
+}
+
+/**
+ * @Description: 			访问目标 Url,返回 page,只是这个 page 有效,如果再次出发其他的事件无效
+ * @param desURL			目标 Url
+ * @param httpProxyURL		http://127.0.0.1:10809
+ * @param timeOut			超时时间
+ * @param maxRetryTimes		当是非超时 err 的时候,最多可以重试几次
+ * @return *rod.Page
+ * @return error
+ */
+func NewBrowserLoadPageFromRemoteDocker(desURL string, httpProxyURL, remoteDockerURL string, timeOut time.Duration, maxRetryTimes int) (*rod.Page, error) {
+	browser, err := NewBrowserFromDocker(httpProxyURL, remoteDockerURL)
+	if err != nil {
+		return nil, err
+	}
+	page, err := browser.Page(proto.TargetCreateTarget{URL: ""})
+	if err != nil {
+		return nil, err
+	}
+	page = page.Timeout(timeOut)
+	nowRetryTimes := 0
+	for nowRetryTimes <= maxRetryTimes {
+		err = rod.Try(func() {
+			wait := page.MustWaitNavigation()
+			page.MustNavigate(desURL)
+			wait()
+		})
+		if errors.Is(err, context.DeadlineExceeded) {
+			// 超时
+			return nil, err
+		} else if err == nil {
+			// 没有问题
+			break
+		}
+	}
+
+	return page, nil
+}
+
+/**
+ * @Description:			访问目标 Url,返回 page,只是这个 page 有效,如果再次出发其他的事件无效
+ * @param desURL			目标 Url
+ * @param httpProxyURL		http://127.0.0.1:10809
+ * @param timeOut			超时时间
+ * @param maxRetryTimes		当是非超时 err 的时候,最多可以重试几次
+ * @return *rod.Page
+ * @return error
+ */
+func NewBrowserLoadPageByHijackRequests(desURL string, httpProxyURL string, timeOut time.Duration, maxRetryTimes int) (*rod.Page, error) {
+
+	var page *rod.Page
+	var err error
+	// 创建一个 page
+	browser := rod.New()
+	err = browser.Connect()
+	if err != nil {
+		return nil, err
+	}
+	page, err = browser.Page(proto.TargetCreateTarget{URL: ""})
+	if err != nil {
+		return nil, err
+	}
+	page = page.Timeout(timeOut)
+	// 设置代理
+	router := page.HijackRequests()
+	defer router.Stop()
+
+	err = rod.Try(func() {
+		router.MustAdd("*", func(ctx *rod.Hijack) {
+			px, _ := url.Parse(httpProxyURL)
+			ctx.LoadResponse(&http.Client{
+				Transport: &http.Transport{
+					Proxy:           http.ProxyURL(px),
+					TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
+				},
+				Timeout: timeOut,
+			}, true)
+		})
+	})
+	if err != nil {
+		return nil ,err
+	}
+	go router.Run()
+
+	nowRetryTimes := 0
+	for nowRetryTimes <= maxRetryTimes {
+		err = rod.Try(func() {
+			page.MustNavigate(desURL).MustWaitLoad()
+		})
+		if errors.Is(err, context.DeadlineExceeded) {
+			// 超时
+			return nil, err
+		} else if err == nil {
+			// 没有问题
+			break
+		}
+		time.Sleep(time.Second)
+		nowRetryTimes++
+	}
+
+	return page, nil
+}

+ 76 - 0
common/rodHelper_test.go

@@ -0,0 +1,76 @@
+package common
+
+import (
+	"github.com/go-rod/rod/lib/proto"
+	"testing"
+	"time"
+)
+
+func TestLoadPage(t *testing.T) {
+	desURL := "https://www.wikipedia.org/"
+	httpProxyURL := "http://127.0.0.1:10809"
+	_, err := NewBrowserLoadPage(desURL, httpProxyURL, 10*time.Second, 5)
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestLoadPageFromRemoteDocker(t *testing.T) {
+	desURL := "https://www.wikipedia.org/"
+	httpProxyURL := "http://127.0.0.1:10809"
+	remoteDockerURL := "ws://192.168.50.135:9222"
+	_, err := NewBrowserLoadPageFromRemoteDocker(desURL, httpProxyURL, remoteDockerURL, 10*time.Second, 5)
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestLoadPageByHijackRequests(t *testing.T) {
+	desURL := "https://www.wikipedia.org/"
+	httpProxyURL := "http://127.0.0.1:10809"
+	_, err := NewBrowserLoadPageByHijackRequests(desURL, httpProxyURL, 10*time.Second, 5)
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestNewBrowser(t *testing.T) {
+	desURL := "https://www.wikipedia.org/"
+	httpProxyURL := "http://127.0.0.1:10809"
+	browser, err := NewBrowser(httpProxyURL)
+	if err != nil {
+		t.Fatal(err)
+	}
+	_, err = browser.Page(proto.TargetCreateTarget{URL: desURL})
+	if err != nil {
+		t.Fatal(err)
+	}
+	//err = rod.Try(func() {
+	//	page.MustElement("#searchInput").MustInput("earth")
+	//	page.MustElement("#search-form > fieldset > button").MustClick()
+	//
+	//	el := page.MustElement("#mw-content-text > div.mw-parser-output > table.infobox > tbody > tr:nth-child(1) > td > a > img")
+	//	err = utils.OutputFile("b.png", el.MustResource())
+	//	if err != nil {
+	//		t.Fatal(err)
+	//	}
+	//})
+	//if err != nil {
+	//	t.Fatal(err)
+	//}
+}
+
+func TestNewBrowserFromDocker(t *testing.T) {
+	desURL := "https://www.wikipedia.org/"
+	httpProxyURL := "http://127.0.0.1:10809"
+	remoteDockerURL := "ws://192.168.50.135:9222"
+
+	browser, err := NewBrowserFromDocker(httpProxyURL, remoteDockerURL)
+	if err != nil {
+		t.Fatal(err)
+	}
+	_, err = browser.Page(proto.TargetCreateTarget{URL: desURL})
+	if err != nil {
+		t.Fatal(err)
+	}
+}

+ 1 - 2
go.mod

@@ -4,10 +4,9 @@ go 1.15
 
 require (
 	github.com/PuerkitoBio/goquery v1.6.1
-	github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394 // indirect
 	github.com/beevik/etree v1.1.0
 	github.com/go-resty/resty/v2 v2.6.0
-	github.com/go-rod/rod v0.100.0
+	github.com/go-rod/rod v0.97.2
 	github.com/middelink/go-parse-torrent-name v0.0.0-20190301154245-3ff4efacd4c4
 	github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646
 )

+ 10 - 10
go.sum

@@ -2,26 +2,26 @@ github.com/PuerkitoBio/goquery v1.6.1 h1:FgjbQZKl5HTmcn4sKBgvx8vv63nhyhIpv7lJpFG
 github.com/PuerkitoBio/goquery v1.6.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
 github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
 github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
-github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394 h1:OYA+5W64v3OgClL+IrOD63t4i/RW7RqrAVl9LTZ9UqQ=
-github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394/go.mod h1:Q8n74mJTIgjX4RBBcHnJ05h//6/k6foqmgE45jTQtxg=
 github.com/beevik/etree v1.1.0 h1:T0xke/WvNtMoCqgzPhkX2r4rjY3GDZFi+FjpRZY2Jbs=
 github.com/beevik/etree v1.1.0/go.mod h1:r8Aw8JqVegEf0w2fDnATrX9VpkMcyFeM0FhwO62wh+A=
 github.com/go-resty/resty/v2 v2.6.0 h1:joIR5PNLM2EFqqESUjCMGXrWmXNHEU9CEiK813oKYS4=
 github.com/go-resty/resty/v2 v2.6.0/go.mod h1:PwvJS6hvaPkjtjNg9ph+VrSD92bi5Zq73w/BIH7cC3Q=
-github.com/go-rod/rod v0.100.0 h1:tEKIb5wS3pGUpW4oJPYDxOKmRXaZbd6S+YVjJ6BHBBY=
-github.com/go-rod/rod v0.100.0/go.mod h1:h9igqSGReLmOWyHtdf0AtUd0mdkHFu3gFwBeV+stleM=
+github.com/go-rod/rod v0.97.2 h1:4AWtucf0fXKbdaEjNNhjIcdYXQyc4+yx8YYI73jyP5A=
+github.com/go-rod/rod v0.97.2/go.mod h1:DgPYd1ql/oCzGxrM5aiCcVM+kA4MFCJ+Mht7ZVBSiG0=
 github.com/middelink/go-parse-torrent-name v0.0.0-20190301154245-3ff4efacd4c4 h1:C/VViMMbR/4Ti2aXrWpKy34S05cRaVd6EvV9BFR3qJ8=
 github.com/middelink/go-parse-torrent-name v0.0.0-20190301154245-3ff4efacd4c4/go.mod h1:H66QhXPJpUSdWschhL6u//v3ge96/qMnQ9mWp3efbxA=
 github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6Oo2LfFZAehjjQMERAvZLEDnQ=
 github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8=
 github.com/ysmood/goob v0.3.0 h1:XZ51cZJ4W3WCoCiUktixzMIQF86W7G5VFL4QQ/Q2uS0=
 github.com/ysmood/goob v0.3.0/go.mod h1:S3lq113Y91y1UBf1wj1pFOxeahvfKkCk6mTWTWbDdWs=
-github.com/ysmood/got v0.12.0/go.mod h1:pE1l4LOwOBhQg6A/8IAatkGp7uZjnalzrZolnlhhMgY=
-github.com/ysmood/gotrace v0.2.2/go.mod h1:TzhIG7nHDry5//eYZDYcTzuJLYQIkykJzCRIo4/dzQM=
-github.com/ysmood/gson v0.6.4 h1:Yb6tosv6bk59HqjZu2/7o4BFherpYEMkDkXmlhgryZ4=
-github.com/ysmood/gson v0.6.4/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg=
-github.com/ysmood/leakless v0.7.0 h1:XCGdaPExyoreoQd+H5qgxM3ReNbSPFsEXpSKwbXbwQw=
-github.com/ysmood/leakless v0.7.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ=
+github.com/ysmood/got v0.9.3 h1:qx51X49jL/WAiqZzPTkPZ0zp5pTmrWJa4zYFTYo0gHI=
+github.com/ysmood/got v0.9.3/go.mod h1:pE1l4LOwOBhQg6A/8IAatkGp7uZjnalzrZolnlhhMgY=
+github.com/ysmood/gotrace v0.2.0 h1:IkTC6rJREwXSaG8yWK+NFwIJGIsxA1DjC6/gxYyQttE=
+github.com/ysmood/gotrace v0.2.0/go.mod h1:TzhIG7nHDry5//eYZDYcTzuJLYQIkykJzCRIo4/dzQM=
+github.com/ysmood/gson v0.6.3 h1:4cU+5oOdsyundXHy00t99H0rLXLthuseD3x6W+xmCiU=
+github.com/ysmood/gson v0.6.3/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg=
+github.com/ysmood/leakless v0.6.16 h1:PVS0EY7eFLzkUTqVJbST7cLz9V+58L+4TIlgxmCLi3k=
+github.com/ysmood/leakless v0.6.16/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=

+ 5 - 35
sub_supplier/subhd/subhd.go

@@ -6,8 +6,6 @@ import (
 	"github.com/PuerkitoBio/goquery"
 	"github.com/allanpk716/ChineseSubFinder/common"
 	"github.com/allanpk716/ChineseSubFinder/sub_supplier"
-	"github.com/go-rod/rod"
-	"github.com/go-rod/rod/lib/devices"
 	"github.com/go-rod/rod/lib/launcher"
 	"github.com/nfnt/resize"
 	"image/jpeg"
@@ -275,41 +273,13 @@ func (s Supplier) httpPost(url string, postData map[string]string, referer strin
 }
 
 // Simulation 模拟滑动过防水墙
-func (s Supplier) Simulation() {
+func (s Supplier) SimulationTest() {
 	// 感谢 https://www.bigs3.com/article/gorod-crack-slider-captcha/
-	//設定設備參數
-	screen := devices.Device{
-		Title:          "Laptop with MDPI screen",
-		Capabilities:   []string{"touch", "mobile"},
-		UserAgent:      "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36",
-		Screen: devices.Screen{
-			DevicePixelRatio: 1,
-			Horizontal: devices.ScreenSize{
-				Width:  1280,
-				Height: 720,
-			},
-		},
+	page, err := common.NewBrowserLoadPage("https://007.qq.com/online.html", "", 10*time.Second, 5)
+	if err != nil {
+		println(err.Error())
+		return
 	}
-	//設定啓動器
-	s.rodlauncher = launcher.New().
-		Set("mute-audio").
-		Set("default-browser-check").
-		Set("disable-gpu").
-		Set("disable-web-security").
-		Set("no-sandbox").
-		//關閉無頭模式,顯示瀏覽器窗體
-		Delete("headless")
-
-	//debug url
-	launchers := s.rodlauncher.MustLaunch()
-	fmt.Printf("debug url: %s\n", launchers)
-	//連接到瀏覽器
-	browser := rod.New().ControlURL(launchers).MustConnect()
-	//新開一個Pages
-	page := browser.DefaultDevice(screen).MustPage("")
-	//跳轉到目標網域
-	page.MustNavigate("https://007.qq.com/online.html").MustWaitLoad()
-
 	// 切换到可疑用户
 	page.MustElement("#app > section.wp-on-online > div > div > div > div.wp-on-box.col-md-5.col-md-offset-1 > div.wp-onb-tit > a:nth-child(2)").MustClick()
 	//模擬Click點擊 "體驗驗證碼" 按鈕