Browse Source

完成 subhd 过墙逻辑,就等网站恢复,再测试

Signed-off-by: allan716 <[email protected]>
allan716 4 years ago
parent
commit
f67932c71d

+ 1 - 0
common/pass_water_wall.go

@@ -12,6 +12,7 @@ import (
 
 // SimulationTest 模拟滑动过防水墙
 func SimulationTest() {
+	// 具体的应用见 subhd 的解析器
 	// 感谢 https://www.bigs3.com/article/gorod-crack-slider-captcha/
 	page, err := NewBrowserLoadPage("https://007.qq.com/online.html", "", 10*time.Second, 5)
 	if err != nil {

+ 3 - 2
common/pass_water_wall_test.go

@@ -1,8 +1,9 @@
 package common
 
-import "testing"
+import (
+	"testing"
+)
 
 func TestSimulationTest(t *testing.T) {
-
 	SimulationTest()
 }

+ 1 - 0
common/selferr.go

@@ -20,4 +20,5 @@ var(
 	SubHDStep2DTokenIsNull = errors.New("subhd step2 dToken is null")
 	SubHDStep2ResultIsNullOrNotTrue = errors.New("subhd step2 result is null or not true")
 	SubHDStep2PostResultGetUrlNotFound= errors.New("subhd step2 post result get url not found")
+	SubHDStep2ExCannotFindDownloadBtn= errors.New("subhd step2ex can not find download btn")
 )

+ 1 - 1
common/urls.go

@@ -7,6 +7,6 @@ const (
 	SubZiMuKuRootUrl  = "http://zmk.pw"
 	SubZiMuKuSearchUrl  = SubZiMuKuRootUrl + "/search"
 
-	SubSubHDRootUrl  = "https://subhd.tv"
+	SubSubHDRootUrl  = "https://subhd.cc"
 	SubSubHDSearchUrl  = SubSubHDRootUrl + "/search/%s"
 )

+ 131 - 65
sub_supplier/subhd/subhd.go

@@ -6,11 +6,14 @@ import (
 	"github.com/PuerkitoBio/goquery"
 	"github.com/allanpk716/ChineseSubFinder/common"
 	"github.com/allanpk716/ChineseSubFinder/sub_supplier"
+	"github.com/go-rod/rod"
 	"github.com/go-rod/rod/lib/launcher"
 	"github.com/nfnt/resize"
 	"image/jpeg"
+	"io/ioutil"
 	"math"
 	"net/url"
+	"os"
 	"path/filepath"
 	"regexp"
 	"strings"
@@ -91,20 +94,20 @@ func (s Supplier) GetSubListFromKeyword(keyword string) ([]sub_supplier.SubInfo,
 	}
 
 	for _, item := range subList {
-		s.Step2Ex(item.Url)
-		//if err != nil {
-		//	return nil, err
-		//}
-		//var subInfo sub_supplier.SubInfo
-		//subInfo.Name = hdContent.Filename
-		//subInfo.Ext = hdContent.Ext
-		//subInfo.Language = common.ChineseSimple
-		//subInfo.Vote = 0
-		//subInfo.FileUrl = common.AddBaseUrl(common.SubSubHDRootUrl, item.Url)
-		//subInfo.Offset = 0
-		//subInfo.Data = hdContent.Data
-		//
-		//subInfos = append(subInfos, subInfo)
+		hdContent, err := s.Step2Ex(item.Url)
+		if err != nil {
+			return nil, err
+		}
+		var subInfo sub_supplier.SubInfo
+		subInfo.Name = hdContent.Filename
+		subInfo.Ext = hdContent.Ext
+		subInfo.Language = common.ChineseSimple
+		subInfo.Vote = 0
+		subInfo.FileUrl = common.AddBaseUrl(common.SubSubHDRootUrl, item.Url)
+		subInfo.Offset = 0
+		subInfo.Data = hdContent.Data
+
+		subInfos = append(subInfos, subInfo)
 	}
 
 	return subInfos, nil
@@ -117,7 +120,7 @@ func (s Supplier) Step0(keyword string) (string, error) {
 	if err != nil {
 		return "", err
 	}
-	re := regexp.MustCompile(`<a\shref="(/d/[\w]+)"><img`)
+	re := regexp.MustCompile(`<a\shref="(/d/[\w]+)">\s?<div`)
 	matched := re.FindAllStringSubmatch(result, -1)
 	if len(matched) < 1 || len(matched[0]) < 2{
 		return "",  common.SubHDStep0HrefIsNull
@@ -136,41 +139,25 @@ func (s Supplier) Step1(detailPageUrl string) ([]HdListItem, error) {
 		return nil, err
 	}
 	var lists []HdListItem
-	doc.Find(".table-sm tr").EachWithBreak(func(i int, tr *goquery.Selection) bool {
-		if tr.Find("a.text-dark").Size() == 0 {
+	doc.Find("div.bg-white div.row").EachWithBreak(func(i int, tr *goquery.Selection) bool {
+		if tr.Find("a.link-dark").Size() == 0 {
 			return true
 		}
-		downUrl, exists := tr.Find("a.text-dark").Eq(0).Attr("href")
+		downUrl, exists := tr.Find("a.link-dark").Eq(0).Attr("href")
 		if !exists {
 			return true
 		}
-		title := strings.TrimSpace(tr.Find("a.text-dark").Text())
+		title := strings.TrimSpace(tr.Find("a.link-dark").Text())
 
-		downCount, err := common.GetNumber2int(tr.Find("td.p-3").Eq(1).Text())
+		downCount, err := common.GetNumber2int(tr.Find("div.px-3").Eq(1).Text())
 		if err != nil {
 			return true
 		}
 
-		ext := ""
-		tr.Find(".text-secondary span").Each(func(a_i int, a_lb *goquery.Selection) {
-			ext += a_lb.Text() + ","
-		})
-		extLen := len(ext)
-		if len(ext) > 0 {
-			ext = ext[0 : extLen - 3]
-		}
-
-		authorInfo := tr.Find("a.text-dark").Eq(2).Text()
-
-		rate := ""
-
 		listItem := HdListItem{}
 		listItem.Url = downUrl
 		listItem.BaseUrl = common.SubSubHDRootUrl
 		listItem.Title = title
-		listItem.Ext = ext
-		listItem.AuthorInfo = authorInfo
-		listItem.Rate = rate
 		listItem.DownCount = downCount
 
 		if len(lists) > s.topic {
@@ -184,7 +171,7 @@ func (s Supplier) Step1(detailPageUrl string) ([]HdListItem, error) {
 
 	return lists, nil
 }
-// Step2 下载字幕
+// Step2 下载字幕,没用了,弃了
 func (s Supplier) Step2(subDownloadPageUrl string) (*HdContent, error) {
 	subDownloadPageUrl = common.AddBaseUrl(common.SubSubHDRootUrl, subDownloadPageUrl)
 	result, err := s.httpGet(subDownloadPageUrl)
@@ -244,37 +231,119 @@ func (s Supplier) Step2(subDownloadPageUrl string) (*HdContent, error) {
 }
 
 // Step2Ex 下载字幕 过防水墙
-func (s Supplier) Step2Ex(subDownloadPageUrl string)  {
+func (s Supplier) Step2Ex(subDownloadPageUrl string) (*HdContent, error)  {
 	subDownloadPageUrl = common.AddBaseUrl(common.SubSubHDRootUrl, subDownloadPageUrl)
 
-	page, err := common.NewBrowserLoadPage(subDownloadPageUrl, s.reqParam.HttpProxy, 10*time.Second, 5)
+	// TODO 后面如果用 docker 部署,需要允许改位远程 browser 启动
+	browser, err := common.NewBrowser(s.reqParam.HttpProxy)
+	if err != nil {
+		return nil, err
+	}
+	// TODO 需要提取出 rod 的超时时间和重试次数
+	page, err := common.NewPageNavigate(browser, subDownloadPageUrl, 10*time.Second, 5)
+	if err != nil {
+		return nil, err
+	}
+	err = page.WaitLoad()
+	if err != nil {
+		return nil, err
+	}
+	pageString, err := page.HTML()
 	if err != nil {
-		println(err.Error())
-		return
+		return nil, err
+	}
+	doc, err := goquery.NewDocumentFromReader(strings.NewReader(pageString))
+	if err != nil {
+		return nil, err
+	}
+	// 是否有腾讯的防水墙
+	hasWaterWall := true
+	waterWall := doc.Find("#TencentCaptcha")
+	if len(waterWall.Nodes) < 1 {
+		hasWaterWall = false
+	}
+	// 是否有下载按钮
+	hasDownBtn := true
+	downBtn := doc.Find("#down")
+	if len(downBtn.Nodes) < 1 {
+		hasDownBtn = false
+	}
+	if hasWaterWall == false && hasDownBtn == false {
+		// 都没有,则返回故障,无法下载
+		return nil, common.SubHDStep2ExCannotFindDownloadBtn
+	}
+	// 下载字幕
+	content, err2 := s.downloadSubFile(browser, page, hasWaterWall)
+	if err2 != nil {
+		return content, err2
 	}
-	// 切换到可疑用户
-	page.MustElement("#app > section.wp-on-online > div > div > div > div.wp-on-box.col-md-5.col-md-offset-1 > div.wp-onb-tit > a:nth-child(2)").MustClick()
-	//模擬Click點擊 "體驗驗證碼" 按鈕
-	page.MustElement("#code").MustClick()
+
+	return nil, nil
+}
+
+func (s Supplier) downloadSubFile(browser *rod.Browser, page *rod.Page, hasWaterWall bool) (*HdContent, error) {
+	var err error
+	fileName := ""
+	fileByte := []byte{0}
+	err = rod.Try(func() {
+		//wait := browser.MustWaitDownload()
+
+		tmpDir := filepath.Join(os.TempDir(), "rod", "downloads")
+		wait := browser.WaitDownload(tmpDir)
+		getDownloadFile:= func() ([]byte, string, error) {
+			info := wait()
+			path := filepath.Join(tmpDir, info.GUID)
+			defer func() { _ = os.Remove(path) }()
+			b, err := ioutil.ReadFile(path)
+			if err != nil {
+				return nil, "", err
+			}
+			return b,info.SuggestedFilename, nil
+		}
+
+		// 点击下载按钮
+		page.MustElement("#down").MustClick()
+		// 过墙
+		if hasWaterWall == true {
+			s.passWaterWall(page)
+		}
+		fileByte, fileName, err = getDownloadFile()
+		if err != nil {
+			panic(err)
+		}
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	var hdContent HdContent
+	hdContent.Filename = fileName
+	hdContent.Ext = filepath.Ext(fileName)
+	hdContent.Data = fileByte
+
+	return &hdContent, nil
+}
+
+func (s Supplier) passWaterWall(page *rod.Page)  {
 	//等待驗證碼窗體載入
 	page.MustElement("#tcaptcha_iframe").MustWaitLoad()
 	//進入到iframe
 	iframe := page.MustElement("#tcaptcha_iframe").MustFrame()
 	//等待拖動條加載, 延遲500秒檢測變化, 以確認加載完畢
-	iframe.MustElement("#tcaptcha_drag_button").WaitStable(500 * time.Millisecond)
+	err := iframe.MustElement("#tcaptcha_drag_button").WaitStable(500 * time.Millisecond)
+	if err != nil {
+		panic(err)
+	}
 	//等待缺口圖像載入
 	iframe.MustElement("#slideBg").MustWaitLoad()
-
-
 	//取得帶缺口圖像
 	shadowbg := iframe.MustElement("#slideBg").MustResource()
 	//取得原始圖像
 	src := iframe.MustElement("#slideBg").MustProperty("src")
-	fullbg, fileName, err := common.DownFile(strings.Replace(src.String(), "img_index=1", "img_index=0", 1))
+	fullbg, _, err := common.DownFile(strings.Replace(src.String(), "img_index=1", "img_index=0", 1))
 	if err != nil {
-		return
+		panic(err)
 	}
-	println(fileName)
 	//取得img展示的真實尺寸
 	bgbox := iframe.MustElement("#slideBg").MustShape().Box()
 	height, width := uint(math.Round(bgbox.Height)), uint(math.Round(bgbox.Width))
@@ -286,22 +355,16 @@ func (s Supplier) Step2Ex(subDownloadPageUrl string)  {
 
 	//啓始left,排除干擾部份,所以右移10個像素
 	left := fullbg_img.Bounds().Min.X + 10
-
 	//啓始top, 排除干擾部份, 所以下移10個像素
 	top := fullbg_img.Bounds().Min.Y + 10
-
 	//最大left, 排除干擾部份, 所以左移10個像素
 	maxleft := fullbg_img.Bounds().Max.X - 10
-
 	//最大top, 排除干擾部份, 所以上移10個像素
 	maxtop := fullbg_img.Bounds().Max.Y - 10
-
 	//rgb比较阈值, 超出此阈值及代表找到缺口位置
 	threshold := 20
-
 	//缺口偏移, 拖動按鈕初始會偏移27.5
 	distance := -27.5
-
 	//取絕對值方法
 	abs := func(n int) int {
 		if n < 0 {
@@ -314,28 +377,30 @@ search:
 		for j := top; j <= maxtop; j++ {
 			color_a_R, color_a_G, color_a_B, _ := fullbg_img.At(i, j).RGBA()
 			color_b_R, color_b_G, color_b_B, _ := shadowbg_img.At(i, j).RGBA()
-			color_a_R, color_a_G, color_a_B = color_a_R >> 8, color_a_G >> 8, color_a_B >> 8
-			color_b_R, color_b_G, color_b_B = color_b_R >> 8, color_b_G >> 8, color_b_B >> 8
-			if abs(int(color_a_R) - int(color_b_R)) > threshold ||
-				abs(int(color_a_G) - int(color_b_G)) > threshold ||
-				abs(int(color_a_B) - int(color_b_B)) > threshold {
+			color_a_R, color_a_G, color_a_B = color_a_R>>8, color_a_G>>8, color_a_B>>8
+			color_b_R, color_b_G, color_b_B = color_b_R>>8, color_b_G>>8, color_b_B>>8
+			if abs(int(color_a_R)-int(color_b_R)) > threshold ||
+				abs(int(color_a_G)-int(color_b_G)) > threshold ||
+				abs(int(color_a_B)-int(color_b_B)) > threshold {
 				distance += float64(i)
 				fmt.Printf("info: 對比完畢, 偏移量: %v\n", distance)
 				break search
 			}
 		}
 	}
-
 	//獲取拖動按鈕形狀
 	dragbtnbox := iframe.MustElement("#tcaptcha_drag_thumb").MustShape().Box()
 	//启用滑鼠功能
 	mouse := page.Mouse
 	//模擬滑鼠移動至拖動按鈕處, 右移3的原因: 拖動按鈕比滑塊圖大3個像素
-	mouse.MustMove(dragbtnbox.X + 3, dragbtnbox.Y + (dragbtnbox.Height / 2))
+	mouse.MustMove(dragbtnbox.X+3, dragbtnbox.Y+(dragbtnbox.Height/2))
 	//按下滑鼠左鍵
 	mouse.MustDown("left")
 	//開始拖動
-	mouse.Move(dragbtnbox.X + distance, dragbtnbox.Y + (dragbtnbox.Height / 2), 20)
+	err = mouse.Move(dragbtnbox.X+distance, dragbtnbox.Y+(dragbtnbox.Height/2), 20)
+	if err != nil {
+		panic(err)
+	}
 	//鬆開滑鼠左鍵, 拖动完毕
 	mouse.MustUp("left")
 	//截圖保存
@@ -357,6 +422,7 @@ func (s Supplier) httpGet(url string) (string, error) {
 	return resp.String(), nil
 }
 
+//httpPost  没用了,弃了
 func (s Supplier) httpPost(url string, postData map[string]string, referer string) (string, error) {
 
 	s.reqParam.Referer = referer

+ 10 - 5
sub_supplier/subhd/subhd_test.go

@@ -6,6 +6,16 @@ import (
 )
 
 func TestSupplier_GetSubListFromFile(t *testing.T) {
+	//httpProxy := "127.0.0.1:10809"
+	////testUrl := "https://github.com/go-rod/rod/issues?q=page+string%28%29+"
+	////page, err := common.NewBrowserLoadPage(testUrl, httpProxy, 10*time.Second, 5)
+	//page, err := common.NewBrowserLoadPage(common.SubSubHDRootUrl, "", 10*time.Second, 5)
+	//if err != nil {
+	//	return
+	//}
+	//page = page.MustWaitLoad()
+	//htmlString := page.MustHTML()
+	//println(htmlString)
 
 	movie1 := "X:\\电影\\消失爱人 (2016)\\消失爱人 (2016) 720p AAC.rmvb"
 	//movie1 := "X:\\电影\\机动战士Z高达:星之继承者 (2005)\\机动战士Z高达:星之继承者 (2005) 1080p TrueHD.mkv"
@@ -20,9 +30,4 @@ func TestSupplier_GetSubListFromFile(t *testing.T) {
 	for i, sublist := range outList {
 		println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Vote, sublist.FileUrl, len(sublist.Data))
 	}
-}
-
-func TestSupplier_Simulation(t *testing.T) {
-	shooter := NewSupplier()
-	shooter.SimulationTest()
 }