|
|
@@ -6,11 +6,14 @@ import (
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
"github.com/allanpk716/ChineseSubFinder/common"
|
|
|
"github.com/allanpk716/ChineseSubFinder/sub_supplier"
|
|
|
+ "github.com/go-rod/rod"
|
|
|
"github.com/go-rod/rod/lib/launcher"
|
|
|
"github.com/nfnt/resize"
|
|
|
"image/jpeg"
|
|
|
+ "io/ioutil"
|
|
|
"math"
|
|
|
"net/url"
|
|
|
+ "os"
|
|
|
"path/filepath"
|
|
|
"regexp"
|
|
|
"strings"
|
|
|
@@ -91,20 +94,20 @@ func (s Supplier) GetSubListFromKeyword(keyword string) ([]sub_supplier.SubInfo,
|
|
|
}
|
|
|
|
|
|
for _, item := range subList {
|
|
|
- s.Step2Ex(item.Url)
|
|
|
- //if err != nil {
|
|
|
- // return nil, err
|
|
|
- //}
|
|
|
- //var subInfo sub_supplier.SubInfo
|
|
|
- //subInfo.Name = hdContent.Filename
|
|
|
- //subInfo.Ext = hdContent.Ext
|
|
|
- //subInfo.Language = common.ChineseSimple
|
|
|
- //subInfo.Vote = 0
|
|
|
- //subInfo.FileUrl = common.AddBaseUrl(common.SubSubHDRootUrl, item.Url)
|
|
|
- //subInfo.Offset = 0
|
|
|
- //subInfo.Data = hdContent.Data
|
|
|
- //
|
|
|
- //subInfos = append(subInfos, subInfo)
|
|
|
+ hdContent, err := s.Step2Ex(item.Url)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ var subInfo sub_supplier.SubInfo
|
|
|
+ subInfo.Name = hdContent.Filename
|
|
|
+ subInfo.Ext = hdContent.Ext
|
|
|
+ subInfo.Language = common.ChineseSimple
|
|
|
+ subInfo.Vote = 0
|
|
|
+ subInfo.FileUrl = common.AddBaseUrl(common.SubSubHDRootUrl, item.Url)
|
|
|
+ subInfo.Offset = 0
|
|
|
+ subInfo.Data = hdContent.Data
|
|
|
+
|
|
|
+ subInfos = append(subInfos, subInfo)
|
|
|
}
|
|
|
|
|
|
return subInfos, nil
|
|
|
@@ -117,7 +120,7 @@ func (s Supplier) Step0(keyword string) (string, error) {
|
|
|
if err != nil {
|
|
|
return "", err
|
|
|
}
|
|
|
- re := regexp.MustCompile(`<a\shref="(/d/[\w]+)"><img`)
|
|
|
+ re := regexp.MustCompile(`<a\shref="(/d/[\w]+)">\s?<div`)
|
|
|
matched := re.FindAllStringSubmatch(result, -1)
|
|
|
if len(matched) < 1 || len(matched[0]) < 2{
|
|
|
return "", common.SubHDStep0HrefIsNull
|
|
|
@@ -136,41 +139,25 @@ func (s Supplier) Step1(detailPageUrl string) ([]HdListItem, error) {
|
|
|
return nil, err
|
|
|
}
|
|
|
var lists []HdListItem
|
|
|
- doc.Find(".table-sm tr").EachWithBreak(func(i int, tr *goquery.Selection) bool {
|
|
|
- if tr.Find("a.text-dark").Size() == 0 {
|
|
|
+ doc.Find("div.bg-white div.row").EachWithBreak(func(i int, tr *goquery.Selection) bool {
|
|
|
+ if tr.Find("a.link-dark").Size() == 0 {
|
|
|
return true
|
|
|
}
|
|
|
- downUrl, exists := tr.Find("a.text-dark").Eq(0).Attr("href")
|
|
|
+ downUrl, exists := tr.Find("a.link-dark").Eq(0).Attr("href")
|
|
|
if !exists {
|
|
|
return true
|
|
|
}
|
|
|
- title := strings.TrimSpace(tr.Find("a.text-dark").Text())
|
|
|
+ title := strings.TrimSpace(tr.Find("a.link-dark").Text())
|
|
|
|
|
|
- downCount, err := common.GetNumber2int(tr.Find("td.p-3").Eq(1).Text())
|
|
|
+ downCount, err := common.GetNumber2int(tr.Find("div.px-3").Eq(1).Text())
|
|
|
if err != nil {
|
|
|
return true
|
|
|
}
|
|
|
|
|
|
- ext := ""
|
|
|
- tr.Find(".text-secondary span").Each(func(a_i int, a_lb *goquery.Selection) {
|
|
|
- ext += a_lb.Text() + ","
|
|
|
- })
|
|
|
- extLen := len(ext)
|
|
|
- if len(ext) > 0 {
|
|
|
- ext = ext[0 : extLen - 3]
|
|
|
- }
|
|
|
-
|
|
|
- authorInfo := tr.Find("a.text-dark").Eq(2).Text()
|
|
|
-
|
|
|
- rate := ""
|
|
|
-
|
|
|
listItem := HdListItem{}
|
|
|
listItem.Url = downUrl
|
|
|
listItem.BaseUrl = common.SubSubHDRootUrl
|
|
|
listItem.Title = title
|
|
|
- listItem.Ext = ext
|
|
|
- listItem.AuthorInfo = authorInfo
|
|
|
- listItem.Rate = rate
|
|
|
listItem.DownCount = downCount
|
|
|
|
|
|
if len(lists) > s.topic {
|
|
|
@@ -184,7 +171,7 @@ func (s Supplier) Step1(detailPageUrl string) ([]HdListItem, error) {
|
|
|
|
|
|
return lists, nil
|
|
|
}
|
|
|
-// Step2 下载字幕
|
|
|
+// Step2 下载字幕,没用了,弃了
|
|
|
func (s Supplier) Step2(subDownloadPageUrl string) (*HdContent, error) {
|
|
|
subDownloadPageUrl = common.AddBaseUrl(common.SubSubHDRootUrl, subDownloadPageUrl)
|
|
|
result, err := s.httpGet(subDownloadPageUrl)
|
|
|
@@ -244,37 +231,119 @@ func (s Supplier) Step2(subDownloadPageUrl string) (*HdContent, error) {
|
|
|
}
|
|
|
|
|
|
// Step2Ex 下载字幕 过防水墙
|
|
|
-func (s Supplier) Step2Ex(subDownloadPageUrl string) {
|
|
|
+func (s Supplier) Step2Ex(subDownloadPageUrl string) (*HdContent, error) {
|
|
|
subDownloadPageUrl = common.AddBaseUrl(common.SubSubHDRootUrl, subDownloadPageUrl)
|
|
|
|
|
|
- page, err := common.NewBrowserLoadPage(subDownloadPageUrl, s.reqParam.HttpProxy, 10*time.Second, 5)
|
|
|
+ // TODO 后面如果用 docker 部署,需要允许改位远程 browser 启动
|
|
|
+ browser, err := common.NewBrowser(s.reqParam.HttpProxy)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ // TODO 需要提取出 rod 的超时时间和重试次数
|
|
|
+ page, err := common.NewPageNavigate(browser, subDownloadPageUrl, 10*time.Second, 5)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ err = page.WaitLoad()
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ pageString, err := page.HTML()
|
|
|
if err != nil {
|
|
|
- println(err.Error())
|
|
|
- return
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ doc, err := goquery.NewDocumentFromReader(strings.NewReader(pageString))
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ // 是否有腾讯的防水墙
|
|
|
+ hasWaterWall := true
|
|
|
+ waterWall := doc.Find("#TencentCaptcha")
|
|
|
+ if len(waterWall.Nodes) < 1 {
|
|
|
+ hasWaterWall = false
|
|
|
+ }
|
|
|
+ // 是否有下载按钮
|
|
|
+ hasDownBtn := true
|
|
|
+ downBtn := doc.Find("#down")
|
|
|
+ if len(downBtn.Nodes) < 1 {
|
|
|
+ hasDownBtn = false
|
|
|
+ }
|
|
|
+ if hasWaterWall == false && hasDownBtn == false {
|
|
|
+ // 都没有,则返回故障,无法下载
|
|
|
+ return nil, common.SubHDStep2ExCannotFindDownloadBtn
|
|
|
+ }
|
|
|
+ // 下载字幕
|
|
|
+ content, err2 := s.downloadSubFile(browser, page, hasWaterWall)
|
|
|
+ if err2 != nil {
|
|
|
+ return content, err2
|
|
|
}
|
|
|
- // 切换到可疑用户
|
|
|
- page.MustElement("#app > section.wp-on-online > div > div > div > div.wp-on-box.col-md-5.col-md-offset-1 > div.wp-onb-tit > a:nth-child(2)").MustClick()
|
|
|
- //模擬Click點擊 "體驗驗證碼" 按鈕
|
|
|
- page.MustElement("#code").MustClick()
|
|
|
+
|
|
|
+ return nil, nil
|
|
|
+}
|
|
|
+
|
|
|
+func (s Supplier) downloadSubFile(browser *rod.Browser, page *rod.Page, hasWaterWall bool) (*HdContent, error) {
|
|
|
+ var err error
|
|
|
+ fileName := ""
|
|
|
+ fileByte := []byte{0}
|
|
|
+ err = rod.Try(func() {
|
|
|
+ //wait := browser.MustWaitDownload()
|
|
|
+
|
|
|
+ tmpDir := filepath.Join(os.TempDir(), "rod", "downloads")
|
|
|
+ wait := browser.WaitDownload(tmpDir)
|
|
|
+ getDownloadFile:= func() ([]byte, string, error) {
|
|
|
+ info := wait()
|
|
|
+ path := filepath.Join(tmpDir, info.GUID)
|
|
|
+ defer func() { _ = os.Remove(path) }()
|
|
|
+ b, err := ioutil.ReadFile(path)
|
|
|
+ if err != nil {
|
|
|
+ return nil, "", err
|
|
|
+ }
|
|
|
+ return b,info.SuggestedFilename, nil
|
|
|
+ }
|
|
|
+
|
|
|
+ // 点击下载按钮
|
|
|
+ page.MustElement("#down").MustClick()
|
|
|
+ // 过墙
|
|
|
+ if hasWaterWall == true {
|
|
|
+ s.passWaterWall(page)
|
|
|
+ }
|
|
|
+ fileByte, fileName, err = getDownloadFile()
|
|
|
+ if err != nil {
|
|
|
+ panic(err)
|
|
|
+ }
|
|
|
+ })
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+
|
|
|
+ var hdContent HdContent
|
|
|
+ hdContent.Filename = fileName
|
|
|
+ hdContent.Ext = filepath.Ext(fileName)
|
|
|
+ hdContent.Data = fileByte
|
|
|
+
|
|
|
+ return &hdContent, nil
|
|
|
+}
|
|
|
+
|
|
|
+func (s Supplier) passWaterWall(page *rod.Page) {
|
|
|
//等待驗證碼窗體載入
|
|
|
page.MustElement("#tcaptcha_iframe").MustWaitLoad()
|
|
|
//進入到iframe
|
|
|
iframe := page.MustElement("#tcaptcha_iframe").MustFrame()
|
|
|
//等待拖動條加載, 延遲500秒檢測變化, 以確認加載完畢
|
|
|
- iframe.MustElement("#tcaptcha_drag_button").WaitStable(500 * time.Millisecond)
|
|
|
+ err := iframe.MustElement("#tcaptcha_drag_button").WaitStable(500 * time.Millisecond)
|
|
|
+ if err != nil {
|
|
|
+ panic(err)
|
|
|
+ }
|
|
|
//等待缺口圖像載入
|
|
|
iframe.MustElement("#slideBg").MustWaitLoad()
|
|
|
-
|
|
|
-
|
|
|
//取得帶缺口圖像
|
|
|
shadowbg := iframe.MustElement("#slideBg").MustResource()
|
|
|
//取得原始圖像
|
|
|
src := iframe.MustElement("#slideBg").MustProperty("src")
|
|
|
- fullbg, fileName, err := common.DownFile(strings.Replace(src.String(), "img_index=1", "img_index=0", 1))
|
|
|
+ fullbg, _, err := common.DownFile(strings.Replace(src.String(), "img_index=1", "img_index=0", 1))
|
|
|
if err != nil {
|
|
|
- return
|
|
|
+ panic(err)
|
|
|
}
|
|
|
- println(fileName)
|
|
|
//取得img展示的真實尺寸
|
|
|
bgbox := iframe.MustElement("#slideBg").MustShape().Box()
|
|
|
height, width := uint(math.Round(bgbox.Height)), uint(math.Round(bgbox.Width))
|
|
|
@@ -286,22 +355,16 @@ func (s Supplier) Step2Ex(subDownloadPageUrl string) {
|
|
|
|
|
|
//啓始left,排除干擾部份,所以右移10個像素
|
|
|
left := fullbg_img.Bounds().Min.X + 10
|
|
|
-
|
|
|
//啓始top, 排除干擾部份, 所以下移10個像素
|
|
|
top := fullbg_img.Bounds().Min.Y + 10
|
|
|
-
|
|
|
//最大left, 排除干擾部份, 所以左移10個像素
|
|
|
maxleft := fullbg_img.Bounds().Max.X - 10
|
|
|
-
|
|
|
//最大top, 排除干擾部份, 所以上移10個像素
|
|
|
maxtop := fullbg_img.Bounds().Max.Y - 10
|
|
|
-
|
|
|
//rgb比较阈值, 超出此阈值及代表找到缺口位置
|
|
|
threshold := 20
|
|
|
-
|
|
|
//缺口偏移, 拖動按鈕初始會偏移27.5
|
|
|
distance := -27.5
|
|
|
-
|
|
|
//取絕對值方法
|
|
|
abs := func(n int) int {
|
|
|
if n < 0 {
|
|
|
@@ -314,28 +377,30 @@ search:
|
|
|
for j := top; j <= maxtop; j++ {
|
|
|
color_a_R, color_a_G, color_a_B, _ := fullbg_img.At(i, j).RGBA()
|
|
|
color_b_R, color_b_G, color_b_B, _ := shadowbg_img.At(i, j).RGBA()
|
|
|
- color_a_R, color_a_G, color_a_B = color_a_R >> 8, color_a_G >> 8, color_a_B >> 8
|
|
|
- color_b_R, color_b_G, color_b_B = color_b_R >> 8, color_b_G >> 8, color_b_B >> 8
|
|
|
- if abs(int(color_a_R) - int(color_b_R)) > threshold ||
|
|
|
- abs(int(color_a_G) - int(color_b_G)) > threshold ||
|
|
|
- abs(int(color_a_B) - int(color_b_B)) > threshold {
|
|
|
+ color_a_R, color_a_G, color_a_B = color_a_R>>8, color_a_G>>8, color_a_B>>8
|
|
|
+ color_b_R, color_b_G, color_b_B = color_b_R>>8, color_b_G>>8, color_b_B>>8
|
|
|
+ if abs(int(color_a_R)-int(color_b_R)) > threshold ||
|
|
|
+ abs(int(color_a_G)-int(color_b_G)) > threshold ||
|
|
|
+ abs(int(color_a_B)-int(color_b_B)) > threshold {
|
|
|
distance += float64(i)
|
|
|
fmt.Printf("info: 對比完畢, 偏移量: %v\n", distance)
|
|
|
break search
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
//獲取拖動按鈕形狀
|
|
|
dragbtnbox := iframe.MustElement("#tcaptcha_drag_thumb").MustShape().Box()
|
|
|
//启用滑鼠功能
|
|
|
mouse := page.Mouse
|
|
|
//模擬滑鼠移動至拖動按鈕處, 右移3的原因: 拖動按鈕比滑塊圖大3個像素
|
|
|
- mouse.MustMove(dragbtnbox.X + 3, dragbtnbox.Y + (dragbtnbox.Height / 2))
|
|
|
+ mouse.MustMove(dragbtnbox.X+3, dragbtnbox.Y+(dragbtnbox.Height/2))
|
|
|
//按下滑鼠左鍵
|
|
|
mouse.MustDown("left")
|
|
|
//開始拖動
|
|
|
- mouse.Move(dragbtnbox.X + distance, dragbtnbox.Y + (dragbtnbox.Height / 2), 20)
|
|
|
+ err = mouse.Move(dragbtnbox.X+distance, dragbtnbox.Y+(dragbtnbox.Height/2), 20)
|
|
|
+ if err != nil {
|
|
|
+ panic(err)
|
|
|
+ }
|
|
|
//鬆開滑鼠左鍵, 拖动完毕
|
|
|
mouse.MustUp("left")
|
|
|
//截圖保存
|
|
|
@@ -357,6 +422,7 @@ func (s Supplier) httpGet(url string) (string, error) {
|
|
|
return resp.String(), nil
|
|
|
}
|
|
|
|
|
|
+//httpPost 没用了,弃了
|
|
|
func (s Supplier) httpPost(url string, postData map[string]string, referer string) (string, error) {
|
|
|
|
|
|
s.reqParam.Referer = referer
|