subhd.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722
  1. package subhd
  2. import (
  3. "bytes"
  4. "fmt"
  5. "github.com/PuerkitoBio/goquery"
  6. "github.com/Tnze/go.num/v2/zh"
  7. "github.com/allanpk716/ChineseSubFinder/internal/common"
  8. "github.com/allanpk716/ChineseSubFinder/internal/pkg/decode"
  9. "github.com/allanpk716/ChineseSubFinder/internal/pkg/folder_helper"
  10. "github.com/allanpk716/ChineseSubFinder/internal/pkg/log_helper"
  11. "github.com/allanpk716/ChineseSubFinder/internal/pkg/my_util"
  12. "github.com/allanpk716/ChineseSubFinder/internal/pkg/notify_center"
  13. "github.com/allanpk716/ChineseSubFinder/internal/pkg/rod_helper"
  14. "github.com/allanpk716/ChineseSubFinder/internal/pkg/settings"
  15. "github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_parser_hub"
  16. "github.com/allanpk716/ChineseSubFinder/internal/pkg/url_connectedness_helper"
  17. "github.com/allanpk716/ChineseSubFinder/internal/types/language"
  18. "github.com/allanpk716/ChineseSubFinder/internal/types/series"
  19. "github.com/allanpk716/ChineseSubFinder/internal/types/supplier"
  20. "github.com/go-rod/rod"
  21. "github.com/go-rod/rod/lib/launcher"
  22. "github.com/huandu/go-clone"
  23. "github.com/nfnt/resize"
  24. "github.com/sirupsen/logrus"
  25. "image/jpeg"
  26. "math"
  27. "net/url"
  28. "os"
  29. "path/filepath"
  30. "regexp"
  31. "strings"
  32. "time"
  33. )
  34. type Supplier struct {
  35. settings settings.Settings
  36. log *logrus.Logger
  37. topic int
  38. rodLauncher *launcher.Launcher
  39. tt time.Duration
  40. debugMode bool
  41. httpProxyAddress string
  42. }
  43. func NewSupplier(_settings settings.Settings) *Supplier {
  44. sup := Supplier{}
  45. sup.log = log_helper.GetLogger()
  46. sup.topic = common.DownloadSubsPerSite
  47. sup.settings = clone.Clone(_settings).(settings.Settings)
  48. if sup.settings.AdvancedSettings.Topic > 0 && sup.settings.AdvancedSettings.Topic != sup.topic {
  49. sup.topic = sup.settings.AdvancedSettings.Topic
  50. }
  51. // 默认超时是 2 * 60s,如果是调试模式则是 5 min
  52. sup.tt = common.HTMLTimeOut
  53. sup.debugMode = sup.settings.AdvancedSettings.DebugMode
  54. if sup.debugMode == true {
  55. sup.tt = common.OneMovieProcessTimeOut
  56. }
  57. // 判断是否启用代理
  58. if sup.settings.AdvancedSettings.ProxySettings.UseHttpProxy == true {
  59. sup.httpProxyAddress = sup.settings.AdvancedSettings.ProxySettings.HttpProxyAddress
  60. } else {
  61. sup.httpProxyAddress = ""
  62. }
  63. return &sup
  64. }
  65. func (s Supplier) CheckAlive() (bool, int64) {
  66. proxyStatus, proxySpeed, err := url_connectedness_helper.UrlConnectednessTest(common.SubSubHDRootUrl, s.settings.AdvancedSettings.ProxySettings.HttpProxyAddress)
  67. if err != nil {
  68. s.log.Errorln(s.GetSupplierName(), "CheckAlive", "Error", err)
  69. return false, 0
  70. }
  71. if proxyStatus == false {
  72. s.log.Errorln(s.GetSupplierName(), "CheckAlive", "Status != 200")
  73. return false, proxySpeed
  74. }
  75. return true, proxySpeed
  76. }
  77. func (s Supplier) GetSupplierName() string {
  78. return common.SubSiteSubHd
  79. }
  80. func (s Supplier) GetSubListFromFile4Movie(filePath string) ([]supplier.SubInfo, error) {
  81. return s.getSubListFromFile4Movie(filePath)
  82. }
  83. func (s Supplier) GetSubListFromFile4Series(seriesInfo *series.SeriesInfo) ([]supplier.SubInfo, error) {
  84. var browser *rod.Browser
  85. // TODO 是用本地的 Browser 还是远程的,推荐是远程的
  86. browser, err := rod_helper.NewBrowser(s.httpProxyAddress, true)
  87. if err != nil {
  88. return nil, err
  89. }
  90. defer func() {
  91. _ = browser.Close()
  92. }()
  93. var subInfos = make([]supplier.SubInfo, 0)
  94. var subList = make([]HdListItem, 0)
  95. for value := range seriesInfo.NeedDlSeasonDict {
  96. // 第一级界面,找到影片的详情界面
  97. keyword := seriesInfo.Name + " 第" + zh.Uint64(value).String() + "季"
  98. detailPageUrl, err := s.step0(browser, keyword)
  99. if err != nil {
  100. s.log.Errorln("subhd step0", keyword)
  101. return nil, err
  102. }
  103. if detailPageUrl == "" {
  104. // 如果只是搜索不到,则继续换关键词
  105. s.log.Warning("subhd first search keyword", keyword, "not found")
  106. keyword = seriesInfo.Name
  107. s.log.Warning("subhd Retry", keyword)
  108. detailPageUrl, err = s.step0(browser, keyword)
  109. if err != nil {
  110. s.log.Errorln("subhd step0", keyword)
  111. return nil, err
  112. }
  113. }
  114. if detailPageUrl == "" {
  115. s.log.Warning("subhd search keyword", keyword, "not found")
  116. continue
  117. }
  118. // 列举字幕
  119. oneSubList, err := s.step1(browser, detailPageUrl, false)
  120. if err != nil {
  121. s.log.Errorln("subhd step1", keyword)
  122. return nil, err
  123. }
  124. subList = append(subList, oneSubList...)
  125. }
  126. // 与剧集需要下载的集 List 进行比较,找到需要下载的列表
  127. // 找到那些 Eps 需要下载字幕的
  128. subInfoNeedDownload := s.whichEpisodeNeedDownloadSub(seriesInfo, subList)
  129. // 下载字幕
  130. for i, item := range subInfoNeedDownload {
  131. bok, hdContent, err := s.step2Ex(browser, item.Url)
  132. if err != nil {
  133. s.log.Errorln("subhd step2Ex", err)
  134. continue
  135. }
  136. if bok == false {
  137. s.log.Errorln("subhd step2Ex return false")
  138. continue
  139. }
  140. oneSubInfo := supplier.NewSubInfo(s.GetSupplierName(), int64(i), hdContent.Filename, language.ChineseSimple, my_util.AddBaseUrl(common.SubSubHDRootUrl, item.Url), 0,
  141. 0, hdContent.Ext, hdContent.Data)
  142. oneSubInfo.Season = item.Season
  143. oneSubInfo.Episode = item.Episode
  144. subInfos = append(subInfos, *oneSubInfo)
  145. }
  146. return subInfos, nil
  147. }
  148. func (s Supplier) GetSubListFromFile4Anime(seriesInfo *series.SeriesInfo) ([]supplier.SubInfo, error) {
  149. panic("not implemented")
  150. }
  151. func (s Supplier) getSubListFromFile4Movie(filePath string) ([]supplier.SubInfo, error) {
  152. /*
  153. 虽然是传入视频文件路径,但是其实需要读取对应的视频文件目录下的
  154. movie.xml 以及 *.nfo,找到 IMDB id
  155. 优先通过 IMDB id 去查找字幕
  156. 如果找不到,再靠文件名提取影片名称去查找
  157. */
  158. // 得到这个视频文件名中的信息
  159. info, _, err := decode.GetVideoInfoFromFileFullPath(filePath)
  160. if err != nil {
  161. return nil, err
  162. }
  163. // 找到这个视频文件,尝试得到 IMDB ID
  164. // 目前测试来看,加入 年 这个关键词去搜索,对 2020 年后的影片有利,因为网站有统一的详细页面了,而之前的,没有,会影响识别
  165. // 所以,year >= 2020 年,则可以多加一个关键词(年)去搜索影片
  166. imdbInfo, err := decode.GetImdbInfo4Movie(filePath)
  167. if err != nil {
  168. // 允许的错误,跳过,继续进行文件名的搜索
  169. s.log.Errorln("model.GetImdbInfo", err)
  170. }
  171. var subInfoList []supplier.SubInfo
  172. if imdbInfo.ImdbId != "" {
  173. // 先用 imdb id 找
  174. subInfoList, err = s.getSubListFromKeyword4Movie(imdbInfo.ImdbId)
  175. if err != nil {
  176. // 允许的错误,跳过,继续进行文件名的搜索
  177. s.log.Errorln(s.GetSupplierName(), "keyword:", imdbInfo.ImdbId)
  178. s.log.Errorln("getSubListFromKeyword4Movie", "IMDBID can not found sub", filePath, err)
  179. }
  180. // 如果有就优先返回
  181. if len(subInfoList) > 0 {
  182. return subInfoList, nil
  183. }
  184. }
  185. // 如果没有,那么就用文件名查找
  186. searchKeyword := my_util.VideoNameSearchKeywordMaker(info.Title, imdbInfo.Year)
  187. subInfoList, err = s.getSubListFromKeyword4Movie(searchKeyword)
  188. if err != nil {
  189. s.log.Errorln(s.GetSupplierName(), "keyword:", searchKeyword)
  190. return nil, err
  191. }
  192. return subInfoList, nil
  193. }
  194. func (s Supplier) getSubListFromKeyword4Movie(keyword string) ([]supplier.SubInfo, error) {
  195. var browser *rod.Browser
  196. // TODO 是用本地的 Browser 还是远程的,推荐是远程的
  197. browser, err := rod_helper.NewBrowser(s.httpProxyAddress, true)
  198. if err != nil {
  199. return nil, err
  200. }
  201. defer func() {
  202. _ = browser.Close()
  203. }()
  204. var subInfos []supplier.SubInfo
  205. detailPageUrl, err := s.step0(browser, keyword)
  206. if err != nil {
  207. return nil, err
  208. }
  209. // 没有搜索到字幕
  210. if detailPageUrl == "" {
  211. return nil, nil
  212. }
  213. subList, err := s.step1(browser, detailPageUrl, true)
  214. if err != nil {
  215. return nil, err
  216. }
  217. for i, item := range subList {
  218. bok, hdContent, err := s.step2Ex(browser, item.Url)
  219. time.Sleep(time.Second)
  220. if err != nil {
  221. s.log.Errorln("subhd step2Ex", err)
  222. continue
  223. }
  224. if bok == false {
  225. s.log.Errorln("subhd step2Ex return false")
  226. continue
  227. }
  228. subInfos = append(subInfos, *supplier.NewSubInfo(s.GetSupplierName(), int64(i), hdContent.Filename, language.ChineseSimple, my_util.AddBaseUrl(common.SubSubHDRootUrl, item.Url), 0, 0, hdContent.Ext, hdContent.Data))
  229. }
  230. return subInfos, nil
  231. }
  232. func (s Supplier) whichEpisodeNeedDownloadSub(seriesInfo *series.SeriesInfo, allSubList []HdListItem) []HdListItem {
  233. // 字幕很多,考虑效率,需要做成字典
  234. // key SxEx - SubInfos
  235. var allSubDict = make(map[string][]HdListItem)
  236. // 全季的字幕列表
  237. var oneSeasonSubDict = make(map[string][]HdListItem)
  238. for _, subInfo := range allSubList {
  239. _, season, episode, err := decode.GetSeasonAndEpisodeFromSubFileName(subInfo.Title)
  240. if err != nil {
  241. s.log.Errorln("whichEpisodeNeedDownloadSub.GetVideoInfoFromFileFullPath", subInfo.Title, err)
  242. continue
  243. }
  244. subInfo.Season = season
  245. subInfo.Episode = episode
  246. epsKey := my_util.GetEpisodeKeyName(season, episode)
  247. _, ok := allSubDict[epsKey]
  248. if ok == false {
  249. // 初始化
  250. allSubDict[epsKey] = make([]HdListItem, 0)
  251. if season != 0 && episode == 0 {
  252. oneSeasonSubDict[epsKey] = make([]HdListItem, 0)
  253. }
  254. }
  255. // 添加
  256. allSubDict[epsKey] = append(allSubDict[epsKey], subInfo)
  257. if season != 0 && episode == 0 {
  258. oneSeasonSubDict[epsKey] = append(oneSeasonSubDict[epsKey], subInfo)
  259. }
  260. }
  261. // 本地的视频列表,找到没有字幕的
  262. // 需要进行下载字幕的列表
  263. var subInfoNeedDownload = make([]HdListItem, 0)
  264. // 有那些 Eps 需要下载的,按 SxEx 反回 epsKey
  265. for epsKey, epsInfo := range seriesInfo.NeedDlEpsKeyList {
  266. // 从一堆字幕里面找合适的
  267. value, ok := allSubDict[epsKey]
  268. // 是否有
  269. if ok == true && len(value) > 0 {
  270. value[0].Season = epsInfo.Season
  271. value[0].Episode = epsInfo.Episode
  272. subInfoNeedDownload = append(subInfoNeedDownload, value[0])
  273. } else {
  274. s.log.Infoln("SubHD Not Find Sub can be download", epsInfo.Title, epsInfo.Season, epsInfo.Episode)
  275. }
  276. }
  277. // 全季的字幕列表,也拼进去,后面进行下载
  278. for _, infos := range oneSeasonSubDict {
  279. subInfoNeedDownload = append(subInfoNeedDownload, infos[0])
  280. }
  281. // 返回前,需要把每一个 Eps 的 Season Episode 信息填充到每个 SubInfo 中
  282. return subInfoNeedDownload
  283. }
  284. // step0 找到这个影片的详情列表
  285. func (s Supplier) step0(browser *rod.Browser, keyword string) (string, error) {
  286. var err error
  287. defer func() {
  288. if err != nil {
  289. notify_center.Notify.Add("subhd_step0", err.Error())
  290. }
  291. }()
  292. result, page, err := s.httpGetFromBrowser(browser, fmt.Sprintf(common.SubSubHDSearchUrl, url.QueryEscape(keyword)))
  293. if err != nil {
  294. return "", err
  295. }
  296. defer func() {
  297. _ = page.Close()
  298. }()
  299. // 是否有查找到的结果,至少要有结果。根据这里这样下面才能判断是分析失效了,还是就是没有结果而已
  300. re := regexp.MustCompile(`共\s*(\d+)\s*条`)
  301. matched := re.FindAllStringSubmatch(result, -1)
  302. if matched == nil || len(matched) < 1 {
  303. return "", common.SubHDStep0SubCountElementNotFound
  304. }
  305. subCount, err := decode.GetNumber2int(matched[0][0])
  306. if err != nil {
  307. return "", err
  308. }
  309. // 如果所搜没有找到字幕,就要返回
  310. if subCount < 1 {
  311. return "", nil
  312. }
  313. // 这里是确认能继续分析的详细连接
  314. doc, err := goquery.NewDocumentFromReader(strings.NewReader(result))
  315. if err != nil {
  316. return "", err
  317. }
  318. imgSelection := doc.Find("img.rounded-start")
  319. _, ok := imgSelection.Attr("src")
  320. if ok == true {
  321. if len(imgSelection.Nodes) < 1 {
  322. return "", common.SubHDStep0ImgParentLessThan1
  323. }
  324. step1Url := ""
  325. if imgSelection.Nodes[0].Parent.Data == "a" {
  326. // 第一个父级是不是超链接
  327. for _, attribute := range imgSelection.Nodes[0].Parent.Attr {
  328. if attribute.Key == "href" {
  329. step1Url = attribute.Val
  330. break
  331. }
  332. }
  333. } else if imgSelection.Nodes[0].Parent.Parent.Data == "a" {
  334. // 第二个父级是不是超链接
  335. for _, attribute := range imgSelection.Nodes[0].Parent.Parent.Attr {
  336. if attribute.Key == "href" {
  337. step1Url = attribute.Val
  338. break
  339. }
  340. }
  341. }
  342. if step1Url == "" {
  343. return "", common.SubHDStep0HrefIsNull
  344. }
  345. return step1Url, nil
  346. } else {
  347. return "", common.SubHDStep0HrefIsNull
  348. }
  349. }
  350. // step1 获取影片的详情字幕列表
  351. func (s Supplier) step1(browser *rod.Browser, detailPageUrl string, isMovieOrSeries bool) ([]HdListItem, error) {
  352. var err error
  353. defer func() {
  354. if err != nil {
  355. notify_center.Notify.Add("subhd_step1", err.Error())
  356. }
  357. }()
  358. detailPageUrl = my_util.AddBaseUrl(common.SubSubHDRootUrl, detailPageUrl)
  359. result, page, err := s.httpGetFromBrowser(browser, detailPageUrl)
  360. if err != nil {
  361. return nil, err
  362. }
  363. defer func() {
  364. _ = page.Close()
  365. }()
  366. doc, err := goquery.NewDocumentFromReader(strings.NewReader(result))
  367. if err != nil {
  368. return nil, err
  369. }
  370. var lists []HdListItem
  371. const subTableKeyword = ".pt-2"
  372. const oneSubTrTitleKeyword = "a.link-dark"
  373. const oneSubTrDownloadCountKeyword = "div.px-3"
  374. const oneSubLangAndTypeKeyword = ".text-secondary"
  375. doc.Find(subTableKeyword).EachWithBreak(func(i int, tr *goquery.Selection) bool {
  376. if tr.Find(oneSubTrTitleKeyword).Size() == 0 {
  377. return true
  378. }
  379. // 文件的下载页面,还需要分析
  380. downUrl, exists := tr.Find(oneSubTrTitleKeyword).Eq(0).Attr("href")
  381. if !exists {
  382. return true
  383. }
  384. // 文件名
  385. title := strings.TrimSpace(tr.Find(oneSubTrTitleKeyword).Text())
  386. // 字幕类型
  387. insideSubType := tr.Find(oneSubLangAndTypeKeyword).Text()
  388. if sub_parser_hub.IsSubTypeWanted(insideSubType) == false {
  389. return true
  390. }
  391. // 下载的次数
  392. downCount, err := decode.GetNumber2int(tr.Find(oneSubTrDownloadCountKeyword).Eq(1).Text())
  393. if err != nil {
  394. return true
  395. }
  396. listItem := HdListItem{}
  397. listItem.Url = downUrl
  398. listItem.BaseUrl = common.SubSubHDRootUrl
  399. listItem.Title = title
  400. listItem.DownCount = downCount
  401. // 电影,就需要第一个
  402. // 连续剧,需要多个
  403. if isMovieOrSeries == true {
  404. if len(lists) >= s.topic {
  405. return false
  406. }
  407. }
  408. lists = append(lists, listItem)
  409. return true
  410. })
  411. return lists, nil
  412. }
  413. // step2Ex 下载字幕 过防水墙
  414. func (s Supplier) step2Ex(browser *rod.Browser, subDownloadPageUrl string) (bool, *HdContent, error) {
  415. var err error
  416. defer func() {
  417. if err != nil {
  418. notify_center.Notify.Add("subhd_step2Ex", err.Error())
  419. }
  420. }()
  421. subDownloadPageUrl = my_util.AddBaseUrl(common.SubSubHDRootUrl, subDownloadPageUrl)
  422. _, page, err := s.httpGetFromBrowser(browser, subDownloadPageUrl)
  423. if err != nil {
  424. return false, nil, err
  425. }
  426. defer func() {
  427. _ = page.Close()
  428. }()
  429. // 需要先判断是否先要输入验证码,然后才到下载界面
  430. // 下载字幕
  431. bok, content, err := s.downloadSubFile(browser, page)
  432. if err != nil {
  433. return false, nil, err
  434. }
  435. if bok == false {
  436. return false, nil, nil
  437. }
  438. return true, content, nil
  439. }
  440. func (s Supplier) downloadSubFile(browser *rod.Browser, page *rod.Page) (bool, *HdContent, error) {
  441. var err error
  442. var doc *goquery.Document
  443. downloadSuccess := false
  444. fileName := ""
  445. fileByte := []byte{0}
  446. err = rod.Try(func() {
  447. tmpDir := filepath.Join(os.TempDir(), "rod", "downloads")
  448. wait := browser.WaitDownload(tmpDir)
  449. getDownloadFile := func() ([]byte, string, error) {
  450. info := wait()
  451. downloadPath := filepath.Join(tmpDir, info.GUID)
  452. defer func() { _ = os.Remove(downloadPath) }()
  453. b, err := os.ReadFile(downloadPath)
  454. if err != nil {
  455. return nil, "", err
  456. }
  457. return b, info.SuggestedFilename, nil
  458. }
  459. // 初始化页面用于查询元素
  460. pString := page.MustHTML()
  461. doc, err = goquery.NewDocumentFromReader(strings.NewReader(pString))
  462. if err != nil {
  463. return
  464. }
  465. // 移除广告
  466. page.MustEval(`testgssdqw = function () { if (document.getElementById("tbkp")) {document.getElementById("tbkp").remove()}; }`)
  467. page.MustEval(`testgssdqw()`)
  468. // 点击“验证获取下载地址”
  469. clickCodeBtn := doc.Find(btnClickCodeBtn)
  470. if len(clickCodeBtn.Nodes) < 1 {
  471. return
  472. }
  473. element := page.MustElement(btnClickCodeBtn)
  474. BtnCodeText := element.MustText()
  475. if strings.Contains(BtnCodeText, "验证") == true {
  476. // 那么需要填写验证码
  477. element.MustClick()
  478. time.Sleep(time.Second * 2)
  479. // 填写“验证码”
  480. page.MustEval(`$("#gzhcode").attr("value","` + common.SubhdCode + `");`)
  481. // 是否有“完成验证”按钮
  482. downBtn := doc.Find(btnCommitCode)
  483. if len(downBtn.Nodes) < 1 {
  484. return
  485. }
  486. element = page.MustElement(btnCommitCode)
  487. benCommit := element.MustText()
  488. if strings.Contains(benCommit, "验证") == false {
  489. log_helper.GetLogger().Errorln("btn not found 完整验证")
  490. return
  491. }
  492. element.MustClick()
  493. time.Sleep(time.Second * 2)
  494. // 点击下载按钮
  495. page.MustElement(btnClickCodeBtn).MustClick()
  496. } else if strings.Contains(BtnCodeText, "下载") == true {
  497. // 直接可以下载
  498. element.MustClick()
  499. time.Sleep(time.Second * 2)
  500. } else {
  501. log_helper.GetLogger().Errorln("btn not found 下载验证 or 下载")
  502. return
  503. }
  504. // 更新 page 的实例对应的 doc Content
  505. pString = page.MustHTML()
  506. doc, err = goquery.NewDocumentFromReader(strings.NewReader(pString))
  507. if err != nil {
  508. return
  509. }
  510. // 是否有腾讯的防水墙
  511. hasWaterWall := false
  512. waterWall := doc.Find(TCode)
  513. if len(waterWall.Nodes) >= 1 {
  514. hasWaterWall = true
  515. }
  516. log_helper.GetLogger().Debugln("Need pass WaterWall", hasWaterWall)
  517. // 过墙
  518. if hasWaterWall == true {
  519. s.passWaterWall(page)
  520. }
  521. time.Sleep(time.Second * 2)
  522. fileByte, fileName, err = getDownloadFile()
  523. if err != nil {
  524. return
  525. }
  526. downloadSuccess = true
  527. })
  528. if err != nil {
  529. return false, nil, err
  530. }
  531. var hdContent HdContent
  532. hdContent.Filename = fileName
  533. hdContent.Ext = filepath.Ext(fileName)
  534. hdContent.Data = fileByte
  535. if downloadSuccess == false {
  536. return false, &hdContent, common.SubHDStep2ExCannotFindDownloadBtn
  537. }
  538. return downloadSuccess, &hdContent, nil
  539. }
  540. func (s Supplier) passWaterWall(page *rod.Page) {
  541. //等待驗證碼窗體載入
  542. page.MustElement("#tcaptcha_iframe").MustWaitLoad()
  543. //進入到iframe
  544. iframe := page.MustElement("#tcaptcha_iframe").MustFrame()
  545. //等待拖動條加載, 延遲500秒檢測變化, 以確認加載完畢
  546. iframe.MustElement("#tcaptcha_drag_button").MustWaitStable()
  547. //等待缺口圖像載入
  548. slideBgEl := iframe.MustElement("#slideBg").MustWaitLoad()
  549. slideBgEl = slideBgEl.MustWaitStable()
  550. //取得帶缺口圖像
  551. shadowbg := slideBgEl.MustResource()
  552. // 取得原始圖像
  553. src := slideBgEl.MustProperty("src")
  554. fullbg, _, err := my_util.DownFile(strings.Replace(src.String(), "img_index=1", "img_index=0", 1))
  555. if err != nil {
  556. panic(err)
  557. }
  558. //取得img展示的真實尺寸
  559. shape, err := slideBgEl.Shape()
  560. if err != nil {
  561. panic(err)
  562. }
  563. bgbox := shape.Box()
  564. height, width := uint(math.Round(bgbox.Height)), uint(math.Round(bgbox.Width))
  565. //裁剪圖像
  566. shadowbgImg, _ := jpeg.Decode(bytes.NewReader(shadowbg))
  567. shadowbgImg = resize.Resize(width, height, shadowbgImg, resize.Lanczos3)
  568. fullbgImg, _ := jpeg.Decode(bytes.NewReader(fullbg))
  569. fullbgImg = resize.Resize(width, height, fullbgImg, resize.Lanczos3)
  570. //啓始left,排除干擾部份,所以右移10個像素
  571. left := fullbgImg.Bounds().Min.X + 10
  572. //啓始top, 排除干擾部份, 所以下移10個像素
  573. top := fullbgImg.Bounds().Min.Y + 10
  574. //最大left, 排除干擾部份, 所以左移10個像素
  575. maxleft := fullbgImg.Bounds().Max.X - 10
  576. //最大top, 排除干擾部份, 所以上移10個像素
  577. maxtop := fullbgImg.Bounds().Max.Y - 10
  578. //rgb比较阈值, 超出此阈值及代表找到缺口位置
  579. threshold := 20
  580. //缺口偏移, 拖動按鈕初始會偏移27.5
  581. distance := -27.5
  582. //取絕對值方法
  583. abs := func(n int) int {
  584. if n < 0 {
  585. return -n
  586. }
  587. return n
  588. }
  589. search:
  590. for i := left; i <= maxleft; i++ {
  591. for j := top; j <= maxtop; j++ {
  592. colorAR, colorAG, colorAB, _ := fullbgImg.At(i, j).RGBA()
  593. colorBR, colorBG, colorBB, _ := shadowbgImg.At(i, j).RGBA()
  594. colorAR, colorAG, colorAB = colorAR>>8, colorAG>>8, colorAB>>8
  595. colorBR, colorBG, colorBB = colorBR>>8, colorBG>>8, colorBB>>8
  596. if abs(int(colorAR)-int(colorBR)) > threshold ||
  597. abs(int(colorAG)-int(colorBG)) > threshold ||
  598. abs(int(colorAB)-int(colorBB)) > threshold {
  599. distance += float64(i)
  600. s.log.Debugln("對比完畢, 偏移量:", distance)
  601. break search
  602. }
  603. }
  604. }
  605. //獲取拖動按鈕形狀
  606. dragBtnBox := iframe.MustElement("#tcaptcha_drag_thumb").MustShape().Box()
  607. //启用滑鼠功能
  608. mouse := page.Mouse
  609. //模擬滑鼠移動至拖動按鈕處, 右移3的原因: 拖動按鈕比滑塊圖大3個像素
  610. mouse.MustMove(dragBtnBox.X+3, dragBtnBox.Y+(dragBtnBox.Height/2))
  611. //按下滑鼠左鍵
  612. mouse.MustDown("left")
  613. //開始拖動
  614. err = mouse.Move(dragBtnBox.X+distance, dragBtnBox.Y+(dragBtnBox.Height/2), 20)
  615. if err != nil {
  616. s.log.Errorln("mouse.Move", err)
  617. }
  618. //鬆開滑鼠左鍵, 拖动完毕
  619. mouse.MustUp("left")
  620. if s.debugMode == true {
  621. //截圖保存
  622. nowProcessRoot, err := folder_helper.GetRootDebugFolder()
  623. if err == nil {
  624. page.MustScreenshot(filepath.Join(nowProcessRoot, "result.png"))
  625. } else {
  626. s.log.Errorln("model.GetDebugFolder", err)
  627. }
  628. }
  629. }
  630. func (s Supplier) httpGetFromBrowser(browser *rod.Browser, inputUrl string) (string, *rod.Page, error) {
  631. page, err := rod_helper.NewPageNavigate(browser, inputUrl, s.tt, 5)
  632. if err != nil {
  633. return "", nil, err
  634. }
  635. pageString, err := page.HTML()
  636. if err != nil {
  637. return "", nil, err
  638. }
  639. // 每次搜索间隔
  640. if s.debugMode == true {
  641. time.Sleep(my_util.RandomSecondDuration(5, 10))
  642. } else {
  643. time.Sleep(my_util.RandomSecondDuration(5, 20))
  644. }
  645. return pageString, page, nil
  646. }
  647. type HdListItem struct {
  648. Url string `json:"url"`
  649. BaseUrl string `json:"baseUrl"`
  650. Title string `json:"title"`
  651. Ext string `json:"ext"`
  652. AuthorInfo string `json:"authorInfo"`
  653. Lang string `json:"lang"`
  654. Rate string `json:"rate"`
  655. DownCount int `json:"downCount"`
  656. Season int // 第几季,默认-1
  657. Episode int // 第几集,默认-1
  658. }
  659. type HdContent struct {
  660. Filename string `json:"filename"`
  661. Ext string `json:"ext"`
  662. Data []byte `json:"data"`
  663. }
  664. const TCode = "#TencentCaptcha"
  665. const btnClickCodeBtn = "button.btn-danger"
  666. const btnCommitCode = "button.btn-primary"