subhd.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718
  1. package subhd
  2. import (
  3. "bytes"
  4. "fmt"
  5. "github.com/PuerkitoBio/goquery"
  6. "github.com/Tnze/go.num/v2/zh"
  7. "github.com/allanpk716/ChineseSubFinder/internal/logic/file_downloader"
  8. "github.com/allanpk716/ChineseSubFinder/internal/logic/task_queue"
  9. pkgcommon "github.com/allanpk716/ChineseSubFinder/internal/pkg/common"
  10. "github.com/allanpk716/ChineseSubFinder/internal/pkg/decode"
  11. "github.com/allanpk716/ChineseSubFinder/internal/pkg/global_value"
  12. "github.com/allanpk716/ChineseSubFinder/internal/pkg/my_util"
  13. "github.com/allanpk716/ChineseSubFinder/internal/pkg/notify_center"
  14. "github.com/allanpk716/ChineseSubFinder/internal/pkg/rod_helper"
  15. "github.com/allanpk716/ChineseSubFinder/internal/pkg/settings"
  16. "github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_parser_hub"
  17. "github.com/allanpk716/ChineseSubFinder/internal/pkg/url_connectedness_helper"
  18. common2 "github.com/allanpk716/ChineseSubFinder/internal/types/common"
  19. "github.com/allanpk716/ChineseSubFinder/internal/types/language"
  20. "github.com/allanpk716/ChineseSubFinder/internal/types/series"
  21. "github.com/allanpk716/ChineseSubFinder/internal/types/supplier"
  22. "github.com/go-rod/rod"
  23. "github.com/nfnt/resize"
  24. "github.com/sirupsen/logrus"
  25. "image/jpeg"
  26. "math"
  27. "net/url"
  28. "os"
  29. "path/filepath"
  30. "regexp"
  31. "strings"
  32. "time"
  33. )
  34. type Supplier struct {
  35. settings *settings.Settings
  36. log *logrus.Logger
  37. fileDownloader *file_downloader.FileDownloader
  38. topic int
  39. tt time.Duration
  40. debugMode bool
  41. isAlive bool
  42. }
  43. func NewSupplier(fileDownloader *file_downloader.FileDownloader) *Supplier {
  44. sup := Supplier{}
  45. sup.log = fileDownloader.Log
  46. sup.fileDownloader = fileDownloader
  47. sup.topic = common2.DownloadSubsPerSite
  48. sup.settings = fileDownloader.Settings
  49. if sup.settings.AdvancedSettings.Topic > 0 && sup.settings.AdvancedSettings.Topic != sup.topic {
  50. sup.topic = sup.settings.AdvancedSettings.Topic
  51. }
  52. sup.isAlive = true // 默认是可以使用的,如果 check 后,再调整状态
  53. // 默认超时是 2 * 60s,如果是调试模式则是 5 min
  54. sup.tt = common2.HTMLTimeOut
  55. sup.debugMode = sup.settings.AdvancedSettings.DebugMode
  56. if sup.debugMode == true {
  57. sup.tt = common2.OneMovieProcessTimeOut
  58. }
  59. return &sup
  60. }
  61. func (s *Supplier) CheckAlive() (bool, int64) {
  62. proxyStatus, proxySpeed, err := url_connectedness_helper.UrlConnectednessTest(s.settings.AdvancedSettings.SuppliersSettings.SubHD.RootUrl,
  63. s.settings.AdvancedSettings.ProxySettings.GetLocalHttpProxyUrl())
  64. if err != nil {
  65. s.log.Errorln(s.GetSupplierName(), "CheckAlive", "Error", err)
  66. s.isAlive = false
  67. return false, 0
  68. }
  69. if proxyStatus == false {
  70. s.log.Errorln(s.GetSupplierName(), "CheckAlive", "Status != 200")
  71. s.isAlive = false
  72. return false, proxySpeed
  73. }
  74. s.isAlive = true
  75. return true, proxySpeed
  76. }
  77. func (s *Supplier) IsAlive() bool {
  78. return s.isAlive
  79. }
  80. func (s *Supplier) OverDailyDownloadLimit() bool {
  81. // 需要查询今天的限额
  82. count, err := task_queue.GetDailyDownloadCount(s.GetSupplierName(),
  83. my_util.GetPublicIP(s.log, s.settings.AdvancedSettings.TaskQueue, s.settings.AdvancedSettings.ProxySettings))
  84. if err != nil {
  85. s.log.Errorln(s.GetSupplierName(), "GetDailyDownloadCount", err)
  86. return true
  87. }
  88. if count > s.settings.AdvancedSettings.SuppliersSettings.Zimuku.DailyDownloadLimit {
  89. s.log.Warningln(s.GetSupplierName(), "DailyDownloadLimit:", s.settings.AdvancedSettings.SuppliersSettings.SubHD.DailyDownloadLimit, "Now Is:", count)
  90. return true
  91. }
  92. // 没有超限
  93. return false
  94. }
  95. func (s *Supplier) GetLogger() *logrus.Logger {
  96. return s.log
  97. }
  98. func (s *Supplier) GetSettings() *settings.Settings {
  99. return s.settings
  100. }
  101. func (s *Supplier) GetSupplierName() string {
  102. return common2.SubSiteSubHd
  103. }
  104. func (s *Supplier) GetSubListFromFile4Movie(filePath string) ([]supplier.SubInfo, error) {
  105. return s.getSubListFromFile4Movie(filePath)
  106. }
  107. func (s *Supplier) GetSubListFromFile4Series(seriesInfo *series.SeriesInfo) ([]supplier.SubInfo, error) {
  108. var browser *rod.Browser
  109. // TODO 是用本地的 Browser 还是远程的,推荐是远程的
  110. browser, err := rod_helper.NewBrowserEx(true, s.settings)
  111. if err != nil {
  112. return nil, err
  113. }
  114. defer func() {
  115. _ = browser.Close()
  116. }()
  117. var subInfos = make([]supplier.SubInfo, 0)
  118. var subList = make([]HdListItem, 0)
  119. for value := range seriesInfo.NeedDlSeasonDict {
  120. // 第一级界面,找到影片的详情界面
  121. keyword := seriesInfo.Name + " 第" + zh.Uint64(value).String() + "季"
  122. detailPageUrl, err := s.step0(browser, keyword)
  123. if err != nil {
  124. s.log.Errorln("subhd step0", keyword)
  125. return nil, err
  126. }
  127. if detailPageUrl == "" {
  128. // 如果只是搜索不到,则继续换关键词
  129. s.log.Warning("subhd first search keyword", keyword, "not found")
  130. keyword = seriesInfo.Name
  131. s.log.Warning("subhd Retry", keyword)
  132. detailPageUrl, err = s.step0(browser, keyword)
  133. if err != nil {
  134. s.log.Errorln("subhd step0", keyword)
  135. return nil, err
  136. }
  137. }
  138. if detailPageUrl == "" {
  139. s.log.Warning("subhd search keyword", keyword, "not found")
  140. continue
  141. }
  142. // 列举字幕
  143. oneSubList, err := s.step1(browser, detailPageUrl, false)
  144. if err != nil {
  145. s.log.Errorln("subhd step1", keyword)
  146. return nil, err
  147. }
  148. subList = append(subList, oneSubList...)
  149. }
  150. // 与剧集需要下载的集 List 进行比较,找到需要下载的列表
  151. // 找到那些 Eps 需要下载字幕的
  152. subInfoNeedDownload := s.whichEpisodeNeedDownloadSub(seriesInfo, subList)
  153. // 下载字幕
  154. for i, item := range subInfoNeedDownload {
  155. pkgcommon.SetSubScanJobStatusScanSeriesSub(i+1, len(seriesInfo.NeedDlEpsKeyList),
  156. fmt.Sprintf("%v - S%v-E%v", item.Title, item.Season, item.Episode))
  157. subInfo, err := s.fileDownloader.GetEx(s.GetSupplierName(), browser, item.Url, int64(i), item.Season, item.Episode, s.DownFile)
  158. if err != nil {
  159. s.log.Errorln(s.GetSupplierName(), "GetEx", subInfo.Name, err)
  160. continue
  161. }
  162. subInfos = append(subInfos, *subInfo)
  163. }
  164. return subInfos, nil
  165. }
  166. func (s *Supplier) GetSubListFromFile4Anime(seriesInfo *series.SeriesInfo) ([]supplier.SubInfo, error) {
  167. panic("not implemented")
  168. }
  169. func (s *Supplier) getSubListFromFile4Movie(filePath string) ([]supplier.SubInfo, error) {
  170. /*
  171. 虽然是传入视频文件路径,但是其实需要读取对应的视频文件目录下的
  172. movie.xml 以及 *.nfo,找到 IMDB id
  173. 优先通过 IMDB id 去查找字幕
  174. 如果找不到,再靠文件名提取影片名称去查找
  175. */
  176. // 得到这个视频文件名中的信息
  177. info, _, err := decode.GetVideoInfoFromFileFullPath(filePath)
  178. if err != nil {
  179. return nil, err
  180. }
  181. // 找到这个视频文件,尝试得到 IMDB ID
  182. // 目前测试来看,加入 年 这个关键词去搜索,对 2020 年后的影片有利,因为网站有统一的详细页面了,而之前的,没有,会影响识别
  183. // 所以,year >= 2020 年,则可以多加一个关键词(年)去搜索影片
  184. imdbInfo, err := decode.GetImdbInfo4Movie(filePath)
  185. if err != nil {
  186. // 允许的错误,跳过,继续进行文件名的搜索
  187. s.log.Errorln("model.GetImdbInfo", err)
  188. }
  189. var subInfoList []supplier.SubInfo
  190. if imdbInfo.ImdbId != "" {
  191. // 先用 imdb id 找
  192. subInfoList, err = s.getSubListFromKeyword4Movie(imdbInfo.ImdbId)
  193. if err != nil {
  194. // 允许的错误,跳过,继续进行文件名的搜索
  195. s.log.Errorln(s.GetSupplierName(), "keyword:", imdbInfo.ImdbId)
  196. s.log.Errorln("getSubListFromKeyword4Movie", "IMDBID can not found sub", filePath, err)
  197. }
  198. // 如果有就优先返回
  199. if len(subInfoList) > 0 {
  200. return subInfoList, nil
  201. }
  202. }
  203. // 如果没有,那么就用文件名查找
  204. searchKeyword := my_util.VideoNameSearchKeywordMaker(s.log, info.Title, imdbInfo.Year)
  205. subInfoList, err = s.getSubListFromKeyword4Movie(searchKeyword)
  206. if err != nil {
  207. s.log.Errorln(s.GetSupplierName(), "keyword:", searchKeyword)
  208. return nil, err
  209. }
  210. return subInfoList, nil
  211. }
  212. func (s *Supplier) getSubListFromKeyword4Movie(keyword string) ([]supplier.SubInfo, error) {
  213. var browser *rod.Browser
  214. // TODO 是用本地的 Browser 还是远程的,推荐是远程的
  215. browser, err := rod_helper.NewBrowserEx(true, s.settings)
  216. if err != nil {
  217. return nil, err
  218. }
  219. defer func() {
  220. _ = browser.Close()
  221. }()
  222. var subInfos []supplier.SubInfo
  223. detailPageUrl, err := s.step0(browser, keyword)
  224. if err != nil {
  225. return nil, err
  226. }
  227. // 没有搜索到字幕
  228. if detailPageUrl == "" {
  229. return nil, nil
  230. }
  231. subList, err := s.step1(browser, detailPageUrl, true)
  232. if err != nil {
  233. return nil, err
  234. }
  235. for i, item := range subList {
  236. subInfo, err := s.fileDownloader.GetEx(s.GetSupplierName(), browser, item.Url, int64(i), 0, 0, s.DownFile)
  237. if err != nil {
  238. s.log.Errorln(s.GetSupplierName(), "GetEx", subInfo.Name, err)
  239. continue
  240. }
  241. subInfos = append(subInfos, *subInfo)
  242. }
  243. return subInfos, nil
  244. }
  245. func (s *Supplier) whichEpisodeNeedDownloadSub(seriesInfo *series.SeriesInfo, allSubList []HdListItem) []HdListItem {
  246. // 字幕很多,考虑效率,需要做成字典
  247. // key SxEx - SubInfos
  248. var allSubDict = make(map[string][]HdListItem)
  249. // 全季的字幕列表
  250. var oneSeasonSubDict = make(map[string][]HdListItem)
  251. for _, subInfo := range allSubList {
  252. _, season, episode, err := decode.GetSeasonAndEpisodeFromSubFileName(subInfo.Title)
  253. if err != nil {
  254. s.log.Errorln("whichEpisodeNeedDownloadSub.GetVideoInfoFromFileFullPath", subInfo.Title, err)
  255. continue
  256. }
  257. subInfo.Season = season
  258. subInfo.Episode = episode
  259. epsKey := my_util.GetEpisodeKeyName(season, episode)
  260. _, ok := allSubDict[epsKey]
  261. if ok == false {
  262. // 初始化
  263. allSubDict[epsKey] = make([]HdListItem, 0)
  264. if season != 0 && episode == 0 {
  265. oneSeasonSubDict[epsKey] = make([]HdListItem, 0)
  266. }
  267. }
  268. // 添加
  269. allSubDict[epsKey] = append(allSubDict[epsKey], subInfo)
  270. if season != 0 && episode == 0 {
  271. oneSeasonSubDict[epsKey] = append(oneSeasonSubDict[epsKey], subInfo)
  272. }
  273. }
  274. // 本地的视频列表,找到没有字幕的
  275. // 需要进行下载字幕的列表
  276. var subInfoNeedDownload = make([]HdListItem, 0)
  277. // 有那些 Eps 需要下载的,按 SxEx 反回 epsKey
  278. for epsKey, epsInfo := range seriesInfo.NeedDlEpsKeyList {
  279. // 从一堆字幕里面找合适的
  280. value, ok := allSubDict[epsKey]
  281. // 是否有
  282. if ok == true && len(value) > 0 {
  283. value[0].Season = epsInfo.Season
  284. value[0].Episode = epsInfo.Episode
  285. subInfoNeedDownload = append(subInfoNeedDownload, value[0])
  286. } else {
  287. s.log.Infoln("SubHD Not Find Sub can be download", epsInfo.Title, epsInfo.Season, epsInfo.Episode)
  288. }
  289. }
  290. // 全季的字幕列表,也拼进去,后面进行下载
  291. for _, infos := range oneSeasonSubDict {
  292. subInfoNeedDownload = append(subInfoNeedDownload, infos[0])
  293. }
  294. // 返回前,需要把每一个 Eps 的 Season Episode 信息填充到每个 SubInfo 中
  295. return subInfoNeedDownload
  296. }
  297. // step0 找到这个影片的详情列表
  298. func (s *Supplier) step0(browser *rod.Browser, keyword string) (string, error) {
  299. var err error
  300. defer func() {
  301. if err != nil {
  302. notify_center.Notify.Add("subhd_step0", err.Error())
  303. }
  304. }()
  305. result, page, err := rod_helper.HttpGetFromBrowser(browser, fmt.Sprintf(s.settings.AdvancedSettings.SuppliersSettings.SubHD.RootUrl+common2.SubSubHDSearchUrl, url.QueryEscape(keyword)), s.tt)
  306. if err != nil {
  307. return "", err
  308. }
  309. defer func() {
  310. _ = page.Close()
  311. }()
  312. // 是否有查找到的结果,至少要有结果。根据这里这样下面才能判断是分析失效了,还是就是没有结果而已
  313. re := regexp.MustCompile(`共\s*(\d+)\s*条`)
  314. matched := re.FindAllStringSubmatch(result, -1)
  315. if matched == nil || len(matched) < 1 {
  316. return "", common2.SubHDStep0SubCountElementNotFound
  317. }
  318. subCount, err := decode.GetNumber2int(matched[0][0])
  319. if err != nil {
  320. return "", err
  321. }
  322. // 如果所搜没有找到字幕,就要返回
  323. if subCount < 1 {
  324. return "", nil
  325. }
  326. // 这里是确认能继续分析的详细连接
  327. doc, err := goquery.NewDocumentFromReader(strings.NewReader(result))
  328. if err != nil {
  329. return "", err
  330. }
  331. imgSelection := doc.Find("img.rounded-start")
  332. _, ok := imgSelection.Attr("src")
  333. if ok == true {
  334. if len(imgSelection.Nodes) < 1 {
  335. return "", common2.SubHDStep0ImgParentLessThan1
  336. }
  337. step1Url := ""
  338. if imgSelection.Nodes[0].Parent.Data == "a" {
  339. // 第一个父级是不是超链接
  340. for _, attribute := range imgSelection.Nodes[0].Parent.Attr {
  341. if attribute.Key == "href" {
  342. step1Url = attribute.Val
  343. break
  344. }
  345. }
  346. } else if imgSelection.Nodes[0].Parent.Parent.Data == "a" {
  347. // 第二个父级是不是超链接
  348. for _, attribute := range imgSelection.Nodes[0].Parent.Parent.Attr {
  349. if attribute.Key == "href" {
  350. step1Url = attribute.Val
  351. break
  352. }
  353. }
  354. }
  355. if step1Url == "" {
  356. return "", common2.SubHDStep0HrefIsNull
  357. }
  358. return step1Url, nil
  359. } else {
  360. return "", common2.SubHDStep0HrefIsNull
  361. }
  362. }
  363. // step1 获取影片的详情字幕列表
  364. func (s *Supplier) step1(browser *rod.Browser, detailPageUrl string, isMovieOrSeries bool) ([]HdListItem, error) {
  365. var err error
  366. defer func() {
  367. if err != nil {
  368. notify_center.Notify.Add("subhd_step1", err.Error())
  369. }
  370. }()
  371. detailPageUrl = my_util.AddBaseUrl(s.settings.AdvancedSettings.SuppliersSettings.SubHD.RootUrl, detailPageUrl)
  372. result, page, err := rod_helper.HttpGetFromBrowser(browser, detailPageUrl, s.tt)
  373. if err != nil {
  374. return nil, err
  375. }
  376. defer func() {
  377. _ = page.Close()
  378. }()
  379. doc, err := goquery.NewDocumentFromReader(strings.NewReader(result))
  380. if err != nil {
  381. return nil, err
  382. }
  383. var lists []HdListItem
  384. const subTableKeyword = ".pt-2"
  385. const oneSubTrTitleKeyword = "a.link-dark"
  386. const oneSubTrDownloadCountKeyword = "div.px-3"
  387. const oneSubLangAndTypeKeyword = ".text-secondary"
  388. doc.Find(subTableKeyword).EachWithBreak(func(i int, tr *goquery.Selection) bool {
  389. if tr.Find(oneSubTrTitleKeyword).Size() == 0 {
  390. return true
  391. }
  392. // 文件的下载页面,还需要分析
  393. downUrl, exists := tr.Find(oneSubTrTitleKeyword).Eq(0).Attr("href")
  394. if !exists {
  395. return true
  396. }
  397. // 文件名
  398. title := strings.TrimSpace(tr.Find(oneSubTrTitleKeyword).Text())
  399. // 字幕类型
  400. insideSubType := tr.Find(oneSubLangAndTypeKeyword).Text()
  401. if sub_parser_hub.IsSubTypeWanted(insideSubType) == false {
  402. return true
  403. }
  404. // 下载的次数
  405. downCount, err := decode.GetNumber2int(tr.Find(oneSubTrDownloadCountKeyword).Eq(1).Text())
  406. if err != nil {
  407. return true
  408. }
  409. listItem := HdListItem{}
  410. listItem.Url = downUrl
  411. listItem.BaseUrl = s.settings.AdvancedSettings.SuppliersSettings.SubHD.RootUrl
  412. listItem.Title = title
  413. listItem.DownCount = downCount
  414. // 电影,就需要第一个
  415. // 连续剧,需要多个
  416. if isMovieOrSeries == true {
  417. if len(lists) >= s.topic {
  418. return false
  419. }
  420. }
  421. lists = append(lists, listItem)
  422. return true
  423. })
  424. return lists, nil
  425. }
  426. // DownFile 下载字幕 过防水墙
  427. func (s *Supplier) DownFile(browser *rod.Browser, subDownloadPageUrl string, TopN int64, Season, Episode int) (*supplier.SubInfo, error) {
  428. var err error
  429. defer func() {
  430. if err != nil {
  431. notify_center.Notify.Add("subhd_DownFile", err.Error())
  432. }
  433. }()
  434. subDownloadPageFullUrl := my_util.AddBaseUrl(s.settings.AdvancedSettings.SuppliersSettings.SubHD.RootUrl, subDownloadPageUrl)
  435. _, page, err := rod_helper.HttpGetFromBrowser(browser, subDownloadPageFullUrl, s.tt)
  436. if err != nil {
  437. return nil, err
  438. }
  439. defer func() {
  440. _ = page.Close()
  441. }()
  442. // 需要先判断是否先要输入验证码,然后才到下载界面
  443. // 下载字幕
  444. subInfo, err := s.downloadSubFile(browser, page, subDownloadPageUrl)
  445. if err != nil {
  446. return nil, err
  447. }
  448. subInfo.TopN = TopN
  449. subInfo.Season = Season
  450. subInfo.Episode = Episode
  451. return subInfo, nil
  452. }
  453. func (s *Supplier) downloadSubFile(browser *rod.Browser, page *rod.Page, subDownloadPageUrl string) (*supplier.SubInfo, error) {
  454. var err error
  455. var doc *goquery.Document
  456. downloadSuccess := false
  457. fileName := ""
  458. fileByte := []byte{0}
  459. err = rod.Try(func() {
  460. tmpDir := filepath.Join(global_value.DefTmpFolder(), "downloads")
  461. wait := browser.WaitDownload(tmpDir)
  462. getDownloadFile := func() ([]byte, string, error) {
  463. info := wait()
  464. downloadPath := filepath.Join(tmpDir, info.GUID)
  465. defer func() { _ = os.Remove(downloadPath) }()
  466. b, err := os.ReadFile(downloadPath)
  467. if err != nil {
  468. return nil, "", err
  469. }
  470. return b, info.SuggestedFilename, nil
  471. }
  472. // 初始化页面用于查询元素
  473. pString := page.MustHTML()
  474. doc, err = goquery.NewDocumentFromReader(strings.NewReader(pString))
  475. if err != nil {
  476. return
  477. }
  478. // 移除广告
  479. page.MustEval(`testgssdqw = function () { if (document.getElementById("tbkp")) {document.getElementById("tbkp").remove()}; }`)
  480. page.MustEval(`testgssdqw()`)
  481. // 点击“验证获取下载地址”
  482. clickCodeBtn := doc.Find(btnClickCodeBtn)
  483. if len(clickCodeBtn.Nodes) < 1 {
  484. return
  485. }
  486. element := page.MustElement(btnClickCodeBtn)
  487. BtnCodeText := element.MustText()
  488. if strings.Contains(BtnCodeText, "验证") == true {
  489. // 那么需要填写验证码
  490. element.MustClick()
  491. time.Sleep(time.Second * 2)
  492. // 填写“验证码”
  493. page.MustEval(`$("#gzhcode").attr("value","` + common2.SubhdCode + `");`)
  494. // 是否有“完成验证”按钮
  495. downBtn := doc.Find(btnCommitCode)
  496. if len(downBtn.Nodes) < 1 {
  497. return
  498. }
  499. element = page.MustElement(btnCommitCode)
  500. benCommit := element.MustText()
  501. if strings.Contains(benCommit, "验证") == false {
  502. s.log.Errorln("btn not found 完整验证")
  503. return
  504. }
  505. element.MustClick()
  506. time.Sleep(time.Second * 2)
  507. // 点击下载按钮
  508. page.MustElement(btnClickCodeBtn).MustClick()
  509. } else if strings.Contains(BtnCodeText, "下载") == true {
  510. // 直接可以下载
  511. element.MustClick()
  512. time.Sleep(time.Second * 2)
  513. } else {
  514. s.log.Errorln("btn not found 下载验证 or 下载")
  515. return
  516. }
  517. // 更新 page 的实例对应的 doc Content
  518. pString = page.MustHTML()
  519. doc, err = goquery.NewDocumentFromReader(strings.NewReader(pString))
  520. if err != nil {
  521. return
  522. }
  523. // 是否有腾讯的防水墙
  524. hasWaterWall := false
  525. waterWall := doc.Find(TCode)
  526. if len(waterWall.Nodes) >= 1 {
  527. hasWaterWall = true
  528. }
  529. s.log.Debugln("Need pass WaterWall", hasWaterWall)
  530. // 过墙
  531. if hasWaterWall == true {
  532. s.passWaterWall(page)
  533. }
  534. time.Sleep(time.Second * 2)
  535. fileByte, fileName, err = getDownloadFile()
  536. if err != nil {
  537. return
  538. }
  539. downloadSuccess = true
  540. })
  541. if err != nil {
  542. return nil, err
  543. }
  544. inSubInfo := supplier.NewSubInfo(s.GetSupplierName(), 1, fileName, language.ChineseSimple, subDownloadPageUrl, 0, 0, filepath.Ext(fileName), fileByte)
  545. if downloadSuccess == false {
  546. return nil, common2.SubHDStep2ExCannotFindDownloadBtn
  547. }
  548. return inSubInfo, nil
  549. }
  550. func (s *Supplier) passWaterWall(page *rod.Page) {
  551. //等待驗證碼窗體載入
  552. page.MustElement("#tcaptcha_iframe").MustWaitLoad()
  553. //進入到iframe
  554. iframe := page.MustElement("#tcaptcha_iframe").MustFrame()
  555. //等待拖動條加載, 延遲500秒檢測變化, 以確認加載完畢
  556. iframe.MustElement("#tcaptcha_drag_button").MustWaitStable()
  557. //等待缺口圖像載入
  558. slideBgEl := iframe.MustElement("#slideBg").MustWaitLoad()
  559. slideBgEl = slideBgEl.MustWaitStable()
  560. //取得帶缺口圖像
  561. shadowbg := slideBgEl.MustResource()
  562. // 取得原始圖像
  563. src := slideBgEl.MustProperty("src")
  564. fullbg, _, err := my_util.DownFile(s.log, strings.Replace(src.String(), "img_index=1", "img_index=0", 1))
  565. if err != nil {
  566. panic(err)
  567. }
  568. //取得img展示的真實尺寸
  569. shape, err := slideBgEl.Shape()
  570. if err != nil {
  571. panic(err)
  572. }
  573. bgbox := shape.Box()
  574. height, width := uint(math.Round(bgbox.Height)), uint(math.Round(bgbox.Width))
  575. //裁剪圖像
  576. shadowbgImg, _ := jpeg.Decode(bytes.NewReader(shadowbg))
  577. shadowbgImg = resize.Resize(width, height, shadowbgImg, resize.Lanczos3)
  578. fullbgImg, _ := jpeg.Decode(bytes.NewReader(fullbg))
  579. fullbgImg = resize.Resize(width, height, fullbgImg, resize.Lanczos3)
  580. //啓始left,排除干擾部份,所以右移10個像素
  581. left := fullbgImg.Bounds().Min.X + 10
  582. //啓始top, 排除干擾部份, 所以下移10個像素
  583. top := fullbgImg.Bounds().Min.Y + 10
  584. //最大left, 排除干擾部份, 所以左移10個像素
  585. maxleft := fullbgImg.Bounds().Max.X - 10
  586. //最大top, 排除干擾部份, 所以上移10個像素
  587. maxtop := fullbgImg.Bounds().Max.Y - 10
  588. //rgb比较阈值, 超出此阈值及代表找到缺口位置
  589. threshold := 20
  590. //缺口偏移, 拖動按鈕初始會偏移27.5
  591. distance := -27.5
  592. //取絕對值方法
  593. abs := func(n int) int {
  594. if n < 0 {
  595. return -n
  596. }
  597. return n
  598. }
  599. search:
  600. for i := left; i <= maxleft; i++ {
  601. for j := top; j <= maxtop; j++ {
  602. colorAR, colorAG, colorAB, _ := fullbgImg.At(i, j).RGBA()
  603. colorBR, colorBG, colorBB, _ := shadowbgImg.At(i, j).RGBA()
  604. colorAR, colorAG, colorAB = colorAR>>8, colorAG>>8, colorAB>>8
  605. colorBR, colorBG, colorBB = colorBR>>8, colorBG>>8, colorBB>>8
  606. if abs(int(colorAR)-int(colorBR)) > threshold ||
  607. abs(int(colorAG)-int(colorBG)) > threshold ||
  608. abs(int(colorAB)-int(colorBB)) > threshold {
  609. distance += float64(i)
  610. s.log.Debugln("對比完畢, 偏移量:", distance)
  611. break search
  612. }
  613. }
  614. }
  615. //獲取拖動按鈕形狀
  616. dragBtnBox := iframe.MustElement("#tcaptcha_drag_thumb").MustShape().Box()
  617. //启用滑鼠功能
  618. mouse := page.Mouse
  619. //模擬滑鼠移動至拖動按鈕處, 右移3的原因: 拖動按鈕比滑塊圖大3個像素
  620. mouse.MustMove(dragBtnBox.X+3, dragBtnBox.Y+(dragBtnBox.Height/2))
  621. //按下滑鼠左鍵
  622. mouse.MustDown("left")
  623. //開始拖動
  624. err = mouse.Move(dragBtnBox.X+distance, dragBtnBox.Y+(dragBtnBox.Height/2), 20)
  625. if err != nil {
  626. s.log.Errorln("mouse.Move", err)
  627. }
  628. //鬆開滑鼠左鍵, 拖动完毕
  629. mouse.MustUp("left")
  630. if s.debugMode == true {
  631. //截圖保存
  632. page.MustScreenshot(global_value.DefDebugFolder(), "result.png")
  633. }
  634. }
  635. type HdListItem struct {
  636. Url string `json:"url"`
  637. BaseUrl string `json:"baseUrl"`
  638. Title string `json:"title"`
  639. Ext string `json:"ext"`
  640. AuthorInfo string `json:"authorInfo"`
  641. Lang string `json:"lang"`
  642. Rate string `json:"rate"`
  643. DownCount int `json:"downCount"`
  644. Season int // 第几季,默认-1
  645. Episode int // 第几集,默认-1
  646. }
  647. //type HdContent struct {
  648. // Filename string `json:"filename"`
  649. // Ext string `json:"ext"`
  650. // Data []byte `json:"data"`
  651. //}
  652. const TCode = "#TencentCaptcha"
  653. const btnClickCodeBtn = "button.btn-danger"
  654. const btnCommitCode = "button.btn-primary"