subhd.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735
  1. package subhd
  2. import (
  3. "bytes"
  4. "fmt"
  5. "github.com/PuerkitoBio/goquery"
  6. "github.com/Tnze/go.num/v2/zh"
  7. "github.com/allanpk716/ChineseSubFinder/internal/logic/task_queue"
  8. pkgcommon "github.com/allanpk716/ChineseSubFinder/internal/pkg/common"
  9. "github.com/allanpk716/ChineseSubFinder/internal/pkg/decode"
  10. "github.com/allanpk716/ChineseSubFinder/internal/pkg/global_value"
  11. "github.com/allanpk716/ChineseSubFinder/internal/pkg/my_util"
  12. "github.com/allanpk716/ChineseSubFinder/internal/pkg/notify_center"
  13. "github.com/allanpk716/ChineseSubFinder/internal/pkg/rod_helper"
  14. "github.com/allanpk716/ChineseSubFinder/internal/pkg/settings"
  15. "github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_parser_hub"
  16. "github.com/allanpk716/ChineseSubFinder/internal/pkg/url_connectedness_helper"
  17. common2 "github.com/allanpk716/ChineseSubFinder/internal/types/common"
  18. "github.com/allanpk716/ChineseSubFinder/internal/types/language"
  19. "github.com/allanpk716/ChineseSubFinder/internal/types/series"
  20. "github.com/allanpk716/ChineseSubFinder/internal/types/supplier"
  21. "github.com/go-rod/rod"
  22. "github.com/nfnt/resize"
  23. "github.com/sirupsen/logrus"
  24. "image/jpeg"
  25. "math"
  26. "net/url"
  27. "os"
  28. "path/filepath"
  29. "regexp"
  30. "strings"
  31. "time"
  32. )
  33. type Supplier struct {
  34. settings *settings.Settings
  35. log *logrus.Logger
  36. topic int
  37. tt time.Duration
  38. debugMode bool
  39. isAlive bool
  40. }
  41. func NewSupplier(_settings *settings.Settings, _logger *logrus.Logger) *Supplier {
  42. sup := Supplier{}
  43. sup.log = _logger
  44. sup.topic = common2.DownloadSubsPerSite
  45. sup.settings = _settings
  46. if sup.settings.AdvancedSettings.Topic > 0 && sup.settings.AdvancedSettings.Topic != sup.topic {
  47. sup.topic = sup.settings.AdvancedSettings.Topic
  48. }
  49. sup.isAlive = true // 默认是可以使用的,如果 check 后,再调整状态
  50. // 默认超时是 2 * 60s,如果是调试模式则是 5 min
  51. sup.tt = common2.HTMLTimeOut
  52. sup.debugMode = sup.settings.AdvancedSettings.DebugMode
  53. if sup.debugMode == true {
  54. sup.tt = common2.OneMovieProcessTimeOut
  55. }
  56. return &sup
  57. }
  58. func (s *Supplier) CheckAlive() (bool, int64) {
  59. proxyStatus, proxySpeed, err := url_connectedness_helper.UrlConnectednessTest(s.settings.AdvancedSettings.SuppliersSettings.SubHD.RootUrl, s.settings.AdvancedSettings.ProxySettings.HttpProxyAddress)
  60. if err != nil {
  61. s.log.Errorln(s.GetSupplierName(), "CheckAlive", "Error", err)
  62. s.isAlive = false
  63. return false, 0
  64. }
  65. if proxyStatus == false {
  66. s.log.Errorln(s.GetSupplierName(), "CheckAlive", "Status != 200")
  67. s.isAlive = false
  68. return false, proxySpeed
  69. }
  70. s.isAlive = true
  71. return true, proxySpeed
  72. }
  73. func (s *Supplier) IsAlive() bool {
  74. return s.isAlive
  75. }
  76. func (s *Supplier) OverDailyDownloadLimit() bool {
  77. // 需要查询今天的限额
  78. count, err := task_queue.GetDailyDownloadCount(s.GetSupplierName(),
  79. my_util.GetPublicIP(s.settings.AdvancedSettings.TaskQueue, s.settings.AdvancedSettings.ProxySettings))
  80. if err != nil {
  81. s.log.Errorln(s.GetSupplierName(), "GetDailyDownloadCount", err)
  82. return true
  83. }
  84. if count > s.settings.AdvancedSettings.SuppliersSettings.Zimuku.DailyDownloadLimit {
  85. s.log.Warningln(s.GetSupplierName(), "DailyDownloadLimit:", s.settings.AdvancedSettings.SuppliersSettings.SubHD.DailyDownloadLimit, "Now Is:", count)
  86. return true
  87. }
  88. // 没有超限
  89. return false
  90. }
  91. func (s *Supplier) GetSupplierName() string {
  92. return common2.SubSiteSubHd
  93. }
  94. func (s *Supplier) GetSubListFromFile4Movie(filePath string) ([]supplier.SubInfo, error) {
  95. return s.getSubListFromFile4Movie(filePath)
  96. }
  97. func (s *Supplier) GetSubListFromFile4Series(seriesInfo *series.SeriesInfo) ([]supplier.SubInfo, error) {
  98. var browser *rod.Browser
  99. // TODO 是用本地的 Browser 还是远程的,推荐是远程的
  100. browser, err := rod_helper.NewBrowserEx(true, s.settings)
  101. if err != nil {
  102. return nil, err
  103. }
  104. defer func() {
  105. _ = browser.Close()
  106. }()
  107. var subInfos = make([]supplier.SubInfo, 0)
  108. var subList = make([]HdListItem, 0)
  109. for value := range seriesInfo.NeedDlSeasonDict {
  110. // 第一级界面,找到影片的详情界面
  111. keyword := seriesInfo.Name + " 第" + zh.Uint64(value).String() + "季"
  112. detailPageUrl, err := s.step0(browser, keyword)
  113. if err != nil {
  114. s.log.Errorln("subhd step0", keyword)
  115. return nil, err
  116. }
  117. if detailPageUrl == "" {
  118. // 如果只是搜索不到,则继续换关键词
  119. s.log.Warning("subhd first search keyword", keyword, "not found")
  120. keyword = seriesInfo.Name
  121. s.log.Warning("subhd Retry", keyword)
  122. detailPageUrl, err = s.step0(browser, keyword)
  123. if err != nil {
  124. s.log.Errorln("subhd step0", keyword)
  125. return nil, err
  126. }
  127. }
  128. if detailPageUrl == "" {
  129. s.log.Warning("subhd search keyword", keyword, "not found")
  130. continue
  131. }
  132. // 列举字幕
  133. oneSubList, err := s.step1(browser, detailPageUrl, false)
  134. if err != nil {
  135. s.log.Errorln("subhd step1", keyword)
  136. return nil, err
  137. }
  138. subList = append(subList, oneSubList...)
  139. }
  140. // 与剧集需要下载的集 List 进行比较,找到需要下载的列表
  141. // 找到那些 Eps 需要下载字幕的
  142. subInfoNeedDownload := s.whichEpisodeNeedDownloadSub(seriesInfo, subList)
  143. // 下载字幕
  144. for i, item := range subInfoNeedDownload {
  145. pkgcommon.SetSubScanJobStatusScanSeriesSub(i+1, len(seriesInfo.NeedDlEpsKeyList),
  146. fmt.Sprintf("%v - S%v-E%v", item.Title, item.Season, item.Episode))
  147. bok, hdContent, err := s.step2Ex(browser, item.Url)
  148. if err != nil {
  149. s.log.Errorln("subhd step2Ex", err)
  150. continue
  151. }
  152. if bok == false {
  153. s.log.Errorln("subhd step2Ex return false")
  154. continue
  155. }
  156. oneSubInfo := supplier.NewSubInfo(s.GetSupplierName(),
  157. int64(i),
  158. hdContent.Filename,
  159. language.ChineseSimple,
  160. my_util.AddBaseUrl(s.settings.AdvancedSettings.SuppliersSettings.SubHD.RootUrl, item.Url),
  161. 0,
  162. 0, hdContent.Ext, hdContent.Data)
  163. oneSubInfo.Season = item.Season
  164. oneSubInfo.Episode = item.Episode
  165. subInfos = append(subInfos, *oneSubInfo)
  166. }
  167. return subInfos, nil
  168. }
  169. func (s *Supplier) GetSubListFromFile4Anime(seriesInfo *series.SeriesInfo) ([]supplier.SubInfo, error) {
  170. panic("not implemented")
  171. }
  172. func (s *Supplier) getSubListFromFile4Movie(filePath string) ([]supplier.SubInfo, error) {
  173. /*
  174. 虽然是传入视频文件路径,但是其实需要读取对应的视频文件目录下的
  175. movie.xml 以及 *.nfo,找到 IMDB id
  176. 优先通过 IMDB id 去查找字幕
  177. 如果找不到,再靠文件名提取影片名称去查找
  178. */
  179. // 得到这个视频文件名中的信息
  180. info, _, err := decode.GetVideoInfoFromFileFullPath(filePath)
  181. if err != nil {
  182. return nil, err
  183. }
  184. // 找到这个视频文件,尝试得到 IMDB ID
  185. // 目前测试来看,加入 年 这个关键词去搜索,对 2020 年后的影片有利,因为网站有统一的详细页面了,而之前的,没有,会影响识别
  186. // 所以,year >= 2020 年,则可以多加一个关键词(年)去搜索影片
  187. imdbInfo, err := decode.GetImdbInfo4Movie(filePath)
  188. if err != nil {
  189. // 允许的错误,跳过,继续进行文件名的搜索
  190. s.log.Errorln("model.GetImdbInfo", err)
  191. }
  192. var subInfoList []supplier.SubInfo
  193. if imdbInfo.ImdbId != "" {
  194. // 先用 imdb id 找
  195. subInfoList, err = s.getSubListFromKeyword4Movie(imdbInfo.ImdbId)
  196. if err != nil {
  197. // 允许的错误,跳过,继续进行文件名的搜索
  198. s.log.Errorln(s.GetSupplierName(), "keyword:", imdbInfo.ImdbId)
  199. s.log.Errorln("getSubListFromKeyword4Movie", "IMDBID can not found sub", filePath, err)
  200. }
  201. // 如果有就优先返回
  202. if len(subInfoList) > 0 {
  203. return subInfoList, nil
  204. }
  205. }
  206. // 如果没有,那么就用文件名查找
  207. searchKeyword := my_util.VideoNameSearchKeywordMaker(s.log, info.Title, imdbInfo.Year)
  208. subInfoList, err = s.getSubListFromKeyword4Movie(searchKeyword)
  209. if err != nil {
  210. s.log.Errorln(s.GetSupplierName(), "keyword:", searchKeyword)
  211. return nil, err
  212. }
  213. return subInfoList, nil
  214. }
  215. func (s *Supplier) getSubListFromKeyword4Movie(keyword string) ([]supplier.SubInfo, error) {
  216. var browser *rod.Browser
  217. // TODO 是用本地的 Browser 还是远程的,推荐是远程的
  218. browser, err := rod_helper.NewBrowserEx(true, s.settings)
  219. if err != nil {
  220. return nil, err
  221. }
  222. defer func() {
  223. _ = browser.Close()
  224. }()
  225. var subInfos []supplier.SubInfo
  226. detailPageUrl, err := s.step0(browser, keyword)
  227. if err != nil {
  228. return nil, err
  229. }
  230. // 没有搜索到字幕
  231. if detailPageUrl == "" {
  232. return nil, nil
  233. }
  234. subList, err := s.step1(browser, detailPageUrl, true)
  235. if err != nil {
  236. return nil, err
  237. }
  238. for i, item := range subList {
  239. bok, hdContent, err := s.step2Ex(browser, item.Url)
  240. time.Sleep(time.Second)
  241. if err != nil {
  242. s.log.Errorln("subhd step2Ex", err)
  243. continue
  244. }
  245. if bok == false {
  246. s.log.Errorln("subhd step2Ex return false")
  247. continue
  248. }
  249. subInfos = append(subInfos, *supplier.NewSubInfo(s.GetSupplierName(), int64(i), hdContent.Filename, language.ChineseSimple,
  250. my_util.AddBaseUrl(s.settings.AdvancedSettings.SuppliersSettings.SubHD.RootUrl, item.Url),
  251. 0,
  252. 0,
  253. hdContent.Ext,
  254. hdContent.Data))
  255. }
  256. return subInfos, nil
  257. }
  258. func (s *Supplier) whichEpisodeNeedDownloadSub(seriesInfo *series.SeriesInfo, allSubList []HdListItem) []HdListItem {
  259. // 字幕很多,考虑效率,需要做成字典
  260. // key SxEx - SubInfos
  261. var allSubDict = make(map[string][]HdListItem)
  262. // 全季的字幕列表
  263. var oneSeasonSubDict = make(map[string][]HdListItem)
  264. for _, subInfo := range allSubList {
  265. _, season, episode, err := decode.GetSeasonAndEpisodeFromSubFileName(subInfo.Title)
  266. if err != nil {
  267. s.log.Errorln("whichEpisodeNeedDownloadSub.GetVideoInfoFromFileFullPath", subInfo.Title, err)
  268. continue
  269. }
  270. subInfo.Season = season
  271. subInfo.Episode = episode
  272. epsKey := my_util.GetEpisodeKeyName(season, episode)
  273. _, ok := allSubDict[epsKey]
  274. if ok == false {
  275. // 初始化
  276. allSubDict[epsKey] = make([]HdListItem, 0)
  277. if season != 0 && episode == 0 {
  278. oneSeasonSubDict[epsKey] = make([]HdListItem, 0)
  279. }
  280. }
  281. // 添加
  282. allSubDict[epsKey] = append(allSubDict[epsKey], subInfo)
  283. if season != 0 && episode == 0 {
  284. oneSeasonSubDict[epsKey] = append(oneSeasonSubDict[epsKey], subInfo)
  285. }
  286. }
  287. // 本地的视频列表,找到没有字幕的
  288. // 需要进行下载字幕的列表
  289. var subInfoNeedDownload = make([]HdListItem, 0)
  290. // 有那些 Eps 需要下载的,按 SxEx 反回 epsKey
  291. for epsKey, epsInfo := range seriesInfo.NeedDlEpsKeyList {
  292. // 从一堆字幕里面找合适的
  293. value, ok := allSubDict[epsKey]
  294. // 是否有
  295. if ok == true && len(value) > 0 {
  296. value[0].Season = epsInfo.Season
  297. value[0].Episode = epsInfo.Episode
  298. subInfoNeedDownload = append(subInfoNeedDownload, value[0])
  299. } else {
  300. s.log.Infoln("SubHD Not Find Sub can be download", epsInfo.Title, epsInfo.Season, epsInfo.Episode)
  301. }
  302. }
  303. // 全季的字幕列表,也拼进去,后面进行下载
  304. for _, infos := range oneSeasonSubDict {
  305. subInfoNeedDownload = append(subInfoNeedDownload, infos[0])
  306. }
  307. // 返回前,需要把每一个 Eps 的 Season Episode 信息填充到每个 SubInfo 中
  308. return subInfoNeedDownload
  309. }
  310. // step0 找到这个影片的详情列表
  311. func (s *Supplier) step0(browser *rod.Browser, keyword string) (string, error) {
  312. var err error
  313. defer func() {
  314. if err != nil {
  315. notify_center.Notify.Add("subhd_step0", err.Error())
  316. }
  317. }()
  318. result, page, err := rod_helper.HttpGetFromBrowser(browser, fmt.Sprintf(s.settings.AdvancedSettings.SuppliersSettings.SubHD.RootUrl+common2.SubSubHDSearchUrl, url.QueryEscape(keyword)), s.tt)
  319. if err != nil {
  320. return "", err
  321. }
  322. defer func() {
  323. _ = page.Close()
  324. }()
  325. // 是否有查找到的结果,至少要有结果。根据这里这样下面才能判断是分析失效了,还是就是没有结果而已
  326. re := regexp.MustCompile(`共\s*(\d+)\s*条`)
  327. matched := re.FindAllStringSubmatch(result, -1)
  328. if matched == nil || len(matched) < 1 {
  329. return "", common2.SubHDStep0SubCountElementNotFound
  330. }
  331. subCount, err := decode.GetNumber2int(matched[0][0])
  332. if err != nil {
  333. return "", err
  334. }
  335. // 如果所搜没有找到字幕,就要返回
  336. if subCount < 1 {
  337. return "", nil
  338. }
  339. // 这里是确认能继续分析的详细连接
  340. doc, err := goquery.NewDocumentFromReader(strings.NewReader(result))
  341. if err != nil {
  342. return "", err
  343. }
  344. imgSelection := doc.Find("img.rounded-start")
  345. _, ok := imgSelection.Attr("src")
  346. if ok == true {
  347. if len(imgSelection.Nodes) < 1 {
  348. return "", common2.SubHDStep0ImgParentLessThan1
  349. }
  350. step1Url := ""
  351. if imgSelection.Nodes[0].Parent.Data == "a" {
  352. // 第一个父级是不是超链接
  353. for _, attribute := range imgSelection.Nodes[0].Parent.Attr {
  354. if attribute.Key == "href" {
  355. step1Url = attribute.Val
  356. break
  357. }
  358. }
  359. } else if imgSelection.Nodes[0].Parent.Parent.Data == "a" {
  360. // 第二个父级是不是超链接
  361. for _, attribute := range imgSelection.Nodes[0].Parent.Parent.Attr {
  362. if attribute.Key == "href" {
  363. step1Url = attribute.Val
  364. break
  365. }
  366. }
  367. }
  368. if step1Url == "" {
  369. return "", common2.SubHDStep0HrefIsNull
  370. }
  371. return step1Url, nil
  372. } else {
  373. return "", common2.SubHDStep0HrefIsNull
  374. }
  375. }
  376. // step1 获取影片的详情字幕列表
  377. func (s *Supplier) step1(browser *rod.Browser, detailPageUrl string, isMovieOrSeries bool) ([]HdListItem, error) {
  378. var err error
  379. defer func() {
  380. if err != nil {
  381. notify_center.Notify.Add("subhd_step1", err.Error())
  382. }
  383. }()
  384. detailPageUrl = my_util.AddBaseUrl(s.settings.AdvancedSettings.SuppliersSettings.SubHD.RootUrl, detailPageUrl)
  385. result, page, err := rod_helper.HttpGetFromBrowser(browser, detailPageUrl, s.tt)
  386. if err != nil {
  387. return nil, err
  388. }
  389. defer func() {
  390. _ = page.Close()
  391. }()
  392. doc, err := goquery.NewDocumentFromReader(strings.NewReader(result))
  393. if err != nil {
  394. return nil, err
  395. }
  396. var lists []HdListItem
  397. const subTableKeyword = ".pt-2"
  398. const oneSubTrTitleKeyword = "a.link-dark"
  399. const oneSubTrDownloadCountKeyword = "div.px-3"
  400. const oneSubLangAndTypeKeyword = ".text-secondary"
  401. doc.Find(subTableKeyword).EachWithBreak(func(i int, tr *goquery.Selection) bool {
  402. if tr.Find(oneSubTrTitleKeyword).Size() == 0 {
  403. return true
  404. }
  405. // 文件的下载页面,还需要分析
  406. downUrl, exists := tr.Find(oneSubTrTitleKeyword).Eq(0).Attr("href")
  407. if !exists {
  408. return true
  409. }
  410. // 文件名
  411. title := strings.TrimSpace(tr.Find(oneSubTrTitleKeyword).Text())
  412. // 字幕类型
  413. insideSubType := tr.Find(oneSubLangAndTypeKeyword).Text()
  414. if sub_parser_hub.IsSubTypeWanted(insideSubType) == false {
  415. return true
  416. }
  417. // 下载的次数
  418. downCount, err := decode.GetNumber2int(tr.Find(oneSubTrDownloadCountKeyword).Eq(1).Text())
  419. if err != nil {
  420. return true
  421. }
  422. listItem := HdListItem{}
  423. listItem.Url = downUrl
  424. listItem.BaseUrl = s.settings.AdvancedSettings.SuppliersSettings.SubHD.RootUrl
  425. listItem.Title = title
  426. listItem.DownCount = downCount
  427. // 电影,就需要第一个
  428. // 连续剧,需要多个
  429. if isMovieOrSeries == true {
  430. if len(lists) >= s.topic {
  431. return false
  432. }
  433. }
  434. lists = append(lists, listItem)
  435. return true
  436. })
  437. return lists, nil
  438. }
  439. // step2Ex 下载字幕 过防水墙
  440. func (s *Supplier) step2Ex(browser *rod.Browser, subDownloadPageUrl string) (bool, *HdContent, error) {
  441. var err error
  442. defer func() {
  443. if err != nil {
  444. notify_center.Notify.Add("subhd_step2Ex", err.Error())
  445. }
  446. }()
  447. subDownloadPageUrl = my_util.AddBaseUrl(s.settings.AdvancedSettings.SuppliersSettings.SubHD.RootUrl, subDownloadPageUrl)
  448. _, page, err := rod_helper.HttpGetFromBrowser(browser, subDownloadPageUrl, s.tt)
  449. if err != nil {
  450. return false, nil, err
  451. }
  452. defer func() {
  453. _ = page.Close()
  454. }()
  455. // 需要先判断是否先要输入验证码,然后才到下载界面
  456. // 下载字幕
  457. bok, content, err := s.downloadSubFile(browser, page)
  458. if err != nil {
  459. return false, nil, err
  460. }
  461. if bok == false {
  462. return false, nil, nil
  463. }
  464. return true, content, nil
  465. }
  466. func (s *Supplier) downloadSubFile(browser *rod.Browser, page *rod.Page) (bool, *HdContent, error) {
  467. var err error
  468. var doc *goquery.Document
  469. downloadSuccess := false
  470. fileName := ""
  471. fileByte := []byte{0}
  472. err = rod.Try(func() {
  473. tmpDir := filepath.Join(global_value.DefTmpFolder(), "downloads")
  474. wait := browser.WaitDownload(tmpDir)
  475. getDownloadFile := func() ([]byte, string, error) {
  476. info := wait()
  477. downloadPath := filepath.Join(tmpDir, info.GUID)
  478. defer func() { _ = os.Remove(downloadPath) }()
  479. b, err := os.ReadFile(downloadPath)
  480. if err != nil {
  481. return nil, "", err
  482. }
  483. return b, info.SuggestedFilename, nil
  484. }
  485. // 初始化页面用于查询元素
  486. pString := page.MustHTML()
  487. doc, err = goquery.NewDocumentFromReader(strings.NewReader(pString))
  488. if err != nil {
  489. return
  490. }
  491. // 移除广告
  492. page.MustEval(`testgssdqw = function () { if (document.getElementById("tbkp")) {document.getElementById("tbkp").remove()}; }`)
  493. page.MustEval(`testgssdqw()`)
  494. // 点击“验证获取下载地址”
  495. clickCodeBtn := doc.Find(btnClickCodeBtn)
  496. if len(clickCodeBtn.Nodes) < 1 {
  497. return
  498. }
  499. element := page.MustElement(btnClickCodeBtn)
  500. BtnCodeText := element.MustText()
  501. if strings.Contains(BtnCodeText, "验证") == true {
  502. // 那么需要填写验证码
  503. element.MustClick()
  504. time.Sleep(time.Second * 2)
  505. // 填写“验证码”
  506. page.MustEval(`$("#gzhcode").attr("value","` + common2.SubhdCode + `");`)
  507. // 是否有“完成验证”按钮
  508. downBtn := doc.Find(btnCommitCode)
  509. if len(downBtn.Nodes) < 1 {
  510. return
  511. }
  512. element = page.MustElement(btnCommitCode)
  513. benCommit := element.MustText()
  514. if strings.Contains(benCommit, "验证") == false {
  515. s.log.Errorln("btn not found 完整验证")
  516. return
  517. }
  518. element.MustClick()
  519. time.Sleep(time.Second * 2)
  520. // 点击下载按钮
  521. page.MustElement(btnClickCodeBtn).MustClick()
  522. } else if strings.Contains(BtnCodeText, "下载") == true {
  523. // 直接可以下载
  524. element.MustClick()
  525. time.Sleep(time.Second * 2)
  526. } else {
  527. s.log.Errorln("btn not found 下载验证 or 下载")
  528. return
  529. }
  530. // 更新 page 的实例对应的 doc Content
  531. pString = page.MustHTML()
  532. doc, err = goquery.NewDocumentFromReader(strings.NewReader(pString))
  533. if err != nil {
  534. return
  535. }
  536. // 是否有腾讯的防水墙
  537. hasWaterWall := false
  538. waterWall := doc.Find(TCode)
  539. if len(waterWall.Nodes) >= 1 {
  540. hasWaterWall = true
  541. }
  542. s.log.Debugln("Need pass WaterWall", hasWaterWall)
  543. // 过墙
  544. if hasWaterWall == true {
  545. s.passWaterWall(page)
  546. }
  547. time.Sleep(time.Second * 2)
  548. fileByte, fileName, err = getDownloadFile()
  549. if err != nil {
  550. return
  551. }
  552. downloadSuccess = true
  553. })
  554. if err != nil {
  555. return false, nil, err
  556. }
  557. var hdContent HdContent
  558. hdContent.Filename = fileName
  559. hdContent.Ext = filepath.Ext(fileName)
  560. hdContent.Data = fileByte
  561. if downloadSuccess == false {
  562. return false, &hdContent, common2.SubHDStep2ExCannotFindDownloadBtn
  563. }
  564. // 下载成功需要统计到今天的次数中
  565. _, err = task_queue.AddDailyDownloadCount(s.GetSupplierName(),
  566. my_util.GetPublicIP(s.settings.AdvancedSettings.TaskQueue, s.settings.AdvancedSettings.ProxySettings))
  567. if err != nil {
  568. s.log.Warningln(s.GetSupplierName(), "getSubListFromFile.AddDailyDownloadCount", err)
  569. }
  570. return downloadSuccess, &hdContent, nil
  571. }
  572. func (s *Supplier) passWaterWall(page *rod.Page) {
  573. //等待驗證碼窗體載入
  574. page.MustElement("#tcaptcha_iframe").MustWaitLoad()
  575. //進入到iframe
  576. iframe := page.MustElement("#tcaptcha_iframe").MustFrame()
  577. //等待拖動條加載, 延遲500秒檢測變化, 以確認加載完畢
  578. iframe.MustElement("#tcaptcha_drag_button").MustWaitStable()
  579. //等待缺口圖像載入
  580. slideBgEl := iframe.MustElement("#slideBg").MustWaitLoad()
  581. slideBgEl = slideBgEl.MustWaitStable()
  582. //取得帶缺口圖像
  583. shadowbg := slideBgEl.MustResource()
  584. // 取得原始圖像
  585. src := slideBgEl.MustProperty("src")
  586. fullbg, _, err := my_util.DownFile(s.log, strings.Replace(src.String(), "img_index=1", "img_index=0", 1))
  587. if err != nil {
  588. panic(err)
  589. }
  590. //取得img展示的真實尺寸
  591. shape, err := slideBgEl.Shape()
  592. if err != nil {
  593. panic(err)
  594. }
  595. bgbox := shape.Box()
  596. height, width := uint(math.Round(bgbox.Height)), uint(math.Round(bgbox.Width))
  597. //裁剪圖像
  598. shadowbgImg, _ := jpeg.Decode(bytes.NewReader(shadowbg))
  599. shadowbgImg = resize.Resize(width, height, shadowbgImg, resize.Lanczos3)
  600. fullbgImg, _ := jpeg.Decode(bytes.NewReader(fullbg))
  601. fullbgImg = resize.Resize(width, height, fullbgImg, resize.Lanczos3)
  602. //啓始left,排除干擾部份,所以右移10個像素
  603. left := fullbgImg.Bounds().Min.X + 10
  604. //啓始top, 排除干擾部份, 所以下移10個像素
  605. top := fullbgImg.Bounds().Min.Y + 10
  606. //最大left, 排除干擾部份, 所以左移10個像素
  607. maxleft := fullbgImg.Bounds().Max.X - 10
  608. //最大top, 排除干擾部份, 所以上移10個像素
  609. maxtop := fullbgImg.Bounds().Max.Y - 10
  610. //rgb比较阈值, 超出此阈值及代表找到缺口位置
  611. threshold := 20
  612. //缺口偏移, 拖動按鈕初始會偏移27.5
  613. distance := -27.5
  614. //取絕對值方法
  615. abs := func(n int) int {
  616. if n < 0 {
  617. return -n
  618. }
  619. return n
  620. }
  621. search:
  622. for i := left; i <= maxleft; i++ {
  623. for j := top; j <= maxtop; j++ {
  624. colorAR, colorAG, colorAB, _ := fullbgImg.At(i, j).RGBA()
  625. colorBR, colorBG, colorBB, _ := shadowbgImg.At(i, j).RGBA()
  626. colorAR, colorAG, colorAB = colorAR>>8, colorAG>>8, colorAB>>8
  627. colorBR, colorBG, colorBB = colorBR>>8, colorBG>>8, colorBB>>8
  628. if abs(int(colorAR)-int(colorBR)) > threshold ||
  629. abs(int(colorAG)-int(colorBG)) > threshold ||
  630. abs(int(colorAB)-int(colorBB)) > threshold {
  631. distance += float64(i)
  632. s.log.Debugln("對比完畢, 偏移量:", distance)
  633. break search
  634. }
  635. }
  636. }
  637. //獲取拖動按鈕形狀
  638. dragBtnBox := iframe.MustElement("#tcaptcha_drag_thumb").MustShape().Box()
  639. //启用滑鼠功能
  640. mouse := page.Mouse
  641. //模擬滑鼠移動至拖動按鈕處, 右移3的原因: 拖動按鈕比滑塊圖大3個像素
  642. mouse.MustMove(dragBtnBox.X+3, dragBtnBox.Y+(dragBtnBox.Height/2))
  643. //按下滑鼠左鍵
  644. mouse.MustDown("left")
  645. //開始拖動
  646. err = mouse.Move(dragBtnBox.X+distance, dragBtnBox.Y+(dragBtnBox.Height/2), 20)
  647. if err != nil {
  648. s.log.Errorln("mouse.Move", err)
  649. }
  650. //鬆開滑鼠左鍵, 拖动完毕
  651. mouse.MustUp("left")
  652. if s.debugMode == true {
  653. //截圖保存
  654. page.MustScreenshot(global_value.DefDebugFolder(), "result.png")
  655. }
  656. }
  657. type HdListItem struct {
  658. Url string `json:"url"`
  659. BaseUrl string `json:"baseUrl"`
  660. Title string `json:"title"`
  661. Ext string `json:"ext"`
  662. AuthorInfo string `json:"authorInfo"`
  663. Lang string `json:"lang"`
  664. Rate string `json:"rate"`
  665. DownCount int `json:"downCount"`
  666. Season int // 第几季,默认-1
  667. Episode int // 第几集,默认-1
  668. }
  669. type HdContent struct {
  670. Filename string `json:"filename"`
  671. Ext string `json:"ext"`
  672. Data []byte `json:"data"`
  673. }
  674. const TCode = "#TencentCaptcha"
  675. const btnClickCodeBtn = "button.btn-danger"
  676. const btnCommitCode = "button.btn-primary"