subhd.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746
  1. package subhd
  2. import (
  3. "bytes"
  4. "fmt"
  5. "github.com/PuerkitoBio/goquery"
  6. "github.com/Tnze/go.num/v2/zh"
  7. "github.com/allanpk716/ChineseSubFinder/internal/logic/file_downloader"
  8. "github.com/allanpk716/ChineseSubFinder/internal/logic/task_queue"
  9. pkgcommon "github.com/allanpk716/ChineseSubFinder/internal/pkg/common"
  10. "github.com/allanpk716/ChineseSubFinder/internal/pkg/decode"
  11. "github.com/allanpk716/ChineseSubFinder/internal/pkg/global_value"
  12. "github.com/allanpk716/ChineseSubFinder/internal/pkg/my_util"
  13. "github.com/allanpk716/ChineseSubFinder/internal/pkg/notify_center"
  14. "github.com/allanpk716/ChineseSubFinder/internal/pkg/rod_helper"
  15. "github.com/allanpk716/ChineseSubFinder/internal/pkg/settings"
  16. "github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_parser_hub"
  17. "github.com/allanpk716/ChineseSubFinder/internal/pkg/url_connectedness_helper"
  18. common2 "github.com/allanpk716/ChineseSubFinder/internal/types/common"
  19. "github.com/allanpk716/ChineseSubFinder/internal/types/language"
  20. "github.com/allanpk716/ChineseSubFinder/internal/types/series"
  21. "github.com/allanpk716/ChineseSubFinder/internal/types/supplier"
  22. "github.com/go-rod/rod"
  23. "github.com/nfnt/resize"
  24. "github.com/sirupsen/logrus"
  25. "image/jpeg"
  26. "math"
  27. "net/url"
  28. "os"
  29. "path/filepath"
  30. "regexp"
  31. "strings"
  32. "time"
  33. )
  34. type Supplier struct {
  35. settings *settings.Settings
  36. log *logrus.Logger
  37. fileDownloader *file_downloader.FileDownloader
  38. topic int
  39. tt time.Duration
  40. debugMode bool
  41. isAlive bool
  42. }
  43. func NewSupplier(fileDownloader *file_downloader.FileDownloader) *Supplier {
  44. sup := Supplier{}
  45. sup.log = fileDownloader.Log
  46. sup.fileDownloader = fileDownloader
  47. sup.topic = common2.DownloadSubsPerSite
  48. sup.settings = fileDownloader.Settings
  49. if sup.settings.AdvancedSettings.Topic > 0 && sup.settings.AdvancedSettings.Topic != sup.topic {
  50. sup.topic = sup.settings.AdvancedSettings.Topic
  51. }
  52. sup.isAlive = true // 默认是可以使用的,如果 check 后,再调整状态
  53. // 默认超时是 2 * 60s,如果是调试模式则是 5 min
  54. sup.tt = common2.HTMLTimeOut
  55. sup.debugMode = sup.settings.AdvancedSettings.DebugMode
  56. if sup.debugMode == true {
  57. sup.tt = common2.OneMovieProcessTimeOut
  58. }
  59. return &sup
  60. }
  61. func (s *Supplier) CheckAlive() (bool, int64) {
  62. proxyStatus, proxySpeed, err := url_connectedness_helper.UrlConnectednessTest(s.settings.AdvancedSettings.SuppliersSettings.SubHD.RootUrl, s.settings.AdvancedSettings.ProxySettings.HttpProxyAddress)
  63. if err != nil {
  64. s.log.Errorln(s.GetSupplierName(), "CheckAlive", "Error", err)
  65. s.isAlive = false
  66. return false, 0
  67. }
  68. if proxyStatus == false {
  69. s.log.Errorln(s.GetSupplierName(), "CheckAlive", "Status != 200")
  70. s.isAlive = false
  71. return false, proxySpeed
  72. }
  73. s.isAlive = true
  74. return true, proxySpeed
  75. }
  76. func (s *Supplier) IsAlive() bool {
  77. return s.isAlive
  78. }
  79. func (s *Supplier) OverDailyDownloadLimit() bool {
  80. // 需要查询今天的限额
  81. count, err := task_queue.GetDailyDownloadCount(s.GetSupplierName(),
  82. my_util.GetPublicIP(s.settings.AdvancedSettings.TaskQueue, s.settings.AdvancedSettings.ProxySettings))
  83. if err != nil {
  84. s.log.Errorln(s.GetSupplierName(), "GetDailyDownloadCount", err)
  85. return true
  86. }
  87. if count > s.settings.AdvancedSettings.SuppliersSettings.Zimuku.DailyDownloadLimit {
  88. s.log.Warningln(s.GetSupplierName(), "DailyDownloadLimit:", s.settings.AdvancedSettings.SuppliersSettings.SubHD.DailyDownloadLimit, "Now Is:", count)
  89. return true
  90. }
  91. // 没有超限
  92. return false
  93. }
  94. func (s *Supplier) GetLogger() *logrus.Logger {
  95. return s.log
  96. }
  97. func (s *Supplier) GetSettings() *settings.Settings {
  98. return s.settings
  99. }
  100. func (s *Supplier) GetSupplierName() string {
  101. return common2.SubSiteSubHd
  102. }
  103. func (s *Supplier) GetSubListFromFile4Movie(filePath string) ([]supplier.SubInfo, error) {
  104. return s.getSubListFromFile4Movie(filePath)
  105. }
  106. func (s *Supplier) GetSubListFromFile4Series(seriesInfo *series.SeriesInfo) ([]supplier.SubInfo, error) {
  107. var browser *rod.Browser
  108. // TODO 是用本地的 Browser 还是远程的,推荐是远程的
  109. browser, err := rod_helper.NewBrowserEx(true, s.settings)
  110. if err != nil {
  111. return nil, err
  112. }
  113. defer func() {
  114. _ = browser.Close()
  115. }()
  116. var subInfos = make([]supplier.SubInfo, 0)
  117. var subList = make([]HdListItem, 0)
  118. for value := range seriesInfo.NeedDlSeasonDict {
  119. // 第一级界面,找到影片的详情界面
  120. keyword := seriesInfo.Name + " 第" + zh.Uint64(value).String() + "季"
  121. detailPageUrl, err := s.step0(browser, keyword)
  122. if err != nil {
  123. s.log.Errorln("subhd step0", keyword)
  124. return nil, err
  125. }
  126. if detailPageUrl == "" {
  127. // 如果只是搜索不到,则继续换关键词
  128. s.log.Warning("subhd first search keyword", keyword, "not found")
  129. keyword = seriesInfo.Name
  130. s.log.Warning("subhd Retry", keyword)
  131. detailPageUrl, err = s.step0(browser, keyword)
  132. if err != nil {
  133. s.log.Errorln("subhd step0", keyword)
  134. return nil, err
  135. }
  136. }
  137. if detailPageUrl == "" {
  138. s.log.Warning("subhd search keyword", keyword, "not found")
  139. continue
  140. }
  141. // 列举字幕
  142. oneSubList, err := s.step1(browser, detailPageUrl, false)
  143. if err != nil {
  144. s.log.Errorln("subhd step1", keyword)
  145. return nil, err
  146. }
  147. subList = append(subList, oneSubList...)
  148. }
  149. // 与剧集需要下载的集 List 进行比较,找到需要下载的列表
  150. // 找到那些 Eps 需要下载字幕的
  151. subInfoNeedDownload := s.whichEpisodeNeedDownloadSub(seriesInfo, subList)
  152. // 下载字幕
  153. for i, item := range subInfoNeedDownload {
  154. pkgcommon.SetSubScanJobStatusScanSeriesSub(i+1, len(seriesInfo.NeedDlEpsKeyList),
  155. fmt.Sprintf("%v - S%v-E%v", item.Title, item.Season, item.Episode))
  156. bok, hdContent, err := s.step2Ex(browser, item.Url)
  157. if err != nil {
  158. s.log.Errorln("subhd step2Ex", err)
  159. continue
  160. }
  161. if bok == false {
  162. s.log.Errorln("subhd step2Ex return false")
  163. continue
  164. }
  165. oneSubInfo := supplier.NewSubInfo(s.GetSupplierName(),
  166. int64(i),
  167. hdContent.Filename,
  168. language.ChineseSimple,
  169. my_util.AddBaseUrl(s.settings.AdvancedSettings.SuppliersSettings.SubHD.RootUrl, item.Url),
  170. 0,
  171. 0, hdContent.Ext, hdContent.Data)
  172. oneSubInfo.Season = item.Season
  173. oneSubInfo.Episode = item.Episode
  174. subInfos = append(subInfos, *oneSubInfo)
  175. }
  176. return subInfos, nil
  177. }
  178. func (s *Supplier) GetSubListFromFile4Anime(seriesInfo *series.SeriesInfo) ([]supplier.SubInfo, error) {
  179. panic("not implemented")
  180. }
  181. func (s *Supplier) getSubListFromFile4Movie(filePath string) ([]supplier.SubInfo, error) {
  182. /*
  183. 虽然是传入视频文件路径,但是其实需要读取对应的视频文件目录下的
  184. movie.xml 以及 *.nfo,找到 IMDB id
  185. 优先通过 IMDB id 去查找字幕
  186. 如果找不到,再靠文件名提取影片名称去查找
  187. */
  188. // 得到这个视频文件名中的信息
  189. info, _, err := decode.GetVideoInfoFromFileFullPath(filePath)
  190. if err != nil {
  191. return nil, err
  192. }
  193. // 找到这个视频文件,尝试得到 IMDB ID
  194. // 目前测试来看,加入 年 这个关键词去搜索,对 2020 年后的影片有利,因为网站有统一的详细页面了,而之前的,没有,会影响识别
  195. // 所以,year >= 2020 年,则可以多加一个关键词(年)去搜索影片
  196. imdbInfo, err := decode.GetImdbInfo4Movie(filePath)
  197. if err != nil {
  198. // 允许的错误,跳过,继续进行文件名的搜索
  199. s.log.Errorln("model.GetImdbInfo", err)
  200. }
  201. var subInfoList []supplier.SubInfo
  202. if imdbInfo.ImdbId != "" {
  203. // 先用 imdb id 找
  204. subInfoList, err = s.getSubListFromKeyword4Movie(imdbInfo.ImdbId)
  205. if err != nil {
  206. // 允许的错误,跳过,继续进行文件名的搜索
  207. s.log.Errorln(s.GetSupplierName(), "keyword:", imdbInfo.ImdbId)
  208. s.log.Errorln("getSubListFromKeyword4Movie", "IMDBID can not found sub", filePath, err)
  209. }
  210. // 如果有就优先返回
  211. if len(subInfoList) > 0 {
  212. return subInfoList, nil
  213. }
  214. }
  215. // 如果没有,那么就用文件名查找
  216. searchKeyword := my_util.VideoNameSearchKeywordMaker(s.log, info.Title, imdbInfo.Year)
  217. subInfoList, err = s.getSubListFromKeyword4Movie(searchKeyword)
  218. if err != nil {
  219. s.log.Errorln(s.GetSupplierName(), "keyword:", searchKeyword)
  220. return nil, err
  221. }
  222. return subInfoList, nil
  223. }
  224. func (s *Supplier) getSubListFromKeyword4Movie(keyword string) ([]supplier.SubInfo, error) {
  225. var browser *rod.Browser
  226. // TODO 是用本地的 Browser 还是远程的,推荐是远程的
  227. browser, err := rod_helper.NewBrowserEx(true, s.settings)
  228. if err != nil {
  229. return nil, err
  230. }
  231. defer func() {
  232. _ = browser.Close()
  233. }()
  234. var subInfos []supplier.SubInfo
  235. detailPageUrl, err := s.step0(browser, keyword)
  236. if err != nil {
  237. return nil, err
  238. }
  239. // 没有搜索到字幕
  240. if detailPageUrl == "" {
  241. return nil, nil
  242. }
  243. subList, err := s.step1(browser, detailPageUrl, true)
  244. if err != nil {
  245. return nil, err
  246. }
  247. for i, item := range subList {
  248. bok, hdContent, err := s.step2Ex(browser, item.Url)
  249. time.Sleep(time.Second)
  250. if err != nil {
  251. s.log.Errorln("subhd step2Ex", err)
  252. continue
  253. }
  254. if bok == false {
  255. s.log.Errorln("subhd step2Ex return false")
  256. continue
  257. }
  258. subInfos = append(subInfos, *supplier.NewSubInfo(s.GetSupplierName(), int64(i), hdContent.Filename, language.ChineseSimple,
  259. my_util.AddBaseUrl(s.settings.AdvancedSettings.SuppliersSettings.SubHD.RootUrl, item.Url),
  260. 0,
  261. 0,
  262. hdContent.Ext,
  263. hdContent.Data))
  264. }
  265. return subInfos, nil
  266. }
  267. func (s *Supplier) whichEpisodeNeedDownloadSub(seriesInfo *series.SeriesInfo, allSubList []HdListItem) []HdListItem {
  268. // 字幕很多,考虑效率,需要做成字典
  269. // key SxEx - SubInfos
  270. var allSubDict = make(map[string][]HdListItem)
  271. // 全季的字幕列表
  272. var oneSeasonSubDict = make(map[string][]HdListItem)
  273. for _, subInfo := range allSubList {
  274. _, season, episode, err := decode.GetSeasonAndEpisodeFromSubFileName(subInfo.Title)
  275. if err != nil {
  276. s.log.Errorln("whichEpisodeNeedDownloadSub.GetVideoInfoFromFileFullPath", subInfo.Title, err)
  277. continue
  278. }
  279. subInfo.Season = season
  280. subInfo.Episode = episode
  281. epsKey := my_util.GetEpisodeKeyName(season, episode)
  282. _, ok := allSubDict[epsKey]
  283. if ok == false {
  284. // 初始化
  285. allSubDict[epsKey] = make([]HdListItem, 0)
  286. if season != 0 && episode == 0 {
  287. oneSeasonSubDict[epsKey] = make([]HdListItem, 0)
  288. }
  289. }
  290. // 添加
  291. allSubDict[epsKey] = append(allSubDict[epsKey], subInfo)
  292. if season != 0 && episode == 0 {
  293. oneSeasonSubDict[epsKey] = append(oneSeasonSubDict[epsKey], subInfo)
  294. }
  295. }
  296. // 本地的视频列表,找到没有字幕的
  297. // 需要进行下载字幕的列表
  298. var subInfoNeedDownload = make([]HdListItem, 0)
  299. // 有那些 Eps 需要下载的,按 SxEx 反回 epsKey
  300. for epsKey, epsInfo := range seriesInfo.NeedDlEpsKeyList {
  301. // 从一堆字幕里面找合适的
  302. value, ok := allSubDict[epsKey]
  303. // 是否有
  304. if ok == true && len(value) > 0 {
  305. value[0].Season = epsInfo.Season
  306. value[0].Episode = epsInfo.Episode
  307. subInfoNeedDownload = append(subInfoNeedDownload, value[0])
  308. } else {
  309. s.log.Infoln("SubHD Not Find Sub can be download", epsInfo.Title, epsInfo.Season, epsInfo.Episode)
  310. }
  311. }
  312. // 全季的字幕列表,也拼进去,后面进行下载
  313. for _, infos := range oneSeasonSubDict {
  314. subInfoNeedDownload = append(subInfoNeedDownload, infos[0])
  315. }
  316. // 返回前,需要把每一个 Eps 的 Season Episode 信息填充到每个 SubInfo 中
  317. return subInfoNeedDownload
  318. }
  319. // step0 找到这个影片的详情列表
  320. func (s *Supplier) step0(browser *rod.Browser, keyword string) (string, error) {
  321. var err error
  322. defer func() {
  323. if err != nil {
  324. notify_center.Notify.Add("subhd_step0", err.Error())
  325. }
  326. }()
  327. result, page, err := rod_helper.HttpGetFromBrowser(browser, fmt.Sprintf(s.settings.AdvancedSettings.SuppliersSettings.SubHD.RootUrl+common2.SubSubHDSearchUrl, url.QueryEscape(keyword)), s.tt)
  328. if err != nil {
  329. return "", err
  330. }
  331. defer func() {
  332. _ = page.Close()
  333. }()
  334. // 是否有查找到的结果,至少要有结果。根据这里这样下面才能判断是分析失效了,还是就是没有结果而已
  335. re := regexp.MustCompile(`共\s*(\d+)\s*条`)
  336. matched := re.FindAllStringSubmatch(result, -1)
  337. if matched == nil || len(matched) < 1 {
  338. return "", common2.SubHDStep0SubCountElementNotFound
  339. }
  340. subCount, err := decode.GetNumber2int(matched[0][0])
  341. if err != nil {
  342. return "", err
  343. }
  344. // 如果所搜没有找到字幕,就要返回
  345. if subCount < 1 {
  346. return "", nil
  347. }
  348. // 这里是确认能继续分析的详细连接
  349. doc, err := goquery.NewDocumentFromReader(strings.NewReader(result))
  350. if err != nil {
  351. return "", err
  352. }
  353. imgSelection := doc.Find("img.rounded-start")
  354. _, ok := imgSelection.Attr("src")
  355. if ok == true {
  356. if len(imgSelection.Nodes) < 1 {
  357. return "", common2.SubHDStep0ImgParentLessThan1
  358. }
  359. step1Url := ""
  360. if imgSelection.Nodes[0].Parent.Data == "a" {
  361. // 第一个父级是不是超链接
  362. for _, attribute := range imgSelection.Nodes[0].Parent.Attr {
  363. if attribute.Key == "href" {
  364. step1Url = attribute.Val
  365. break
  366. }
  367. }
  368. } else if imgSelection.Nodes[0].Parent.Parent.Data == "a" {
  369. // 第二个父级是不是超链接
  370. for _, attribute := range imgSelection.Nodes[0].Parent.Parent.Attr {
  371. if attribute.Key == "href" {
  372. step1Url = attribute.Val
  373. break
  374. }
  375. }
  376. }
  377. if step1Url == "" {
  378. return "", common2.SubHDStep0HrefIsNull
  379. }
  380. return step1Url, nil
  381. } else {
  382. return "", common2.SubHDStep0HrefIsNull
  383. }
  384. }
  385. // step1 获取影片的详情字幕列表
  386. func (s *Supplier) step1(browser *rod.Browser, detailPageUrl string, isMovieOrSeries bool) ([]HdListItem, error) {
  387. var err error
  388. defer func() {
  389. if err != nil {
  390. notify_center.Notify.Add("subhd_step1", err.Error())
  391. }
  392. }()
  393. detailPageUrl = my_util.AddBaseUrl(s.settings.AdvancedSettings.SuppliersSettings.SubHD.RootUrl, detailPageUrl)
  394. result, page, err := rod_helper.HttpGetFromBrowser(browser, detailPageUrl, s.tt)
  395. if err != nil {
  396. return nil, err
  397. }
  398. defer func() {
  399. _ = page.Close()
  400. }()
  401. doc, err := goquery.NewDocumentFromReader(strings.NewReader(result))
  402. if err != nil {
  403. return nil, err
  404. }
  405. var lists []HdListItem
  406. const subTableKeyword = ".pt-2"
  407. const oneSubTrTitleKeyword = "a.link-dark"
  408. const oneSubTrDownloadCountKeyword = "div.px-3"
  409. const oneSubLangAndTypeKeyword = ".text-secondary"
  410. doc.Find(subTableKeyword).EachWithBreak(func(i int, tr *goquery.Selection) bool {
  411. if tr.Find(oneSubTrTitleKeyword).Size() == 0 {
  412. return true
  413. }
  414. // 文件的下载页面,还需要分析
  415. downUrl, exists := tr.Find(oneSubTrTitleKeyword).Eq(0).Attr("href")
  416. if !exists {
  417. return true
  418. }
  419. // 文件名
  420. title := strings.TrimSpace(tr.Find(oneSubTrTitleKeyword).Text())
  421. // 字幕类型
  422. insideSubType := tr.Find(oneSubLangAndTypeKeyword).Text()
  423. if sub_parser_hub.IsSubTypeWanted(insideSubType) == false {
  424. return true
  425. }
  426. // 下载的次数
  427. downCount, err := decode.GetNumber2int(tr.Find(oneSubTrDownloadCountKeyword).Eq(1).Text())
  428. if err != nil {
  429. return true
  430. }
  431. listItem := HdListItem{}
  432. listItem.Url = downUrl
  433. listItem.BaseUrl = s.settings.AdvancedSettings.SuppliersSettings.SubHD.RootUrl
  434. listItem.Title = title
  435. listItem.DownCount = downCount
  436. // 电影,就需要第一个
  437. // 连续剧,需要多个
  438. if isMovieOrSeries == true {
  439. if len(lists) >= s.topic {
  440. return false
  441. }
  442. }
  443. lists = append(lists, listItem)
  444. return true
  445. })
  446. return lists, nil
  447. }
  448. // step2Ex 下载字幕 过防水墙
  449. func (s *Supplier) step2Ex(browser *rod.Browser, subDownloadPageUrl string) (bool, *HdContent, error) {
  450. var err error
  451. defer func() {
  452. if err != nil {
  453. notify_center.Notify.Add("subhd_step2Ex", err.Error())
  454. }
  455. }()
  456. subDownloadPageUrl = my_util.AddBaseUrl(s.settings.AdvancedSettings.SuppliersSettings.SubHD.RootUrl, subDownloadPageUrl)
  457. _, page, err := rod_helper.HttpGetFromBrowser(browser, subDownloadPageUrl, s.tt)
  458. if err != nil {
  459. return false, nil, err
  460. }
  461. defer func() {
  462. _ = page.Close()
  463. }()
  464. // 需要先判断是否先要输入验证码,然后才到下载界面
  465. // 下载字幕
  466. bok, content, err := s.downloadSubFile(browser, page)
  467. if err != nil {
  468. return false, nil, err
  469. }
  470. if bok == false {
  471. return false, nil, nil
  472. }
  473. return true, content, nil
  474. }
  475. func (s *Supplier) downloadSubFile(browser *rod.Browser, page *rod.Page) (bool, *HdContent, error) {
  476. var err error
  477. var doc *goquery.Document
  478. downloadSuccess := false
  479. fileName := ""
  480. fileByte := []byte{0}
  481. err = rod.Try(func() {
  482. tmpDir := filepath.Join(global_value.DefTmpFolder(), "downloads")
  483. wait := browser.WaitDownload(tmpDir)
  484. getDownloadFile := func() ([]byte, string, error) {
  485. info := wait()
  486. downloadPath := filepath.Join(tmpDir, info.GUID)
  487. defer func() { _ = os.Remove(downloadPath) }()
  488. b, err := os.ReadFile(downloadPath)
  489. if err != nil {
  490. return nil, "", err
  491. }
  492. return b, info.SuggestedFilename, nil
  493. }
  494. // 初始化页面用于查询元素
  495. pString := page.MustHTML()
  496. doc, err = goquery.NewDocumentFromReader(strings.NewReader(pString))
  497. if err != nil {
  498. return
  499. }
  500. // 移除广告
  501. page.MustEval(`testgssdqw = function () { if (document.getElementById("tbkp")) {document.getElementById("tbkp").remove()}; }`)
  502. page.MustEval(`testgssdqw()`)
  503. // 点击“验证获取下载地址”
  504. clickCodeBtn := doc.Find(btnClickCodeBtn)
  505. if len(clickCodeBtn.Nodes) < 1 {
  506. return
  507. }
  508. element := page.MustElement(btnClickCodeBtn)
  509. BtnCodeText := element.MustText()
  510. if strings.Contains(BtnCodeText, "验证") == true {
  511. // 那么需要填写验证码
  512. element.MustClick()
  513. time.Sleep(time.Second * 2)
  514. // 填写“验证码”
  515. page.MustEval(`$("#gzhcode").attr("value","` + common2.SubhdCode + `");`)
  516. // 是否有“完成验证”按钮
  517. downBtn := doc.Find(btnCommitCode)
  518. if len(downBtn.Nodes) < 1 {
  519. return
  520. }
  521. element = page.MustElement(btnCommitCode)
  522. benCommit := element.MustText()
  523. if strings.Contains(benCommit, "验证") == false {
  524. s.log.Errorln("btn not found 完整验证")
  525. return
  526. }
  527. element.MustClick()
  528. time.Sleep(time.Second * 2)
  529. // 点击下载按钮
  530. page.MustElement(btnClickCodeBtn).MustClick()
  531. } else if strings.Contains(BtnCodeText, "下载") == true {
  532. // 直接可以下载
  533. element.MustClick()
  534. time.Sleep(time.Second * 2)
  535. } else {
  536. s.log.Errorln("btn not found 下载验证 or 下载")
  537. return
  538. }
  539. // 更新 page 的实例对应的 doc Content
  540. pString = page.MustHTML()
  541. doc, err = goquery.NewDocumentFromReader(strings.NewReader(pString))
  542. if err != nil {
  543. return
  544. }
  545. // 是否有腾讯的防水墙
  546. hasWaterWall := false
  547. waterWall := doc.Find(TCode)
  548. if len(waterWall.Nodes) >= 1 {
  549. hasWaterWall = true
  550. }
  551. s.log.Debugln("Need pass WaterWall", hasWaterWall)
  552. // 过墙
  553. if hasWaterWall == true {
  554. s.passWaterWall(page)
  555. }
  556. time.Sleep(time.Second * 2)
  557. fileByte, fileName, err = getDownloadFile()
  558. if err != nil {
  559. return
  560. }
  561. downloadSuccess = true
  562. })
  563. if err != nil {
  564. return false, nil, err
  565. }
  566. var hdContent HdContent
  567. hdContent.Filename = fileName
  568. hdContent.Ext = filepath.Ext(fileName)
  569. hdContent.Data = fileByte
  570. if downloadSuccess == false {
  571. return false, &hdContent, common2.SubHDStep2ExCannotFindDownloadBtn
  572. }
  573. // 下载成功需要统计到今天的次数中
  574. _, err = task_queue.AddDailyDownloadCount(s.GetSupplierName(),
  575. my_util.GetPublicIP(s.settings.AdvancedSettings.TaskQueue, s.settings.AdvancedSettings.ProxySettings))
  576. if err != nil {
  577. s.log.Warningln(s.GetSupplierName(), "getSubListFromFile.AddDailyDownloadCount", err)
  578. }
  579. return downloadSuccess, &hdContent, nil
  580. }
  581. func (s *Supplier) passWaterWall(page *rod.Page) {
  582. //等待驗證碼窗體載入
  583. page.MustElement("#tcaptcha_iframe").MustWaitLoad()
  584. //進入到iframe
  585. iframe := page.MustElement("#tcaptcha_iframe").MustFrame()
  586. //等待拖動條加載, 延遲500秒檢測變化, 以確認加載完畢
  587. iframe.MustElement("#tcaptcha_drag_button").MustWaitStable()
  588. //等待缺口圖像載入
  589. slideBgEl := iframe.MustElement("#slideBg").MustWaitLoad()
  590. slideBgEl = slideBgEl.MustWaitStable()
  591. //取得帶缺口圖像
  592. shadowbg := slideBgEl.MustResource()
  593. // 取得原始圖像
  594. src := slideBgEl.MustProperty("src")
  595. fullbg, _, err := my_util.DownFile(s.log, strings.Replace(src.String(), "img_index=1", "img_index=0", 1))
  596. if err != nil {
  597. panic(err)
  598. }
  599. //取得img展示的真實尺寸
  600. shape, err := slideBgEl.Shape()
  601. if err != nil {
  602. panic(err)
  603. }
  604. bgbox := shape.Box()
  605. height, width := uint(math.Round(bgbox.Height)), uint(math.Round(bgbox.Width))
  606. //裁剪圖像
  607. shadowbgImg, _ := jpeg.Decode(bytes.NewReader(shadowbg))
  608. shadowbgImg = resize.Resize(width, height, shadowbgImg, resize.Lanczos3)
  609. fullbgImg, _ := jpeg.Decode(bytes.NewReader(fullbg))
  610. fullbgImg = resize.Resize(width, height, fullbgImg, resize.Lanczos3)
  611. //啓始left,排除干擾部份,所以右移10個像素
  612. left := fullbgImg.Bounds().Min.X + 10
  613. //啓始top, 排除干擾部份, 所以下移10個像素
  614. top := fullbgImg.Bounds().Min.Y + 10
  615. //最大left, 排除干擾部份, 所以左移10個像素
  616. maxleft := fullbgImg.Bounds().Max.X - 10
  617. //最大top, 排除干擾部份, 所以上移10個像素
  618. maxtop := fullbgImg.Bounds().Max.Y - 10
  619. //rgb比较阈值, 超出此阈值及代表找到缺口位置
  620. threshold := 20
  621. //缺口偏移, 拖動按鈕初始會偏移27.5
  622. distance := -27.5
  623. //取絕對值方法
  624. abs := func(n int) int {
  625. if n < 0 {
  626. return -n
  627. }
  628. return n
  629. }
  630. search:
  631. for i := left; i <= maxleft; i++ {
  632. for j := top; j <= maxtop; j++ {
  633. colorAR, colorAG, colorAB, _ := fullbgImg.At(i, j).RGBA()
  634. colorBR, colorBG, colorBB, _ := shadowbgImg.At(i, j).RGBA()
  635. colorAR, colorAG, colorAB = colorAR>>8, colorAG>>8, colorAB>>8
  636. colorBR, colorBG, colorBB = colorBR>>8, colorBG>>8, colorBB>>8
  637. if abs(int(colorAR)-int(colorBR)) > threshold ||
  638. abs(int(colorAG)-int(colorBG)) > threshold ||
  639. abs(int(colorAB)-int(colorBB)) > threshold {
  640. distance += float64(i)
  641. s.log.Debugln("對比完畢, 偏移量:", distance)
  642. break search
  643. }
  644. }
  645. }
  646. //獲取拖動按鈕形狀
  647. dragBtnBox := iframe.MustElement("#tcaptcha_drag_thumb").MustShape().Box()
  648. //启用滑鼠功能
  649. mouse := page.Mouse
  650. //模擬滑鼠移動至拖動按鈕處, 右移3的原因: 拖動按鈕比滑塊圖大3個像素
  651. mouse.MustMove(dragBtnBox.X+3, dragBtnBox.Y+(dragBtnBox.Height/2))
  652. //按下滑鼠左鍵
  653. mouse.MustDown("left")
  654. //開始拖動
  655. err = mouse.Move(dragBtnBox.X+distance, dragBtnBox.Y+(dragBtnBox.Height/2), 20)
  656. if err != nil {
  657. s.log.Errorln("mouse.Move", err)
  658. }
  659. //鬆開滑鼠左鍵, 拖动完毕
  660. mouse.MustUp("left")
  661. if s.debugMode == true {
  662. //截圖保存
  663. page.MustScreenshot(global_value.DefDebugFolder(), "result.png")
  664. }
  665. }
  666. type HdListItem struct {
  667. Url string `json:"url"`
  668. BaseUrl string `json:"baseUrl"`
  669. Title string `json:"title"`
  670. Ext string `json:"ext"`
  671. AuthorInfo string `json:"authorInfo"`
  672. Lang string `json:"lang"`
  673. Rate string `json:"rate"`
  674. DownCount int `json:"downCount"`
  675. Season int // 第几季,默认-1
  676. Episode int // 第几集,默认-1
  677. }
  678. type HdContent struct {
  679. Filename string `json:"filename"`
  680. Ext string `json:"ext"`
  681. Data []byte `json:"data"`
  682. }
  683. const TCode = "#TencentCaptcha"
  684. const btnClickCodeBtn = "button.btn-danger"
  685. const btnCommitCode = "button.btn-primary"