a4k.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. package a4k
  2. import (
  3. "errors"
  4. "fmt"
  5. "github.com/Tnze/go.num/v2/zh"
  6. "github.com/allanpk716/ChineseSubFinder/internal/pkg/logic/file_downloader"
  7. "github.com/allanpk716/ChineseSubFinder/internal/pkg/mix_media_info"
  8. "github.com/go-resty/resty/v2"
  9. "github.com/jinzhu/now"
  10. "strconv"
  11. "strings"
  12. "time"
  13. "github.com/allanpk716/ChineseSubFinder/internal/pkg/decode"
  14. "github.com/PuerkitoBio/goquery"
  15. "github.com/allanpk716/ChineseSubFinder/internal/pkg/my_util"
  16. "github.com/allanpk716/ChineseSubFinder/internal/pkg/settings"
  17. common2 "github.com/allanpk716/ChineseSubFinder/internal/types/common"
  18. "github.com/allanpk716/ChineseSubFinder/internal/types/series"
  19. "github.com/allanpk716/ChineseSubFinder/internal/types/supplier"
  20. "github.com/sirupsen/logrus"
  21. )
  22. type Supplier struct {
  23. settings *settings.Settings
  24. log *logrus.Logger
  25. fileDownloader *file_downloader.FileDownloader
  26. topic int
  27. isAlive bool
  28. }
  29. func NewSupplier(fileDownloader *file_downloader.FileDownloader) *Supplier {
  30. sup := Supplier{}
  31. sup.log = fileDownloader.Log
  32. sup.fileDownloader = fileDownloader
  33. sup.topic = common2.DownloadSubsPerSite
  34. sup.isAlive = true // 默认是可以使用的,如果 check 后,再调整状态
  35. sup.settings = fileDownloader.Settings
  36. if sup.settings.AdvancedSettings.Topic > 0 && sup.settings.AdvancedSettings.Topic != sup.topic {
  37. sup.topic = sup.settings.AdvancedSettings.Topic
  38. }
  39. return &sup
  40. }
  41. func (s *Supplier) CheckAlive() (bool, int64) {
  42. // 计算当前时间
  43. startT := time.Now()
  44. httpClient, err := my_util.NewHttpClient(s.settings.AdvancedSettings.ProxySettings)
  45. if err != nil {
  46. s.log.Errorln(s.GetSupplierName(), "CheckAlive.NewHttpClient", err)
  47. return false, 0
  48. }
  49. searPageUrl := fmt.Sprintf(s.settings.AdvancedSettings.SuppliersSettings.A4k.RootUrl)
  50. resp, err := httpClient.R().Get(searPageUrl)
  51. if err != nil {
  52. s.log.Errorln(s.GetSupplierName(), "CheckAlive.Get", err)
  53. return false, 0
  54. }
  55. if resp.StatusCode() != 200 {
  56. s.log.Errorln(s.GetSupplierName(), "CheckAlive.StatusCode", resp.StatusCode())
  57. return false, 0
  58. }
  59. s.isAlive = true
  60. return true, time.Since(startT).Milliseconds()
  61. }
  62. func (s *Supplier) IsAlive() bool {
  63. return s.isAlive
  64. }
  65. func (s *Supplier) OverDailyDownloadLimit() bool {
  66. if s.settings.AdvancedSettings.SuppliersSettings.A4k.DailyDownloadLimit == 0 {
  67. s.log.Warningln(s.GetSupplierName(), "DailyDownloadLimit is 0, will Skip Download")
  68. return true
  69. }
  70. // 对于这个接口暂时没有限制
  71. return false
  72. }
  73. func (s *Supplier) GetLogger() *logrus.Logger {
  74. return s.log
  75. }
  76. func (s *Supplier) GetSettings() *settings.Settings {
  77. return s.settings
  78. }
  79. func (s *Supplier) GetSupplierName() string {
  80. return common2.SubSiteA4K
  81. }
  82. func (s *Supplier) GetSubListFromFile4Movie(videoFPath string) ([]supplier.SubInfo, error) {
  83. defer func() {
  84. s.log.Debugln(s.GetSupplierName(), videoFPath, "End...")
  85. }()
  86. s.log.Debugln(s.GetSupplierName(), videoFPath, "Start...")
  87. outSubInfos := make([]supplier.SubInfo, 0)
  88. mediaInfo, err := mix_media_info.GetMixMediaInfo(s.log, s.fileDownloader.SubtitleBestApi,
  89. videoFPath, true, s.settings.AdvancedSettings.ProxySettings)
  90. if err != nil {
  91. s.log.Errorln(s.GetSupplierName(), "GetMixMediaInfo", err)
  92. return nil, err
  93. }
  94. // 需要找到中文名称去搜索
  95. keyWord, err := mix_media_info.KeyWordSelect(mediaInfo, videoFPath, true, "cn")
  96. if err != nil {
  97. s.log.Errorln(s.GetSupplierName(), "keyWordSelect", err)
  98. return nil, err
  99. }
  100. airTime, err := now.Parse(mediaInfo.Year)
  101. if err != nil {
  102. s.log.Errorln(s.GetSupplierName(), "Parse airTime", err)
  103. return nil, err
  104. }
  105. searchKeyword := fmt.Sprintf("%s %d", keyWord, airTime.Year())
  106. s.log.Infoln(s.GetSupplierName(), "searchKeyword", searchKeyword)
  107. searchResultItems, err := s.searchKeyword(searchKeyword, true)
  108. if err != nil {
  109. return nil, err
  110. }
  111. if len(searchResultItems) == 0 {
  112. // 没有找到则返回
  113. s.log.Infoln(s.GetSupplierName(), "searchKeyword", searchKeyword, "not found")
  114. return nil, nil
  115. }
  116. // 开启下载
  117. downloadCounter := 0
  118. for _, searchResultItem := range searchResultItems {
  119. downloadPageUrl := s.settings.AdvancedSettings.SuppliersSettings.A4k.RootUrl + searchResultItem.RUrl
  120. subInfo, err := s.downloadSub(videoFPath, downloadPageUrl, 0, 0)
  121. if err != nil {
  122. s.log.Errorln(s.GetSupplierName(), "downloadSub", err)
  123. return nil, err
  124. }
  125. outSubInfos = append(outSubInfos, *subInfo)
  126. downloadCounter++
  127. if downloadCounter >= s.topic {
  128. break
  129. }
  130. }
  131. return outSubInfos, nil
  132. }
  133. func (s *Supplier) GetSubListFromFile4Series(seriesInfo *series.SeriesInfo) ([]supplier.SubInfo, error) {
  134. defer func() {
  135. s.log.Debugln(s.GetSupplierName(), seriesInfo.Name, "End...")
  136. }()
  137. s.log.Debugln(s.GetSupplierName(), seriesInfo.Name, "Start...")
  138. // 搜索的策略应该是 绝命律师 第五季 or 绝命律师 S05E06 这两种方式,优先后者,具体去搜索,如果找不到然后再切换关键词为全季
  139. outSubInfos := make([]supplier.SubInfo, 0)
  140. // 这里拿到的 seriesInfo ,里面包含了,需要下载字幕的 Eps 信息
  141. for _, episodeInfo := range seriesInfo.NeedDlEpsKeyList {
  142. mediaInfo, err := mix_media_info.GetMixMediaInfo(s.log, s.fileDownloader.SubtitleBestApi,
  143. episodeInfo.FileFullPath, false, s.settings.AdvancedSettings.ProxySettings)
  144. if err != nil {
  145. s.log.Errorln(s.GetSupplierName(), "GetMixMediaInfo", err)
  146. return nil, err
  147. }
  148. // 需要找到中文名称去搜索
  149. keyWord, err := mix_media_info.KeyWordSelect(mediaInfo, episodeInfo.FileFullPath, false, "cn")
  150. if err != nil {
  151. s.log.Errorln(s.GetSupplierName(), "keyWordSelect", err)
  152. return nil, err
  153. }
  154. // 第一次搜索 黄石 S04E01
  155. s.log.Infoln(s.GetSupplierName(), "searchKeyword", keyWord)
  156. searchResultItems, err := s.searchKeyword(keyWord, false)
  157. if err != nil {
  158. return nil, err
  159. }
  160. if len(searchResultItems) == 0 {
  161. // 没有找到则更换关键词
  162. // 黄石 第四季
  163. keyWord, err = mix_media_info.KeyWordSelect(mediaInfo, episodeInfo.FileFullPath, true, "cn")
  164. if err != nil {
  165. s.log.Errorln(s.GetSupplierName(), "keyWordSelect", err)
  166. return nil, err
  167. }
  168. searchKeyword := fmt.Sprintf("%s %s", keyWord, " 第"+zh.Uint64(episodeInfo.Season).String()+"季")
  169. s.log.Infoln(s.GetSupplierName(), "searchKeyword", searchKeyword)
  170. searchResultItems, err = s.searchKeyword(searchKeyword, false)
  171. if err != nil {
  172. return nil, err
  173. }
  174. if len(searchResultItems) == 0 {
  175. // 没有找到则返回
  176. s.log.Infoln(s.GetSupplierName(), episodeInfo.Season, episodeInfo.Episode, "no sub found")
  177. return nil, nil
  178. }
  179. }
  180. // 开启下载
  181. downloadCounter := 0
  182. for _, searchResultItem := range searchResultItems {
  183. if episodeInfo.Season == searchResultItem.Season && episodeInfo.Episode == searchResultItem.Episode {
  184. // Season 和 Eps 匹配上再继续下载
  185. } else if episodeInfo.Season == searchResultItem.Season && searchResultItem.IsFullSeason == true {
  186. // Season 匹配上,Eps 为 0 则下载,全季
  187. }
  188. downloadPageUrl := s.settings.AdvancedSettings.SuppliersSettings.A4k.RootUrl + searchResultItem.RUrl
  189. // 注意这里传入的 Season Episode 是这个字幕下载时候解析出来的信息
  190. subInfo, err := s.downloadSub(episodeInfo.FileFullPath, downloadPageUrl, searchResultItem.Season, searchResultItem.Episode)
  191. if err != nil {
  192. s.log.Errorln(s.GetSupplierName(), "downloadSub", err)
  193. return nil, err
  194. }
  195. outSubInfos = append(outSubInfos, *subInfo)
  196. // 连续剧的时候至多下载 5 个即可
  197. downloadCounter++
  198. if downloadCounter >= 5 {
  199. break
  200. }
  201. }
  202. }
  203. return outSubInfos, nil
  204. }
  205. func (s *Supplier) GetSubListFromFile4Anime(seriesInfo *series.SeriesInfo) ([]supplier.SubInfo, error) {
  206. panic("not implemented")
  207. }
  208. // searchKeyword 通过关键词获取所有的字幕列表
  209. func (s *Supplier) searchKeyword(keyword string, isMovie bool) (searchResultItems []SearchResultItem, err error) {
  210. var totalPage int
  211. totalPage = 0
  212. searchResultItems = make([]SearchResultItem, 0)
  213. // 先获取第一页
  214. nowPageIndex := 0
  215. for {
  216. var nowSearchResultItem []SearchResultItem
  217. if nowPageIndex == 0 {
  218. // 第一页才有获取总页数的操作
  219. nowSearchResultItem, totalPage, err = s.listPageItems(keyword, nowPageIndex, isMovie)
  220. } else {
  221. // 其他页面,跳过总页数的获取逻辑
  222. nowSearchResultItem, _, err = s.listPageItems(keyword, nowPageIndex, isMovie)
  223. }
  224. if err != nil {
  225. return
  226. }
  227. searchResultItems = append(searchResultItems, nowSearchResultItem...)
  228. if totalPage == 0 {
  229. // 说明只有一页
  230. break
  231. }
  232. nowPageIndex++
  233. if nowPageIndex > totalPage {
  234. // 超过总页数
  235. break
  236. }
  237. }
  238. return
  239. }
  240. func (s *Supplier) listPageItems(keyword string, pageIndex int, isMovie bool) (searchResultItems []SearchResultItem, totalPage int, err error) {
  241. defer func() {
  242. time.Sleep(time.Second * 10)
  243. }()
  244. searchResultItems = make([]SearchResultItem, 0)
  245. httpClient, err := my_util.NewHttpClient(s.settings.AdvancedSettings.ProxySettings)
  246. if err != nil {
  247. err = errors.New("NewHttpClient error:" + err.Error())
  248. return
  249. }
  250. // 先对第一页进行分析
  251. searPageUrl := fmt.Sprintf(s.settings.AdvancedSettings.SuppliersSettings.A4k.RootUrl+"/search?term=%s&page=%d", keyword, pageIndex)
  252. resp, err := httpClient.R().Get(searPageUrl)
  253. if err != nil {
  254. err = errors.New("http get error:" + err.Error())
  255. return
  256. }
  257. var doc *goquery.Document
  258. doc, err = goquery.NewDocumentFromReader(strings.NewReader(resp.String()))
  259. if err != nil {
  260. err = errors.New("goquery NewDocumentFromReader error:" + err.Error())
  261. return
  262. }
  263. doc.Find(".sub-item-list li.item div.content h3").EachWithBreak(func(i int, selection *goquery.Selection) bool {
  264. subA := selection.Find("a")
  265. if subA == nil {
  266. s.log.Errorln(".sub-item-list li.item div.content h3 a is nil")
  267. return false
  268. }
  269. title := subA.Text()
  270. hrefUrl, ok := subA.Attr("href")
  271. if ok == false {
  272. s.log.Errorln("a href is nil")
  273. return false
  274. }
  275. var isFullSeason bool
  276. var season int
  277. var eps int
  278. isFullSeason, season, eps, err = decode.GetSeasonAndEpisodeFromSubFileName(title)
  279. if err != nil {
  280. s.log.Errorln("decode.GetSeasonAndEpisodeFromSubFileName error:" + err.Error())
  281. return false
  282. }
  283. searchResultItems = append(searchResultItems, SearchResultItem{
  284. Title: title,
  285. IsMovie: isMovie,
  286. RUrl: hrefUrl,
  287. Season: season,
  288. Episode: eps,
  289. IsFullSeason: isFullSeason,
  290. })
  291. return true
  292. })
  293. if len(searchResultItems) < 1 {
  294. // 说明没有搜索的结果
  295. return
  296. }
  297. err = nil
  298. totalPage = 0
  299. if pageIndex != 0 {
  300. // 没有必要分析最后一页的信息
  301. return
  302. }
  303. // 判断一共有多少页,一页就是 0,第二页就是 1,以此类推
  304. lastPageSelection := doc.Find("a.pager__item--last")
  305. if pageIndex == 0 && lastPageSelection == nil {
  306. // 说明只有一页的结果
  307. return
  308. }
  309. if lastPageSelection != nil {
  310. // 说明至少有两页
  311. lastPageHrefUrl, ok := lastPageSelection.Attr("href")
  312. if pageIndex == 0 && ok == false {
  313. // 说明只有一页的结果
  314. return
  315. }
  316. if ok == false {
  317. err = errors.New("last page a href is nil")
  318. return
  319. }
  320. if strings.Contains(lastPageHrefUrl, pageTageName) == false {
  321. err = errors.New("last page a href is not correct, not found page tag")
  322. return
  323. }
  324. lastPageParts := strings.Split(lastPageHrefUrl, pageTageName)
  325. if len(lastPageParts) != 2 {
  326. err = errors.New("last page a href is not correct, split parts error")
  327. return
  328. }
  329. totalPage, err = strconv.Atoi(lastPageParts[1])
  330. if err != nil {
  331. err = errors.New("last page a href is not correct, convert to int error")
  332. return
  333. }
  334. }
  335. return
  336. }
  337. func (s Supplier) downloadSub(videoFileName, downloadPageUrl string, season, eps int) (subInfo *supplier.SubInfo, err error) {
  338. defer func() {
  339. time.Sleep(time.Second * 5)
  340. }()
  341. var httpClient *resty.Client
  342. httpClient, err = my_util.NewHttpClient(s.settings.AdvancedSettings.ProxySettings)
  343. if err != nil {
  344. err = errors.New("NewHttpClient error:" + err.Error())
  345. return
  346. }
  347. // 先对第一页进行分析
  348. var resp *resty.Response
  349. resp, err = httpClient.R().Get(downloadPageUrl)
  350. if err != nil {
  351. err = errors.New("http get error:" + err.Error())
  352. return
  353. }
  354. var doc *goquery.Document
  355. doc, err = goquery.NewDocumentFromReader(strings.NewReader(resp.String()))
  356. if err != nil {
  357. err = errors.New("goquery NewDocumentFromReader error:" + err.Error())
  358. return
  359. }
  360. // 找到下载的 btn
  361. downloadBtSelection := doc.Find("div.buttons a.green")
  362. if downloadBtSelection == nil {
  363. err = errors.New("download btn is nil")
  364. return
  365. }
  366. downloadBtHrefUrl, ok := downloadBtSelection.Attr("href")
  367. if ok == false {
  368. err = errors.New("download btn href is nil")
  369. return
  370. }
  371. // 开始下载
  372. downloadFileUrl := s.settings.AdvancedSettings.SuppliersSettings.A4k.RootUrl + downloadBtHrefUrl
  373. subInfo, err = s.fileDownloader.GetA4k(s.GetSupplierName(), 0, season, eps, videoFileName, downloadFileUrl)
  374. if err != nil {
  375. err = errors.New("fileDownloader.Get error:" + err.Error())
  376. return
  377. }
  378. return
  379. }
  380. type SearchResultItem struct {
  381. Title string `json:"title"`
  382. RUrl string `json:"r_url"`
  383. IsMovie bool `json:"is_movie"`
  384. Season int `json:"season"`
  385. Episode int `json:"episode"`
  386. IsFullSeason bool `json:"is_full_season"`
  387. }
  388. const pageTageName = "&page="