zimuku.go 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792
  1. package zimuku
  2. import (
  3. "errors"
  4. "fmt"
  5. "net/url"
  6. "os"
  7. "path/filepath"
  8. "regexp"
  9. "sort"
  10. "strings"
  11. "time"
  12. "github.com/allanpk716/ChineseSubFinder/pkg/search"
  13. "github.com/allanpk716/ChineseSubFinder/pkg"
  14. "github.com/allanpk716/ChineseSubFinder/pkg/types/common"
  15. language2 "github.com/allanpk716/ChineseSubFinder/pkg/types/language"
  16. "github.com/allanpk716/ChineseSubFinder/pkg/types/series"
  17. "github.com/allanpk716/ChineseSubFinder/pkg/types/supplier"
  18. "github.com/allanpk716/ChineseSubFinder/pkg/rod_helper"
  19. "github.com/allanpk716/ChineseSubFinder/pkg/logic/file_downloader"
  20. "github.com/PuerkitoBio/goquery"
  21. "github.com/Tnze/go.num/v2/zh"
  22. "github.com/allanpk716/ChineseSubFinder/pkg/decode"
  23. "github.com/allanpk716/ChineseSubFinder/pkg/language"
  24. "github.com/allanpk716/ChineseSubFinder/pkg/notify_center"
  25. "github.com/allanpk716/ChineseSubFinder/pkg/settings"
  26. "github.com/allanpk716/ChineseSubFinder/pkg/sub_parser_hub"
  27. "github.com/go-rod/rod"
  28. "github.com/sirupsen/logrus"
  29. )
  30. type Supplier struct {
  31. settings *settings.Settings
  32. log *logrus.Logger
  33. fileDownloader *file_downloader.FileDownloader
  34. tt time.Duration
  35. debugMode bool
  36. httpProxyAddress string
  37. topic int
  38. isAlive bool
  39. }
  40. func NewSupplier(fileDownloader *file_downloader.FileDownloader) *Supplier {
  41. sup := Supplier{}
  42. sup.log = fileDownloader.Log
  43. sup.fileDownloader = fileDownloader
  44. sup.topic = common.DownloadSubsPerSite
  45. sup.isAlive = true // 默认是可以使用的,如果 check 后,再调整状态
  46. sup.settings = fileDownloader.Settings
  47. if sup.settings.AdvancedSettings.Topic > 0 && sup.settings.AdvancedSettings.Topic != sup.topic {
  48. sup.topic = sup.settings.AdvancedSettings.Topic
  49. }
  50. // 默认超时是 2 * 60s,如果是调试模式则是 5 min
  51. sup.tt = common.BrowserTimeOut
  52. sup.debugMode = sup.settings.AdvancedSettings.DebugMode
  53. if sup.debugMode == true {
  54. sup.tt = common.OneMovieProcessTimeOut
  55. }
  56. // 判断是否启用代理
  57. sup.httpProxyAddress = sup.settings.AdvancedSettings.ProxySettings.GetLocalHttpProxyUrl()
  58. return &sup
  59. }
  60. func (s *Supplier) CheckAlive(proxySettings ...*settings.ProxySettings) (bool, int64) {
  61. // TODO 是用本地的 Browser 还是远程的,推荐是远程的
  62. opt := rod_helper.NewBrowserOptions(s.log, true, s.settings)
  63. opt.SetPreLoadUrl(s.settings.AdvancedSettings.SuppliersSettings.Zimuku.RootUrl)
  64. browser, err := rod_helper.NewBrowserEx(opt)
  65. if err != nil {
  66. return false, 0
  67. }
  68. defer func() {
  69. _ = browser.Close()
  70. }()
  71. begin := time.Now() //判断代理访问时间
  72. _, page, err := rod_helper.HttpGetFromBrowser(browser, s.settings.AdvancedSettings.SuppliersSettings.Zimuku.RootUrl, 15*time.Second)
  73. if err != nil {
  74. return false, 0
  75. }
  76. _ = page.Close()
  77. speed := time.Now().Sub(begin).Nanoseconds() / 1000 / 1000 //ms
  78. s.isAlive = true
  79. return true, speed
  80. }
  81. func (s *Supplier) IsAlive() bool {
  82. return s.isAlive
  83. }
  84. func (s *Supplier) OverDailyDownloadLimit() bool {
  85. return true
  86. if s.settings.AdvancedSettings.SuppliersSettings.Zimuku.DailyDownloadLimit == 0 {
  87. s.log.Warningln(s.GetSupplierName(), "DailyDownloadLimit is 0, will Skip Download")
  88. return true
  89. }
  90. // 需要查询今天的限额
  91. count, err := s.fileDownloader.CacheCenter.DailyDownloadCountGet(s.GetSupplierName(),
  92. pkg.GetPublicIP(s.log, s.settings.AdvancedSettings.TaskQueue, s.settings.AdvancedSettings.ProxySettings))
  93. if err != nil {
  94. s.log.Errorln(s.GetSupplierName(), "DailyDownloadCountGet", err)
  95. return true
  96. }
  97. if count >= s.settings.AdvancedSettings.SuppliersSettings.Zimuku.DailyDownloadLimit {
  98. // 超限了
  99. s.log.Warningln(s.GetSupplierName(), "DailyDownloadLimit:", s.settings.AdvancedSettings.SuppliersSettings.Zimuku.DailyDownloadLimit, "Now Is:", count)
  100. return true
  101. } else {
  102. // 没有超限
  103. s.log.Infoln(s.GetSupplierName(), "DailyDownloadLimit:", s.settings.AdvancedSettings.SuppliersSettings.Zimuku.DailyDownloadLimit, "Now Is:", count)
  104. return false
  105. }
  106. }
  107. func (s *Supplier) GetLogger() *logrus.Logger {
  108. return s.log
  109. }
  110. func (s *Supplier) GetSettings() *settings.Settings {
  111. return s.settings
  112. }
  113. func (s *Supplier) GetSupplierName() string {
  114. return common.SubSiteZiMuKu
  115. }
  116. func (s *Supplier) GetSubListFromFile4Movie(filePath string) ([]supplier.SubInfo, error) {
  117. // TODO 是用本地的 Browser 还是远程的,推荐是远程的
  118. opt := rod_helper.NewBrowserOptions(s.log, true, s.settings)
  119. opt.SetPreLoadUrl(s.settings.AdvancedSettings.SuppliersSettings.Zimuku.RootUrl)
  120. browser, err := rod_helper.NewBrowserEx(opt)
  121. if err != nil {
  122. return nil, err
  123. }
  124. defer func() {
  125. _ = browser.Close()
  126. }()
  127. return s.getSubListFromMovie(browser, filePath)
  128. }
  129. func (s *Supplier) GetSubListFromFile4Series(seriesInfo *series.SeriesInfo) ([]supplier.SubInfo, error) {
  130. defer func() {
  131. s.log.Debugln(s.GetSupplierName(), seriesInfo.Name, "End...")
  132. }()
  133. s.log.Debugln(s.GetSupplierName(), seriesInfo.Name, "Start...")
  134. var err error
  135. // TODO 是用本地的 Browser 还是远程的,推荐是远程的
  136. opt := rod_helper.NewBrowserOptions(s.log, true, s.settings)
  137. opt.SetPreLoadUrl(s.settings.AdvancedSettings.SuppliersSettings.Zimuku.RootUrl)
  138. browser, err := rod_helper.NewBrowserEx(opt)
  139. if err != nil {
  140. return nil, err
  141. }
  142. defer func() {
  143. _ = browser.Close()
  144. }()
  145. /*
  146. 去网站搜索的时候,有个比较由意思的逻辑,有些剧集,哪怕只有一季,sonarr 也会给它命名为 Season 1
  147. 但是在 zimuku 搜索的时候,如果你加上 XXX 第一季 就搜索不出来,那么目前比较可行的办法是查询两次
  148. 第一次优先查询 XXX 第一季 ,如果返回的列表是空的,那么再查询 XXX
  149. */
  150. // 这里打算牺牲效率,提高代码的复用度,不然后续得维护一套电影的查询逻辑,一套剧集的查询逻辑
  151. // 比如,其实可以搜索剧集名称,应该可以得到多个季的列表,然后分析再继续
  152. // 现在粗暴点,直接一季搜索一次,跟电影的搜索一样,在首个影片就停止,然后继续往下
  153. AllSeasonSubResult := SubResult{}
  154. for value := range seriesInfo.NeedDlSeasonDict {
  155. /*
  156. 经过网友的测试反馈,每一季 zimuku 是支持这一季的第一集的 IMDB ID 可以搜索到这一季的信息 #253
  157. 1. 那么在搜索某一集的时候,需要根据这一集去找这一季的第一集,然后读取它的 IMDB ID 信息,然后优先用于搜索这一季的信息
  158. 2. 如果是搜索季,就直接推算到达季文件夹的位置,搜索所有文件找到第一集,获取它的 IMDB ID
  159. 是不是有点绕···
  160. */
  161. findSeasonFirstEpsIMDBId := ""
  162. videoList, err := search.MatchedVideoFile(s.log, seriesInfo.DirPath)
  163. if err != nil {
  164. s.log.Errorln("GetSubListFromFile4Series.MatchedVideoFile, Season:", value, "Error:", err)
  165. continue
  166. }
  167. for _, oneVideoFPath := range videoList {
  168. oneVideoInfo, err := decode.GetVideoNfoInfo4OneSeriesEpisode(oneVideoFPath)
  169. if err != nil {
  170. s.log.Errorln("GetVideoInfoFromFileName", oneVideoInfo, err)
  171. continue
  172. }
  173. if oneVideoInfo.Season == value && oneVideoInfo.Episode == 1 {
  174. // 这一季的第一集
  175. findSeasonFirstEpsIMDBId = oneVideoInfo.ImdbId
  176. break
  177. }
  178. }
  179. filmDetailPageUrl := ""
  180. if findSeasonFirstEpsIMDBId != "" {
  181. // 第一级界面,找到影片的详情界面
  182. // 使用上面得到的这一季第一集的 IMDB ID 进行搜索这一季的信息
  183. keyword := findSeasonFirstEpsIMDBId
  184. s.log.Debugln(s.GetSupplierName(), "step 0", "1 times", "keyword:", keyword)
  185. filmDetailPageUrl, err = s.step0(browser, keyword)
  186. if err != nil {
  187. s.log.Errorln(s.GetSupplierName(), "step 0", "0 times", "keyword:", keyword, err)
  188. // 如果只是搜索不到,则继续换关键词
  189. if err != common.ZiMuKuSearchKeyWordStep0DetailPageUrlNotFound {
  190. s.log.Errorln(s.GetSupplierName(), "ZiMuKuSearchKeyWordStep0DetailPageUrlNotFound", keyword, err)
  191. continue
  192. }
  193. }
  194. }
  195. // 如果上面找到了,那么 filmDetailPageUrl 就应该不为空,如果没有找到就是空
  196. if filmDetailPageUrl == "" {
  197. // 第一级界面,找到影片的详情界面
  198. keyword := seriesInfo.Name + " 第" + zh.Uint64(value).String() + "季"
  199. s.log.Debugln(s.GetSupplierName(), "step 0", "0 times", "keyword:", keyword)
  200. filmDetailPageUrl, err = s.step0(browser, keyword)
  201. if err != nil {
  202. s.log.Errorln(s.GetSupplierName(), "step 0", "0 times", "keyword:", keyword, err)
  203. // 如果只是搜索不到,则继续换关键词
  204. if err != common.ZiMuKuSearchKeyWordStep0DetailPageUrlNotFound {
  205. s.log.Errorln(s.GetSupplierName(), "ZiMuKuSearchKeyWordStep0DetailPageUrlNotFound", keyword, err)
  206. continue
  207. }
  208. // 直接更换为这个剧目的 Name 的搜索,不带季度关键词信息
  209. keyword = seriesInfo.Name
  210. s.log.Debugln(s.GetSupplierName(), "step 0", "1 times", "keyword:", keyword)
  211. filmDetailPageUrl, err = s.step0(browser, keyword)
  212. if err != nil {
  213. s.log.Errorln(s.GetSupplierName(), "1 times", "keyword:", keyword, err)
  214. continue
  215. }
  216. }
  217. }
  218. // 第二级界面,有多少个字幕
  219. s.log.Debugln(s.GetSupplierName(), "step 1", filmDetailPageUrl)
  220. subResult, err := s.step1(browser, filmDetailPageUrl)
  221. if err != nil {
  222. s.log.Errorln(s.GetSupplierName(), "step 1", filmDetailPageUrl, err)
  223. continue
  224. }
  225. if AllSeasonSubResult.Title == "" {
  226. AllSeasonSubResult = subResult
  227. } else {
  228. AllSeasonSubResult.SubInfos = append(AllSeasonSubResult.SubInfos, subResult.SubInfos...)
  229. }
  230. }
  231. // 找到最大的优先级的字幕下载
  232. sort.Sort(SortByPriority{AllSeasonSubResult.SubInfos})
  233. // 找到那些 Eps 需要下载字幕的
  234. subInfoNeedDownload := s.whichEpisodeNeedDownloadSub(seriesInfo, AllSeasonSubResult)
  235. // 剩下的部分跟 GetSubListFroKeyword 一样,就是去下载了
  236. outSubInfoList := s.whichSubInfoNeedDownload(browser, subInfoNeedDownload, err)
  237. // 返回前,需要把每一个 Eps 的 Season Episode 信息填充到每个 SubInfo 中
  238. return outSubInfoList, nil
  239. }
  240. func (s *Supplier) GetSubListFromFile4Anime(seriesInfo *series.SeriesInfo) ([]supplier.SubInfo, error) {
  241. panic("not implemented")
  242. }
  243. func (s *Supplier) getSubListFromMovie(browser *rod.Browser, fileFPath string) ([]supplier.SubInfo, error) {
  244. defer func() {
  245. s.log.Debugln(s.GetSupplierName(), fileFPath, "End...")
  246. }()
  247. s.log.Debugln(s.GetSupplierName(), fileFPath, "Start...")
  248. /*
  249. 虽然是传入视频文件路径,但是其实需要读取对应的视频文件目录下的
  250. movie.xml 以及 *.nfo,找到 IMDB id
  251. 优先通过 IMDB id 去查找字幕
  252. 如果找不到,再靠文件名提取影片名称去查找
  253. */
  254. // 找到这个视频文件,尝试得到 IMDB ID
  255. // 目前测试来看,加入 年 这个关键词去搜索,对 2020 年后的影片有利,因为网站有统一的详细页面了,而之前的,没有,会影响识别
  256. // 所以,year >= 2020 年,则可以多加一个关键词(年)去搜索影片
  257. imdbInfo, err := decode.GetVideoNfoInfo4Movie(fileFPath)
  258. if err != nil {
  259. // 允许的错误,跳过,继续进行文件名的搜索
  260. s.log.Errorln("model.GetImdbInfo", err)
  261. } else {
  262. s.log.Debugln(s.GetSupplierName(), fileFPath, "GetVideoNfoInfo4Movie -> Title:", imdbInfo.Title)
  263. s.log.Debugln(s.GetSupplierName(), fileFPath, "GetVideoNfoInfo4Movie -> OriginalTitle:", imdbInfo.OriginalTitle)
  264. s.log.Debugln(s.GetSupplierName(), fileFPath, "GetVideoNfoInfo4Movie -> Year:", imdbInfo.Year)
  265. s.log.Debugln(s.GetSupplierName(), fileFPath, "GetVideoNfoInfo4Movie -> ImdbId:", imdbInfo.ImdbId)
  266. }
  267. var subInfoList []supplier.SubInfo
  268. if imdbInfo.ImdbId != "" {
  269. // 先用 imdb id 找
  270. s.log.Debugln(s.GetSupplierName(), fileFPath, "getSubListFromKeyword -> Search By IMDB ID:", imdbInfo.ImdbId)
  271. subInfoList, err = s.getSubListFromKeyword(browser, imdbInfo.ImdbId)
  272. if err != nil {
  273. // 允许的错误,跳过,继续进行文件名的搜索
  274. s.log.Errorln(s.GetSupplierName(), "keyword:", imdbInfo.ImdbId)
  275. s.log.Errorln("getSubListFromKeyword", "IMDBID can not found sub", fileFPath, err)
  276. }
  277. s.log.Debugln(s.GetSupplierName(), fileFPath, "getSubListFromKeyword -> Search By IMDB ID, subInfoList Count:", len(subInfoList))
  278. // 如果有就优先返回
  279. if len(subInfoList) > 0 {
  280. return subInfoList, nil
  281. }
  282. }
  283. // 如果没有,那么就用文件名查找
  284. searchKeyword := search.VideoNameSearchKeywordMaker(s.log, imdbInfo.Title, imdbInfo.Year)
  285. s.log.Debugln(s.GetSupplierName(), fileFPath, "VideoNameSearchKeywordMaker Keyword:", searchKeyword)
  286. subInfoList, err = s.getSubListFromKeyword(browser, searchKeyword)
  287. if err != nil {
  288. s.log.Errorln(s.GetSupplierName(), "keyword:", searchKeyword)
  289. return nil, err
  290. }
  291. s.log.Debugln(s.GetSupplierName(), fileFPath, "getSubListFromKeyword -> Search By Keyword, subInfoList Count:", len(subInfoList))
  292. return subInfoList, nil
  293. }
  294. // getSubListFromKeyword 目前是给电影使用的,搜索返回的字幕列表可能很多,需要挑选一下,比如 Top 1 下来就好了
  295. func (s *Supplier) getSubListFromKeyword(browser *rod.Browser, keyword string) ([]supplier.SubInfo, error) {
  296. s.log.Infoln("Search Keyword:", keyword)
  297. var outSubInfoList []supplier.SubInfo
  298. // 第一级界面,找到影片的详情界面
  299. filmDetailPageUrl, err := s.step0(browser, keyword)
  300. if err != nil {
  301. return nil, err
  302. }
  303. s.log.Debugln(s.GetSupplierName(), "getSubListFromKeyword -> step0 -> filmDetailPageUrl:", filmDetailPageUrl)
  304. // 第二级界面,有多少个字幕
  305. subResult, err := s.step1(browser, filmDetailPageUrl)
  306. if err != nil {
  307. return nil, err
  308. }
  309. // 第三级界面,单个字幕详情
  310. // 找到最大的优先级的字幕下载
  311. sort.Sort(SortByPriority{subResult.SubInfos})
  312. // 强制把找到的列表缩少到 Top 5
  313. subResult.SubInfos = subResult.SubInfos[:5]
  314. s.log.Debugln(s.GetSupplierName(), "getSubListFromKeyword -> step1 -> subResult.Title:", subResult.Title)
  315. s.log.Debugln(s.GetSupplierName(), "getSubListFromKeyword -> step1 -> subResult.OtherName:", subResult.OtherName)
  316. for i, info := range subResult.SubInfos {
  317. s.log.Debugln(s.GetSupplierName(), "getSubListFromKeyword -> step1 -> info.Name", i, info.Name)
  318. s.log.Debugln(s.GetSupplierName(), "getSubListFromKeyword -> step1 -> info.DownloadUrl:", i, info.DownloadUrl)
  319. s.log.Debugln(s.GetSupplierName(), "getSubListFromKeyword -> step1 -> info.DetailUrl:", i, info.DetailUrl)
  320. s.log.Debugln(s.GetSupplierName(), "getSubListFromKeyword -> step1 -> info.DownloadTimes:", i, info.DownloadTimes)
  321. }
  322. outSubInfoList = s.whichSubInfoNeedDownload(browser, subResult.SubInfos, err)
  323. return outSubInfoList, nil
  324. }
  325. func (s *Supplier) whichEpisodeNeedDownloadSub(seriesInfo *series.SeriesInfo, AllSeasonSubResult SubResult) []SubInfo {
  326. // 字幕很多,考虑效率,需要做成字典
  327. // key SxEx - SubInfos
  328. var allSubDict = make(map[string]SubInfos)
  329. // 全季的字幕列表
  330. var oneSeasonSubDict = make(map[string]SubInfos)
  331. for _, subInfo := range AllSeasonSubResult.SubInfos {
  332. _, season, episode, err := decode.GetSeasonAndEpisodeFromSubFileName(subInfo.Name)
  333. if err != nil {
  334. s.log.Errorln("whichEpisodeNeedDownloadSub.GetVideoInfoFromFileFullPath", subInfo.Name, err)
  335. continue
  336. }
  337. subInfo.Season = season
  338. subInfo.Episode = episode
  339. epsKey := pkg.GetEpisodeKeyName(season, episode)
  340. _, ok := allSubDict[epsKey]
  341. if ok == false {
  342. // 初始化
  343. allSubDict[epsKey] = SubInfos{}
  344. if season != 0 && episode == 0 {
  345. oneSeasonSubDict[epsKey] = SubInfos{}
  346. }
  347. }
  348. // 添加
  349. allSubDict[epsKey] = append(allSubDict[epsKey], subInfo)
  350. if season != 0 && episode == 0 {
  351. oneSeasonSubDict[epsKey] = append(oneSeasonSubDict[epsKey], subInfo)
  352. }
  353. }
  354. // 本地的视频列表,找到没有字幕的
  355. // 需要进行下载字幕的列表
  356. var subInfoNeedDownload = make([]SubInfo, 0)
  357. // 有那些 Eps 需要下载的,按 SxEx 反回 epsKey
  358. for epsKey, epsInfo := range seriesInfo.NeedDlEpsKeyList {
  359. // 从一堆字幕里面找合适的
  360. value, ok := allSubDict[epsKey]
  361. // 是否有
  362. if ok == true && len(value) > 0 {
  363. value[0].Season = epsInfo.Season
  364. value[0].Episode = epsInfo.Episode
  365. subInfoNeedDownload = append(subInfoNeedDownload, value[0])
  366. }
  367. }
  368. // 全季的字幕列表,也拼进去,后面进行下载
  369. for _, infos := range oneSeasonSubDict {
  370. if len(infos) < 1 {
  371. continue
  372. }
  373. subInfoNeedDownload = append(subInfoNeedDownload, infos[0])
  374. }
  375. // 返回前,需要把每一个 Eps 的 Season Episode 信息填充到每个 SubInfo 中
  376. return subInfoNeedDownload
  377. }
  378. func (s *Supplier) whichSubInfoNeedDownload(browser *rod.Browser, subInfos SubInfos, err error) []supplier.SubInfo {
  379. var outSubInfoList = make([]supplier.SubInfo, 0)
  380. for i := range subInfos {
  381. err = s.step2(browser, &subInfos[i])
  382. if err != nil {
  383. s.log.Error(s.GetSupplierName(), "step 2", subInfos[i].Name, err)
  384. continue
  385. }
  386. s.log.Debugln(s.GetSupplierName(), "whichSubInfoNeedDownload -> step2 -> info.SubDownloadPageUrl:", i, subInfos[i].SubDownloadPageUrl)
  387. }
  388. // TODO 这里需要考虑,可以设置为高级选项,不够就用 unknow 来补充
  389. // 首先过滤出中文的字幕,同时需要满足是支持的字幕
  390. var tmpSubInfo = make([]SubInfo, 0)
  391. for _, subInfo := range subInfos {
  392. tmpLang := language.LangConverter4Sub_Supplier(subInfo.Lang)
  393. if language.HasChineseLang(tmpLang) == true && sub_parser_hub.IsSubTypeWanted(subInfo.Ext) == true {
  394. tmpSubInfo = append(tmpSubInfo, subInfo)
  395. }
  396. }
  397. // 看字幕够不够
  398. if len(tmpSubInfo) < s.topic {
  399. for _, subInfo := range subInfos {
  400. if len(tmpSubInfo) >= s.topic {
  401. break
  402. }
  403. tmpLang := language.LangConverter4Sub_Supplier(subInfo.Lang)
  404. if language.HasChineseLang(tmpLang) == false {
  405. tmpSubInfo = append(tmpSubInfo, subInfo)
  406. }
  407. }
  408. }
  409. s.log.Debugln(s.GetSupplierName(), "step2 -> tmpSubInfo.Count", len(tmpSubInfo))
  410. for i, info := range tmpSubInfo {
  411. s.log.Debugln(s.GetSupplierName(), "ChineseSubs -> tmpSubInfo.Name:", i, info.Name)
  412. s.log.Debugln(s.GetSupplierName(), "ChineseSubs -> tmpSubInfo.DownloadUrl:", i, info.DownloadUrl)
  413. s.log.Debugln(s.GetSupplierName(), "ChineseSubs -> tmpSubInfo.DetailUrl:", i, info.DetailUrl)
  414. s.log.Debugln(s.GetSupplierName(), "ChineseSubs -> tmpSubInfo.DownloadTimes:", i, info.DownloadTimes)
  415. s.log.Debugln(s.GetSupplierName(), "ChineseSubs -> tmpSubInfo.SubDownloadPageUrl:", i, info.SubDownloadPageUrl)
  416. }
  417. // 看字幕是不是太多了,超出 topic 的限制了
  418. if len(tmpSubInfo) > s.topic {
  419. tmpSubInfo = tmpSubInfo[:s.topic]
  420. }
  421. s.log.Debugln(s.GetSupplierName(), "step2 -> tmpSubInfo.Count with topic limit", len(tmpSubInfo))
  422. for i, info := range tmpSubInfo {
  423. s.log.Debugln(s.GetSupplierName(), "ChineseSubs -> tmpSubInfo.Name:", i, info.Name)
  424. s.log.Debugln(s.GetSupplierName(), "ChineseSubs -> tmpSubInfo.DownloadUrl:", i, info.DownloadUrl)
  425. s.log.Debugln(s.GetSupplierName(), "ChineseSubs -> tmpSubInfo.DetailUrl:", i, info.DetailUrl)
  426. s.log.Debugln(s.GetSupplierName(), "ChineseSubs -> tmpSubInfo.DownloadTimes:", i, info.DownloadTimes)
  427. s.log.Debugln(s.GetSupplierName(), "ChineseSubs -> tmpSubInfo.SubDownloadPageUrl:", i, info.SubDownloadPageUrl)
  428. }
  429. // 第四级界面,具体字幕下载
  430. for i, subInfo := range tmpSubInfo {
  431. s.log.Debugln(s.GetSupplierName(), "GetEx:", i, subInfo.SubDownloadPageUrl)
  432. getSubInfo, err := s.fileDownloader.GetEx(s.GetSupplierName(), browser, subInfo.SubDownloadPageUrl, int64(i), subInfo.Season, subInfo.Episode, s.DownFile)
  433. if err != nil {
  434. s.log.Errorln(s.GetSupplierName(), "GetEx", "GetEx", subInfo.Name, subInfo.Season, subInfo.Episode, err)
  435. continue
  436. }
  437. outSubInfoList = append(outSubInfoList, *getSubInfo)
  438. }
  439. for i, info := range outSubInfoList {
  440. s.log.Debugln(s.GetSupplierName(), "DownFile -> Downloaded File Info", i, "FileName:", info.Name, "FileUrl:", info.FileUrl)
  441. }
  442. // 返回前,需要把每一个 Eps 的 Season Episode 信息填充到每个 SubInfo 中
  443. return outSubInfoList
  444. }
  445. // step0 先在查询界面找到字幕对应第一个影片的详情界面,需要解决自定义错误 ZiMuKuSearchKeyWordStep0DetailPageUrlNotFound
  446. func (s *Supplier) step0(browser *rod.Browser, keyword string) (string, error) {
  447. var err error
  448. defer func() {
  449. if err != nil {
  450. notify_center.Notify.Add("zimuku_step0", err.Error())
  451. }
  452. }()
  453. desUrl := fmt.Sprintf(s.settings.AdvancedSettings.SuppliersSettings.Zimuku.RootUrl+common.SubZiMuKuSearchFormatUrl, url.QueryEscape(keyword))
  454. result, page, err := rod_helper.HttpGetFromBrowser(browser, desUrl, s.tt)
  455. if err != nil {
  456. return "", err
  457. }
  458. defer func() {
  459. _ = page.Close()
  460. }()
  461. // 找到对应影片的详情界面
  462. re := regexp.MustCompile(`<p\s+class="tt\s+clearfix"><a\s+href="(/subs/[\w]+\.html)"\s+target="_blank"><b>(.*?)</b></a></p>`)
  463. matched := re.FindAllStringSubmatch(result, -1)
  464. if matched == nil || len(matched) < 1 {
  465. return "", common.ZiMuKuSearchKeyWordStep0DetailPageUrlNotFound
  466. }
  467. // 影片的详情界面 url
  468. filmDetailPageUrl := matched[0][1]
  469. return filmDetailPageUrl, nil
  470. }
  471. // step1 分析详情界面,找到有多少个字幕
  472. func (s *Supplier) step1(browser *rod.Browser, filmDetailPageUrl string) (SubResult, error) {
  473. var err error
  474. defer func() {
  475. if err != nil {
  476. notify_center.Notify.Add("zimuku_step1", err.Error())
  477. }
  478. }()
  479. var subResult SubResult
  480. subResult.SubInfos = SubInfos{}
  481. filmDetailPageUrl = pkg.AddBaseUrl(s.settings.AdvancedSettings.SuppliersSettings.Zimuku.RootUrl, filmDetailPageUrl)
  482. result, page, err := rod_helper.HttpGetFromBrowser(browser, filmDetailPageUrl, s.tt)
  483. if err != nil {
  484. return subResult, err
  485. }
  486. defer func() {
  487. _ = page.Close()
  488. }()
  489. doc, err := goquery.NewDocumentFromReader(strings.NewReader(result))
  490. if err != nil {
  491. return SubResult{}, err
  492. }
  493. counterIndex := 3
  494. // 先找到页面”下载“关键词是第几列,然后下面的下载量才能正确的解析。否则,电影是[3],而在剧集中,因为多了字幕组的筛选,则为[4]
  495. doc.Find("#subtb thead tr th").Each(func(i int, th *goquery.Selection) {
  496. if th.Text() == "下载" {
  497. counterIndex = i
  498. }
  499. })
  500. doc.Find("#subtb tbody tr").Each(func(i int, tr *goquery.Selection) {
  501. // 字幕下载页面地址
  502. href, exists := tr.Find("a").Attr("href")
  503. if !exists {
  504. return
  505. }
  506. // 标题
  507. title, exists := tr.Find("a").Attr("title")
  508. if !exists {
  509. return
  510. }
  511. // 扩展名
  512. ext := tr.Find(".label-info").Text()
  513. // 作者信息
  514. authorInfos := tr.Find(".gray")
  515. authorInfo := ""
  516. authorInfos.Each(func(a_i int, a_lb *goquery.Selection) {
  517. authorInfo += a_lb.Text() + ","
  518. })
  519. authorInfoLen := len(authorInfo)
  520. if authorInfoLen > 0 {
  521. authorInfo = authorInfo[0 : authorInfoLen-3]
  522. }
  523. // 语言
  524. lang, exists := tr.Find("img").First().Attr("alt")
  525. if !exists {
  526. lang = ""
  527. }
  528. // 投票
  529. rate, exists := tr.Find(".rating-star").First().Attr("data-original-title")
  530. if !exists {
  531. rate = ""
  532. }
  533. vote, err := decode.GetNumber2Float(rate)
  534. if err != nil {
  535. return
  536. }
  537. // 下载次数统计
  538. downCountNub := 0
  539. downCount := tr.Find("td").Eq(counterIndex).Text()
  540. if strings.Contains(downCount, "万") {
  541. fNumb, err := decode.GetNumber2Float(downCount)
  542. if err != nil {
  543. return
  544. }
  545. downCountNub = int(fNumb * 10000)
  546. } else {
  547. downCountNub, err = decode.GetNumber2int(downCount)
  548. if err != nil {
  549. return
  550. }
  551. }
  552. var subInfo SubInfo
  553. subResult.Title = title
  554. subInfo.Name = title
  555. subInfo.DetailUrl = href
  556. subInfo.Ext = ext
  557. subInfo.AuthorInfo = authorInfo
  558. subInfo.Lang = lang
  559. subInfo.DownloadTimes = downCountNub
  560. subInfo.Score = vote
  561. // 计算优先级
  562. subInfo.Priority = subInfo.Score * float32(subInfo.DownloadTimes)
  563. subResult.SubInfos = append(subResult.SubInfos, subInfo)
  564. })
  565. return subResult, nil
  566. }
  567. // step2 第二级界面,单个字幕详情,需要判断 ZiMuKuDownloadUrlStep2NotFound 这个自定义错误
  568. func (s *Supplier) step2(browser *rod.Browser, subInfo *SubInfo) error {
  569. var err error
  570. defer func() {
  571. if err != nil {
  572. notify_center.Notify.Add("zimuku_step2", err.Error())
  573. }
  574. }()
  575. detailUrl := pkg.AddBaseUrl(s.settings.AdvancedSettings.SuppliersSettings.Zimuku.RootUrl, subInfo.DetailUrl)
  576. result, page, err := rod_helper.HttpGetFromBrowser(browser, detailUrl, s.tt)
  577. if err != nil {
  578. return err
  579. }
  580. defer func() {
  581. _ = page.Close()
  582. }()
  583. // 找到下载地址
  584. re := regexp.MustCompile(`<a\s+id="down1"\s+href="([^"]*/dld/[\w]+\.html)"`)
  585. matched := re.FindAllStringSubmatch(result, -1)
  586. if matched == nil || len(matched) == 0 || len(matched[0]) == 0 {
  587. s.log.Warnln("Step2,sub download url not found", detailUrl)
  588. return common.ZiMuKuDownloadUrlStep2NotFound
  589. }
  590. if strings.Contains(matched[0][1], "://") {
  591. subInfo.SubDownloadPageUrl = matched[0][1]
  592. } else {
  593. subInfo.SubDownloadPageUrl = fmt.Sprintf("%s%s", s.settings.AdvancedSettings.SuppliersSettings.Zimuku.RootUrl, matched[0][1])
  594. }
  595. return nil
  596. }
  597. // DownFile 第三级界面,具体字幕下载 ZiMuKuDownloadUrlStep3NotFound ZiMuKuDownloadUrlDownFileFailed
  598. func (s *Supplier) DownFile(browser *rod.Browser, subDownloadPageUrl string, TopN int64, Season, Episode int) (*supplier.SubInfo, error) {
  599. var err error
  600. defer func() {
  601. if err != nil {
  602. notify_center.Notify.Add("zimuku_DownFile", err.Error())
  603. }
  604. }()
  605. subDownloadPageFullUrl := pkg.AddBaseUrl(s.settings.AdvancedSettings.SuppliersSettings.Zimuku.RootUrl, subDownloadPageUrl)
  606. result, page, err := rod_helper.HttpGetFromBrowser(browser, subDownloadPageFullUrl, s.tt)
  607. if err != nil {
  608. return nil, err
  609. }
  610. defer func() {
  611. _ = page.Close()
  612. }()
  613. re := regexp.MustCompile(`<li><a\s+rel="nofollow"\s+href="([^"]*/download/[^"]+)"`)
  614. matched := re.FindAllStringSubmatch(result, -1)
  615. if matched == nil || len(matched) == 0 || len(matched[0]) == 0 {
  616. s.log.Debugln("Step3,sub download url not found", subDownloadPageFullUrl)
  617. return nil, common.ZiMuKuDownloadUrlStep3NotFound
  618. }
  619. fileName := ""
  620. fileByte := []byte{0}
  621. downloadSuccess := false
  622. err = rod.Try(func() {
  623. tmpDir := filepath.Join(pkg.DefTmpFolder(), "downloads")
  624. wait := browser.Timeout(30 * time.Second).WaitDownload(tmpDir)
  625. getDownloadFile := func() ([]byte, string, error) {
  626. info := wait()
  627. if info == nil {
  628. return nil, "", errors.New("download sub timeout")
  629. }
  630. downloadPath := filepath.Join(tmpDir, info.GUID)
  631. defer func() { _ = os.Remove(downloadPath) }()
  632. b, err := os.ReadFile(downloadPath)
  633. if err != nil {
  634. return nil, "", err
  635. }
  636. return b, info.SuggestedFilename, nil
  637. }
  638. // 初始化页面用于查询元素
  639. element := page.MustElement(btnClickDownload)
  640. // 直接可以下载
  641. element.MustClick()
  642. fileByte, fileName, err = getDownloadFile()
  643. if err != nil {
  644. return
  645. }
  646. downloadSuccess = true
  647. })
  648. if err != nil {
  649. s.log.Errorln("ZiMuKu DownFile DownloadFile", err)
  650. return nil, err
  651. }
  652. if downloadSuccess == true {
  653. s.log.Debugln("Step3,DownFile, FileName:", fileName, "DataLen:", len(fileByte))
  654. inSubInfo := supplier.NewSubInfo(s.GetSupplierName(), 1, fileName, language2.ChineseSimple,
  655. subDownloadPageUrl, 0, 0, filepath.Ext(fileName), fileByte)
  656. inSubInfo.TopN = TopN
  657. inSubInfo.Season = Season
  658. inSubInfo.Episode = Episode
  659. return inSubInfo, nil
  660. } else {
  661. s.log.Debugln("Step3,sub download url not found", subDownloadPageFullUrl)
  662. return nil, common.ZiMuKuDownloadUrlDownFileFailed
  663. }
  664. }
  665. type SubResult struct {
  666. Title string // 字幕的标题
  667. OtherName string // 影片又名
  668. SubInfos SubInfos // 字幕的列表
  669. }
  670. type SubInfo struct {
  671. Name string // 字幕的名称
  672. Lang string // 语言
  673. AuthorInfo string // 作者
  674. Ext string // 后缀名
  675. Score float32 // 评分
  676. DownloadTimes int // 下载的次数
  677. Priority float32 // 优先级,使用评分和次数乘积而来,类似于 Score 投票
  678. DetailUrl string // 字幕的详情界面,需要再次分析具体的下载地址,地址需要拼接网站的根地址上去
  679. SubDownloadPageUrl string // 字幕的具体的下载页面,会有多个下载可用的链接
  680. DownloadUrl string // 字幕的下载地址
  681. Season int // 第几季,默认-1
  682. Episode int // 第几集,默认-1
  683. }
  684. // SubInfos 实现自定义排序
  685. type SubInfos []SubInfo
  686. func (s SubInfos) Len() int {
  687. return len(s)
  688. }
  689. func (s SubInfos) Less(i, j int) bool {
  690. return s[i].Priority > s[j].Priority
  691. }
  692. func (s SubInfos) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
  693. type SortByPriority struct{ SubInfos }
  694. // Less 根据元素的优先级降序排序
  695. func (s SortByPriority) Less(i, j int) bool {
  696. return s.SubInfos[i].Priority > s.SubInfos[j].Priority
  697. }
  698. const btnClickDownload = "a.btn-danger"