zimuku.go 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685
  1. package zimuku
  2. import (
  3. "fmt"
  4. "github.com/PuerkitoBio/goquery"
  5. "github.com/Tnze/go.num/v2/zh"
  6. "github.com/allanpk716/ChineseSubFinder/internal/common"
  7. pkgcommon "github.com/allanpk716/ChineseSubFinder/internal/pkg/common"
  8. "github.com/allanpk716/ChineseSubFinder/internal/pkg/decode"
  9. "github.com/allanpk716/ChineseSubFinder/internal/pkg/global_value"
  10. "github.com/allanpk716/ChineseSubFinder/internal/pkg/language"
  11. "github.com/allanpk716/ChineseSubFinder/internal/pkg/log_helper"
  12. "github.com/allanpk716/ChineseSubFinder/internal/pkg/my_util"
  13. "github.com/allanpk716/ChineseSubFinder/internal/pkg/notify_center"
  14. "github.com/allanpk716/ChineseSubFinder/internal/pkg/rod_helper"
  15. "github.com/allanpk716/ChineseSubFinder/internal/pkg/settings"
  16. "github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_parser_hub"
  17. language2 "github.com/allanpk716/ChineseSubFinder/internal/types/language"
  18. "github.com/allanpk716/ChineseSubFinder/internal/types/series"
  19. "github.com/allanpk716/ChineseSubFinder/internal/types/supplier"
  20. "github.com/go-rod/rod"
  21. "github.com/huandu/go-clone"
  22. "github.com/sirupsen/logrus"
  23. "net/url"
  24. "os"
  25. "path/filepath"
  26. "regexp"
  27. "sort"
  28. "strings"
  29. "time"
  30. )
  31. type Supplier struct {
  32. settings settings.Settings
  33. log *logrus.Logger
  34. tt time.Duration
  35. debugMode bool
  36. httpProxyAddress string
  37. topic int
  38. isAlive bool
  39. }
  40. func NewSupplier(_settings settings.Settings) *Supplier {
  41. sup := Supplier{}
  42. sup.log = log_helper.GetLogger()
  43. sup.topic = common.DownloadSubsPerSite
  44. sup.isAlive = true // 默认是可以使用的,如果 check 后,再调整状态
  45. sup.settings = clone.Clone(_settings).(settings.Settings)
  46. if sup.settings.AdvancedSettings.Topic > 0 && sup.settings.AdvancedSettings.Topic != sup.topic {
  47. sup.topic = sup.settings.AdvancedSettings.Topic
  48. }
  49. // 默认超时是 2 * 60s,如果是调试模式则是 5 min
  50. sup.tt = common.HTMLTimeOut
  51. sup.debugMode = sup.settings.AdvancedSettings.DebugMode
  52. if sup.debugMode == true {
  53. sup.tt = common.OneMovieProcessTimeOut
  54. }
  55. // 判断是否启用代理
  56. if sup.settings.AdvancedSettings.ProxySettings.UseHttpProxy == true {
  57. sup.httpProxyAddress = sup.settings.AdvancedSettings.ProxySettings.HttpProxyAddress
  58. } else {
  59. sup.httpProxyAddress = ""
  60. }
  61. return &sup
  62. }
  63. func (s *Supplier) CheckAlive() (bool, int64) {
  64. // TODO 是用本地的 Browser 还是远程的,推荐是远程的
  65. browser, err := rod_helper.NewBrowserEx(true, s.settings, common.SubZiMuKuRootUrl)
  66. if err != nil {
  67. return false, 0
  68. }
  69. defer func() {
  70. _ = browser.Close()
  71. }()
  72. begin := time.Now() //判断代理访问时间
  73. _, page, err := rod_helper.HttpGetFromBrowser(browser, common.SubZiMuKuRootUrl, 15*time.Second)
  74. if err != nil {
  75. return false, 0
  76. }
  77. _ = page.Close()
  78. speed := time.Now().Sub(begin).Nanoseconds() / 1000 / 1000 //ms
  79. s.isAlive = true
  80. return true, speed
  81. }
  82. func (s *Supplier) IsAlive() bool {
  83. return s.isAlive
  84. }
  85. func (s Supplier) GetSupplierName() string {
  86. return common.SubSiteZiMuKu
  87. }
  88. func (s Supplier) GetSubListFromFile4Movie(filePath string) ([]supplier.SubInfo, error) {
  89. // TODO 是用本地的 Browser 还是远程的,推荐是远程的
  90. browser, err := rod_helper.NewBrowserEx(true, s.settings, common.SubZiMuKuRootUrl)
  91. if err != nil {
  92. return nil, err
  93. }
  94. defer func() {
  95. _ = browser.Close()
  96. }()
  97. return s.getSubListFromMovie(browser, filePath)
  98. }
  99. func (s Supplier) GetSubListFromFile4Series(seriesInfo *series.SeriesInfo) ([]supplier.SubInfo, error) {
  100. defer func() {
  101. s.log.Debugln(s.GetSupplierName(), seriesInfo.Name, "End...")
  102. }()
  103. s.log.Debugln(s.GetSupplierName(), seriesInfo.Name, "Start...")
  104. var err error
  105. // TODO 是用本地的 Browser 还是远程的,推荐是远程的
  106. browser, err := rod_helper.NewBrowserEx(true, s.settings, common.SubZiMuKuRootUrl)
  107. if err != nil {
  108. return nil, err
  109. }
  110. defer func() {
  111. _ = browser.Close()
  112. }()
  113. /*
  114. 去网站搜索的时候,有个比较由意思的逻辑,有些剧集,哪怕只有一季,sonarr 也会给它命名为 Season 1
  115. 但是在 zimuku 搜索的时候,如果你加上 XXX 第一季 就搜索不出来,那么目前比较可行的办法是查询两次
  116. 第一次优先查询 XXX 第一季 ,如果返回的列表是空的,那么再查询 XXX
  117. */
  118. // 这里打算牺牲效率,提高代码的复用度,不然后续得维护一套电影的查询逻辑,一套剧集的查询逻辑
  119. // 比如,其实可以搜索剧集名称,应该可以得到多个季的列表,然后分析再继续
  120. // 现在粗暴点,直接一季搜索一次,跟电影的搜索一样,在首个影片就停止,然后继续往下
  121. AllSeasonSubResult := SubResult{}
  122. for value := range seriesInfo.SeasonDict {
  123. // 第一级界面,找到影片的详情界面
  124. keyword := seriesInfo.Name + " 第" + zh.Uint64(value).String() + "季"
  125. s.log.Debugln(s.GetSupplierName(), "step 0", "0 times", "keyword:", keyword)
  126. filmDetailPageUrl, err := s.step0(browser, keyword)
  127. if err != nil {
  128. s.log.Errorln(s.GetSupplierName(), "step 0", "0 times", "keyword:", keyword, err)
  129. // 如果只是搜索不到,则继续换关键词
  130. if err != common.ZiMuKuSearchKeyWordStep0DetailPageUrlNotFound {
  131. s.log.Errorln(s.GetSupplierName(), "ZiMuKuSearchKeyWordStep0DetailPageUrlNotFound", keyword, err)
  132. continue
  133. }
  134. keyword = seriesInfo.Name
  135. s.log.Debugln(s.GetSupplierName(), "step 0", "1 times", "keyword:", keyword)
  136. filmDetailPageUrl, err = s.step0(browser, keyword)
  137. if err != nil {
  138. s.log.Errorln(s.GetSupplierName(), "1 times", "keyword:", keyword, err)
  139. continue
  140. }
  141. }
  142. // 第二级界面,有多少个字幕
  143. s.log.Debugln(s.GetSupplierName(), "step 1", filmDetailPageUrl)
  144. subResult, err := s.step1(browser, filmDetailPageUrl)
  145. if err != nil {
  146. s.log.Errorln(s.GetSupplierName(), "step 1", filmDetailPageUrl, err)
  147. continue
  148. }
  149. if AllSeasonSubResult.Title == "" {
  150. AllSeasonSubResult = subResult
  151. } else {
  152. AllSeasonSubResult.SubInfos = append(AllSeasonSubResult.SubInfos, subResult.SubInfos...)
  153. }
  154. }
  155. // 找到最大的优先级的字幕下载
  156. sort.Sort(SortByPriority{AllSeasonSubResult.SubInfos})
  157. // 找到那些 Eps 需要下载字幕的
  158. subInfoNeedDownload := s.whichEpisodeNeedDownloadSub(seriesInfo, AllSeasonSubResult)
  159. // 剩下的部分跟 GetSubListFroKeyword 一样,就是去下载了
  160. outSubInfoList := s.whichSubInfoNeedDownload(browser, subInfoNeedDownload, err)
  161. // 返回前,需要把每一个 Eps 的 Season Episode 信息填充到每个 SubInfo 中
  162. return outSubInfoList, nil
  163. }
  164. func (s Supplier) GetSubListFromFile4Anime(seriesInfo *series.SeriesInfo) ([]supplier.SubInfo, error) {
  165. panic("not implemented")
  166. }
  167. func (s Supplier) getSubListFromMovie(browser *rod.Browser, fileFPath string) ([]supplier.SubInfo, error) {
  168. defer func() {
  169. s.log.Debugln(s.GetSupplierName(), fileFPath, "End...")
  170. }()
  171. s.log.Debugln(s.GetSupplierName(), fileFPath, "Start...")
  172. /*
  173. 虽然是传入视频文件路径,但是其实需要读取对应的视频文件目录下的
  174. movie.xml 以及 *.nfo,找到 IMDB id
  175. 优先通过 IMDB id 去查找字幕
  176. 如果找不到,再靠文件名提取影片名称去查找
  177. */
  178. // 得到这个视频文件名中的信息
  179. info, _, err := decode.GetVideoInfoFromFileFullPath(fileFPath)
  180. if err != nil {
  181. return nil, err
  182. }
  183. s.log.Debugln(s.GetSupplierName(), fileFPath, "GetVideoInfoFromFileFullPath -> Title:", info.Title)
  184. // 找到这个视频文件,尝试得到 IMDB ID
  185. // 目前测试来看,加入 年 这个关键词去搜索,对 2020 年后的影片有利,因为网站有统一的详细页面了,而之前的,没有,会影响识别
  186. // 所以,year >= 2020 年,则可以多加一个关键词(年)去搜索影片
  187. imdbInfo, err := decode.GetImdbInfo4Movie(fileFPath)
  188. if err != nil {
  189. // 允许的错误,跳过,继续进行文件名的搜索
  190. s.log.Errorln("model.GetImdbInfo", err)
  191. } else {
  192. s.log.Debugln(s.GetSupplierName(), fileFPath, "GetImdbInfo4Movie -> Title:", imdbInfo.Title)
  193. s.log.Debugln(s.GetSupplierName(), fileFPath, "GetImdbInfo4Movie -> OriginalTitle:", imdbInfo.OriginalTitle)
  194. s.log.Debugln(s.GetSupplierName(), fileFPath, "GetImdbInfo4Movie -> Year:", imdbInfo.Year)
  195. s.log.Debugln(s.GetSupplierName(), fileFPath, "GetImdbInfo4Movie -> ImdbId:", imdbInfo.ImdbId)
  196. }
  197. var subInfoList []supplier.SubInfo
  198. if imdbInfo.ImdbId != "" {
  199. // 先用 imdb id 找
  200. s.log.Debugln(s.GetSupplierName(), fileFPath, "getSubListFromKeyword -> Search By IMDB ID:", imdbInfo.ImdbId)
  201. subInfoList, err = s.getSubListFromKeyword(browser, imdbInfo.ImdbId)
  202. if err != nil {
  203. // 允许的错误,跳过,继续进行文件名的搜索
  204. s.log.Errorln(s.GetSupplierName(), "keyword:", imdbInfo.ImdbId)
  205. s.log.Errorln("getSubListFromKeyword", "IMDBID can not found sub", fileFPath, err)
  206. }
  207. s.log.Debugln(s.GetSupplierName(), fileFPath, "getSubListFromKeyword -> Search By IMDB ID, subInfoList Count:", len(subInfoList))
  208. // 如果有就优先返回
  209. if len(subInfoList) > 0 {
  210. return subInfoList, nil
  211. }
  212. }
  213. // 如果没有,那么就用文件名查找
  214. searchKeyword := my_util.VideoNameSearchKeywordMaker(s.log, info.Title, imdbInfo.Year)
  215. s.log.Debugln(s.GetSupplierName(), fileFPath, "VideoNameSearchKeywordMaker Keyword:", searchKeyword)
  216. subInfoList, err = s.getSubListFromKeyword(browser, searchKeyword)
  217. if err != nil {
  218. s.log.Errorln(s.GetSupplierName(), "keyword:", searchKeyword)
  219. return nil, err
  220. }
  221. s.log.Debugln(s.GetSupplierName(), fileFPath, "getSubListFromKeyword -> Search By Keyword, subInfoList Count:", len(subInfoList))
  222. return subInfoList, nil
  223. }
  224. func (s Supplier) getSubListFromKeyword(browser *rod.Browser, keyword string) ([]supplier.SubInfo, error) {
  225. var outSubInfoList []supplier.SubInfo
  226. // 第一级界面,找到影片的详情界面
  227. filmDetailPageUrl, err := s.step0(browser, keyword)
  228. if err != nil {
  229. return nil, err
  230. }
  231. s.log.Debugln(s.GetSupplierName(), "getSubListFromKeyword -> step0 -> filmDetailPageUrl:", filmDetailPageUrl)
  232. // 第二级界面,有多少个字幕
  233. subResult, err := s.step1(browser, filmDetailPageUrl)
  234. if err != nil {
  235. return nil, err
  236. }
  237. // 第三级界面,单个字幕详情
  238. // 找到最大的优先级的字幕下载
  239. sort.Sort(SortByPriority{subResult.SubInfos})
  240. s.log.Debugln(s.GetSupplierName(), "getSubListFromKeyword -> step1 -> subResult.Title:", subResult.Title)
  241. s.log.Debugln(s.GetSupplierName(), "getSubListFromKeyword -> step1 -> subResult.OtherName:", subResult.OtherName)
  242. for i, info := range subResult.SubInfos {
  243. s.log.Debugln(s.GetSupplierName(), "getSubListFromKeyword -> step1 -> info.Name", i, info.Name)
  244. s.log.Debugln(s.GetSupplierName(), "getSubListFromKeyword -> step1 -> info.DownloadUrl:", i, info.DownloadUrl)
  245. s.log.Debugln(s.GetSupplierName(), "getSubListFromKeyword -> step1 -> info.DetailUrl:", i, info.DetailUrl)
  246. s.log.Debugln(s.GetSupplierName(), "getSubListFromKeyword -> step1 -> info.DownloadTimes:", i, info.DownloadTimes)
  247. }
  248. outSubInfoList = s.whichSubInfoNeedDownload(browser, subResult.SubInfos, err)
  249. return outSubInfoList, nil
  250. }
  251. func (s Supplier) whichEpisodeNeedDownloadSub(seriesInfo *series.SeriesInfo, AllSeasonSubResult SubResult) []SubInfo {
  252. // 字幕很多,考虑效率,需要做成字典
  253. // key SxEx - SubInfos
  254. var allSubDict = make(map[string]SubInfos)
  255. // 全季的字幕列表
  256. var oneSeasonSubDict = make(map[string]SubInfos)
  257. for _, subInfo := range AllSeasonSubResult.SubInfos {
  258. _, season, episode, err := decode.GetSeasonAndEpisodeFromSubFileName(subInfo.Name)
  259. if err != nil {
  260. s.log.Errorln("whichEpisodeNeedDownloadSub.GetVideoInfoFromFileFullPath", subInfo.Name, err)
  261. continue
  262. }
  263. subInfo.Season = season
  264. subInfo.Episode = episode
  265. epsKey := my_util.GetEpisodeKeyName(season, episode)
  266. _, ok := allSubDict[epsKey]
  267. if ok == false {
  268. // 初始化
  269. allSubDict[epsKey] = SubInfos{}
  270. if season != 0 && episode == 0 {
  271. oneSeasonSubDict[epsKey] = SubInfos{}
  272. }
  273. }
  274. // 添加
  275. allSubDict[epsKey] = append(allSubDict[epsKey], subInfo)
  276. if season != 0 && episode == 0 {
  277. oneSeasonSubDict[epsKey] = append(oneSeasonSubDict[epsKey], subInfo)
  278. }
  279. }
  280. // 本地的视频列表,找到没有字幕的
  281. // 需要进行下载字幕的列表
  282. var subInfoNeedDownload = make([]SubInfo, 0)
  283. // 有那些 Eps 需要下载的,按 SxEx 反回 epsKey
  284. for epsKey, epsInfo := range seriesInfo.NeedDlEpsKeyList {
  285. // 从一堆字幕里面找合适的
  286. value, ok := allSubDict[epsKey]
  287. // 是否有
  288. if ok == true && len(value) > 0 {
  289. value[0].Season = epsInfo.Season
  290. value[0].Episode = epsInfo.Episode
  291. subInfoNeedDownload = append(subInfoNeedDownload, value[0])
  292. } else {
  293. s.log.Infoln(s.GetSupplierName(), "Not Find Sub can be download",
  294. epsInfo.Title, epsInfo.Season, epsInfo.Episode)
  295. }
  296. }
  297. // 全季的字幕列表,也拼进去,后面进行下载
  298. for _, infos := range oneSeasonSubDict {
  299. subInfoNeedDownload = append(subInfoNeedDownload, infos[0])
  300. }
  301. // 返回前,需要把每一个 Eps 的 Season Episode 信息填充到每个 SubInfo 中
  302. return subInfoNeedDownload
  303. }
  304. func (s Supplier) whichSubInfoNeedDownload(browser *rod.Browser, subInfos SubInfos, err error) []supplier.SubInfo {
  305. var outSubInfoList = make([]supplier.SubInfo, 0)
  306. for i := range subInfos {
  307. pkgcommon.SetSubScanJobStatusScanSeriesSub(i+1, len(subInfos),
  308. fmt.Sprintf("%v - S%v-E%v", subInfos[i].Name, subInfos[i].Season, subInfos[i].Episode))
  309. err = s.step2(browser, &subInfos[i])
  310. if err != nil {
  311. s.log.Error(s.GetSupplierName(), "step 2", subInfos[i].Name, err)
  312. continue
  313. }
  314. s.log.Debugln(s.GetSupplierName(), "whichSubInfoNeedDownload -> step2 -> info.SubDownloadPageUrl:", i, subInfos[i].SubDownloadPageUrl)
  315. }
  316. // TODO 这里需要考虑,可以设置为高级选项,不够就用 unknow 来补充
  317. // 首先过滤出中文的字幕,同时需要满足是支持的字幕
  318. var tmpSubInfo = make([]SubInfo, 0)
  319. for _, subInfo := range subInfos {
  320. tmpLang := language.LangConverter4Sub_Supplier(subInfo.Lang)
  321. if language.HasChineseLang(tmpLang) == true && sub_parser_hub.IsSubTypeWanted(subInfo.Ext) == true {
  322. tmpSubInfo = append(tmpSubInfo, subInfo)
  323. }
  324. }
  325. // 看字幕够不够
  326. if len(tmpSubInfo) < s.topic {
  327. for _, subInfo := range subInfos {
  328. if len(tmpSubInfo) >= s.topic {
  329. break
  330. }
  331. tmpLang := language.LangConverter4Sub_Supplier(subInfo.Lang)
  332. if language.HasChineseLang(tmpLang) == false {
  333. tmpSubInfo = append(tmpSubInfo, subInfo)
  334. }
  335. }
  336. }
  337. s.log.Debugln(s.GetSupplierName(), "step2 -> tmpSubInfo.Count", len(tmpSubInfo))
  338. for i, info := range tmpSubInfo {
  339. s.log.Debugln(s.GetSupplierName(), "ChineseSubs -> tmpSubInfo.Name:", i, info.Name)
  340. s.log.Debugln(s.GetSupplierName(), "ChineseSubs -> tmpSubInfo.DownloadUrl:", i, info.DownloadUrl)
  341. s.log.Debugln(s.GetSupplierName(), "ChineseSubs -> tmpSubInfo.DetailUrl:", i, info.DetailUrl)
  342. s.log.Debugln(s.GetSupplierName(), "ChineseSubs -> tmpSubInfo.DownloadTimes:", i, info.DownloadTimes)
  343. s.log.Debugln(s.GetSupplierName(), "ChineseSubs -> tmpSubInfo.SubDownloadPageUrl:", i, info.SubDownloadPageUrl)
  344. }
  345. // 第四级界面,具体字幕下载
  346. for i, subInfo := range tmpSubInfo {
  347. s.log.Debugln(s.GetSupplierName(), "step3 -> subInfo.SubDownloadPageUrl:", i, subInfo.SubDownloadPageUrl)
  348. fileName, data, err := s.step3(browser, subInfo.SubDownloadPageUrl)
  349. if err != nil {
  350. s.log.Error(s.GetSupplierName(), "step 3", err)
  351. continue
  352. }
  353. // 默认都是包含中文字幕的,然后具体使用的时候再进行区分
  354. oneSubInfo := supplier.NewSubInfo(s.GetSupplierName(), int64(i), fileName, language2.ChineseSimple, my_util.AddBaseUrl(common.SubZiMuKuRootUrl, subInfo.SubDownloadPageUrl), 0,
  355. 0, filepath.Ext(fileName), data)
  356. oneSubInfo.Season = subInfo.Season
  357. oneSubInfo.Episode = subInfo.Episode
  358. outSubInfoList = append(outSubInfoList, *oneSubInfo)
  359. }
  360. for i, info := range outSubInfoList {
  361. s.log.Debugln(s.GetSupplierName(), "step3 -> Downloaded File Info", i, "FileName:", info.Name, "FileUrl:", info.FileUrl)
  362. }
  363. // 返回前,需要把每一个 Eps 的 Season Episode 信息填充到每个 SubInfo 中
  364. return outSubInfoList
  365. }
  366. // step0 先在查询界面找到字幕对应第一个影片的详情界面,需要解决自定义错误 ZiMuKuSearchKeyWordStep0DetailPageUrlNotFound
  367. func (s Supplier) step0(browser *rod.Browser, keyword string) (string, error) {
  368. var err error
  369. defer func() {
  370. if err != nil {
  371. notify_center.Notify.Add("zimuku_step0", err.Error())
  372. }
  373. }()
  374. desUrl := fmt.Sprintf(common.SubZiMuKuSearchFormatUrl, url.QueryEscape(keyword))
  375. result, page, err := rod_helper.HttpGetFromBrowser(browser, desUrl, s.tt)
  376. if err != nil {
  377. return "", err
  378. }
  379. defer func() {
  380. _ = page.Close()
  381. }()
  382. // 找到对应影片的详情界面
  383. re := regexp.MustCompile(`<p\s+class="tt\s+clearfix"><a\s+href="(/subs/[\w]+\.html)"\s+target="_blank"><b>(.*?)</b></a></p>`)
  384. matched := re.FindAllStringSubmatch(result, -1)
  385. if matched == nil || len(matched) < 1 {
  386. return "", common.ZiMuKuSearchKeyWordStep0DetailPageUrlNotFound
  387. }
  388. // 影片的详情界面 url
  389. filmDetailPageUrl := matched[0][1]
  390. return filmDetailPageUrl, nil
  391. }
  392. // step1 分析详情界面,找到有多少个字幕
  393. func (s Supplier) step1(browser *rod.Browser, filmDetailPageUrl string) (SubResult, error) {
  394. var err error
  395. defer func() {
  396. if err != nil {
  397. notify_center.Notify.Add("zimuku_step1", err.Error())
  398. }
  399. }()
  400. var subResult SubResult
  401. subResult.SubInfos = SubInfos{}
  402. filmDetailPageUrl = my_util.AddBaseUrl(common.SubZiMuKuRootUrl, filmDetailPageUrl)
  403. result, page, err := rod_helper.HttpGetFromBrowser(browser, filmDetailPageUrl, s.tt)
  404. if err != nil {
  405. return subResult, err
  406. }
  407. defer func() {
  408. _ = page.Close()
  409. }()
  410. doc, err := goquery.NewDocumentFromReader(strings.NewReader(result))
  411. if err != nil {
  412. return SubResult{}, err
  413. }
  414. counterIndex := 3
  415. // 先找到页面”下载“关键词是第几列,然后下面的下载量才能正确的解析。否则,电影是[3],而在剧集中,因为多了字幕组的筛选,则为[4]
  416. doc.Find("#subtb thead tr th").Each(func(i int, th *goquery.Selection) {
  417. if th.Text() == "下载" {
  418. counterIndex = i
  419. }
  420. })
  421. doc.Find("#subtb tbody tr").Each(func(i int, tr *goquery.Selection) {
  422. // 字幕下载页面地址
  423. href, exists := tr.Find("a").Attr("href")
  424. if !exists {
  425. return
  426. }
  427. // 标题
  428. title, exists := tr.Find("a").Attr("title")
  429. if !exists {
  430. return
  431. }
  432. // 扩展名
  433. ext := tr.Find(".label-info").Text()
  434. // 作者信息
  435. authorInfos := tr.Find(".gray")
  436. authorInfo := ""
  437. authorInfos.Each(func(a_i int, a_lb *goquery.Selection) {
  438. authorInfo += a_lb.Text() + ","
  439. })
  440. authorInfoLen := len(authorInfo)
  441. if authorInfoLen > 0 {
  442. authorInfo = authorInfo[0 : authorInfoLen-3]
  443. }
  444. // 语言
  445. lang, exists := tr.Find("img").First().Attr("alt")
  446. if !exists {
  447. lang = ""
  448. }
  449. // 投票
  450. rate, exists := tr.Find(".rating-star").First().Attr("data-original-title")
  451. if !exists {
  452. rate = ""
  453. }
  454. vote, err := decode.GetNumber2Float(rate)
  455. if err != nil {
  456. return
  457. }
  458. // 下载次数统计
  459. downCountNub := 0
  460. downCount := tr.Find("td").Eq(counterIndex).Text()
  461. if strings.Contains(downCount, "万") {
  462. fNumb, err := decode.GetNumber2Float(downCount)
  463. if err != nil {
  464. return
  465. }
  466. downCountNub = int(fNumb * 10000)
  467. } else {
  468. downCountNub, err = decode.GetNumber2int(downCount)
  469. if err != nil {
  470. return
  471. }
  472. }
  473. var subInfo SubInfo
  474. subResult.Title = title
  475. subInfo.Name = title
  476. subInfo.DetailUrl = href
  477. subInfo.Ext = ext
  478. subInfo.AuthorInfo = authorInfo
  479. subInfo.Lang = lang
  480. subInfo.DownloadTimes = downCountNub
  481. subInfo.Score = vote
  482. // 计算优先级
  483. subInfo.Priority = subInfo.Score * float32(subInfo.DownloadTimes)
  484. subResult.SubInfos = append(subResult.SubInfos, subInfo)
  485. })
  486. return subResult, nil
  487. }
  488. // step2 第二级界面,单个字幕详情,需要判断 ZiMuKuDownloadUrlStep2NotFound 这个自定义错误
  489. func (s Supplier) step2(browser *rod.Browser, subInfo *SubInfo) error {
  490. var err error
  491. defer func() {
  492. if err != nil {
  493. notify_center.Notify.Add("zimuku_step2", err.Error())
  494. }
  495. }()
  496. detailUrl := my_util.AddBaseUrl(common.SubZiMuKuRootUrl, subInfo.DetailUrl)
  497. result, page, err := rod_helper.HttpGetFromBrowser(browser, detailUrl, s.tt)
  498. if err != nil {
  499. return err
  500. }
  501. defer func() {
  502. _ = page.Close()
  503. }()
  504. // 找到下载地址
  505. re := regexp.MustCompile(`<a\s+id="down1"\s+href="([^"]*/dld/[\w]+\.html)"`)
  506. matched := re.FindAllStringSubmatch(result, -1)
  507. if matched == nil || len(matched) == 0 || len(matched[0]) == 0 {
  508. s.log.Warnln("Step2,sub download url not found", detailUrl)
  509. return common.ZiMuKuDownloadUrlStep2NotFound
  510. }
  511. if strings.Contains(matched[0][1], "://") {
  512. subInfo.SubDownloadPageUrl = matched[0][1]
  513. } else {
  514. subInfo.SubDownloadPageUrl = fmt.Sprintf("%s%s", common.SubZiMuKuRootUrl, matched[0][1])
  515. }
  516. return nil
  517. }
  518. // step3 第三级界面,具体字幕下载 ZiMuKuDownloadUrlStep3NotFound ZiMuKuDownloadUrlStep3AllFailed
  519. func (s Supplier) step3(browser *rod.Browser, subDownloadPageUrl string) (string, []byte, error) {
  520. var err error
  521. defer func() {
  522. if err != nil {
  523. notify_center.Notify.Add("zimuku_step3", err.Error())
  524. }
  525. }()
  526. subDownloadPageUrl = my_util.AddBaseUrl(common.SubZiMuKuRootUrl, subDownloadPageUrl)
  527. result, page, err := rod_helper.HttpGetFromBrowser(browser, subDownloadPageUrl, s.tt)
  528. if err != nil {
  529. return "", nil, err
  530. }
  531. defer func() {
  532. _ = page.Close()
  533. }()
  534. re := regexp.MustCompile(`<li><a\s+rel="nofollow"\s+href="([^"]*/download/[^"]+)"`)
  535. matched := re.FindAllStringSubmatch(result, -1)
  536. if matched == nil || len(matched) == 0 || len(matched[0]) == 0 {
  537. s.log.Debugln("Step3,sub download url not found", subDownloadPageUrl)
  538. return "", nil, common.ZiMuKuDownloadUrlStep3NotFound
  539. }
  540. fileName := ""
  541. fileByte := []byte{0}
  542. downloadSuccess := false
  543. err = rod.Try(func() {
  544. tmpDir := filepath.Join(global_value.DefTmpFolder(), "downloads")
  545. wait := browser.WaitDownload(tmpDir)
  546. getDownloadFile := func() ([]byte, string, error) {
  547. info := wait()
  548. downloadPath := filepath.Join(tmpDir, info.GUID)
  549. defer func() { _ = os.Remove(downloadPath) }()
  550. b, err := os.ReadFile(downloadPath)
  551. if err != nil {
  552. return nil, "", err
  553. }
  554. return b, info.SuggestedFilename, nil
  555. }
  556. // 初始化页面用于查询元素
  557. element := page.MustElement(btnClickDownload)
  558. // 直接可以下载
  559. element.MustClick()
  560. time.Sleep(time.Second * 2)
  561. fileByte, fileName, err = getDownloadFile()
  562. if err != nil {
  563. return
  564. }
  565. downloadSuccess = true
  566. })
  567. if err != nil {
  568. s.log.Errorln("ZiMuKu step3 DownloadFile", err)
  569. return "", nil, err
  570. }
  571. if downloadSuccess == true {
  572. s.log.Debugln("Step3,DownFile, FileName:", fileName, "DataLen:", len(fileByte))
  573. return fileName, fileByte, nil
  574. } else {
  575. s.log.Debugln("Step3,sub download url not found", subDownloadPageUrl)
  576. return "", nil, common.ZiMuKuDownloadUrlStep3AllFailed
  577. }
  578. }
  579. type SubResult struct {
  580. Title string // 字幕的标题
  581. OtherName string // 影片又名
  582. SubInfos SubInfos // 字幕的列表
  583. }
  584. type SubInfo struct {
  585. Name string // 字幕的名称
  586. Lang string // 语言
  587. AuthorInfo string // 作者
  588. Ext string // 后缀名
  589. Score float32 // 评分
  590. DownloadTimes int // 下载的次数
  591. Priority float32 // 优先级,使用评分和次数乘积而来,类似于 Score 投票
  592. DetailUrl string // 字幕的详情界面,需要再次分析具体的下载地址,地址需要拼接网站的根地址上去
  593. SubDownloadPageUrl string // 字幕的具体的下载页面,会有多个下载可用的链接
  594. DownloadUrl string // 字幕的下载地址
  595. Season int // 第几季,默认-1
  596. Episode int // 第几集,默认-1
  597. }
  598. // SubInfos 实现自定义排序
  599. type SubInfos []SubInfo
  600. func (s SubInfos) Len() int {
  601. return len(s)
  602. }
  603. func (s SubInfos) Less(i, j int) bool {
  604. return s[i].Priority > s[j].Priority
  605. }
  606. func (s SubInfos) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
  607. type SortByPriority struct{ SubInfos }
  608. // Less 根据元素的优先级降序排序
  609. func (s SortByPriority) Less(i, j int) bool {
  610. return s.SubInfos[i].Priority > s.SubInfos[j].Priority
  611. }
  612. const btnClickDownload = "a.btn-danger"