zimuku.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. package zimuku
  2. import (
  3. "fmt"
  4. "github.com/PuerkitoBio/goquery"
  5. "github.com/Tnze/go.num/v2/zh"
  6. common2 "github.com/allanpk716/ChineseSubFinder/internal/common"
  7. "github.com/allanpk716/ChineseSubFinder/internal/pkg"
  8. "github.com/allanpk716/ChineseSubFinder/internal/types"
  9. "github.com/allanpk716/ChineseSubFinder/internal/types/series"
  10. "github.com/allanpk716/ChineseSubFinder/internal/types/supplier"
  11. "github.com/sirupsen/logrus"
  12. "path/filepath"
  13. "regexp"
  14. "sort"
  15. "strings"
  16. )
  17. type Supplier struct {
  18. reqParam types.ReqParam
  19. log *logrus.Logger
  20. topic int
  21. }
  22. func NewSupplier(_reqParam ...types.ReqParam) *Supplier {
  23. sup := Supplier{}
  24. sup.log = pkg.GetLogger()
  25. sup.topic = common2.DownloadSubsPerSite
  26. if len(_reqParam) > 0 {
  27. sup.reqParam = _reqParam[0]
  28. if sup.reqParam.Topic > 0 && sup.reqParam.Topic != sup.topic {
  29. sup.topic = sup.reqParam.Topic
  30. }
  31. }
  32. return &sup
  33. }
  34. func (s Supplier) GetSupplierName() string {
  35. return common2.SubSiteZiMuKu
  36. }
  37. func (s Supplier) GetReqParam() types.ReqParam {
  38. return s.reqParam
  39. }
  40. func (s Supplier) GetSubListFromFile4Movie(filePath string) ([]supplier.SubInfo, error){
  41. return s.getSubListFromMovie(filePath)
  42. }
  43. func (s Supplier) GetSubListFromFile4Series(seriesInfo *series.SeriesInfo) ([]supplier.SubInfo, error) {
  44. var err error
  45. /*
  46. 去网站搜索的时候,有个比较由意思的逻辑,有些剧集,哪怕只有一季,sonarr 也会给它命名为 Season 1
  47. 但是在 zimuku 搜索的时候,如果你加上 XXX 第一季 就搜索不出来,那么目前比较可行的办法是查询两次
  48. 第一次优先查询 XXX 第一季 ,如果返回的列表是空的,那么再查询 XXX
  49. */
  50. // 这里打算牺牲效率,提高代码的复用度,不然后续得维护一套电影的查询逻辑,一套剧集的查询逻辑
  51. // 比如,其实可以搜索剧集名称,应该可以得到多个季的列表,然后分析再继续
  52. // 现在粗暴点,直接一季搜索一次,跟电影的搜索一样,在首个影片就停止,然后继续往下
  53. AllSeasonSubResult := SubResult{}
  54. for value := range seriesInfo.SeasonDict {
  55. // 第一级界面,找到影片的详情界面
  56. keyword := seriesInfo.Name + " 第" + zh.Uint64(value).String() + "季"
  57. filmDetailPageUrl, err := s.step0(keyword)
  58. if err != nil {
  59. s.log.Errorln(keyword)
  60. // 如果只是搜索不到,则继续换关键词
  61. if err != common2.ZiMuKuSearchKeyWordStep0DetailPageUrlNotFound {
  62. return nil, err
  63. }
  64. keyword := seriesInfo.Name
  65. s.log.Infoln("Retry", keyword)
  66. filmDetailPageUrl, err = s.step0(keyword)
  67. if err != nil {
  68. s.log.Errorln(keyword)
  69. return nil, err
  70. }
  71. }
  72. // 第二级界面,有多少个字幕
  73. subResult, err := s.step1(filmDetailPageUrl)
  74. if err != nil {
  75. s.log.Errorln("step1", filmDetailPageUrl)
  76. return nil, err
  77. }
  78. if AllSeasonSubResult.Title == "" {
  79. AllSeasonSubResult = subResult
  80. } else {
  81. AllSeasonSubResult.SubInfos = append(AllSeasonSubResult.SubInfos, subResult.SubInfos...)
  82. }
  83. }
  84. // 找到最大的优先级的字幕下载
  85. sort.Sort(SortByPriority{AllSeasonSubResult.SubInfos})
  86. // 找到那些 Eps 需要下载字幕的
  87. subInfoNeedDownload := s.whichEpisodeNeedDownloadSub(seriesInfo, AllSeasonSubResult)
  88. // 剩下的部分跟 GetSubListFroKeyword 一样,就是去下载了
  89. outSubInfoList := s.whichSubInfoNeedDownload(subInfoNeedDownload, err)
  90. // 返回前,需要把每一个 Eps 的 Season Episode 信息填充到每个 SubInfo 中
  91. return outSubInfoList, nil
  92. }
  93. func (s Supplier) GetSubListFromFile4Anime(seriesInfo *series.SeriesInfo) ([]supplier.SubInfo, error){
  94. panic("not implemented")
  95. }
  96. func (s Supplier) getSubListFromMovie(fileFPath string) ([]supplier.SubInfo, error) {
  97. /*
  98. 虽然是传入视频文件路径,但是其实需要读取对应的视频文件目录下的
  99. movie.xml 以及 *.nfo,找到 IMDB id
  100. 优先通过 IMDB id 去查找字幕
  101. 如果找不到,再靠文件名提取影片名称去查找
  102. */
  103. // 得到这个视频文件名中的信息
  104. info, _, err := pkg.GetVideoInfoFromFileFullPath(fileFPath)
  105. if err != nil {
  106. return nil, err
  107. }
  108. // 找到这个视频文件,尝试得到 IMDB ID
  109. // 目前测试来看,加入 年 这个关键词去搜索,对 2020 年后的影片有利,因为网站有统一的详细页面了,而之前的,没有,会影响识别
  110. // 所以,year >= 2020 年,则可以多加一个关键词(年)去搜索影片
  111. imdbInfo, err := pkg.GetImdbInfo4Movie(fileFPath)
  112. if err != nil {
  113. // 允许的错误,跳过,继续进行文件名的搜索
  114. s.log.Errorln("model.GetImdbInfo", err)
  115. }
  116. var subInfoList []supplier.SubInfo
  117. if imdbInfo.ImdbId != "" {
  118. // 先用 imdb id 找
  119. subInfoList, err = s.getSubListFromKeyword(imdbInfo.ImdbId)
  120. if err != nil {
  121. // 允许的错误,跳过,继续进行文件名的搜索
  122. s.log.Errorln(s.GetSupplierName(), "keyword:", imdbInfo.ImdbId)
  123. s.log.Errorln("getSubListFromKeyword", "IMDBID can not found sub", fileFPath, err)
  124. }
  125. // 如果有就优先返回
  126. if len(subInfoList) >0 {
  127. return subInfoList, nil
  128. }
  129. }
  130. // 如果没有,那么就用文件名查找
  131. searchKeyword := pkg.VideoNameSearchKeywordMaker(info.Title, imdbInfo.Year)
  132. subInfoList, err = s.getSubListFromKeyword(searchKeyword)
  133. if err != nil {
  134. s.log.Errorln(s.GetSupplierName(), "keyword:", searchKeyword)
  135. return nil, err
  136. }
  137. return subInfoList, nil
  138. }
  139. func (s Supplier) getSubListFromKeyword(keyword string) ([]supplier.SubInfo, error) {
  140. var outSubInfoList []supplier.SubInfo
  141. // 第一级界面,找到影片的详情界面
  142. filmDetailPageUrl, err := s.step0(keyword)
  143. if err != nil {
  144. return nil, err
  145. }
  146. // 第二级界面,有多少个字幕
  147. subResult, err := s.step1(filmDetailPageUrl)
  148. if err != nil {
  149. return nil, err
  150. }
  151. // 第三级界面,单个字幕详情
  152. // 找到最大的优先级的字幕下载
  153. sort.Sort(SortByPriority{subResult.SubInfos})
  154. outSubInfoList = s.whichSubInfoNeedDownload(subResult.SubInfos, err)
  155. return outSubInfoList, nil
  156. }
  157. func (s Supplier) whichEpisodeNeedDownloadSub(seriesInfo *series.SeriesInfo, AllSeasonSubResult SubResult) []SubInfo {
  158. // 字幕很多,考虑效率,需要做成字典
  159. // key SxEx - SubInfos
  160. var allSubDict = make(map[string]SubInfos)
  161. // 全季的字幕列表
  162. var oneSeasonSubDict = make(map[string]SubInfos)
  163. for _, subInfo := range AllSeasonSubResult.SubInfos {
  164. _, season, episode, err := pkg.GetSeasonAndEpisodeFromSubFileName(subInfo.Name)
  165. if err != nil {
  166. s.log.Errorln("whichEpisodeNeedDownloadSub.GetVideoInfoFromFileFullPath", subInfo.Name, err)
  167. continue
  168. }
  169. subInfo.Season = season
  170. subInfo.Episode = episode
  171. epsKey := pkg.GetEpisodeKeyName(season, episode)
  172. _, ok := allSubDict[epsKey]
  173. if ok == false {
  174. // 初始化
  175. allSubDict[epsKey] = SubInfos{}
  176. if season != 0 && episode == 0 {
  177. oneSeasonSubDict[epsKey] = SubInfos{}
  178. }
  179. }
  180. // 添加
  181. allSubDict[epsKey] = append(allSubDict[epsKey], subInfo)
  182. if season != 0 && episode == 0 {
  183. oneSeasonSubDict[epsKey] = append(oneSeasonSubDict[epsKey], subInfo)
  184. }
  185. }
  186. // 本地的视频列表,找到没有字幕的
  187. // 需要进行下载字幕的列表
  188. var subInfoNeedDownload = make([]SubInfo, 0)
  189. // 有那些 Eps 需要下载的,按 SxEx 反回 epsKey
  190. for epsKey, epsInfo := range seriesInfo.NeedDlEpsKeyList {
  191. // 从一堆字幕里面找合适的
  192. value, ok := allSubDict[epsKey]
  193. // 是否有
  194. if ok == true && len(value) > 0 {
  195. value[0].Season = epsInfo.Season
  196. value[0].Episode = epsInfo.Episode
  197. subInfoNeedDownload = append(subInfoNeedDownload, value[0])
  198. } else {
  199. s.log.Infoln("ZiMuKu Not Find Sub can be download", epsInfo.Title, epsInfo.Season, epsInfo.Episode)
  200. }
  201. }
  202. // 全季的字幕列表,也拼进去,后面进行下载
  203. for _, infos := range oneSeasonSubDict {
  204. subInfoNeedDownload = append(subInfoNeedDownload, infos[0])
  205. }
  206. // 返回前,需要把每一个 Eps 的 Season Episode 信息填充到每个 SubInfo 中
  207. return subInfoNeedDownload
  208. }
  209. func (s Supplier) whichSubInfoNeedDownload(subInfos SubInfos, err error) []supplier.SubInfo {
  210. var outSubInfoList = make([]supplier.SubInfo, 0)
  211. for i := range subInfos {
  212. err = s.step2(&subInfos[i])
  213. if err != nil {
  214. s.log.Error(err)
  215. continue
  216. }
  217. }
  218. // TODO 这里需要考虑,可以设置为高级选项,不够就用 unknow 来补充
  219. // 首先过滤出中文的字幕,同时需要满足是支持的字幕
  220. var tmpSubInfo = make([]SubInfo, 0)
  221. for _, subInfo := range subInfos {
  222. tmpLang := pkg.LangConverter(subInfo.Lang)
  223. if pkg.HasChineseLang(tmpLang) == true && pkg.IsSubTypeWanted(subInfo.Ext) == true {
  224. tmpSubInfo = append(tmpSubInfo, subInfo)
  225. }
  226. }
  227. // 看字幕够不够
  228. if len(tmpSubInfo) < s.topic {
  229. for _, subInfo := range subInfos {
  230. if len(tmpSubInfo) >= s.topic {
  231. break
  232. }
  233. tmpLang := pkg.LangConverter(subInfo.Lang)
  234. if pkg.HasChineseLang(tmpLang) == false {
  235. tmpSubInfo = append(tmpSubInfo, subInfo)
  236. }
  237. }
  238. }
  239. // 第四级界面,具体字幕下载
  240. for i, subInfo := range tmpSubInfo {
  241. fileName, data, err := s.step3(subInfo.SubDownloadPageUrl)
  242. if err != nil {
  243. s.log.Error(err)
  244. continue
  245. }
  246. // 默认都是包含中文字幕的,然后具体使用的时候再进行区分
  247. oneSubInfo := supplier.NewSubInfo(s.GetSupplierName(), int64(i), fileName, types.ChineseSimple, pkg.AddBaseUrl(common2.SubZiMuKuRootUrl, subInfo.SubDownloadPageUrl), 0,
  248. 0, filepath.Ext(fileName), data)
  249. oneSubInfo.Season = subInfo.Season
  250. oneSubInfo.Episode = subInfo.Episode
  251. outSubInfoList = append(outSubInfoList, *oneSubInfo)
  252. }
  253. // 返回前,需要把每一个 Eps 的 Season Episode 信息填充到每个 SubInfo 中
  254. return outSubInfoList
  255. }
  256. // step0 先在查询界面找到字幕对应第一个影片的详情界面,需要解决自定义错误 ZiMuKuSearchKeyWordStep0DetailPageUrlNotFound
  257. func (s Supplier) step0(keyword string) (string, error) {
  258. var err error
  259. defer func() {
  260. if err != nil {
  261. pkg.Notify.Add("zimuku_step0", err.Error())
  262. }
  263. }()
  264. httpClient := pkg.NewHttpClient(s.reqParam)
  265. // 第一级界面,有多少个字幕
  266. resp, err := httpClient.R().
  267. SetQueryParams(map[string]string{
  268. "q": keyword,
  269. }).
  270. Get(common2.SubZiMuKuSearchUrl)
  271. if err != nil {
  272. return "", err
  273. }
  274. // 找到对应影片的详情界面
  275. re := regexp.MustCompile(`<p\s+class="tt\s+clearfix"><a\s+href="(/subs/[\w]+\.html)"\s+target="_blank"><b>(.*?)</b></a></p>`)
  276. matched := re.FindAllStringSubmatch(resp.String(), -1)
  277. if len(matched) < 1 {
  278. return "", common2.ZiMuKuSearchKeyWordStep0DetailPageUrlNotFound
  279. }
  280. // 影片的详情界面 url
  281. filmDetailPageUrl := matched[0][1]
  282. return filmDetailPageUrl, nil
  283. }
  284. // step1 分析详情界面,找到有多少个字幕
  285. func (s Supplier) step1(filmDetailPageUrl string) (SubResult, error) {
  286. var err error
  287. defer func() {
  288. if err != nil {
  289. pkg.Notify.Add("zimuku_step1", err.Error())
  290. }
  291. }()
  292. filmDetailPageUrl = pkg.AddBaseUrl(common2.SubZiMuKuRootUrl, filmDetailPageUrl)
  293. httpClient := pkg.NewHttpClient(s.reqParam)
  294. resp, err := httpClient.R().
  295. Get(filmDetailPageUrl)
  296. if err != nil {
  297. return SubResult{}, err
  298. }
  299. doc, err := goquery.NewDocumentFromReader(strings.NewReader(resp.String()))
  300. if err != nil {
  301. return SubResult{}, err
  302. }
  303. var subResult SubResult
  304. subResult.SubInfos = SubInfos{}
  305. counterIndex := 3
  306. // 先找到页面”下载“关键词是第几列,然后下面的下载量才能正确的解析。否则,电影是[3],而在剧集中,因为多了字幕组的筛选,则为[4]
  307. doc.Find("#subtb thead tr th").Each(func(i int, th *goquery.Selection) {
  308. if th.Text() == "下载" {
  309. counterIndex = i
  310. }
  311. })
  312. doc.Find("#subtb tbody tr").Each(func(i int, tr *goquery.Selection) {
  313. // 字幕下载页面地址
  314. href, exists := tr.Find("a").Attr("href")
  315. if !exists {
  316. return
  317. }
  318. // 标题
  319. title, exists := tr.Find("a").Attr("title")
  320. if !exists {
  321. return
  322. }
  323. // 扩展名
  324. ext := tr.Find(".label-info").Text()
  325. // 作者信息
  326. authorInfos := tr.Find(".gray")
  327. authorInfo := ""
  328. authorInfos.Each(func(a_i int, a_lb *goquery.Selection) {
  329. authorInfo += a_lb.Text() + ","
  330. })
  331. authorInfoLen := len(authorInfo)
  332. if authorInfoLen > 0 {
  333. authorInfo = authorInfo[0 : authorInfoLen-3]
  334. }
  335. // 语言
  336. lang, exists := tr.Find("img").First().Attr("alt")
  337. if !exists {
  338. lang = ""
  339. }
  340. // 投票
  341. rate, exists := tr.Find(".rating-star").First().Attr("title")
  342. if !exists {
  343. rate = ""
  344. }
  345. vote, err := pkg.GetNumber2Float(rate)
  346. if err != nil {
  347. return
  348. }
  349. // 下载次数统计
  350. downCountNub := 0
  351. downCount := tr.Find("td").Eq(counterIndex).Text()
  352. if strings.Contains(downCount, "万") {
  353. fNumb, err := pkg.GetNumber2Float(downCount)
  354. if err != nil {
  355. return
  356. }
  357. downCountNub = int(fNumb * 10000)
  358. } else {
  359. downCountNub, err = pkg.GetNumber2int(downCount)
  360. if err != nil {
  361. return
  362. }
  363. }
  364. var subInfo SubInfo
  365. subResult.Title = title
  366. subInfo.Name = title
  367. subInfo.DetailUrl = href
  368. subInfo.Ext = ext
  369. subInfo.AuthorInfo = authorInfo
  370. subInfo.Lang = lang
  371. subInfo.DownloadTimes = downCountNub
  372. subInfo.Score = vote
  373. // 计算优先级
  374. subInfo.Priority = subInfo.Score * float32(subInfo.DownloadTimes)
  375. subResult.SubInfos = append(subResult.SubInfos, subInfo)
  376. })
  377. return subResult, nil
  378. }
  379. // step2 第二级界面,单个字幕详情,需要判断 ZiMuKuDownloadUrlStep2NotFound 这个自定义错误
  380. func (s Supplier) step2(subInfo *SubInfo) error {
  381. var err error
  382. defer func() {
  383. if err != nil {
  384. pkg.Notify.Add("zimuku_step2", err.Error())
  385. }
  386. }()
  387. detailUrl := pkg.AddBaseUrl(common2.SubZiMuKuRootUrl, subInfo.DetailUrl)
  388. httpClient := pkg.NewHttpClient(s.reqParam)
  389. resp, err := httpClient.R().
  390. Get(detailUrl)
  391. if err != nil {
  392. return err
  393. }
  394. // 找到下载地址
  395. re := regexp.MustCompile(`<a\s+id="down1"\s+href="([^"]*/dld/[\w]+\.html)"`)
  396. matched := re.FindAllStringSubmatch(resp.String(), -1)
  397. if matched == nil || len(matched) == 0 || len(matched[0]) == 0 {
  398. s.log.Debug(detailUrl)
  399. return common2.ZiMuKuDownloadUrlStep2NotFound
  400. }
  401. if strings.Contains(matched[0][1], "://") {
  402. subInfo.SubDownloadPageUrl = matched[0][1]
  403. } else {
  404. subInfo.SubDownloadPageUrl = fmt.Sprintf("%s%s", common2.SubZiMuKuRootUrl, matched[0][1])
  405. }
  406. return nil
  407. }
  408. // step3 第三级界面,具体字幕下载 ZiMuKuDownloadUrlStep3NotFound ZiMuKuDownloadUrlStep3AllFailed
  409. func (s Supplier) step3(subDownloadPageUrl string) (string, []byte, error) {
  410. var err error
  411. defer func() {
  412. if err != nil {
  413. pkg.Notify.Add("zimuku_step3", err.Error())
  414. }
  415. }()
  416. subDownloadPageUrl = pkg.AddBaseUrl(common2.SubZiMuKuRootUrl, subDownloadPageUrl)
  417. httpClient := pkg.NewHttpClient(s.reqParam)
  418. resp, err := httpClient.R().
  419. Get(subDownloadPageUrl)
  420. if err != nil {
  421. return "", nil, err
  422. }
  423. re := regexp.MustCompile(`<li><a\s+rel="nofollow"\s+href="([^"]*/download/[^"]+)"`)
  424. matched := re.FindAllStringSubmatch(resp.String(), -1)
  425. if matched == nil || len(matched) == 0 || len(matched[0]) == 0 {
  426. s.log.Debug(subDownloadPageUrl)
  427. return "", nil, common2.ZiMuKuDownloadUrlStep3NotFound
  428. }
  429. var filename string
  430. var data []byte
  431. s.reqParam.Referer = subDownloadPageUrl
  432. for i := 0; i < len(matched); i++ {
  433. data, filename, err = pkg.DownFile(pkg.AddBaseUrl(common2.SubZiMuKuRootUrl, matched[i][1]), s.reqParam)
  434. if err != nil {
  435. s.log.Errorln("ZiMuKu step3 DownloadFile", err)
  436. continue
  437. }
  438. return filename, data, nil
  439. }
  440. s.log.Debug(subDownloadPageUrl)
  441. return "", nil, common2.ZiMuKuDownloadUrlStep3AllFailed
  442. }
  443. type SubResult struct {
  444. Title string // 字幕的标题
  445. OtherName string // 影片又名
  446. SubInfos SubInfos // 字幕的列表
  447. }
  448. type SubInfo struct {
  449. Name string // 字幕的名称
  450. Lang string // 语言
  451. AuthorInfo string // 作者
  452. Ext string // 后缀名
  453. Score float32 // 评分
  454. DownloadTimes int // 下载的次数
  455. Priority float32 // 优先级,使用评分和次数乘积而来,类似于 Score 投票
  456. DetailUrl string // 字幕的详情界面,需要再次分析具体的下载地址,地址需要拼接网站的根地址上去
  457. SubDownloadPageUrl string // 字幕的具体的下载页面,会有多个下载可用的链接
  458. DownloadUrl string // 字幕的下载地址
  459. Season int // 第几季,默认-1
  460. Episode int // 第几集,默认-1
  461. }
  462. // SubInfos 实现自定义排序
  463. type SubInfos []SubInfo
  464. func (s SubInfos) Len() int {
  465. return len(s)
  466. }
  467. func (s SubInfos) Less(i, j int) bool {
  468. return s[i].Priority > s[j].Priority
  469. }
  470. func (s SubInfos) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
  471. type SortByPriority struct{ SubInfos }
  472. // Less 根据元素的优先级降序排序
  473. func (s SortByPriority) Less(i, j int) bool {
  474. return s.SubInfos[i].Priority > s.SubInfos[j].Priority
  475. }