|
|
@@ -12,14 +12,24 @@ import (
|
|
|
)
|
|
|
|
|
|
type Supplier struct {
|
|
|
- _reqParam common.ReqParam
|
|
|
+ reqParam common.ReqParam
|
|
|
+ topic int
|
|
|
}
|
|
|
|
|
|
-func NewSupplier() *Supplier {
|
|
|
- return &Supplier{}
|
|
|
+func NewSupplier(_reqParam ... common.ReqParam) *Supplier {
|
|
|
+
|
|
|
+ sup := Supplier{}
|
|
|
+ sup.topic = common.DownloadSubsPerSite
|
|
|
+ if len(_reqParam) > 0 {
|
|
|
+ sup.reqParam = _reqParam[0]
|
|
|
+ if sup.reqParam.Topic > 0 && sup.reqParam.Topic != sup.topic {
|
|
|
+ sup.topic = sup.reqParam.Topic
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return &sup
|
|
|
}
|
|
|
|
|
|
-func (s Supplier) GetSubListFromFile(filePath string, httpProxy string) ([]sub_supplier.SubInfo, error) {
|
|
|
+func (s Supplier) GetSubListFromFile(filePath string) ([]sub_supplier.SubInfo, error) {
|
|
|
|
|
|
/*
|
|
|
虽然是传入视频文件路径,但是其实需要读取对应的视频文件目录下的
|
|
|
@@ -35,21 +45,26 @@ func (s Supplier) GetSubListFromFile(filePath string, httpProxy string) ([]sub_s
|
|
|
// 找到这个视频文件,然后读取它目录下的文件,尝试得到 IMDB ID
|
|
|
fileRootDirPath := filepath.Dir(filePath)
|
|
|
imdbId, err := common.GetImdbId(fileRootDirPath)
|
|
|
- if err != nil {
|
|
|
+ if err != nil && err != common.CanNotFindIMDBID {
|
|
|
return nil, err
|
|
|
}
|
|
|
|
|
|
- // 先用 imdb id 找
|
|
|
- subInfoList, err := s.GetSubListFromKeyword(imdbId, httpProxy)
|
|
|
- if err != nil {
|
|
|
- return nil, err
|
|
|
- }
|
|
|
- // 如果有就优先返回
|
|
|
- if len(subInfoList) >0 {
|
|
|
- return subInfoList, nil
|
|
|
+ var subInfoList []sub_supplier.SubInfo
|
|
|
+
|
|
|
+ if imdbId != "" {
|
|
|
+ // 先用 imdb id 找
|
|
|
+ subInfoList, err = s.GetSubListFromKeyword(imdbId)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ // 如果有就优先返回
|
|
|
+ if len(subInfoList) >0 {
|
|
|
+ return subInfoList, nil
|
|
|
+ }
|
|
|
}
|
|
|
+
|
|
|
// 如果没有,那么就用文件名查找
|
|
|
- subInfoList, err = s.GetSubListFromKeyword(info.Title, httpProxy)
|
|
|
+ subInfoList, err = s.GetSubListFromKeyword(info.Title)
|
|
|
if err != nil {
|
|
|
return nil, err
|
|
|
}
|
|
|
@@ -57,31 +72,223 @@ func (s Supplier) GetSubListFromFile(filePath string, httpProxy string) ([]sub_s
|
|
|
return subInfoList, nil
|
|
|
}
|
|
|
|
|
|
-func (s Supplier) GetSubListFromKeyword(keyword string, httpProxy string) ([]sub_supplier.SubInfo, error) {
|
|
|
+func (s Supplier) GetSubListFromKeyword(keyword string) ([]sub_supplier.SubInfo, error) {
|
|
|
|
|
|
- // 第一级界面,有多少个字幕
|
|
|
- subResult, err := s.Step1(keyword, httpProxy)
|
|
|
+ var outSubInfoList []sub_supplier.SubInfo
|
|
|
+ // 第一级界面,找到影片的详情界面
|
|
|
+ filmDetailPageUrl, err := s.Step0(keyword)
|
|
|
if err != nil {
|
|
|
return nil, err
|
|
|
}
|
|
|
- // 第二级界面,单个字幕详情
|
|
|
- err = s.Step2(&subResult, httpProxy)
|
|
|
+ // 第二级界面,有多少个字幕
|
|
|
+ subResult, err := s.Step1(filmDetailPageUrl)
|
|
|
if err != nil {
|
|
|
return nil, err
|
|
|
}
|
|
|
- // 第三级界面,具体字幕下载
|
|
|
- err = s.Step3(&subResult, httpProxy)
|
|
|
+ // 第三级界面,单个字幕详情
|
|
|
+ // 找到最大的优先级的字幕下载
|
|
|
+ sort.Sort(SortByPriority{subResult.SubInfos})
|
|
|
+ // 移除多出来的字幕
|
|
|
+ if len(subResult.SubInfos) > s.topic {
|
|
|
+ subResult.SubInfos = subResult.SubInfos[:s.topic]
|
|
|
+ }
|
|
|
+
|
|
|
+ for i := range subResult.SubInfos {
|
|
|
+ err = s.Step2(&subResult.SubInfos[i])
|
|
|
+ if err != nil {
|
|
|
+ println(err.Error())
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // 第四级界面,具体字幕下载
|
|
|
+ for _, subInfo := range subResult.SubInfos {
|
|
|
+ fileName, data, err := s.Step3(subInfo.SubDownloadPageUrl)
|
|
|
+ if err != nil {
|
|
|
+ println(err.Error())
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ // 默认都是包含中文字幕的,然后具体使用的时候再进行区分
|
|
|
+ outSubInfoList = append(outSubInfoList, *sub_supplier.NewSubInfo(fileName, common.ChineseSimple, common.AddBaseUrl(common.SubZiMuKuRootUrl, subInfo.SubDownloadPageUrl), 0,
|
|
|
+ 0, filepath.Ext(fileName), data))
|
|
|
+ }
|
|
|
+
|
|
|
+ return outSubInfoList, nil
|
|
|
+}
|
|
|
+
|
|
|
+// Step0 先在查询界面找到字幕对应第一个影片的详情界面
|
|
|
+func (s Supplier) Step0(keyword string) (string, error) {
|
|
|
+ httpClient := common.NewHttpClient(s.reqParam)
|
|
|
+ // 第一级界面,有多少个字幕
|
|
|
+ resp, err := httpClient.R().
|
|
|
+ SetQueryParams(map[string]string{
|
|
|
+ "q": keyword,
|
|
|
+ }).
|
|
|
+ Get(common.SubZiMuKuSearchUrl)
|
|
|
if err != nil {
|
|
|
- return nil, err
|
|
|
+ return "", err
|
|
|
}
|
|
|
- // TODO 需要把查询到的信息转换到 []sub_supplier.SubInfo 再输出
|
|
|
- // 注意要做一次排序,根据优先级
|
|
|
- return nil, nil
|
|
|
+ // 找到对应影片的详情界面
|
|
|
+ re := regexp.MustCompile(`<p\s+class="tt\s+clearfix"><a\s+href="(/subs/[\w]+\.html)"\s+target="_blank"><b>(.*?)</b></a></p>`)
|
|
|
+ matched := re.FindAllStringSubmatch(resp.String(), -1)
|
|
|
+ //lists := make([]string, 0)
|
|
|
+ //for _, match := range matched {
|
|
|
+ // // 去重
|
|
|
+ // for _, list := range lists {
|
|
|
+ // if list != match[1] {
|
|
|
+ // lists = append(lists, match[1])
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // lists = append(lists, match[1])
|
|
|
+ //}
|
|
|
+ if len(matched) < 1 {
|
|
|
+ return "", common.ZiMuKuSearchKeyWordStep0DetailPageUrlNotFound
|
|
|
+ }
|
|
|
+ // 影片的详情界面 url
|
|
|
+ filmDetailPageUrl := matched[0][1]
|
|
|
+ return filmDetailPageUrl, nil
|
|
|
}
|
|
|
|
|
|
-// Step1 第一级界面,有多少个字幕
|
|
|
-func (s Supplier) Step1(keyword string, _reqParam ...common.ReqParam) (SubResult, error) {
|
|
|
- httpClient := common.NewHttpClient()
|
|
|
+// Step1 分析详情界面,找到有多少个字幕
|
|
|
+func (s Supplier) Step1(filmDetailPageUrl string) (SubResult, error) {
|
|
|
+ filmDetailPageUrl = common.AddBaseUrl(common.SubZiMuKuRootUrl, filmDetailPageUrl)
|
|
|
+ httpClient := common.NewHttpClient(s.reqParam)
|
|
|
+ resp, err := httpClient.R().
|
|
|
+ Get(filmDetailPageUrl)
|
|
|
+ if err != nil {
|
|
|
+ return SubResult{}, err
|
|
|
+ }
|
|
|
+ doc, err := goquery.NewDocumentFromReader(strings.NewReader(resp.String()))
|
|
|
+ if err != nil {
|
|
|
+ return SubResult{}, err
|
|
|
+ }
|
|
|
+ var subResult SubResult
|
|
|
+ subResult.SubInfos = SubInfos{}
|
|
|
+ doc.Find("#subtb tbody tr").Each(func(i int, tr *goquery.Selection) {
|
|
|
+ href, exists := tr.Find("a").Attr("href")
|
|
|
+ if !exists {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ title, exists := tr.Find("a").Attr("title")
|
|
|
+ if !exists {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ ext := tr.Find(".label-info").Text()
|
|
|
+ authorInfos := tr.Find(".gray")
|
|
|
+ authorInfo := ""
|
|
|
+ authorInfos.Each(func(a_i int, a_lb *goquery.Selection) {
|
|
|
+ authorInfo += a_lb.Text() + ","
|
|
|
+ })
|
|
|
+ authorInfoLen := len(authorInfo)
|
|
|
+ if authorInfoLen > 0 {
|
|
|
+ authorInfo = authorInfo[0 : authorInfoLen-3]
|
|
|
+ }
|
|
|
+
|
|
|
+ lang, exists := tr.Find("img").First().Attr("alt")
|
|
|
+ if !exists {
|
|
|
+ lang = ""
|
|
|
+ }
|
|
|
+ rate, exists := tr.Find(".rating-star").First().Attr("title")
|
|
|
+ if !exists {
|
|
|
+ rate = ""
|
|
|
+ }
|
|
|
+ vote, err := common.GetNumber2Folat(rate)
|
|
|
+ if err != nil {
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ downCountNub := 0
|
|
|
+ downCount := tr.Find("td").Eq(3).Text()
|
|
|
+ if strings.Contains(downCount, "万") {
|
|
|
+ fNumb, err := common.GetNumber2Folat(downCount)
|
|
|
+ if err != nil {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ downCountNub = int(fNumb * 10000)
|
|
|
+ } else {
|
|
|
+ downCountNub, err = common.GetNumber2int(downCount)
|
|
|
+ if err != nil {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ var subInfo SubInfo
|
|
|
+ subResult.Title = title
|
|
|
+ subInfo.DetailUrl = href
|
|
|
+ subInfo.Ext = ext
|
|
|
+ subInfo.AuthorInfo = authorInfo
|
|
|
+ subInfo.Lang = lang
|
|
|
+ subInfo.DownloadTimes = downCountNub
|
|
|
+
|
|
|
+ subInfo.Score = vote
|
|
|
+ // 计算优先级
|
|
|
+ subInfo.Priority = subInfo.Score * float32(subInfo.DownloadTimes)
|
|
|
+
|
|
|
+ subResult.SubInfos = append(subResult.SubInfos, subInfo)
|
|
|
+ })
|
|
|
+ return subResult, nil
|
|
|
+}
|
|
|
+
|
|
|
+// Step2 第二级界面,单个字幕详情
|
|
|
+func (s Supplier) Step2(subInfo *SubInfo) error {
|
|
|
+
|
|
|
+ detailUrl := common.AddBaseUrl(common.SubZiMuKuRootUrl, subInfo.DetailUrl)
|
|
|
+ httpClient := common.NewHttpClient(s.reqParam)
|
|
|
+ resp, err := httpClient.R().
|
|
|
+ Get(detailUrl)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ // 找到下载地址
|
|
|
+ re := regexp.MustCompile(`<a\s+id="down1"\s+href="([^"]*/dld/[\w]+\.html)"`)
|
|
|
+ matched := re.FindAllStringSubmatch(resp.String(), -1)
|
|
|
+ if matched == nil || len(matched) == 0 || len(matched[0]) == 0 {
|
|
|
+ println(detailUrl)
|
|
|
+ return common.ZiMuKuDownloadUrlStep2NotFound
|
|
|
+ }
|
|
|
+ if strings.Contains(matched[0][1], "://") {
|
|
|
+ subInfo.SubDownloadPageUrl = matched[0][1]
|
|
|
+ } else {
|
|
|
+ subInfo.SubDownloadPageUrl = fmt.Sprintf("%s%s", common.SubZiMuKuRootUrl, matched[0][1])
|
|
|
+ }
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+// Step3 第三级界面,具体字幕下载
|
|
|
+func (s Supplier) Step3(subDownloadPageUrl string) (string, []byte, error) {
|
|
|
+
|
|
|
+ subDownloadPageUrl = common.AddBaseUrl(common.SubZiMuKuRootUrl, subDownloadPageUrl)
|
|
|
+ httpClient := common.NewHttpClient(s.reqParam)
|
|
|
+ resp, err := httpClient.R().
|
|
|
+ Get(subDownloadPageUrl)
|
|
|
+ if err != nil {
|
|
|
+ return "", nil, err
|
|
|
+ }
|
|
|
+ re := regexp.MustCompile(`<li><a\s+rel="nofollow"\s+href="([^"]*/download/[^"]+)"`)
|
|
|
+ matched := re.FindAllStringSubmatch(resp.String(), -1)
|
|
|
+ if matched == nil || len(matched) == 0 || len(matched[0]) == 0 {
|
|
|
+ println(subDownloadPageUrl)
|
|
|
+ return "", nil, common.ZiMuKuDownloadUrlStep3NotFound
|
|
|
+ }
|
|
|
+ var filename string
|
|
|
+ var data []byte
|
|
|
+ for i := 0; i < len(matched); i++ {
|
|
|
+ data, filename, err = common.DownFile(common.AddBaseUrl(common.SubZiMuKuRootUrl, matched[i][1]), common.ReqParam{
|
|
|
+ HttpProxy: s.reqParam.HttpProxy,
|
|
|
+ Referer: subDownloadPageUrl,
|
|
|
+ })
|
|
|
+ if err != nil {
|
|
|
+ println("ZiMuKu Step3 DownloadFile", err)
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ return filename, data, nil
|
|
|
+ }
|
|
|
+ println(subDownloadPageUrl)
|
|
|
+ return "", nil, common.ZiMuKuDownloadUrlStep3AllFailed
|
|
|
+}
|
|
|
+
|
|
|
+// Step1Discard 第一级界面,有多少个字幕,弃用,直接再搜索出来的结果界面匹配会遇到一个问题,就是 “还有8个字幕,点击查看” 类似此问题
|
|
|
+func (s Supplier) Step1Discard(keyword string) (SubResult, error) {
|
|
|
+ httpClient := common.NewHttpClient(s.reqParam)
|
|
|
// 第一级界面,有多少个字幕
|
|
|
resp, err := httpClient.R().
|
|
|
SetQueryParams(map[string]string{
|
|
|
@@ -152,67 +359,11 @@ func (s Supplier) Step1(keyword string, _reqParam ...common.ReqParam) (SubResult
|
|
|
})
|
|
|
// 这里要判断,一级界面是否OK 了,不行就返回
|
|
|
if subResult.Title == "" || len(subResult.SubInfos) == 0 {
|
|
|
- return SubResult{}, common.ZiMuKuSearchKeyWordStep1NotFound
|
|
|
+ return SubResult{}, nil
|
|
|
}
|
|
|
return subResult, nil
|
|
|
}
|
|
|
|
|
|
-// Step2 第二级界面,单个字幕详情
|
|
|
-func (s Supplier) Step2(subResult *SubResult, httpProxy string) error {
|
|
|
- // 找到最大的优先级的字幕下载
|
|
|
- sort.Sort(SortByPriority{subResult.SubInfos})
|
|
|
- // 排序后的第一个
|
|
|
- httpClient := common.NewHttpClient(httpProxy)
|
|
|
- resp, err := httpClient.R().
|
|
|
- Get(common.SubZiMuKuRootUrl + subResult.SubInfos[0].DetailUrl)
|
|
|
- if err != nil {
|
|
|
- return err
|
|
|
- }
|
|
|
- // 找到下载地址
|
|
|
- re := regexp.MustCompile(`<a\s+id="down1"\s+href="([^"]*/dld/[\w]+\.html)"`)
|
|
|
- matched := re.FindAllStringSubmatch(resp.String(), -1)
|
|
|
- if matched == nil || len(matched) == 0 || len(matched[0]) == 0 {
|
|
|
- return common.ZiMuKuDownloadUrlStep2NotFound
|
|
|
- }
|
|
|
- if strings.Contains(matched[0][1], "://") {
|
|
|
- subResult.SubInfos[0].SubDownloadPageUrl = matched[0][1]
|
|
|
- } else {
|
|
|
- subResult.SubInfos[0].SubDownloadPageUrl = fmt.Sprintf("%s%s", common.SubZiMuKuRootUrl, matched[0][1])
|
|
|
- }
|
|
|
- return nil
|
|
|
-}
|
|
|
-
|
|
|
-// Step3 第三级界面,具体字幕下载
|
|
|
-func (s Supplier) Step3(subResult *SubResult, httpProxy string) error {
|
|
|
-
|
|
|
- subDownloadPageUrl := common.AddBaseUrl(common.SubZiMuKuRootUrl, subResult.SubInfos[0].SubDownloadPageUrl)
|
|
|
- httpClient := common.NewHttpClient(httpProxy)
|
|
|
- resp, err := httpClient.R().
|
|
|
- Get(subDownloadPageUrl)
|
|
|
- if err != nil {
|
|
|
- return err
|
|
|
- }
|
|
|
- re := regexp.MustCompile(`<li><a\s+rel="nofollow"\s+href="([^"]*/download/[^"]+)"`)
|
|
|
- matched := re.FindAllStringSubmatch(resp.String(), -1)
|
|
|
- if matched == nil || len(matched) == 0 || len(matched[0]) == 0 {
|
|
|
- return nil
|
|
|
- }
|
|
|
- var filename string
|
|
|
- var data []byte
|
|
|
- for i := 0; i < len(matched); i++ {
|
|
|
-
|
|
|
- data, filename, err = common.DownFile(common.AddBaseUrl(common.SubZiMuKuRootUrl, matched[i][1]), common.ReqParam{
|
|
|
- HttpProxy: httpProxy,
|
|
|
- Referer: subDownloadPageUrl,
|
|
|
- })
|
|
|
- if err != nil {
|
|
|
- println("ZiMuKu Step3 DownloadFile", err)
|
|
|
- continue
|
|
|
- }
|
|
|
- }
|
|
|
- return nil
|
|
|
-}
|
|
|
-
|
|
|
type SubResult struct {
|
|
|
Title string // 字幕的标题
|
|
|
OtherName string // 影片又名
|
|
|
@@ -220,9 +371,12 @@ type SubResult struct {
|
|
|
}
|
|
|
|
|
|
type SubInfo struct {
|
|
|
+ Lang string // 语言
|
|
|
+ AuthorInfo string // 作者
|
|
|
+ Ext string // 后缀名
|
|
|
Score float32 // 评分
|
|
|
DownloadTimes int // 下载的次数
|
|
|
- Priority float32 // 优先级,使用评分和次数乘积而来
|
|
|
+ Priority float32 // 优先级,使用评分和次数乘积而来,类似于 Vote 投票
|
|
|
DetailUrl string // 字幕的详情界面,需要再次分析具体的下载地址,地址需要拼接网站的根地址上去
|
|
|
SubDownloadPageUrl string // 字幕的具体的下载页面,会有多个下载可用的链接
|
|
|
DownloadUrl string // 字幕的下载地址
|