Browse Source

完成基本的 subhd 重构,下载还有问题

Signed-off-by: allan716 <[email protected]>
allan716 4 years ago
parent
commit
e2a735f618

+ 1 - 1
common/decode.go

@@ -105,7 +105,7 @@ func GetVideoInfo(videoFileName string) (*PTN.TorrentInfo, error) {
 	return parse, nil
 }
 
-func GetNumber2Folat(input string) (float32, error) {
+func GetNumber2Float(input string) (float32, error) {
 	compile := regexp.MustCompile(regGetNumber)
 	params := compile.FindStringSubmatch(input)
 	if len(params) == 0 {

+ 6 - 0
common/selferr.go

@@ -14,4 +14,10 @@ var(
 	ZiMuKuDownloadUrlStep2NotFound = errors.New("zimuku download url step2 not found")
 	ZiMuKuDownloadUrlStep3NotFound = errors.New("zimuku download url step3 not found")
 	ZiMuKuDownloadUrlStep3AllFailed = errors.New("zimuku download url step3 all failed")
+
+	SubHDStep0HrefIsNull = errors.New("subhd step0 href is Null")
+	SubHDStep2SidIsNull = errors.New("subhd step2 sid is null")
+	SubHDStep2DTokenIsNull = errors.New("subhd step2 dToken is null")
+	SubHDStep2ResultIsNullOrNotTrue = errors.New("subhd step2 result is null or not true")
+	SubHDStep2PostResultGetUrlNotFound= errors.New("subhd step2 post result get url not found")
 )

+ 4 - 0
common/urls.go

@@ -3,6 +3,10 @@ package common
 const (
 	SubXunLeiRootUrl  = "http://sub.xmp.sandai.net:8000/subxl/%s.json"
 	SubShooterRootUrl = "https://www.shooter.cn/api/subapi.php"
+
 	SubZiMuKuRootUrl  = "http://zmk.pw"
 	SubZiMuKuSearchUrl  = SubZiMuKuRootUrl + "/search"
+
+	SubSubHDRootUrl  = "https://subhd.tv"
+	SubSubHDSearchUrl  = SubSubHDRootUrl + "/search/%s"
 )

+ 1 - 1
common/util.go

@@ -48,7 +48,7 @@ func NewHttpClient(_reqParam ...ReqParam) *resty.Client {
 	return httpClient
 }
 
-// DownFile 从指定的 Url 下载文件
+// DownFile 从指定的 url 下载文件
 func DownFile(urlStr string, _reqParam ...ReqParam) ([]byte, string, error)  {
 	var reqParam ReqParam
 	if len(_reqParam) > 0 {

+ 283 - 0
sub_supplier/subhd/subhd.go

@@ -0,0 +1,283 @@
+package subhd
+
+import (
+	"fmt"
+	"github.com/PuerkitoBio/goquery"
+	"github.com/allanpk716/ChineseSubFinder/common"
+	"github.com/allanpk716/ChineseSubFinder/sub_supplier"
+	"net/url"
+	"path/filepath"
+	"regexp"
+	"strings"
+)
+
+type Supplier struct {
+	reqParam common.ReqParam
+	topic int
+}
+
+func NewSupplier(_reqParam ... common.ReqParam) *Supplier {
+
+	sup := Supplier{}
+	sup.topic = common.DownloadSubsPerSite
+	if len(_reqParam) > 0 {
+		sup.reqParam = _reqParam[0]
+		if sup.reqParam.Topic > 0 && sup.reqParam.Topic != sup.topic {
+			sup.topic = sup.reqParam.Topic
+		}
+	}
+	return &sup
+}
+
+func (s Supplier) GetSubListFromFile(filePath string) ([]sub_supplier.SubInfo, error) {
+	/*
+		虽然是传入视频文件路径,但是其实需要读取对应的视频文件目录下的
+		movie.xml 以及 *.nfo,找到 IMDB id
+		优先通过 IMDB id 去查找字幕
+		如果找不到,再靠文件名提取影片名称去查找
+	*/
+	// 得到这个视频文件名中的信息
+	info, err := common.GetVideoInfo(filePath)
+	if err != nil {
+		return nil, err
+	}
+	// 找到这个视频文件,然后读取它目录下的文件,尝试得到 IMDB ID
+	fileRootDirPath := filepath.Dir(filePath)
+	imdbId, err := common.GetImdbId(fileRootDirPath)
+	if err != nil && err != common.CanNotFindIMDBID {
+		return nil, err
+	}
+
+	var subInfoList []sub_supplier.SubInfo
+
+	if imdbId != "" {
+		// 先用 imdb id 找
+		subInfoList, err = s.GetSubListFromKeyword(imdbId)
+		if err != nil {
+			return nil, err
+		}
+		// 如果有就优先返回
+		if len(subInfoList) >0 {
+			return subInfoList, nil
+		}
+	}
+
+	// 如果没有,那么就用文件名查找
+	subInfoList, err = s.GetSubListFromKeyword(info.Title)
+	if err != nil {
+		return nil, err
+	}
+
+	return subInfoList, nil
+}
+
+func (s Supplier) GetSubListFromKeyword(keyword string) ([]sub_supplier.SubInfo, error) {
+
+	var subInfos  []sub_supplier.SubInfo
+	detailPageUrl, err := s.Step0(keyword)
+	if err != nil {
+		return nil, err
+	}
+	subList, err := s.Step1(detailPageUrl)
+	if err != nil {
+		return nil, err
+	}
+
+	for _, item := range subList {
+		hdContent, err := s.Step2(item.Url)
+		if err != nil {
+			return nil, err
+		}
+
+		var subInfo sub_supplier.SubInfo
+		subInfo.Name = hdContent.Filename
+		subInfo.Ext = hdContent.Ext
+		subInfo.Language = common.ChineseSimple
+		subInfo.Vote = 0
+		subInfo.FileUrl = common.AddBaseUrl(common.SubSubHDRootUrl, item.Url)
+		subInfo.Offset = 0
+		subInfo.Data = hdContent.Data
+
+		subInfos = append(subInfos, subInfo)
+	}
+
+	return subInfos, nil
+}
+
+// Step0 找到这个影片的详情列表
+func (s Supplier) Step0(keyword string) (string, error) {
+
+	result, err := s.httpGet(fmt.Sprintf(common.SubSubHDSearchUrl, url.QueryEscape(keyword)))
+	if err != nil {
+		return "", err
+	}
+	re := regexp.MustCompile(`<a\shref="(/d/[\w]+)"><img`)
+	matched := re.FindAllStringSubmatch(result, -1)
+	if len(matched) < 1 || len(matched[0]) < 2{
+		return "",  common.SubHDStep0HrefIsNull
+	}
+	return matched[0][1], nil
+}
+// Step1 获取影片的详情字幕列表
+func (s Supplier) Step1(detailPageUrl string) ([]HdListItem, error) {
+	detailPageUrl = common.AddBaseUrl(common.SubSubHDRootUrl, detailPageUrl)
+	result, err := s.httpGet(detailPageUrl)
+	if err != nil {
+		return nil, err
+	}
+	doc, err := goquery.NewDocumentFromReader(strings.NewReader(result))
+	if err != nil {
+		return nil, err
+	}
+	var lists []HdListItem
+	doc.Find(".table-sm tr").EachWithBreak(func(i int, tr *goquery.Selection) bool {
+		if tr.Find("a.text-dark").Size() == 0 {
+			return true
+		}
+		downUrl, exists := tr.Find("a.text-dark").Eq(0).Attr("href")
+		if !exists {
+			return true
+		}
+		title := strings.TrimSpace(tr.Find("a.text-dark").Text())
+
+		downCount, err := common.GetNumber2int(tr.Find("td.p-3").Eq(1).Text())
+		if err != nil {
+			return true
+		}
+
+		ext := ""
+		tr.Find(".text-secondary span").Each(func(a_i int, a_lb *goquery.Selection) {
+			ext += a_lb.Text() + ","
+		})
+		extLen := len(ext)
+		if len(ext) > 0 {
+			ext = ext[0 : extLen - 3]
+		}
+
+		authorInfo := tr.Find("a.text-dark").Eq(2).Text()
+
+		rate := ""
+
+		listItem := HdListItem{}
+		listItem.Url = downUrl
+		listItem.BaseUrl = common.SubSubHDRootUrl
+		listItem.Title = title
+		listItem.Ext = ext
+		listItem.AuthorInfo = authorInfo
+		listItem.Rate = rate
+		listItem.DownCount = downCount
+
+		if len(lists) > s.topic {
+			return false
+		}
+
+		lists = append(lists, listItem)
+
+		return true
+	})
+
+	return lists, nil
+}
+// Step2 下载字幕
+func (s Supplier) Step2(subDownloadPageUrl string) (*HdContent, error) {
+	subDownloadPageUrl = common.AddBaseUrl(common.SubSubHDRootUrl, subDownloadPageUrl)
+	result, err := s.httpGet(subDownloadPageUrl)
+	if err != nil {
+		return nil, err
+	}
+	doc, err := goquery.NewDocumentFromReader(strings.NewReader(result))
+	if err != nil {
+		return nil, err
+	}
+	// 是否有腾讯的防水墙
+	matchList := doc.Find("#TencentCaptcha")
+	if len(matchList.Nodes) < 1 {
+		println("qiang")
+	}
+	//matchList = doc.Find("#down")
+	//if len(matchList.Nodes) < 1 {
+	//	println("not found down")
+	//}
+	postData := make(map[string]string)
+	sid, exists := matchList.Attr("sid")
+	if !exists {
+		return nil, common.SubHDStep2SidIsNull
+	}
+	postData["sub_id"] = sid
+	dToken, exists := matchList.Attr("dtoken1")
+	if !exists {
+		return nil, common.SubHDStep2DTokenIsNull
+	}
+	postData["dtoken1"] = dToken
+	url2 := fmt.Sprintf("%s%s", common.SubSubHDRootUrl, "/ajax/down_ajax")
+	result, err = s.httpPost(url2, postData, subDownloadPageUrl)
+	if err != nil {
+		return nil, err
+	}
+	if result == "" || strings.Contains(result, "true") == false {
+		return nil, common.SubHDStep2ResultIsNullOrNotTrue
+	}
+	reg := regexp.MustCompile(`"url":"([^"]+)"`)
+	arr := reg.FindStringSubmatch(result)
+	if len(arr) == 0 {
+		return nil, common.SubHDStep2PostResultGetUrlNotFound
+	}
+	downUrl := arr[1]
+	downUrl = strings.ReplaceAll(downUrl, "\\", "")
+	var filename = filepath.Base(downUrl)
+	var data []byte
+	data, filename, err = common.DownFile(downUrl, s.reqParam)
+	if err != nil {
+		return nil, err
+	}
+	return &HdContent{
+		Filename: filename,
+		Ext:      strings.ToLower(filepath.Ext(filename)),
+		Data:     data,
+	}, nil
+}
+
+func (s Supplier) httpGet(url string) (string, error) {
+	s.reqParam.Referer = url
+	httpClient := common.NewHttpClient(s.reqParam)
+	resp, err := httpClient.R().Get(url)
+	if err != nil {
+		return "", err
+	}
+	//搜索验证 点击继续搜索
+	if strings.Contains(resp.String(), "搜索验证") {
+		println("搜索验证 reload", url)
+		return s.httpGet(url)
+	}
+	return resp.String(), nil
+}
+
+func (s Supplier) httpPost(url string, postData map[string]string, referer string) (string, error) {
+
+	s.reqParam.Referer = referer
+	httpClient := common.NewHttpClient(s.reqParam)
+	resp, err := httpClient.R().
+		SetFormData(postData).
+		Post(url)
+	if err != nil {
+		return "", err
+	}
+	return resp.String(), nil
+}
+
+type HdListItem struct {
+	Url        string `json:"url"`
+	BaseUrl    string `json:"baseUrl"`
+	Title      string `json:"title"`
+	Ext        string `json:"ext"`
+	AuthorInfo string `json:"authorInfo"`
+	Lang       string `json:"lang"`
+	Rate       string `json:"rate"`
+	DownCount  int    `json:"downCount"`
+}
+
+type HdContent struct {
+	Filename string `json:"filename"`
+	Ext      string `json:"ext"`
+	Data     []byte `json:"data"`
+}

+ 23 - 0
sub_supplier/subhd/subhd_test.go

@@ -0,0 +1,23 @@
+package subhd
+
+import (
+	"github.com/allanpk716/ChineseSubFinder/common"
+	"testing"
+)
+
+func TestSupplier_GetSubListFromFile(t *testing.T) {
+
+	movie1 := "X:\\电影\\消失爱人 (2016)\\消失爱人 (2016) 720p AAC.rmvb"
+	//movie1 := "X:\\电影\\机动战士Z高达:星之继承者 (2005)\\机动战士Z高达:星之继承者 (2005) 1080p TrueHD.mkv"
+	//movie1 := "X:\\连续剧\\The Bad Batch\\Season 1\\The Bad Batch - S01E01 - Aftermath WEBDL-1080p.mkv"
+	shooter := NewSupplier(common.ReqParam{Topic: 3})
+	outList, err := shooter.GetSubListFromFile(movie1)
+	if err != nil {
+		t.Error(err)
+	}
+	println(outList)
+
+	for i, sublist := range outList {
+		println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Vote, sublist.FileUrl, len(sublist.Data))
+	}
+}

+ 6 - 7
sub_supplier/zimuku/zimuku.go

@@ -191,7 +191,7 @@ func (s Supplier) Step1(filmDetailPageUrl string) (SubResult, error) {
 		if !exists {
 			rate = ""
 		}
-		vote, err := common.GetNumber2Folat(rate)
+		vote, err := common.GetNumber2Float(rate)
 		if err != nil {
 			return
 		}
@@ -199,7 +199,7 @@ func (s Supplier) Step1(filmDetailPageUrl string) (SubResult, error) {
 		downCountNub := 0
 		downCount := tr.Find("td").Eq(3).Text()
 		if strings.Contains(downCount, "万") {
-			fNumb, err := common.GetNumber2Folat(downCount)
+			fNumb, err := common.GetNumber2Float(downCount)
 			if err != nil {
 				return
 			}
@@ -271,11 +271,10 @@ func (s Supplier) Step3(subDownloadPageUrl string) (string, []byte, error) {
 	}
 	var filename string
 	var data []byte
+
+	s.reqParam.Referer = subDownloadPageUrl
 	for i := 0; i < len(matched); i++ {
-		data, filename, err = common.DownFile(common.AddBaseUrl(common.SubZiMuKuRootUrl, matched[i][1]), common.ReqParam{
-			HttpProxy: s.reqParam.HttpProxy,
-			Referer:   subDownloadPageUrl,
-		})
+		data, filename, err = common.DownFile(common.AddBaseUrl(common.SubZiMuKuRootUrl, matched[i][1]), s.reqParam)
 		if err != nil {
 			println("ZiMuKu Step3 DownloadFile", err)
 			continue
@@ -333,7 +332,7 @@ func (s Supplier) Step1Discard(keyword string) (SubResult, error) {
 						if ok == false {
 							return
 						}
-						number, err := common.GetNumber2Folat(vote)
+						number, err := common.GetNumber2Float(vote)
 						if err != nil {
 							return
 						}