Browse Source

重构,完成 zimuku api

Signed-off-by: allan716 <[email protected]>
allan716 4 years ago
parent
commit
ede5ff0cec

+ 3 - 1
common/common.go

@@ -2,4 +2,6 @@ package common
 
 import "time"
 
-const HTMLTimeOut = 20 * time.Second
+const HTMLTimeOut = 20 * time.Second	// HttpClient 超时时间
+
+const DownloadSubsPerSite = 1 // 每个网站下载一个字幕

+ 112 - 0
common/lang.go

@@ -0,0 +1,112 @@
+package common
+
+import "strings"
+
+// LangConverter 语言转换器
+func LangConverter(subLang string) Language {
+	/*
+		xunlei:未知语言、简体&英语、繁体&英语、简体、繁体、英语
+	*/
+	if strings.Contains(subLang, MatchLangChs) {
+		// 优先简体
+		if strings.Contains(subLang, MatchLangEn) {
+			// 简英
+			return ChineseSimpleEnglish
+		} else if strings.Contains(subLang, MatchLangJp) {
+			// 简日
+			return ChineseSimpleJapanese
+		} else if strings.Contains(subLang, MatchLangKr) {
+			// 简韩
+			return ChineseSimpleKorean
+		}
+		// 默认简体中文
+		return ChineseSimple
+	} else if strings.Contains(subLang, MatchLangCht) {
+		// 然后是繁体
+		if strings.Contains(subLang, MatchLangEn) {
+			// 繁英
+			return ChineseTraditionalEnglish
+		} else if strings.Contains(subLang, MatchLangJp) {
+			// 繁日
+			return ChineseTraditionalJapanese
+		} else if strings.Contains(subLang, MatchLangKr) {
+			// 繁韩
+			return ChineseTraditionalKorean
+		}
+		// 默认繁体中文
+		return ChineseTraditional
+	} else if strings.Contains(subLang, MatchLangEn) {
+		// 英文
+		return English
+	} else if strings.Contains(subLang, MatchLangJp) {
+		// 日文
+		return Japanese
+	} else if strings.Contains(subLang, MatchLangKr) {
+		// 韩文
+		return Korean
+	} else {
+		// 都没有,则标记未知
+		return Unknow
+	}
+}
+
+// Language 语言类型,注意,这里默认还是查找的是中文字幕,只不过下载的时候可能附带了其他的
+type Language int
+const (
+	Unknow	Language = iota				// 未知语言
+	ChineseSimple    					// 简体中文
+	ChineseTraditional					// 繁体中文
+	ChineseSimpleEnglish				// 简英双语字幕
+	ChineseTraditionalEnglish			// 繁英双语字幕
+	English								// 英文
+	Japanese							// 日语
+	ChineseSimpleJapanese				// 简日双语字幕
+	ChineseTraditionalJapanese			// 繁日双语字幕
+	Korean								// 韩语
+	ChineseSimpleKorean					// 简韩双语字幕
+	ChineseTraditionalKorean			// 繁韩双语字幕
+)
+
+const (
+	MathLangChnUnknow = "未知语言"
+	MatchLangChs      = "简"
+	MatchLangCht      = "繁"
+	MatchLangChsEn    = "简英"
+	MatchLangChtEn    = "繁英"
+	MatchLangEn       = "英"
+	MatchLangJp       = "日"
+	MatchLangChsJp    = "简日"
+	MatchLangChtJp    = "繁日"
+	MatchLangKr       = "韩"
+	MatchLangChsKr    = "简韩"
+	MatchLangChtKr    = "繁韩"
+)
+
+func (l Language) String() string {
+	switch l {
+	case ChineseSimple:
+		return MatchLangChs
+	case ChineseTraditional:
+		return MatchLangCht
+	case ChineseSimpleEnglish:
+		return MatchLangChsEn
+	case ChineseTraditionalEnglish:
+		return MatchLangChtEn
+	case English:
+		return MatchLangEn
+	case Japanese:
+		return MatchLangJp
+	case ChineseSimpleJapanese:
+		return MatchLangChsJp
+	case ChineseTraditionalJapanese:
+		return MatchLangChtJp
+	case Korean:
+		return MatchLangKr
+	case ChineseSimpleKorean:
+		return MatchLangChsKr
+	case ChineseTraditionalKorean:
+		return MatchLangChtKr
+	}
+
+	return MathLangChnUnknow
+}

+ 3 - 0
common/selferr.go

@@ -9,6 +9,9 @@ var(
 	VideoFileIsTooSmall    = errors.New("video file is too small")
 	ShooterFileHashIsEmpty = errors.New("filehash is empty")
 
+	ZiMuKuSearchKeyWordStep0DetailPageUrlNotFound = errors.New("zimuku search keyword step0 not found, detail page url")
 	ZiMuKuSearchKeyWordStep1NotFound = errors.New("zimuku search keyword step1 not found")
 	ZiMuKuDownloadUrlStep2NotFound = errors.New("zimuku download url step2 not found")
+	ZiMuKuDownloadUrlStep3NotFound = errors.New("zimuku download url step3 not found")
+	ZiMuKuDownloadUrlStep3AllFailed = errors.New("zimuku download url step3 all failed")
 )

+ 11 - 21
common/util.go

@@ -1,10 +1,8 @@
 package common
 
 import (
-	"compress/gzip"
 	"fmt"
 	"github.com/go-resty/resty/v2"
-	"io/ioutil"
 	"net/http"
 	"regexp"
 	"strings"
@@ -12,7 +10,8 @@ import (
 
 // NewHttpClient 新建一个 resty 的对象
 func NewHttpClient(_reqParam ...ReqParam) *resty.Client {
-	const defUserAgent = "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50"
+	//const defUserAgent = "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50"
+	const defUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41"
 
 	var reqParam ReqParam
 	var HttpProxy, UserAgent, Referer string
@@ -60,19 +59,8 @@ func DownFile(urlStr string, _reqParam ...ReqParam) ([]byte, string, error)  {
 	if err != nil {
 		return nil, "", err
 	}
-	body := resp.RawResponse.Body
-	if resp.RawResponse.Header.Get("Content-Encoding") == "gzip" {
-		body, err = gzip.NewReader(body)
-		if err != nil {
-			return nil, "", err
-		}
-	}
-	data, err := ioutil.ReadAll(body)
-	if err != nil {
-		return nil, "", err
-	}
 	filename := GetFileName(resp.RawResponse)
-	return data, filename, nil
+	return resp.Body(), filename, nil
 }
 
 // GetFileName 获取下载文件的文件名
@@ -98,10 +86,12 @@ func AddBaseUrl(baseUrl, url string) string {
 	return fmt.Sprintf("%s%s", baseUrl, url)
 }
 
+// ReqParam 可选择传入的参数
 type ReqParam struct {
-	HttpProxy string
-	UserAgent string
-	Referer   string
-	MediaType string
-	Charset   string
-}
+	HttpProxy string		// HttpClient 相关
+	UserAgent string		// HttpClient 相关
+	Referer   string		// HttpClient 相关
+	MediaType string		// HttpClient 相关
+	Charset   string		// HttpClient 相关
+	Topic	  int			// 搜索结果的时候,返回 Topic N 以内的
+}

+ 1 - 0
go.mod

@@ -4,6 +4,7 @@ go 1.15
 
 require (
 	github.com/PuerkitoBio/goquery v1.6.1
+	github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394 // indirect
 	github.com/beevik/etree v1.1.0
 	github.com/go-resty/resty/v2 v2.6.0
 	github.com/middelink/go-parse-torrent-name v0.0.0-20190301154245-3ff4efacd4c4

+ 2 - 0
go.sum

@@ -2,6 +2,8 @@ github.com/PuerkitoBio/goquery v1.6.1 h1:FgjbQZKl5HTmcn4sKBgvx8vv63nhyhIpv7lJpFG
 github.com/PuerkitoBio/goquery v1.6.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
 github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
 github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
+github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394 h1:OYA+5W64v3OgClL+IrOD63t4i/RW7RqrAVl9LTZ9UqQ=
+github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394/go.mod h1:Q8n74mJTIgjX4RBBcHnJ05h//6/k6foqmgE45jTQtxg=
 github.com/beevik/etree v1.1.0 h1:T0xke/WvNtMoCqgzPhkX2r4rjY3GDZFi+FjpRZY2Jbs=
 github.com/beevik/etree v1.1.0/go.mod h1:r8Aw8JqVegEf0w2fDnATrX9VpkMcyFeM0FhwO62wh+A=
 github.com/go-resty/resty/v2 v2.6.0 h1:joIR5PNLM2EFqqESUjCMGXrWmXNHEU9CEiK813oKYS4=

+ 2 - 2
sub_supplier/iSupplier.go

@@ -2,7 +2,7 @@ package sub_supplier
 
 type iSupplier interface {
 
-	GetSubListFromFile(filePath string, httpProxy string) ([]SubInfo, error)
+	GetSubListFromFile(filePath string) ([]SubInfo, error)
 
-	GetSubListFromKeyword(keyword string, httpProxy string) ([]SubInfo, error)
+	GetSubListFromKeyword(keyword string) ([]SubInfo, error)
 }

+ 28 - 7
sub_supplier/shooter/shooter.go

@@ -12,15 +12,26 @@ import (
 )
 
 type Supplier struct {
-
+	reqParam common.ReqParam
+	topic int
 }
 
-func NewSupplier() *Supplier {
-	return &Supplier{}
+func NewSupplier(_reqParam ... common.ReqParam) *Supplier {
+
+	sup := Supplier{}
+	sup.topic = common.DownloadSubsPerSite
+	if len(_reqParam) > 0 {
+		sup.reqParam = _reqParam[0]
+		if sup.reqParam.Topic > 0 && sup.reqParam.Topic != sup.topic {
+			sup.topic = sup.reqParam.Topic
+		}
+	}
+	return &sup
 }
 
-func (s Supplier) GetSubListFromFile(filePath string, httpProxy string) ([]sub_supplier.SubInfo, error) {
+func (s Supplier) GetSubListFromFile(filePath string) ([]sub_supplier.SubInfo, error) {
 
+	// 可以提供的字幕查询 eng或者chn
 	const qLan = "Chn"
 	var outSubInfoList []sub_supplier.SubInfo
 	var jsonList []SublistShooter
@@ -35,7 +46,7 @@ func (s Supplier) GetSubListFromFile(filePath string, httpProxy string) ([]sub_s
 
 	fileName := filepath.Base(filePath)
 
-	httpClient := common.NewHttpClient(httpProxy)
+	httpClient := common.NewHttpClient(s.reqParam)
 
 	_, err = httpClient.R().
 		SetFormData(map[string]string{
@@ -55,13 +66,23 @@ func (s Supplier) GetSubListFromFile(filePath string, httpProxy string) ([]sub_s
 			if strings.Contains(file.Ext, ".") == false {
 				subExt = "." + subExt
 			}
-			outSubInfoList = append(outSubInfoList, *sub_supplier.NewSubInfo(fileName, qLan, "", file.Link, 0, shooter.Delay, subExt))
+
+			data, _, err := common.DownFile(file.Link)
+			if err != nil {
+				println(err.Error())
+				continue
+			}
+			outSubInfoList = append(outSubInfoList, *sub_supplier.NewSubInfo(fileName, common.ChineseSimple, file.Link, 0, shooter.Delay, subExt, data))
+			// 如果够了那么多个字幕就返回
+			if len(outSubInfoList) >= s.topic {
+				return outSubInfoList, nil
+			}
 		}
 	}
 	return outSubInfoList, nil
 }
 
-func (s Supplier) GetSubListFromKeyword(keyword string, httpProxy string) ([]sub_supplier.SubInfo, error) {
+func (s Supplier) GetSubListFromKeyword(keyword string) ([]sub_supplier.SubInfo, error) {
 	panic("not implemented")
 }
 

+ 7 - 6
sub_supplier/shooter/shooter_test.go

@@ -1,21 +1,22 @@
 package shooter
 
 import (
+	"github.com/allanpk716/ChineseSubFinder/common"
 	"testing"
 )
 
 func TestNewSupplier(t *testing.T) {
-	//movie1 := "X:\\电影\\消失爱人 (2016)\\消失爱人 (2016) 720p AAC.rmvb"
-	//movie2 := "X:\\电影\\机动战士Z高达:星之继承者 (2005)\\机动战士Z高达:星之继承者 (2005) 1080p TrueHD.mkv"
-	standard1 := "X:\\连续剧\\The Bad Batch\\Season 1\\The Bad Batch - S01E01 - Aftermath WEBDL-1080p.mkv"
-	shooter := NewSupplier()
-	outList, err := shooter.GetSubListFromFile(standard1, "")
+	movie1 := "X:\\电影\\消失爱人 (2016)\\消失爱人 (2016) 720p AAC.rmvb"
+	//movie1 := "X:\\电影\\机动战士Z高达:星之继承者 (2005)\\机动战士Z高达:星之继承者 (2005) 1080p TrueHD.mkv"
+	//movie1 := "X:\\连续剧\\The Bad Batch\\Season 1\\The Bad Batch - S01E01 - Aftermath WEBDL-1080p.mkv"
+	shooter := NewSupplier(common.ReqParam{Topic: 3})
+	outList, err := shooter.GetSubListFromFile(movie1)
 	if err != nil {
 		t.Error(err)
 	}
 	println(outList)
 
 	for i, sublist := range outList {
-		println(i, sublist.Language, sublist.Rate, sublist.Vote, sublist.FileUrl)
+		println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Vote, sublist.FileUrl, len(sublist.Data))
 	}
 }

+ 13 - 10
sub_supplier/sublist.go

@@ -1,15 +1,18 @@
 package sub_supplier
 
+import "github.com/allanpk716/ChineseSubFinder/common"
+
 type SubInfo struct {
-	Name 		string `json:"name"`
-	Language 	string `json:"language"`
-	Rate 		string `json:"rate"`
-	FileUrl     string `json:"file-url"`
-	Vote    	int64  `json:"vote"`
-	Offset  	int64  `json:"offset"`
-	Ext			string `json:"ext"`		// 字幕文件的后缀名带点,有可能是直接能用的字幕文件,也可能是压缩包
+	Name 		string `json:"name"`		// 字幕的名称,这个比较随意,优先是影片的名称,然后才是从网上下载字幕的对应名称
+	Language 	common.Language `json:"language"`	// 字幕的语言
+	FileUrl     string `json:"file-url"`	// 字幕文件下载的路径
+	Vote    	int64  `json:"vote"`		// 投票
+	Offset  	int64  `json:"offset"`		// 字幕的偏移
+	Ext			string `json:"ext"`			// 字幕文件的后缀名带点,有可能是直接能用的字幕文件,也可能是压缩包
+	Data		[]byte	`json:"data"`		// 字幕文件的二进制数据
+}
+
+func NewSubInfo(name string, language common.Language, fileUrl string, vote int64, offset int64, ext string, data []byte) *SubInfo {
+	return &SubInfo{Name: name, Language: language, FileUrl: fileUrl, Vote: vote, Offset: offset, Ext: ext, Data: data}
 }
 
-func NewSubInfo(name string, language string, rate string, fileUrl string, vote int64, offset int64, ext string) *SubInfo {
-	return &SubInfo{Name: name, Language: language, Rate: rate, FileUrl: fileUrl, Vote: vote, Offset: offset, Ext: ext}
-}

+ 41 - 17
sub_supplier/xunlei/xunlei.go

@@ -2,7 +2,6 @@ package xunlei
 
 import (
 	"crypto/sha1"
-	"encoding/json"
 	"fmt"
 	"github.com/allanpk716/ChineseSubFinder/common"
 	"github.com/allanpk716/ChineseSubFinder/sub_supplier"
@@ -11,43 +10,66 @@ import (
 	"path/filepath"
 )
 
-
-
 type Supplier struct {
-
+	reqParam common.ReqParam
+	topic int
 }
 
-func NewSupplier() *Supplier {
-	return &Supplier{}
+func NewSupplier(_reqParam ... common.ReqParam) *Supplier {
+
+	sup := Supplier{}
+	sup.topic = common.DownloadSubsPerSite
+	if len(_reqParam) > 0 {
+		sup.reqParam = _reqParam[0]
+		if sup.reqParam.Topic > 0 && sup.reqParam.Topic != sup.topic {
+			sup.topic = sup.reqParam.Topic
+		}
+	}
+	return &sup
 }
 
-func (s Supplier) GetSubListFromFile(filePath string, httpProxy string) ([]sub_supplier.SubInfo, error) {
+func (s Supplier) GetSubListFromFile(filePath string) ([]sub_supplier.SubInfo, error) {
+
 	cid, err := s.getCid(filePath)
 	var jsonList SublistSliceXunLei
 	var outSubList []sub_supplier.SubInfo
 	if len(cid) == 0 {
 		return outSubList, common.XunLeiCIdIsEmpty
 	}
-	httpClient := common.NewHttpClient(httpProxy)
-	resp, err := httpClient.R().Get(fmt.Sprintf(common.SubXunLeiRootUrl, cid))
-	if err != nil {
-		return outSubList, err
-	}
-	// 解析
-	err = json.Unmarshal([]byte(resp.String()), &jsonList)
+	httpClient := common.NewHttpClient(s.reqParam)
+	_, err = httpClient.R().
+		SetResult(&jsonList).
+		Get(fmt.Sprintf(common.SubXunLeiRootUrl, cid))
 	if err != nil {
 		return outSubList, err
 	}
 	// 剔除空的
 	for _, v := range jsonList.Sublist {
 		if len(v.Scid) > 0 {
-			outSubList = append(outSubList, *sub_supplier.NewSubInfo(v.Sname, v.Language, v.Rate, v.Surl, v.Svote, v.Roffset, filepath.Ext(v.Surl)))
+
+			data, filename, err := common.DownFile(v.Surl)
+			if err != nil {
+				println(err.Error())
+				continue
+			}
+			ext := ""
+			if filename == "" {
+				ext = filepath.Ext(v.Surl)
+			} else {
+				ext = filepath.Ext(filename)
+			}
+			tmpLang := common.LangConverter(v.Language)
+			outSubList = append(outSubList, *sub_supplier.NewSubInfo(v.Sname, tmpLang, v.Surl, v.Svote, v.Roffset, ext, data))
+			// 如果够了那么多个字幕就返回
+			if len(outSubList) >= s.topic {
+				return outSubList, nil
+			}
 		}
 	}
 	return outSubList, nil
 }
 
-func (s Supplier) GetSubListFromKeyword(keyword string, httpProxy string) ([]sub_supplier.SubInfo, error) {
+func (s Supplier) GetSubListFromKeyword(keyword string) ([]sub_supplier.SubInfo, error) {
 	panic("not implemented")
 }
 
@@ -100,4 +122,6 @@ type SublistXunLei struct {
 
 type SublistSliceXunLei struct {
 	Sublist []SublistXunLei
-}
+}
+
+const LangUnknow = "未知语言"

+ 6 - 4
sub_supplier/xunlei/xunlei_test.go

@@ -5,17 +5,19 @@ import (
 )
 
 func TestGetList(t *testing.T) {
+	movie1 := "X:\\电影\\龙猫 (1988)\\龙猫 (1988) 1080p DTS.mkv"
+	//movie1 := "X:\\电影\\消失爱人 (2016)\\消失爱人 (2016) 720p AAC.rmvb"
 	//movie1:= "X:\\电影\\Spiral From the Book of Saw (2021)\\Spiral From the Book of Saw (2021) WEBDL-1080p.mkv"
-	//movie2 := "X:\\电影\\机动战士Z高达:星之继承者 (2005)\\机动战士Z高达:星之继承者 (2005) 1080p TrueHD.mkv"
-	standard1 := "X:\\连续剧\\The Bad Batch\\Season 1\\The Bad Batch - S01E01 - Aftermath WEBDL-1080p.mkv"
+	//movie1 := "X:\\电影\\机动战士Z高达:星之继承者 (2005)\\机动战士Z高达:星之继承者 (2005) 1080p TrueHD.mkv"
+	//movie1 := "X:\\连续剧\\The Bad Batch\\Season 1\\The Bad Batch - S01E01 - Aftermath WEBDL-1080p.mkv"
 	xunlie := NewSupplier()
-	outList, err := xunlie.GetSubListFromFile(standard1, "")
+	outList, err := xunlie.GetSubListFromFile(movie1)
 	if err != nil {
 		t.Error(err)
 	}
 	println(outList)
 
 	for i, sublist := range outList {
-		println(i, sublist.Language, sublist.Rate, sublist.Vote, sublist.FileUrl)
+		println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Vote, len(sublist.Data))
 	}
 }

+ 240 - 86
sub_supplier/zimuku/zimuku.go

@@ -12,14 +12,24 @@ import (
 )
 
 type Supplier struct {
-	_reqParam common.ReqParam
+	reqParam common.ReqParam
+	topic int
 }
 
-func NewSupplier() *Supplier {
-	return &Supplier{}
+func NewSupplier(_reqParam ... common.ReqParam) *Supplier {
+
+	sup := Supplier{}
+	sup.topic = common.DownloadSubsPerSite
+	if len(_reqParam) > 0 {
+		sup.reqParam = _reqParam[0]
+		if sup.reqParam.Topic > 0 && sup.reqParam.Topic != sup.topic {
+			sup.topic = sup.reqParam.Topic
+		}
+	}
+	return &sup
 }
 
-func (s Supplier) GetSubListFromFile(filePath string, httpProxy string) ([]sub_supplier.SubInfo, error) {
+func (s Supplier) GetSubListFromFile(filePath string) ([]sub_supplier.SubInfo, error) {
 
 	/*
 		虽然是传入视频文件路径,但是其实需要读取对应的视频文件目录下的
@@ -35,21 +45,26 @@ func (s Supplier) GetSubListFromFile(filePath string, httpProxy string) ([]sub_s
 	// 找到这个视频文件,然后读取它目录下的文件,尝试得到 IMDB ID
 	fileRootDirPath := filepath.Dir(filePath)
 	imdbId, err := common.GetImdbId(fileRootDirPath)
-	if err != nil {
+	if err != nil && err != common.CanNotFindIMDBID {
 		return nil, err
 	}
 
-	// 先用 imdb id 找
-	subInfoList, err := s.GetSubListFromKeyword(imdbId, httpProxy)
-	if err != nil {
-		return nil, err
-	}
-	// 如果有就优先返回
-	if len(subInfoList) >0 {
-		return subInfoList, nil
+	var subInfoList []sub_supplier.SubInfo
+
+	if imdbId != "" {
+		// 先用 imdb id 找
+		subInfoList, err = s.GetSubListFromKeyword(imdbId)
+		if err != nil {
+			return nil, err
+		}
+		// 如果有就优先返回
+		if len(subInfoList) >0 {
+			return subInfoList, nil
+		}
 	}
+
 	// 如果没有,那么就用文件名查找
-	subInfoList, err = s.GetSubListFromKeyword(info.Title, httpProxy)
+	subInfoList, err = s.GetSubListFromKeyword(info.Title)
 	if err != nil {
 		return nil, err
 	}
@@ -57,31 +72,223 @@ func (s Supplier) GetSubListFromFile(filePath string, httpProxy string) ([]sub_s
 	return subInfoList, nil
 }
 
-func (s Supplier) GetSubListFromKeyword(keyword string, httpProxy string) ([]sub_supplier.SubInfo, error) {
+func (s Supplier) GetSubListFromKeyword(keyword string) ([]sub_supplier.SubInfo, error) {
 
-	// 第一级界面,有多少个字幕
-	subResult, err := s.Step1(keyword, httpProxy)
+	var outSubInfoList []sub_supplier.SubInfo
+	// 第一级界面,找到影片的详情界面
+	filmDetailPageUrl, err := s.Step0(keyword)
 	if err != nil {
 		return nil, err
 	}
-	// 第二级界面,单个字幕详情
-	err = s.Step2(&subResult, httpProxy)
+	// 第二级界面,有多少个字幕
+	subResult, err := s.Step1(filmDetailPageUrl)
 	if err != nil {
 		return nil, err
 	}
-	// 第三级界面,具体字幕下载
-	err = s.Step3(&subResult, httpProxy)
+	// 第三级界面,单个字幕详情
+	// 找到最大的优先级的字幕下载
+	sort.Sort(SortByPriority{subResult.SubInfos})
+	// 移除多出来的字幕
+	if len(subResult.SubInfos) > s.topic {
+		subResult.SubInfos = subResult.SubInfos[:s.topic]
+	}
+
+	for i := range subResult.SubInfos {
+		err = s.Step2(&subResult.SubInfos[i])
+		if err != nil {
+			println(err.Error())
+			continue
+		}
+	}
+	// 第四级界面,具体字幕下载
+	for _, subInfo := range subResult.SubInfos {
+		fileName, data, err := s.Step3(subInfo.SubDownloadPageUrl)
+		if err != nil {
+			println(err.Error())
+			continue
+		}
+		// 默认都是包含中文字幕的,然后具体使用的时候再进行区分
+		outSubInfoList = append(outSubInfoList, *sub_supplier.NewSubInfo(fileName, common.ChineseSimple, common.AddBaseUrl(common.SubZiMuKuRootUrl, subInfo.SubDownloadPageUrl), 0,
+			0, filepath.Ext(fileName), data))
+	}
+
+	return outSubInfoList, nil
+}
+
+// Step0 先在查询界面找到字幕对应第一个影片的详情界面
+func (s Supplier) Step0(keyword string) (string, error) {
+	httpClient := common.NewHttpClient(s.reqParam)
+	// 第一级界面,有多少个字幕
+	resp, err := httpClient.R().
+		SetQueryParams(map[string]string{
+			"q": keyword,
+		}).
+		Get(common.SubZiMuKuSearchUrl)
 	if err != nil {
-		return nil, err
+		return "", err
 	}
-	// TODO 需要把查询到的信息转换到 []sub_supplier.SubInfo 再输出
-	// 注意要做一次排序,根据优先级
-	return nil, nil
+	// 找到对应影片的详情界面
+	re := regexp.MustCompile(`<p\s+class="tt\s+clearfix"><a\s+href="(/subs/[\w]+\.html)"\s+target="_blank"><b>(.*?)</b></a></p>`)
+	matched := re.FindAllStringSubmatch(resp.String(), -1)
+	//lists := make([]string, 0)
+	//for _, match := range matched {
+	//	// 去重
+	//	for _, list := range lists {
+	//		if list != match[1] {
+	//			lists = append(lists, match[1])
+	//		}
+	//	}
+	//	lists = append(lists, match[1])
+	//}
+	if len(matched) < 1 {
+		return "", common.ZiMuKuSearchKeyWordStep0DetailPageUrlNotFound
+	}
+	// 影片的详情界面 url
+	filmDetailPageUrl := matched[0][1]
+	return filmDetailPageUrl, nil
 }
 
-// Step1 第一级界面,有多少个字幕
-func (s Supplier) Step1(keyword string, _reqParam ...common.ReqParam) (SubResult, error) {
-	httpClient := common.NewHttpClient()
+// Step1 分析详情界面,找到有多少个字幕
+func (s Supplier) Step1(filmDetailPageUrl string) (SubResult, error) {
+	filmDetailPageUrl = common.AddBaseUrl(common.SubZiMuKuRootUrl, filmDetailPageUrl)
+	httpClient := common.NewHttpClient(s.reqParam)
+	resp, err := httpClient.R().
+		Get(filmDetailPageUrl)
+	if err != nil {
+		return SubResult{}, err
+	}
+	doc, err := goquery.NewDocumentFromReader(strings.NewReader(resp.String()))
+	if err != nil {
+		return SubResult{}, err
+	}
+	var subResult SubResult
+	subResult.SubInfos = SubInfos{}
+	doc.Find("#subtb tbody tr").Each(func(i int, tr *goquery.Selection) {
+		href, exists := tr.Find("a").Attr("href")
+		if !exists {
+			return
+		}
+		title, exists := tr.Find("a").Attr("title")
+		if !exists {
+			return
+		}
+		ext := tr.Find(".label-info").Text()
+		authorInfos := tr.Find(".gray")
+		authorInfo := ""
+		authorInfos.Each(func(a_i int, a_lb *goquery.Selection) {
+			authorInfo += a_lb.Text() + ","
+		})
+		authorInfoLen := len(authorInfo)
+		if authorInfoLen > 0 {
+			authorInfo = authorInfo[0 : authorInfoLen-3]
+		}
+
+		lang, exists := tr.Find("img").First().Attr("alt")
+		if !exists {
+			lang = ""
+		}
+		rate, exists := tr.Find(".rating-star").First().Attr("title")
+		if !exists {
+			rate = ""
+		}
+		vote, err := common.GetNumber2Folat(rate)
+		if err != nil {
+			return
+		}
+
+		downCountNub := 0
+		downCount := tr.Find("td").Eq(3).Text()
+		if strings.Contains(downCount, "万") {
+			fNumb, err := common.GetNumber2Folat(downCount)
+			if err != nil {
+				return
+			}
+			downCountNub = int(fNumb * 10000)
+		} else {
+			downCountNub, err = common.GetNumber2int(downCount)
+			if err != nil {
+				return
+			}
+		}
+
+		var subInfo SubInfo
+		subResult.Title = title
+		subInfo.DetailUrl = href
+		subInfo.Ext = ext
+		subInfo.AuthorInfo = authorInfo
+		subInfo.Lang = lang
+		subInfo.DownloadTimes = downCountNub
+
+		subInfo.Score = vote
+		// 计算优先级
+		subInfo.Priority = subInfo.Score * float32(subInfo.DownloadTimes)
+
+		subResult.SubInfos = append(subResult.SubInfos, subInfo)
+	})
+	return subResult, nil
+}
+
+// Step2 第二级界面,单个字幕详情
+func (s Supplier) Step2(subInfo *SubInfo) error {
+
+	detailUrl := common.AddBaseUrl(common.SubZiMuKuRootUrl, subInfo.DetailUrl)
+	httpClient := common.NewHttpClient(s.reqParam)
+	resp, err := httpClient.R().
+		Get(detailUrl)
+	if err != nil {
+		return err
+	}
+	// 找到下载地址
+	re := regexp.MustCompile(`<a\s+id="down1"\s+href="([^"]*/dld/[\w]+\.html)"`)
+	matched := re.FindAllStringSubmatch(resp.String(), -1)
+	if matched == nil || len(matched) == 0 || len(matched[0]) == 0 {
+		println(detailUrl)
+		return common.ZiMuKuDownloadUrlStep2NotFound
+	}
+	if strings.Contains(matched[0][1], "://") {
+		subInfo.SubDownloadPageUrl = matched[0][1]
+	} else {
+		subInfo.SubDownloadPageUrl = fmt.Sprintf("%s%s", common.SubZiMuKuRootUrl, matched[0][1])
+	}
+	return nil
+}
+
+// Step3 第三级界面,具体字幕下载
+func (s Supplier) Step3(subDownloadPageUrl string) (string, []byte, error) {
+
+	subDownloadPageUrl = common.AddBaseUrl(common.SubZiMuKuRootUrl, subDownloadPageUrl)
+	httpClient := common.NewHttpClient(s.reqParam)
+	resp, err := httpClient.R().
+		Get(subDownloadPageUrl)
+	if err != nil {
+		return "", nil, err
+	}
+	re := regexp.MustCompile(`<li><a\s+rel="nofollow"\s+href="([^"]*/download/[^"]+)"`)
+	matched := re.FindAllStringSubmatch(resp.String(), -1)
+	if matched == nil || len(matched) == 0 || len(matched[0]) == 0 {
+		println(subDownloadPageUrl)
+		return "", nil, common.ZiMuKuDownloadUrlStep3NotFound
+	}
+	var filename string
+	var data []byte
+	for i := 0; i < len(matched); i++ {
+		data, filename, err = common.DownFile(common.AddBaseUrl(common.SubZiMuKuRootUrl, matched[i][1]), common.ReqParam{
+			HttpProxy: s.reqParam.HttpProxy,
+			Referer:   subDownloadPageUrl,
+		})
+		if err != nil {
+			println("ZiMuKu Step3 DownloadFile", err)
+			continue
+		}
+		return filename, data, nil
+	}
+	println(subDownloadPageUrl)
+	return "", nil, common.ZiMuKuDownloadUrlStep3AllFailed
+}
+
+// Step1Discard 第一级界面,有多少个字幕,弃用,直接再搜索出来的结果界面匹配会遇到一个问题,就是 “还有8个字幕,点击查看” 类似此问题
+func (s Supplier) Step1Discard(keyword string) (SubResult, error) {
+	httpClient := common.NewHttpClient(s.reqParam)
 	// 第一级界面,有多少个字幕
 	resp, err := httpClient.R().
 		SetQueryParams(map[string]string{
@@ -152,67 +359,11 @@ func (s Supplier) Step1(keyword string, _reqParam ...common.ReqParam) (SubResult
 	})
 	// 这里要判断,一级界面是否OK 了,不行就返回
 	if subResult.Title == "" || len(subResult.SubInfos) == 0 {
-		return SubResult{}, common.ZiMuKuSearchKeyWordStep1NotFound
+		return SubResult{}, nil
 	}
 	return subResult, nil
 }
 
-// Step2 第二级界面,单个字幕详情
-func (s Supplier) Step2(subResult *SubResult, httpProxy string) error {
-	// 找到最大的优先级的字幕下载
-	sort.Sort(SortByPriority{subResult.SubInfos})
-	// 排序后的第一个
-	httpClient := common.NewHttpClient(httpProxy)
-	resp, err := httpClient.R().
-		Get(common.SubZiMuKuRootUrl + subResult.SubInfos[0].DetailUrl)
-	if err != nil {
-		return err
-	}
-	// 找到下载地址
-	re := regexp.MustCompile(`<a\s+id="down1"\s+href="([^"]*/dld/[\w]+\.html)"`)
-	matched := re.FindAllStringSubmatch(resp.String(), -1)
-	if matched == nil || len(matched) == 0 || len(matched[0]) == 0 {
-		return common.ZiMuKuDownloadUrlStep2NotFound
-	}
-	if strings.Contains(matched[0][1], "://") {
-		subResult.SubInfos[0].SubDownloadPageUrl = matched[0][1]
-	} else {
-		subResult.SubInfos[0].SubDownloadPageUrl = fmt.Sprintf("%s%s", common.SubZiMuKuRootUrl, matched[0][1])
-	}
-	return nil
-}
-
-// Step3 第三级界面,具体字幕下载
-func (s Supplier) Step3(subResult *SubResult, httpProxy string) error {
-
-	subDownloadPageUrl := common.AddBaseUrl(common.SubZiMuKuRootUrl, subResult.SubInfos[0].SubDownloadPageUrl)
-	httpClient := common.NewHttpClient(httpProxy)
-	resp, err := httpClient.R().
-		Get(subDownloadPageUrl)
-	if err != nil {
-		return err
-	}
-	re := regexp.MustCompile(`<li><a\s+rel="nofollow"\s+href="([^"]*/download/[^"]+)"`)
-	matched := re.FindAllStringSubmatch(resp.String(), -1)
-	if matched == nil || len(matched) == 0 || len(matched[0]) == 0 {
-		return nil
-	}
-	var filename string
-	var data []byte
-	for i := 0; i < len(matched); i++ {
-
-		data, filename, err = common.DownFile(common.AddBaseUrl(common.SubZiMuKuRootUrl, matched[i][1]), common.ReqParam{
-			HttpProxy: httpProxy,
-			Referer:   subDownloadPageUrl,
-		})
-		if err != nil {
-			println("ZiMuKu Step3 DownloadFile", err)
-			continue
-		}
-	}
-	return nil
-}
-
 type SubResult struct {
 	Title string			// 字幕的标题
 	OtherName string		// 影片又名
@@ -220,9 +371,12 @@ type SubResult struct {
 }
 
 type SubInfo struct {
+	Lang				string	// 语言
+	AuthorInfo			string	// 作者
+	Ext					string	// 后缀名
 	Score				float32	// 评分
 	DownloadTimes 		int		// 下载的次数
-	Priority			float32	// 优先级,使用评分和次数乘积而来
+	Priority			float32	// 优先级,使用评分和次数乘积而来,类似于 Vote 投票
 	DetailUrl			string	// 字幕的详情界面,需要再次分析具体的下载地址,地址需要拼接网站的根地址上去
 	SubDownloadPageUrl 	string	// 字幕的具体的下载页面,会有多个下载可用的链接
 	DownloadUrl			string	// 字幕的下载地址

+ 22 - 4
sub_supplier/zimuku/zimuku_test.go

@@ -9,12 +9,30 @@ func TestSupplier_GetSubListFromKeyword(t *testing.T) {
 	//imdbId1 := "tt3228774"
 	videoName := "黑白魔女库伊拉"
 	s := NewSupplier()
-	subList, err := s.GetSubListFromKeyword(videoName, "")
+	outList, err := s.GetSubListFromKeyword(videoName)
 	if err != nil {
 		t.Error(err)
 	}
-
-	for _, info := range subList {
-		println(info.Name)
+	println(outList)
+	for i, sublist := range outList {
+		println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Vote, len(sublist.Data))
 	}
 }
+
+func TestSupplier_GetSubListFromFile(t *testing.T) {
+	//movie1 := "X:\\电影\\龙猫 (1988)\\龙猫 (1988) 1080p DTS.mkv"
+	movie1 := "X:\\电影\\消失爱人 (2016)\\消失爱人 (2016) 720p AAC.rmvb"
+	//movie1:= "X:\\电影\\Spiral From the Book of Saw (2021)\\Spiral From the Book of Saw (2021) WEBDL-1080p.mkv"
+	//movie1 := "X:\\电影\\机动战士Z高达:星之继承者 (2005)\\机动战士Z高达:星之继承者 (2005) 1080p TrueHD.mkv"
+	//movie1 := "X:\\连续剧\\The Bad Batch\\Season 1\\The Bad Batch - S01E01 - Aftermath WEBDL-1080p.mkv"
+
+	s := NewSupplier()
+	outList, err := s.GetSubListFromFile(movie1)
+	if err != nil {
+		t.Error(err)
+	}
+	println(outList)
+	for i, sublist := range outList {
+		println(i, sublist.Name, sublist.Ext, sublist.Language.String(), sublist.Vote, len(sublist.Data))
+	}
+}