Browse Source

查询电影的时候,如果电影的年份>=2020,那么把年份加入搜索,提高查询的正确率

Signed-off-by: 716 <[email protected]>
716 4 years ago
parent
commit
3c0678aeca
4 changed files with 101 additions and 71 deletions
  1. 49 25
      model/decode.go
  2. 18 37
      model/decode_test.go
  3. 17 4
      sub_supplier/subhd/subhd.go
  4. 17 5
      sub_supplier/zimuku/zimuku.go

+ 49 - 25
model/decode.go

@@ -13,40 +13,64 @@ import (
 	"strings"
 )
 
-func getImdbMovieXml(movieFilePath string) (string, error) {
+func getImdbAndYearMovieXml(movieFilePath string) (string, string, error) {
 	doc := etree.NewDocument()
 	if err := doc.ReadFromFile(movieFilePath); err != nil {
-		return "", err
+		return "", "", err
 	}
+	imdbId := ""
 	for _, t := range doc.FindElements("//IMDB") {
-		return t.Text(), nil
+		imdbId = t.Text()
+		break
 	}
-
-	return "", common.CanNotFindIMDBID
+	year := ""
+	for _, t := range doc.FindElements("//ProductionYear") {
+		year = t.Text()
+		break
+	}
+	if imdbId != "" {
+		return imdbId, year, nil
+	}
+	return "", "", common.CanNotFindIMDBID
 }
 
-func getImdbNfo(nfoFilePath string) (string, error) {
+func getImdbAndYearNfo(nfoFilePath string) (string, string, error) {
 	doc := etree.NewDocument()
-	if err := doc.ReadFromFile(nfoFilePath); err != nil {
-		return "", err
-	}
-	for _, t := range doc.FindElements("//uniqueid[@type='Imdb']") {
-		return t.Text(), nil
+	// 这里会遇到一个梗,下面的关键词,可能是小写、大写、首字母大写
+	// 读取文件转换为全部的小写,然后在解析 xml ? etree 在转换为小写后,某些类型的文件的内容会崩溃···
+	// 所以这里很傻的方式解决
+	err := doc.ReadFromFile(nfoFilePath)
+	if err != nil {
+		return "", "", err
 	}
+	imdbId := ""
 	for _, t := range doc.FindElements("//uniqueid[@type='imdb']") {
-		return t.Text(), nil
+		imdbId = t.Text()
+		break
+	}
+	for _, t := range doc.FindElements("//uniqueid[@type='Imdb']") {
+		imdbId = t.Text()
+		break
 	}
 	for _, t := range doc.FindElements("//uniqueid[@type='IMDB']") {
-		return t.Text(), nil
+		imdbId = t.Text()
+		break
 	}
-
-	return "", common.CanNotFindIMDBID
+	year := ""
+	for _, t := range doc.FindElements("./movie/year") {
+		year = t.Text()
+		break
+	}
+	if imdbId != "" {
+		return imdbId, year, nil
+	}
+	return "",  "", common.CanNotFindIMDBID
 }
 
-func GetImdbId(dirPth string) (string ,error) {
+func GetImdbIdAndYear(dirPth string) (string, string, error) {
 	dir, err := ioutil.ReadDir(dirPth)
 	if err != nil {
-		return "", err
+		return "", "", err
 	}
 	pathSep := string(os.PathSeparator)
 	// 优先找 movie.xml 这个是 raddarr 下载的电影会存下来的,可以在 Metadata 设置 Emby
@@ -71,28 +95,28 @@ func GetImdbId(dirPth string) (string ,error) {
 	}
 	// 根据找到的开始解析
 	if movieFilePath == "" && nfoFilePath == "" {
-		return "", common.NoMetadataFile
+		return "", "", common.NoMetadataFile
 	}
 
 	if movieFilePath != "" {
-		outId, err := getImdbMovieXml(movieFilePath)
+		outId, outYear, err := getImdbAndYearMovieXml(movieFilePath)
 		if err != nil {
-			GetLogger().Errorln("getImdbMovieXml error, move on:", err)
+			GetLogger().Errorln("getImdbAndYearMovieXml error, move on:", err)
 		} else {
-			return outId, nil
+			return outId, outYear, nil
 		}
 	}
 
 	if nfoFilePath != "" {
-		outId, err := getImdbNfo(nfoFilePath)
+		outId, outYear, err := getImdbAndYearNfo(nfoFilePath)
 		if err != nil {
-			return "", err
+			return "","", err
 		} else {
-			return outId, nil
+			return outId, outYear, nil
 		}
 	}
 
-	return "", common.CanNotFindIMDBID
+	return "", "", common.CanNotFindIMDBID
 }
 
 //GetVideoInfo 从文件名推断视频文件的信息

+ 18 - 37
model/decode_test.go

@@ -4,64 +4,45 @@ import (
 	"testing"
 )
 
-func TestGet_IMDB_Id(t *testing.T) {
-	type args struct {
-		dirPth string
-	}
-	tests := []struct {
-		name    string
-		args    args
-		want    string
-		wantErr bool
-	}{
-		{name: "have", args: args{dirPth: "x:\\电影\\Army of the Dead (2021)"}, want: "tt0993840", wantErr: false},
-		{name: "want error", args: args{dirPth: "x:\\电影\\"}, want: "", wantErr: true},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got, err := GetImdbId(tt.args.dirPth)
-			if (err != nil) != tt.wantErr {
-				t.Errorf("GetImdbId() error = %v, wantErr %v", err, tt.wantErr)
-				return
-			}
-			if got != tt.want {
-				t.Errorf("GetImdbId() got = %v, want %v", got, tt.want)
-			}
-		})
-	}
-}
-
 func Test_GetIMDB_ID(t *testing.T)  {
 
 	serPath := "X:\\连续剧\\The Bad Batch"
-	id, err := GetImdbId(serPath)
+	id, year, err := GetImdbIdAndYear(serPath)
 	if err != nil {
 		t.Fatal(err)
 	}
-	println(id)
+	println(id, year)
 }
 
 func Test_get_IMDB_movie_xml(t *testing.T) {
-    want := "tt0993840"
+    wantid := "tt0993840"
+    wantyear:= "2021"
 	dirPth := "x:\\电影\\Army of the Dead (2021)\\movie.xml"
-	got, err := getImdbMovieXml(dirPth)
+	id, year, err := getImdbAndYearMovieXml(dirPth)
 	if err != nil {
 		t.Error(err)
 	}
-	if got != want {
-		t.Errorf("Test_get_IMDB_movie_xml() got = %v, want %v", got, want)
+	if id != wantid {
+		t.Errorf("Test_get_IMDB_movie_xml() got = %v, want %v", id, wantid)
+	}
+	if year != wantyear {
+		t.Errorf("Test_get_IMDB_movie_xml() got = %v, want %v", year, wantyear)
 	}
 }
 
 func Test_get_IMDB_nfo(t *testing.T) {
-	want := "tt0993840"
+	wantid := "tt0993840"
+	wantyear:= "2021"
 	dirPth := "X:\\电影\\Army of the Dead (2021)\\Army of the Dead (2021) WEBDL-1080p.nfo"
-	got, err := getImdbNfo(dirPth)
+	id, year, err := getImdbAndYearNfo(dirPth)
 	if err != nil {
 		t.Error(err)
 	}
-	if got != want {
-		t.Errorf("Test_get_IMDB_movie_xml() got = %v, want %v", got, want)
+	if id != wantid {
+		t.Errorf("Test_get_IMDB_movie_xml() id = %v, wantid %v", id, wantid)
+	}
+	if year != wantyear {
+		t.Errorf("Test_get_IMDB_movie_xml() year = %v, wantyear %v", id, wantyear)
 	}
 }
 

+ 17 - 4
sub_supplier/subhd/subhd.go

@@ -18,6 +18,7 @@ import (
 	"path"
 	"path/filepath"
 	"regexp"
+	"strconv"
 	"strings"
 	"time"
 )
@@ -73,20 +74,32 @@ func (s Supplier) GetSubListFromFile(filePath string) ([]common.SupplierSubInfo,
 	}
 	// 找到这个视频文件,然后读取它目录下的文件,尝试得到 IMDB ID
 	fileRootDirPath := filepath.Dir(filePath)
-	imdbId, err := model.GetImdbId(fileRootDirPath)
+	// 目前测试来看,加入 年 这个关键词去搜索,对 2020 年后的影片有利,因为网站有统一的详细页面了,而之前的,没有,会影响识别
+	// 所以,year >= 2020 年,则可以多加一个关键词(年)去搜索影片
+	imdbId, year, err := model.GetImdbIdAndYear(fileRootDirPath)
 	if err != nil {
 		// 允许的错误,跳过,继续进行文件名的搜索
-		s.log.Error(err)
+		s.log.Errorln("model.GetImdbIdAndYear", err)
+	}
+	iYear, err := strconv.Atoi(year)
+	if err != nil {
+		// 允许的错误
+		s.log.Errorln("GetImdbIdAndYear", "year to int",err)
+		iYear = 0
 	}
 
 	var subInfoList []common.SupplierSubInfo
 
 	if imdbId != "" {
+		searchKeyword := imdbId
+		if iYear >= 2020 {
+			searchKeyword = searchKeyword + year
+		}
 		// 先用 imdb id 找
-		subInfoList, err = s.GetSubListFromKeyword(imdbId)
+		subInfoList, err = s.GetSubListFromKeyword(searchKeyword)
 		if err != nil {
 			// 允许的错误,跳过,继续进行文件名的搜索
-			s.log.Error(err)
+			s.log.Errorln("GetSubListFromKeyword", "IMDBID can not found sub", filePath, err)
 		}
 		// 如果有就优先返回
 		if len(subInfoList) >0 {

+ 17 - 5
sub_supplier/zimuku/zimuku.go

@@ -9,6 +9,7 @@ import (
 	"path/filepath"
 	"regexp"
 	"sort"
+	"strconv"
 	"strings"
 )
 
@@ -63,20 +64,31 @@ func (s Supplier) GetSubListFromFile(filePath string) ([]common.SupplierSubInfo,
 	}
 	// 找到这个视频文件,然后读取它目录下的文件,尝试得到 IMDB ID
 	fileRootDirPath := filepath.Dir(filePath)
-	imdbId, err := model.GetImdbId(fileRootDirPath)
+	// 目前测试来看,加入 年 这个关键词去搜索,对 2020 年后的影片有利,因为网站有统一的详细页面了,而之前的,没有,会影响识别
+	// 所以,year >= 2020 年,则可以多加一个关键词(年)去搜索影片
+	imdbId, year, err := model.GetImdbIdAndYear(fileRootDirPath)
 	if err != nil {
 		// 允许的错误,跳过,继续进行文件名的搜索
-		s.log.Error(err)
+		s.log.Errorln("model.GetImdbIdAndYear", err)
+	}
+	iYear, err := strconv.Atoi(year)
+	if err != nil {
+		// 允许的错误
+		s.log.Errorln("GetImdbIdAndYear", "year to int",err)
+		iYear = 0
 	}
-
 	var subInfoList []common.SupplierSubInfo
 
 	if imdbId != "" {
+		searchKeyword := imdbId
+		if iYear >= 2020 {
+			searchKeyword = searchKeyword + year
+		}
 		// 先用 imdb id 找
-		subInfoList, err = s.GetSubListFromKeyword(imdbId)
+		subInfoList, err = s.GetSubListFromKeyword(searchKeyword)
 		if err != nil {
 			// 允许的错误,跳过,继续进行文件名的搜索
-			s.log.Error("GetSubListFromKeyword", "IMDBID can not found sub", filePath, err)
+			s.log.Errorln("GetSubListFromKeyword", "IMDBID can not found sub", filePath, err)
 		}
 		// 如果有就优先返回
 		if len(subInfoList) >0 {