瀏覽代碼

调整 sub fixer 的测试用例,输出更多信息

Signed-off-by: allan716 <[email protected]>
allan716 4 年之前
父節點
當前提交
e278576af9
共有 4 個文件被更改,包括 132 次插入34 次删除
  1. 1 0
      .gitignore
  2. 19 1
      internal/pkg/sub_timeline_fixer/fixer.go
  3. 93 33
      internal/pkg/sub_timeline_fixer/fixer_test.go
  4. 19 0
      internal/pkg/util.go

+ 1 - 0
.gitignore

@@ -50,3 +50,4 @@
 /internal/logic/sub_timeline_fixer/tmpSubFix
 /internal/logic/sub_timeline_fixer/SubFixCache
 /internal/logic/sub_timeline_fixer/Logs
+/internal/pkg/sub_timeline_fixer/Logs

+ 19 - 1
internal/pkg/sub_timeline_fixer/fixer.go

@@ -3,6 +3,7 @@ package sub_timeline_fixer
 import (
 	"fmt"
 	"github.com/allanpk716/ChineseSubFinder/internal/common"
+	"github.com/allanpk716/ChineseSubFinder/internal/pkg"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/log_helper"
 	"github.com/allanpk716/ChineseSubFinder/internal/types/subparser"
 	"github.com/go-echarts/go-echarts/v2/opts"
@@ -46,8 +47,9 @@ func StopWordCounter(inString string, per int) []string {
 }
 
 // GetOffsetTime 暂时只支持英文的基准字幕,源字幕必须是双语中英字幕
-func GetOffsetTime(infoBase, infoSrc *subparser.FileInfo, staticLineFileSavePath string) (float64, error) {
+func GetOffsetTime(infoBase, infoSrc *subparser.FileInfo, staticLineFileSavePath string, debugInfoFileSavePath string) (float64, error) {
 
+	var debugInfos = make([]string, 0)
 	// 构建基准语料库,目前阶段只需要考虑是 En 的就行了
 	var baseCorpus = make([]string, 0)
 	for _, oneDialogueEx := range infoBase.DialoguesEx {
@@ -130,10 +132,15 @@ func GetOffsetTime(infoBase, infoSrc *subparser.FileInfo, staticLineFileSavePath
 	matchIndexLineCount := len(matchIndexList) * maxCompareDialogue
 	perMatch := float64(matchIndexLineCount) / float64(len(infoSrc.DialoguesEx))
 	if perMatch < 0.1 {
+		debugInfos = append(debugInfos, "Sequence match 5 dialogues (< 10%), Skip",
+			fmt.Sprintf(" %f", perMatch), infoSrc.Name)
+
 		log_helper.GetLogger().Debugln("Sequence match 5 dialogues (< 10%), Skip",
 			fmt.Sprintf("%f", perMatch), infoSrc.Name)
 		return 0, nil
 	} else {
+		debugInfos = append(debugInfos, "Sequence match 5 dialogues:",
+			fmt.Sprintf(" %f", perMatch), infoSrc.Name)
 		log_helper.GetLogger().Debugln("Sequence match 5 dialogues:",
 			fmt.Sprintf("%f", perMatch), infoSrc.Name)
 	}
@@ -192,11 +199,15 @@ func GetOffsetTime(infoBase, infoSrc *subparser.FileInfo, staticLineFileSavePath
 
 			xAxis = append(xAxis, fmt.Sprintf("%d_%d", mIndex, i))
 
+			debugInfos = append(debugInfos, "bs "+infoBase.DialoguesEx[tmpBaseIndex].StartTime+" <-> "+infoBase.DialoguesEx[tmpBaseIndex].EndTime)
+			debugInfos = append(debugInfos, "sc "+infoSrc.DialoguesEx[tmpSrcIndex].StartTime+" <-> "+infoSrc.DialoguesEx[tmpSrcIndex].EndTime)
+			debugInfos = append(debugInfos, "StartDiffTime: "+fmt.Sprintf("%f", TimeDiffStart.Seconds()))
 			//println(fmt.Sprintf("Diff Start-End: %s - %s Base[%d] %s-%s '%s' <--> Src[%d] %s-%s '%s'",
 			//	TimeDiffStart, TimeDiffEnd,
 			//	tmpBaseIndex, infoBase.DialoguesEx[tmpBaseIndex].StartTime, infoBase.DialoguesEx[tmpBaseIndex].EndTime, infoBase.DialoguesEx[tmpBaseIndex].EnLine,
 			//	tmpSrcIndex, infoSrc.DialoguesEx[tmpSrcIndex].StartTime, infoSrc.DialoguesEx[tmpSrcIndex].EndTime, infoSrc.DialoguesEx[tmpSrcIndex].EnLine))
 		}
+		debugInfos = append(debugInfos, "---------------------------------------------")
 		//println("---------------------------------------------")
 	}
 
@@ -254,6 +265,13 @@ func GetOffsetTime(infoBase, infoSrc *subparser.FileInfo, staticLineFileSavePath
 			return 0, err
 		}
 	}
+	// 输出调试的匹配时间轴信息的列表
+	if debugInfoFileSavePath != "" {
+		err = pkg.WriteStrings2File(debugInfoFileSavePath, debugInfos)
+		if err != nil {
+			return 0, err
+		}
+	}
 
 	return newMean, nil
 }

+ 93 - 33
internal/pkg/sub_timeline_fixer/fixer_test.go

@@ -5,6 +5,7 @@ import (
 	"github.com/allanpk716/ChineseSubFinder/internal/logic/sub_parser/ass"
 	"github.com/allanpk716/ChineseSubFinder/internal/logic/sub_parser/srt"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg"
+	"github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_helper"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_parser_hub"
 	"github.com/james-bowman/nlp"
 	"github.com/james-bowman/nlp/measures/pairwise"
@@ -47,43 +48,100 @@ func TestGetOffsetTime(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	enSubFile := path.Join(testRootDir, "R&M S05E01 - English.srt")
-	ch_enSubFile := path.Join(testRootDir, "R&M S05E01 - 简英.srt")
-
-	//enSubFile := path.Join(testRootDir, "R&M S05E10 - English.ass")
-	//ch_enSubFile := path.Join(testRootDir, "R&M S05E10 - 简英.ass")
-	//ch_enSubFile := path.Join(testRootDir, "R&M S05E10 - 简英-shooter.ass")
-
-	//enSubFile := path.Join(testRootDir, "基地 S01E03 - English.ass")
-	//ch_enSubFile := path.Join(testRootDir, "基地 S01E03 - 简英.ass")
-
 	subParserHub := sub_parser_hub.NewSubParserHub(ass.NewParser(), srt.NewParser())
-	bFind, infoBase, err := subParserHub.DetermineFileTypeFromFile(enSubFile)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if bFind == false {
-		t.Fatal("sub not match")
-	}
-	bFind, infoSrc, err := subParserHub.DetermineFileTypeFromFile(ch_enSubFile)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if bFind == false {
-		t.Fatal("sub not match")
-	}
 
-	time, err := GetOffsetTime(infoBase, infoSrc, "")
-	if err != nil {
-		t.Fatal(err)
+	type args struct {
+		enSubFile              string
+		ch_enSubFile           string
+		staticLineFileSavePath string
 	}
-
-	_, err = FixSubTimeline(infoSrc, time, "Foundation (2021) - S01E03 - The Mathematician’s Ghost WEBDL-1080p.chinese(简英,fix).ass")
-	if err != nil {
-		t.Fatal(err)
+	tests := []struct {
+		name    string
+		args    args
+		want    float64
+		wantErr bool
+	}{
+		{name: "R&M S05E01", args: args{enSubFile: path.Join(testRootDir, "R&M S05E01 - English.srt"),
+			ch_enSubFile:           path.Join(testRootDir, "R&M S05E01 - 简英.srt"),
+			staticLineFileSavePath: "bar.html"}, want: -6.42981818181818, wantErr: false},
+		{name: "R&M S05E10", args: args{enSubFile: path.Join(testRootDir, "R&M S05E10 - English.ass"),
+			ch_enSubFile:           path.Join(testRootDir, "R&M S05E10 - 简英.ass"),
+			staticLineFileSavePath: "bar.html"}, want: -6.335985401459854, wantErr: false},
+		{name: "R&M S05E10-shooter", args: args{enSubFile: path.Join(testRootDir, "R&M S05E10 - English.ass"),
+			ch_enSubFile:           path.Join(testRootDir, "R&M S05E10 - 简英-shooter.ass"),
+			staticLineFileSavePath: "bar.html"}, want: -6.335985401459854, wantErr: false},
+		{name: "基地 S01E03", args: args{enSubFile: path.Join(testRootDir, "基地 S01E03 - English.ass"),
+			ch_enSubFile:           path.Join(testRootDir, "基地 S01E03 - 简英.ass"),
+			staticLineFileSavePath: "bar.html"}, want: -32.09061538461539, wantErr: false},
+
+		{name: "Dan Brown's The Lost Symbol - S01E01", args: args{
+			enSubFile:              path.Join(testRootDir, tmpSubDataFolderName, "Dan Brown's The Lost Symbol - S01E01 - As Above, So Below WEBDL-720p", "Dan Brown's The Lost Symbol - S01E01 - As Above, So Below WEBDL-720p.chinese(inside).ass"),
+			ch_enSubFile:           path.Join(testRootDir, tmpSubDataFolderName, "Dan Brown's The Lost Symbol - S01E01 - As Above, So Below WEBDL-720p", "Dan Brown's The Lost Symbol - S01E01 - As Above, So Below WEBDL-720p.chinese(简英,shooter).ass"),
+			staticLineFileSavePath: "bar.html"},
+			want: 1.3217821782178225, wantErr: false},
+		{name: "Dan Brown's The Lost Symbol - S01E02", args: args{
+			enSubFile:              path.Join(testRootDir, tmpSubDataFolderName, "Dan Brown's The Lost Symbol - S01E02 - The Araf WEBDL-1080p", "Dan Brown's The Lost Symbol - S01E02 - The Araf WEBDL-1080p.chinese(inside).ass"),
+			ch_enSubFile:           path.Join(testRootDir, tmpSubDataFolderName, "Dan Brown's The Lost Symbol - S01E02 - The Araf WEBDL-1080p", "Dan Brown's The Lost Symbol - S01E02 - The Araf WEBDL-1080p.chinese(简英,subhd).ass"),
+			staticLineFileSavePath: "bar.html"},
+			want: -0.5253383458646617, wantErr: false},
+		{name: "Dan Brown's The Lost Symbol - S01E03", args: args{
+			enSubFile:              path.Join(testRootDir, tmpSubDataFolderName, "Dan Brown's The Lost Symbol - S01E03 - Murmuration WEBDL-1080p", "Dan Brown's The Lost Symbol - S01E03 - Murmuration WEBDL-1080p.chinese(inside).ass"),
+			ch_enSubFile:           path.Join(testRootDir, tmpSubDataFolderName, "Dan Brown's The Lost Symbol - S01E03 - Murmuration WEBDL-1080p", "Dan Brown's The Lost Symbol - S01E03 - Murmuration WEBDL-1080p.chinese(简英,shooter).ass"),
+			staticLineFileSavePath: "bar.html"},
+			want: -0.505656, wantErr: false},
+		{name: "Dan Brown's The Lost Symbol - S01E03", args: args{
+			enSubFile:              path.Join(testRootDir, tmpSubDataFolderName, "Dan Brown's The Lost Symbol - S01E03 - Murmuration WEBDL-1080p", "Dan Brown's The Lost Symbol - S01E03 - Murmuration WEBDL-1080p.chinese(inside).ass"),
+			ch_enSubFile:           path.Join(testRootDir, tmpSubDataFolderName, "Dan Brown's The Lost Symbol - S01E03 - Murmuration WEBDL-1080p", "Dan Brown's The Lost Symbol - S01E03 - Murmuration WEBDL-1080p.chinese(繁英,xunlei).ass"),
+			staticLineFileSavePath: "bar.html"},
+			want: -0.505656, wantErr: false},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+
+			bFind, infoBase, err := subParserHub.DetermineFileTypeFromFile(tt.args.enSubFile)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if bFind == false {
+				t.Fatal("sub not match")
+			}
+			/*
+				这里发现一个梗,内置的英文字幕导出的时候,有可能需要合并多个 Dialogue,见
+				internal/pkg/sub_helper/sub_helper.go 中 MergeMultiDialogue4EngSubtitle 的实现
+			*/
+			sub_helper.MergeMultiDialogue4EngSubtitle(infoBase)
+
+			bFind, infoSrc, err := subParserHub.DetermineFileTypeFromFile(tt.args.ch_enSubFile)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if bFind == false {
+				t.Fatal("sub not match")
+			}
+			/*
+				这里发现一个梗,内置的英文字幕导出的时候,有可能需要合并多个 Dialogue,见
+				internal/pkg/sub_helper/sub_helper.go 中 MergeMultiDialogue4EngSubtitle 的实现
+			*/
+			sub_helper.MergeMultiDialogue4EngSubtitle(infoSrc)
+
+			got, err := GetOffsetTime(infoBase, infoSrc, tt.args.ch_enSubFile+"-bar.html", tt.args.ch_enSubFile+".log")
+			if (err != nil) != tt.wantErr {
+				t.Errorf("GetOffsetTime() error = %v, wantErr %v", err, tt.wantErr)
+				return
+			}
+
+			// 在一个正负范围内都可以接受
+			if got > tt.want-0.1 && got < tt.want+0.1 {
+
+			} else {
+				t.Errorf("GetOffsetTime() got = %v, want %v", got, tt.want)
+			}
+			//if got != tt.want {
+			//	t.Errorf("GetOffsetTime() got = %v, want %v", got, tt.want)
+			//}
+			println(fmt.Sprintf("GetOffsetTime: %fs", got))
+		})
 	}
-
-	println(fmt.Sprintf("GetOffsetTime: %fs", time))
 }
 
 func TestTFIDF(t *testing.T) {
@@ -137,3 +195,5 @@ func TestTFIDF(t *testing.T) {
 	fmt.Printf("Matched '%s'", testCorpus[matched])
 	// Output: Matched 'The quick brown fox jumped over the lazy dog'
 }
+
+const tmpSubDataFolderName = "SubFixCache"

+ 19 - 0
internal/pkg/util.go

@@ -424,3 +424,22 @@ func OSCheck() bool {
 func FixWindowPathBackSlash(path string) string {
 	return strings.Replace(path, string(filepath.Separator), "/", -1)
 }
+
+func WriteStrings2File(desfilePath string, strings []string) error {
+	dstFile, err := os.Create(desfilePath)
+	if err != nil {
+		return err
+	}
+	defer func() {
+		_ = dstFile.Close()
+	}()
+	allString := ""
+	for _, s := range strings {
+		allString += s + "\r\n"
+	}
+	_, err = dstFile.WriteString(allString)
+	if err != nil {
+		return err
+	}
+	return nil
+}