| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449 | 
							- package sub_timeline_fixer
 
- import (
 
- 	"fmt"
 
- 	"github.com/allanpk716/ChineseSubFinder/internal/logic/sub_parser/ass"
 
- 	"github.com/allanpk716/ChineseSubFinder/internal/logic/sub_parser/srt"
 
- 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/my_util"
 
- 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_helper"
 
- 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_parser_hub"
 
- 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/vad"
 
- 	"github.com/allanpk716/ChineseSubFinder/internal/types/sub_timeline_fiexer"
 
- 	"github.com/james-bowman/nlp"
 
- 	"github.com/james-bowman/nlp/measures/pairwise"
 
- 	"gonum.org/v1/gonum/mat"
 
- 	"path/filepath"
 
- 	"strings"
 
- 	"testing"
 
- )
 
- func TestStopWordCounter(t *testing.T) {
 
- 	testDataPath := "../../../TestData/FixTimeline"
 
- 	testRootDir, err := my_util.CopyTestData(testDataPath)
 
- 	if err != nil {
 
- 		t.Fatal(err)
 
- 	}
 
- 	subParserHub := sub_parser_hub.NewSubParserHub(ass.NewParser(), srt.NewParser())
 
- 	bFind, info, err := subParserHub.DetermineFileTypeFromFile(filepath.Join(testRootDir, "R&M S05E10 - English.srt"))
 
- 	if err != nil {
 
- 		t.Fatal(err)
 
- 	}
 
- 	if bFind == false {
 
- 		t.Fatal("not match sub types")
 
- 	}
 
- 	allString := strings.Join(info.OtherLines, " ")
 
- 	s := SubTimelineFixer{}
 
- 	stopWords := s.StopWordCounter(strings.ToLower(allString), 5)
 
- 	print(len(stopWords))
 
- 	println(info.Name)
 
- }
 
- func TestGetOffsetTimeV1(t *testing.T) {
 
- 	testDataPath := "../../../TestData/FixTimeline"
 
- 	testRootDir, err := my_util.CopyTestData(testDataPath)
 
- 	if err != nil {
 
- 		t.Fatal(err)
 
- 	}
 
- 	testRootDirYes := filepath.Join(testRootDir, "yes")
 
- 	testRootDirNo := filepath.Join(testRootDir, "no")
 
- 	subParserHub := sub_parser_hub.NewSubParserHub(ass.NewParser(), srt.NewParser())
 
- 	type args struct {
 
- 		enSubFile              string
 
- 		ch_enSubFile           string
 
- 		staticLineFileSavePath string
 
- 	}
 
- 	tests := []struct {
 
- 		name    string
 
- 		args    args
 
- 		want    float64
 
- 		wantErr bool
 
- 	}{
 
- 		/*
 
- 			这里有几个比较理想的字幕时间轴校正的示例
 
- 		*/
 
- 		{name: "R&M S05E01", args: args{enSubFile: filepath.Join(testRootDirYes, "R&M S05E01 - English.srt"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirYes, "R&M S05E01 - 简英.srt"),
 
- 			staticLineFileSavePath: "bar.html"}, want: -6.42981818181818, wantErr: false},
 
- 		{name: "R&M S05E10", args: args{enSubFile: filepath.Join(testRootDirYes, "R&M S05E10 - English.ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirYes, "R&M S05E10 - 简英.ass"),
 
- 			staticLineFileSavePath: "bar.html"}, want: -6.335985401459854, wantErr: false},
 
- 		{name: "基地 S01E03", args: args{enSubFile: filepath.Join(testRootDirYes, "基地 S01E03 - English.ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirYes, "基地 S01E03 - 简英.ass"),
 
- 			staticLineFileSavePath: "bar.html"}, want: -32.09061538461539, wantErr: false},
 
- 		/*
 
- 			WTF,这部剧集
 
- 			Dan Brown's The Lost Symbol
 
- 			内置的英文字幕时间轴是歪的,所以修正完了就错了
 
- 		*/
 
- 		{name: "Dan Brown's The Lost Symbol - S01E01", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Dan Brown's The Lost Symbol - S01E01.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Dan Brown's The Lost Symbol - S01E01.chinese(简英,shooter).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 1.3217821782178225, wantErr: false},
 
- 		{name: "Dan Brown's The Lost Symbol - S01E02", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Dan Brown's The Lost Symbol - S01E02.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Dan Brown's The Lost Symbol - S01E02.chinese(简英,subhd).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: -0.5253383458646617, wantErr: false},
 
- 		{name: "Dan Brown's The Lost Symbol - S01E03", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Dan Brown's The Lost Symbol - S01E03.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Dan Brown's The Lost Symbol - S01E03.chinese(繁英,xunlei).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: -0.505656, wantErr: false},
 
- 		{name: "Dan Brown's The Lost Symbol - S01E04", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Dan Brown's The Lost Symbol - S01E04.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Dan Brown's The Lost Symbol - S01E04.chinese(简英,zimuku).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: -0.633415, wantErr: false},
 
- 		/*
 
- 			只有一个是字幕下载了一个错误的,其他的无需修正
 
- 		*/
 
- 		{name: "Don't Breathe 2 (2021) - shooter-srt", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Don't Breathe 2 (2021).chinese(inside).srt"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Don't Breathe 2 (2021).chinese(简英,shooter).srt"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		{name: "Don't Breathe 2 (2021) - subhd-srt error matched sub", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Don't Breathe 2 (2021).chinese(inside).srt"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Don't Breathe 2 (2021).chinese(简英,subhd).srt"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		{name: "Don't Breathe 2 (2021) - xunlei-ass", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Don't Breathe 2 (2021).chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Don't Breathe 2 (2021).chinese(简英,xunlei).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		{name: "Don't Breathe 2 (2021) - zimuku-ass", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Don't Breathe 2 (2021).chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Don't Breathe 2 (2021).chinese(简英,zimuku).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		/*
 
- 			基地
 
- 		*/
 
- 		{name: "Foundation (2021) - S01E01", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Foundation (2021) - S01E01.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Foundation (2021) - S01E01.chinese(简英,zimuku).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		{name: "Foundation (2021) - S01E02", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirYes, "Foundation (2021) - S01E02.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirYes, "Foundation (2021) - S01E02.chinese(简英,subhd).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: -30.624840, wantErr: false},
 
- 		{name: "Foundation (2021) - S01E03", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirYes, "Foundation (2021) - S01E03.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirYes, "Foundation (2021) - S01E03.chinese(简英,subhd).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: -32.085037037037054, wantErr: false},
 
- 		{name: "Foundation (2021) - S01E04", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirYes, "Foundation (2021) - S01E04.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirYes, "Foundation (2021) - S01E04.chinese(简英,subhd).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: -36.885074, wantErr: false},
 
- 		{name: "Foundation (2021) - S01E04", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Foundation (2021) - S01E04.chinese(inside).srt"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Foundation (2021) - S01E04.chinese(繁英,shooter).srt"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		/*
 
- 			The Card Counter
 
- 		*/
 
- 		{name: "The Card Counter", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "The Card Counter (2021).chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "The Card Counter (2021).chinese(简英,xunlei).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		{name: "The Card Counter", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "The Card Counter (2021).chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "The Card Counter (2021).chinese(简英,shooter).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0.224844, wantErr: false},
 
- 		/*
 
- 			Kingdom Ashin of the North
 
- 		*/
 
- 		{name: "Kingdom Ashin of the North - error matched sub", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Kingdom Ashin of the North (2021).chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Kingdom Ashin of the North (2021).chinese(简英,subhd).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		/*
 
- 			Only Murders in the Building
 
- 		*/
 
- 		{name: "Only Murders in the Building - S01E06", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Only Murders in the Building - S01E06.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Only Murders in the Building - S01E06.chinese(简英,subhd).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		{name: "Only Murders in the Building - S01E08", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Only Murders in the Building - S01E08.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Only Murders in the Building - S01E08.chinese(简英,subhd).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		/*
 
- 			Ted Lasso
 
- 		*/
 
- 		{name: "Ted Lasso - S02E09", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Ted Lasso - S02E09.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Ted Lasso - S02E09.chinese(简英,subhd).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		{name: "Ted Lasso - S02E09", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Ted Lasso - S02E09.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Ted Lasso - S02E09.chinese(简英,zimuku).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		{name: "Ted Lasso - S02E10", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Ted Lasso - S02E10.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Ted Lasso - S02E10.chinese(简英,subhd).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		{name: "Ted Lasso - S02E10", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Ted Lasso - S02E10.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Ted Lasso - S02E10.chinese(简英,zimuku).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		{name: "Ted Lasso - S02E10", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Ted Lasso - S02E10.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Ted Lasso - S02E10.chinese(简英,shooter).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		{name: "Ted Lasso - S02E11", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Ted Lasso - S02E11.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Ted Lasso - S02E11.chinese(简英,subhd).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		{name: "Ted Lasso - S02E11", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Ted Lasso - S02E11.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Ted Lasso - S02E11.chinese(简英,zimuku).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		{name: "Ted Lasso - S02E12", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Ted Lasso - S02E12.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Ted Lasso - S02E12.chinese(简英,subhd).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		{name: "Ted Lasso - S02E12", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "Ted Lasso - S02E12.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "Ted Lasso - S02E12.chinese(简英,shooter).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		/*
 
- 			The Protégé
 
- 		*/
 
- 		{name: "The Protégé", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "The Protégé (2021).chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "The Protégé (2021).chinese(简英,zimuku).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		{name: "The Protégé", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "The Protégé (2021).chinese(inside).srt"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "The Protégé (2021).chinese(简英,shooter).srt"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		/*
 
- 			The Witcher Nightmare of the Wolf
 
- 		*/
 
- 		{name: "The Witcher Nightmare of the Wolf", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "The Witcher Nightmare of the Wolf.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "The Witcher Nightmare of the Wolf.chinese(简英,zimuku).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		/*
 
- 			What If…!
 
- 		*/
 
- 		{name: "What If…! - S01E07", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "What If…! - S01E07.chinese(inside).ass"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "What If…! - S01E07.chinese(简英,subhd).ass"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 		{name: "What If…! - S01E09", args: args{
 
- 			enSubFile:              filepath.Join(testRootDirNo, "What If…! - S01E09.chinese(inside).srt"),
 
- 			ch_enSubFile:           filepath.Join(testRootDirNo, "What If…! - S01E09.chinese(简英,shooter).srt"),
 
- 			staticLineFileSavePath: "bar.html"},
 
- 			want: 0, wantErr: false},
 
- 	}
 
- 	s := NewSubTimelineFixer(sub_timeline_fiexer.SubTimelineFixerConfig{
 
- 		MaxCompareDialogue: 3,
 
- 		MaxStartTimeDiffSD: 0.1,
 
- 		MinMatchedPercent:  0.1,
 
- 		MinOffset:          0.1,
 
- 	})
 
- 	for _, tt := range tests {
 
- 		t.Run(tt.name, func(t *testing.T) {
 
- 			bFind, infoBase, err := subParserHub.DetermineFileTypeFromFile(tt.args.enSubFile)
 
- 			if err != nil {
 
- 				t.Fatal(err)
 
- 			}
 
- 			if bFind == false {
 
- 				t.Fatal("sub not match")
 
- 			}
 
- 			/*
 
- 				这里发现一个梗,内置的英文字幕导出的时候,有可能需要合并多个 Dialogue,见
 
- 				internal/pkg/sub_helper/sub_helper.go 中 MergeMultiDialogue4EngSubtitle 的实现
 
- 			*/
 
- 			sub_helper.MergeMultiDialogue4EngSubtitle(infoBase)
 
- 			bFind, infoSrc, err := subParserHub.DetermineFileTypeFromFile(tt.args.ch_enSubFile)
 
- 			if err != nil {
 
- 				t.Fatal(err)
 
- 			}
 
- 			if bFind == false {
 
- 				t.Fatal("sub not match")
 
- 			}
 
- 			/*
 
- 				这里发现一个梗,内置的英文字幕导出的时候,有可能需要合并多个 Dialogue,见
 
- 				internal/pkg/sub_helper/sub_helper.go 中 MergeMultiDialogue4EngSubtitle 的实现
 
- 			*/
 
- 			sub_helper.MergeMultiDialogue4EngSubtitle(infoSrc)
 
- 			bok, got, sd, err := s.GetOffsetTimeV1(infoBase, infoSrc, tt.args.ch_enSubFile+"-bar.html", tt.args.ch_enSubFile+".log")
 
- 			if (err != nil) != tt.wantErr {
 
- 				t.Errorf("GetOffsetTimeV1() error = %v, wantErr %v", err, tt.wantErr)
 
- 				return
 
- 			}
 
- 			// 在一个正负范围内都可以接受
 
- 			if got > tt.want-0.1 && got < tt.want+0.1 {
 
- 			} else {
 
- 				t.Errorf("GetOffsetTimeV1() got = %v, want %v", got, tt.want)
 
- 			}
 
- 			//if got != tt.want {
 
- 			//	t.Errorf("GetOffsetTimeV1() got = %v, want %v", got, tt.want)
 
- 			//}
 
- 			if bok == true && got != 0 {
 
- 				_, err = s.FixSubTimeline(infoSrc, got, tt.args.ch_enSubFile+FixMask+infoBase.Ext)
 
- 				if err != nil {
 
- 					t.Fatal(err)
 
- 				}
 
- 			}
 
- 			println(fmt.Sprintf("GetOffsetTimeV1: %fs SD:%f", got, sd))
 
- 		})
 
- 	}
 
- }
 
- func TestTFIDF(t *testing.T) {
 
- 	testCorpus := []string{
 
- 		"The quick brown fox jumped over the lazy dog",
 
- 		"hey diddle diddle, the cat and the fiddle",
 
- 		"the cow jumped over the moon",
 
- 		"the little dog laughed to see such fun",
 
- 		"and the dish ran away with the spoon",
 
- 	}
 
- 	query := "the brown fox ran around the dog"
 
- 	vectoriser := nlp.NewCountVectoriser(StopWords...)
 
- 	transformer := nlp.NewTfidfTransformer()
 
- 	// set k (the number of dimensions following truncation) to 4
 
- 	reducer := nlp.NewTruncatedSVD(4)
 
- 	lsiPipeline := nlp.NewPipeline(vectoriser, transformer, reducer)
 
- 	// Transform the corpus into an LSI fitting the model to the documents in the process
 
- 	lsi, err := lsiPipeline.FitTransform(testCorpus...)
 
- 	if err != nil {
 
- 		fmt.Printf("Failed to process documents because %v", err)
 
- 		return
 
- 	}
 
- 	// run the query through the same pipeline that was fitted to the corpus and
 
- 	// to project it into the same dimensional space
 
- 	queryVector, err := lsiPipeline.Transform(query)
 
- 	if err != nil {
 
- 		fmt.Printf("Failed to process documents because %v", err)
 
- 		return
 
- 	}
 
- 	// iterate over document feature vectors (columns) in the LSI matrix and compare
 
- 	// with the query vector for similarity.  Similarity is determined by the difference
 
- 	// between the angles of the vectors known as the cosine similarity
 
- 	highestSimilarity := -1.0
 
- 	var matched int
 
- 	_, docs := lsi.Dims()
 
- 	for i := 0; i < docs; i++ {
 
- 		similarity := pairwise.CosineSimilarity(queryVector.(mat.ColViewer).ColView(0), lsi.(mat.ColViewer).ColView(i))
 
- 		if similarity > highestSimilarity {
 
- 			matched = i
 
- 			highestSimilarity = similarity
 
- 		}
 
- 	}
 
- 	fmt.Printf("Matched '%s'", testCorpus[matched])
 
- 	// Output: Matched 'The quick brown fox jumped over the lazy dog'
 
- }
 
- func TestSubTimelineFixer_GetOffsetTimeV2(t *testing.T) {
 
- 	subParserHub := sub_parser_hub.NewSubParserHub(ass.NewParser(), srt.NewParser())
 
- 	type fields struct {
 
- 		fixerConfig sub_timeline_fiexer.SubTimelineFixerConfig
 
- 	}
 
- 	type args struct {
 
- 		audioInfo              vad.AudioInfo
 
- 		subFilePath            string
 
- 		staticLineFileSavePath string
 
- 		debugInfoFileSavePath  string
 
- 	}
 
- 	tests := []struct {
 
- 		name    string
 
- 		fields  fields
 
- 		args    args
 
- 		want    bool
 
- 		want1   float64
 
- 		want2   float64
 
- 		wantErr bool
 
- 	}{
 
- 		{name: "Rick and Morty - S05E10", args: args{audioInfo: vad.AudioInfo{FileFullPath: "C:\\Tmp\\Rick and Morty - S05E10\\英_1.pcm"}, subFilePath: "C:\\Tmp\\Rick and Morty - S05E10\\英_2.ass"}},
 
- 	}
 
- 	for _, tt := range tests {
 
- 		t.Run(tt.name, func(t *testing.T) {
 
- 			s := &SubTimelineFixer{
 
- 				fixerConfig: tt.fields.fixerConfig,
 
- 			}
 
- 			bok, fileInfo, err := subParserHub.DetermineFileTypeFromFile(tt.args.subFilePath)
 
- 			if err != nil {
 
- 				t.Fatal(err)
 
- 			}
 
- 			if bok == false {
 
- 				t.Fatal("DetermineFileTypeFromFile == false")
 
- 			}
 
- 			/*
 
- 				这里发现一个梗,内置的英文字幕导出的时候,有可能需要合并多个 Dialogue,见
 
- 				internal/pkg/sub_helper/sub_helper.go 中 MergeMultiDialogue4EngSubtitle 的实现
 
- 			*/
 
- 			sub_helper.MergeMultiDialogue4EngSubtitle(fileInfo)
 
- 			got, got1, got2, err := s.GetOffsetTimeV2(tt.args.audioInfo, fileInfo, tt.args.staticLineFileSavePath, tt.args.debugInfoFileSavePath)
 
- 			if (err != nil) != tt.wantErr {
 
- 				t.Errorf("GetOffsetTimeV2() error = %v, wantErr %v", err, tt.wantErr)
 
- 				return
 
- 			}
 
- 			if got != tt.want {
 
- 				t.Errorf("GetOffsetTimeV2() got = %v, want %v", got, tt.want)
 
- 			}
 
- 			if got1 != tt.want1 {
 
- 				t.Errorf("GetOffsetTimeV2() got1 = %v, want %v", got1, tt.want1)
 
- 			}
 
- 			if got2 != tt.want2 {
 
- 				t.Errorf("GetOffsetTimeV2() got2 = %v, want %v", got2, tt.want2)
 
- 			}
 
- 		})
 
- 	}
 
- }
 
 
  |