瀏覽代碼

保存进度

Signed-off-by: allan716 <[email protected]>
allan716 3 年之前
父節點
當前提交
48bce46db0

+ 1 - 0
go.mod

@@ -78,6 +78,7 @@ require (
 	github.com/hashicorp/errwrap v1.0.0 // indirect
 	github.com/hashicorp/go-multierror v1.1.0 // indirect
 	github.com/hashicorp/hcl v1.0.0 // indirect
+	github.com/huandu/go-clone v1.3.0 // indirect
 	github.com/jinzhu/inflection v1.0.0 // indirect
 	github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 // indirect
 	github.com/klauspost/pgzip v1.2.4 // indirect

+ 3 - 0
go.sum

@@ -244,6 +244,9 @@ github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0m
 github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I=
 github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc=
 github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
+github.com/huandu/go-assert v1.1.5/go.mod h1:yOLvuqZwmcHIC5rIzrBhT7D3Q9c3GFnd0JrPVhn/06U=
+github.com/huandu/go-clone v1.3.0 h1:gZ0HVFnzdal9t6p12QAoeuRW1Q8tp8gLCRUvLbj0hY0=
+github.com/huandu/go-clone v1.3.0/go.mod h1:bPJ9bAG8fjyAEBRFt6toaGUZcGFGL3f6g5u6yW+9W14=
 github.com/hudl/fargo v1.3.0/go.mod h1:y3CKSmjA+wD2gak7sUSXTAoopbhU08POFhmITJgmKTg=
 github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
 github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=

+ 49 - 2
internal/pkg/gss/gss.go

@@ -32,8 +32,55 @@ var (
 // >>> print (c,d)
 // (1.9999959837979107, 2.0000050911830893)
 // '''
-func Gss(f func(float64) float64, a, b, tol float64, logger *log.Logger) (float64, float64) {
-	return gss(f, a, b, tol, nan, nan, nan, nan, nan, logger)
+func Gss(fWrapped func(float64, bool) float64, a, b, tol float64, logger *log.Logger) (float64, float64) {
+	if a > b {
+		a, b = b, a
+	}
+	h := b - a
+	if h <= tol {
+		return a, b
+	}
+
+	n := int(math.Ceil(math.Log(tol/h) / math.Log(invphi)))
+	if logger != nil {
+		logger.Printf("About to perform %d iterations of golden section search to find the best framerate", n)
+	}
+
+	c := a + invphi2*h
+	d := a + invphi*h
+	yc := fWrapped(c, n == 1)
+	yd := fWrapped(d, n == 1)
+
+	for i := 0; i < n-1; i++ {
+
+		if logger != nil {
+			logger.Printf("%d\t%9.6g\t%9.6g\n", i, a, b)
+		}
+
+		if yc < yd {
+			b = d
+			d = c
+			yd = yc
+			h = invphi * h
+			c = a + invphi2*h
+			yc = fWrapped(c, i == n-2)
+		} else {
+			a = c
+			c = d
+			yc = yd
+			h = invphi * h
+			d = a + invphi*h
+			yd = fWrapped(d, i == n-2)
+		}
+	}
+
+	if yc < yd {
+		return a, d
+	} else {
+		return c, b
+	}
+
+	//return gss(f, a, b, tol, nan, nan, nan, nan, nan, logger)
 }
 func gss(f func(float64) float64, a, b, tol, h, c, d, fc, fd float64, logger *log.Logger) (float64, float64) {
 	if a > b {

+ 1 - 1
internal/pkg/gss/gss_test.go

@@ -7,7 +7,7 @@ import (
 )
 
 func TestGss(t *testing.T) {
-	f := func(x float64) float64 {
+	f := func(x float64, b bool) float64 {
 		tmp := x - 2
 		return tmp * tmp
 	}

+ 2 - 0
internal/pkg/sub_helper/sub_helper.go

@@ -481,6 +481,8 @@ func GetVADInfoFeatureFromSubNew(fileInfo *subparser.FileInfo, SkipFrontAndEndPe
 	if err != nil {
 		return nil, err
 	}
+	// 如果想要从 0 时间点开始算,那么 subStartTimeFloor 这个值就需要重置到0
+	subStartTimeFloor = 0
 	subFullSecondTimeFloor := subEndTimeFloor - subStartTimeFloor
 	// 根据这个时长就能够得到一个完整的 VAD List,然后再通过每一句对白进行 VAD 值的调整即可,这样就能够保证
 	// 相同的一个字幕因为使用 ffmpeg 导出 srt 和 ass 后的,可能存在总体时间轴不一致的问题

+ 34 - 3
internal/pkg/sub_timeline_fixer/fft_aligner.go

@@ -13,15 +13,25 @@ import (
 	复现 https://github.com/smacke/ffsubsync 的 FFTAligner 算法
 */
 type FFTAligner struct {
+	maxOffsetSamples int
 }
 
-func NewFFTAligner() *FFTAligner {
-	return &FFTAligner{}
+func NewFFTAligner(maxOffsetSeconds, sampleRate int) *FFTAligner {
+
+	maxOffsetSamples := maxOffsetSeconds * sampleRate
+	if maxOffsetSamples < 0 {
+		maxOffsetSamples = -maxOffsetSamples
+	}
+	return &FFTAligner{
+		maxOffsetSamples: maxOffsetSamples,
+	}
 }
 
 // Fit 给出最佳的偏移,还需要根据实际情况进行转换(比如,1 步 是 10 ms),输入的数组只能是 1 -1 这样的值,需要在外部做好归一化
 func (f FFTAligner) Fit(refFloats, subFloats []float64) (int, float64) {
-	return f.computeArgmax(f.fit(refFloats, subFloats), subFloats)
+
+	convolve := f.fit(refFloats, subFloats)
+	return f.computeArgmax(f.eliminateExtremeOffsetsFromSolutions(convolve, subFloats), subFloats)
 }
 
 // fit 返回 convolve
@@ -75,6 +85,27 @@ func (f FFTAligner) fit(refFloats, subFloats []float64) []float64 {
 	return convolve
 }
 
+func (f FFTAligner) eliminateExtremeOffsetsFromSolutions(convolve, subSting []float64) []float64 {
+	if f.maxOffsetSamples == 0 {
+		return convolve
+	}
+
+	convolveCopy := convolve
+	offsetFun := func(offset int) int {
+		return len(convolveCopy) - 1 + offset - len(subSting)
+	}
+	s1 := offsetFun(-f.maxOffsetSamples)
+	s2 := offsetFun(f.maxOffsetSamples)
+	for i := 0; i < s1; i++ {
+		convolveCopy[i] = math.NaN()
+	}
+	for i := s2; i < len(convolveCopy); i++ {
+		convolveCopy[i] = math.NaN()
+	}
+
+	return convolveCopy
+}
+
 // computeArgmax 找对最优偏移,还需要根据实际情况进行转换(比如,1 步 是 10 ms)
 func (f FFTAligner) computeArgmax(convolve, subFloats []float64) (int, float64) {
 

+ 15 - 15
internal/pkg/sub_timeline_fixer/fft_aligner_test.go

@@ -11,30 +11,30 @@ func TestFFTAligner_Fit(t *testing.T) {
 		subFloats []float64
 	}
 	tests := []struct {
-		name  string
-		args  args
-		want  int
-		want1 float64
+		name      string
+		args      args
+		wantIndex int
+		wantScore float64
 	}{
-		{name: "2-5", args: args{
-			refFloats: []float64{1, 1, 1, 1, -1, -1, 1},
-			subFloats: []float64{1, 1, -1, -1, 1},
-		}, want: 2, want1: 5},
+		{name: "3-4", args: args{
+			refFloats: []float64{1, 1, 1, 1, 1, -1, 1},
+			subFloats: []float64{1, 1, -1, 1},
+		}, wantIndex: 3, wantScore: 4},
 		{name: "3-5", args: args{
 			refFloats: []float64{0, 1, 1, 1, 1, -1, -1, 1},
 			subFloats: []float64{1, 1, -1, -1, 1},
-		}, want: 3, want1: 5},
+		}, wantIndex: 3, wantScore: 5},
 	}
 	const tol = 1e-10
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			f := NewFFTAligner()
-			got, got1 := f.Fit(tt.args.refFloats, tt.args.subFloats)
-			if got != tt.want {
-				t.Errorf("Fit() got = %v, want %v", got, tt.want)
+			f := NewFFTAligner(2, 2)
+			index, score := f.Fit(tt.args.refFloats, tt.args.subFloats)
+			if index != tt.wantIndex {
+				t.Errorf("Fit() wantIndex = %v, want %v", index, tt.wantIndex)
 			}
-			if scalar.EqualWithinAbsOrRel(got1, tt.want1, tol, tol) == false {
-				t.Errorf("Fit() got1 = %v, want %v", got1, tt.want1)
+			if scalar.EqualWithinAbsOrRel(score, tt.wantScore, tol, tol) == false {
+				t.Errorf("Fit() wantScore = %v, want %v", score, tt.wantScore)
 			}
 		})
 	}

+ 2 - 2
internal/pkg/sub_timeline_fixer/fixer.go

@@ -693,7 +693,7 @@ func (s *SubTimelineFixer) slidingWindowProcessorV2(windowInfo *WindowInfo) (*Ma
 		// -------------------------------------------------
 		// 开始匹配
 		// 这里的对白单元,当前的 Base 进行对比,详细示例见图解。Step 2 中橙色的区域
-		fffAligner := NewFFTAligner()
+		fffAligner := NewFFTAligner(DefaultMaxOffsetSeconds, SampleRate)
 		var bok = false
 		var nowBaseStartTime = 0.0
 		var offsetIndex = 0
@@ -924,7 +924,7 @@ func (s *SubTimelineFixer) GetOffsetTimeV3(infoBase, infoSrc, orgFix *subparser.
 		然后 < 20 的就继承 20 的偏移,> 80 的就继承 80 的偏移即可
 		那么现在就需要从对白中开始遍历
 	*/
-	fffAligner := NewFFTAligner()
+	fffAligner := NewFFTAligner(DefaultMaxOffsetSeconds, SampleRate)
 	err2, done := s.caleOne(0.1, srcUnitNew, fffAligner, baseUnitNew)
 	if done {
 		return err2

+ 72 - 20
internal/pkg/sub_timeline_fixer/pipeline.go

@@ -1,8 +1,12 @@
 package sub_timeline_fixer
 
 import (
+	"fmt"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/gss"
+	"github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_helper"
 	"github.com/allanpk716/ChineseSubFinder/internal/types/subparser"
+	"github.com/huandu/go-clone"
+	"sort"
 )
 
 type Pipeline struct {
@@ -15,26 +19,65 @@ func NewPipeline() *Pipeline {
 	}
 }
 
-func (p Pipeline) Fit_gss(infoBase, infoSrc subparser.FileInfo) error {
-
-	/*
-		ffsubsync 的 pipeline 有这三个步骤
-		1. parse			解析字幕
-		2. scale			根据帧数比率调整时间轴
-		3. speech_extract	从字幕转换为 VAD 的语音检测信息
-	*/
-	opt_func := func(framerateRatio float64) float64 {
-		nowInfoSrc := infoSrc
-		err := nowInfoSrc.ChangeDialoguesFilterExTimeByFramerateRatio(framerateRatio)
+func (p Pipeline) FitGSS(infoBase, infoSrc *subparser.FileInfo) error {
+
+	pipeResults := make([]PipeResult, 0)
+	// 排序
+	sort.Sort(subparser.OneDialogueByStartTime(infoBase.DialoguesFilter))
+	sort.Sort(subparser.OneDialogueByStartTime(infoSrc.DialoguesFilter))
+	// 解析处 VAD 信息
+	baseUnitNew, err := sub_helper.GetVADInfoFeatureFromSubNew(infoBase, 0)
+	if err != nil {
+		return err
+	}
+	fffAligner := NewFFTAligner(DefaultMaxOffsetSeconds, SampleRate)
+
+	framerateRatios := p.getFramerateRatios2Try()
+	for _, framerateRatio := range framerateRatios {
+
+		/*
+			ffsubsync 的 pipeline 有这三个步骤
+			1. parse			解析字幕
+			2. scale			根据帧数比率调整时间轴
+			3. speech_extract	从字幕转换为 VAD 的语音检测信息
+		*/
+		// 外部传入
+		// 1. parse			解析字幕
+		tmpInfoSrc := clone.Clone(infoSrc).(*subparser.FileInfo)
+		// 2. scale			根据帧数比率调整时间轴
+		err := tmpInfoSrc.ChangeDialoguesFilterExTimeByFramerateRatio(framerateRatio)
 		if err != nil {
 			// 还原
 			println("ChangeDialoguesFilterExTimeByFramerateRatio", err)
-			nowInfoSrc = infoSrc
+			tmpInfoSrc = clone.Clone(infoSrc).(*subparser.FileInfo)
+		}
+		tmpSrcInfoUnit, err := sub_helper.GetVADInfoFeatureFromSubNew(tmpInfoSrc, 0)
+		if err != nil {
+			return err
 		}
-		// 然后进行 base 与 src 匹配计算,将每一次变动 framerateRatio 计算得到的 偏移值和分数进行记录
 
+		optFunc := func(framerateRatio float64, isLastIter bool) float64 {
+
+			// 3. speech_extract	从字幕转换为 VAD 的语音检测信息
+			// 然后进行 base 与 src 匹配计算,将每一次变动 framerateRatio 计算得到的 偏移值和分数进行记录
+			bestOffset, score := fffAligner.Fit(baseUnitNew.GetVADFloatSlice(), tmpSrcInfoUnit.GetVADFloatSlice())
+			println(fmt.Sprintf("got score %.0f (offset %d) for ratio %.3f", score, bestOffset, framerateRatio))
+			// 放到外部的存储中
+			if isLastIter == true {
+				pipeResult := PipeResult{
+					Score:       score,
+					BestOffset:  bestOffset,
+					ScaleFactor: framerateRatio,
+				}
+				pipeResults = append(pipeResults, pipeResult)
+			}
+			return -score
+		}
+
+		gss.Gss(optFunc, MinFramerateRatio, MaxFramerateRatio, 1e-4, nil)
 	}
-	gss.Gss(opt_func, MIN_FRAMERATE_RATIO, MAX_FRAMERATE_RATIO, 1e-4, nil)
+
+	return nil
 }
 
 func (p *Pipeline) getFramerateRatios2Try() []float64 {
@@ -42,14 +85,23 @@ func (p *Pipeline) getFramerateRatios2Try() []float64 {
 	if len(p.framerateRatios) > 0 {
 		return p.framerateRatios
 	}
-	p.framerateRatios = append(p.framerateRatios, FRAMERATE_RATIOS...)
-	for i := 0; i < len(FRAMERATE_RATIOS); i++ {
-		p.framerateRatios = append(p.framerateRatios, 1.0/FRAMERATE_RATIOS[i])
+	p.framerateRatios = append(p.framerateRatios, 1.0)
+	p.framerateRatios = append(p.framerateRatios, FramerateRatios...)
+	for i := 0; i < len(FramerateRatios); i++ {
+		p.framerateRatios = append(p.framerateRatios, 1.0/FramerateRatios[i])
 	}
 	return p.framerateRatios
 }
 
-var FRAMERATE_RATIOS = []float64{24. / 23.976, 25. / 23.976, 25. / 24.}
+var FramerateRatios = []float64{24. / 23.976, 25. / 23.976, 25. / 24.}
+
+const MinFramerateRatio = 0.9
+const MaxFramerateRatio = 1.1
+const DefaultMaxOffsetSeconds = 60
+const SampleRate = 100
 
-const MIN_FRAMERATE_RATIO = 0.9
-const MAX_FRAMERATE_RATIO = 1.1
+type PipeResult struct {
+	Score       float64
+	BestOffset  int
+	ScaleFactor float64
+}

+ 74 - 0
internal/pkg/sub_timeline_fixer/pipeline_test.go

@@ -2,6 +2,9 @@ package sub_timeline_fixer
 
 import (
 	"fmt"
+	"github.com/allanpk716/ChineseSubFinder/internal/logic/sub_parser/ass"
+	"github.com/allanpk716/ChineseSubFinder/internal/logic/sub_parser/srt"
+	"github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_parser_hub"
 	"testing"
 )
 
@@ -12,3 +15,74 @@ func TestPipeline_getFramerateRatios2Try(t *testing.T) {
 		println(i, fmt.Sprintf("%v", value))
 	}
 }
+
+func TestPipeline_FitGSS(t *testing.T) {
+	subParserHub := sub_parser_hub.NewSubParserHub(ass.NewParser(), srt.NewParser())
+
+	type args struct {
+		baseSubFile   string
+		orgFixSubFile string
+		srcSubFile    string
+	}
+	tests := []struct {
+		name    string
+		args    args
+		want    float64
+		wantErr bool
+	}{
+		{name: "BL S01E03", args: args{
+			baseSubFile:   "C:\\Tmp\\BL - S01E03\\英_2.ass",
+			orgFixSubFile: "C:\\Tmp\\BL - S01E03\\org-fix.ass",
+			srcSubFile:    "C:\\Tmp\\BL - S01E03\\org.ass",
+		}, want: -4.1, wantErr: false},
+		{name: "Rick and Morty - S05E10", args: args{
+			baseSubFile:   "C:\\Tmp\\Rick and Morty - S05E10\\英_2.ass",
+			orgFixSubFile: "C:\\Tmp\\Rick and Morty - S05E10\\org-fix.ass",
+			srcSubFile:    "C:\\Tmp\\Rick and Morty - S05E10\\org.ass",
+		}, want: -4.1, wantErr: false},
+		{name: "Foundation - S01E09", args: args{
+			baseSubFile:   "C:\\Tmp\\Foundation - S01E09\\英_2.ass",
+			orgFixSubFile: "C:\\Tmp\\Foundation - S01E09\\org-fix.ass",
+			srcSubFile:    "C:\\Tmp\\Foundation - S01E09\\org.ass",
+		}, want: -4.1, wantErr: false},
+		{name: "mix", args: args{
+			baseSubFile: "C:\\Tmp\\Rick and Morty - S05E10\\英_2.ass",
+			srcSubFile:  "C:\\Tmp\\BL - S01E03\\org.ass",
+		}, want: -4.1, wantErr: false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+
+			bFind, infoBase, err := subParserHub.DetermineFileTypeFromFile(tt.args.baseSubFile)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if bFind == false {
+				t.Fatal("sub not match")
+			}
+
+			bFind, infoSrc, err := subParserHub.DetermineFileTypeFromFile(tt.args.srcSubFile)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if bFind == false {
+				t.Fatal("sub not match")
+			}
+
+			//bFind, orgFix, err := subParserHub.DetermineFileTypeFromFile(tt.args.orgFixSubFile)
+			//if err != nil {
+			//	t.Fatal(err)
+			//}
+			//if bFind == false {
+			//	t.Fatal("sub not match")
+			//}
+			// ---------------------------------------------------------------------------------------
+			err = NewPipeline().FitGSS(infoBase, infoSrc)
+			if (err != nil) != tt.wantErr {
+				t.Errorf("GetOffsetTimeV3() error = %v, wantErr %v", err, tt.wantErr)
+				return
+			}
+		})
+	}
+}

+ 9 - 6
internal/types/subparser/fileinfo.go

@@ -57,9 +57,9 @@ func (f FileInfo) GetDialogueExContent(index int) string {
 func (f *FileInfo) ChangeDialoguesFilterExTimeByFramerateRatio(framerateRatio float64) error {
 
 	timeFormat := f.GetTimeFormat()
-	for i := 0; i < len(f.DialoguesFilterEx); i++ {
+	for i := 0; i < len(f.DialoguesFilter); i++ {
 
-		oneDialogue := f.DialoguesFilterEx[i]
+		oneDialogue := f.DialoguesFilter[i]
 		timeStart, err := my_util.ParseTime(oneDialogue.StartTime)
 		if err != nil {
 			return err
@@ -68,11 +68,14 @@ func (f *FileInfo) ChangeDialoguesFilterExTimeByFramerateRatio(framerateRatio fl
 		if err != nil {
 			return err
 		}
-		scaleTimeStart := timeStart.Add(time.Duration(my_util.Time2SecondNumber(timeStart) * framerateRatio * math.Pow10(9)))
-		scaleTimeEnd := timeEnd.Add(time.Duration(my_util.Time2SecondNumber(timeEnd) * framerateRatio * math.Pow10(9)))
+		timeStartNumber := my_util.Time2SecondNumber(timeStart)
+		timeEndNumber := my_util.Time2SecondNumber(timeEnd)
 
-		my_util.Time2SubTimeString(scaleTimeStart, timeFormat)
-		my_util.Time2SubTimeString(scaleTimeEnd, timeFormat)
+		scaleTimeStart := time.Time{}.Add(time.Duration(timeStartNumber * framerateRatio * math.Pow10(9)))
+		scaleTimeEnd := time.Time{}.Add(time.Duration(timeEndNumber * framerateRatio * math.Pow10(9)))
+
+		f.DialoguesFilter[i].StartTime = my_util.Time2SubTimeString(scaleTimeStart, timeFormat)
+		f.DialoguesFilter[i].EndTime = my_util.Time2SubTimeString(scaleTimeEnd, timeFormat)
 	}
 
 	return nil