4 gadi atpakaļ · 32559cc4ba
--- a/internal/pkg/frechet/frechet.go
+++ b/internal/pkg/frechet/frechet.go
@@ -0,0 +1,56 @@
 
				+package frechet
			
 
				+
			
 
				+import "math"
			
 
				+
			
 
				+/*
			
 
				+	https://github.com/artpar/frechet
			
 
				+*/
			
 
				+
			
 
				+// Point is used to represent curves
			
 
				+type Point struct {
			
 
				+	X float64
			
 
				+	Y float64
			
 
				+}
			
 
				+
			
 
				+func euclideanDistance(p1 Point, p2 Point) float64 {
			
 
				+	dx := p2.X - p1.X
			
 
				+	dy := p2.Y - p1.Y
			
 
				+	return math.Sqrt(dx*dx + dy*dy)
			
 
				+}
			
 
				+
			
 
				+func min(x float64, y float64, z float64) float64 {
			
 
				+	if x < y {
			
 
				+		return math.Min(x, z)
			
 
				+	}
			
 
				+
			
 
				+	return math.Min(y, z)
			
 
				+}
			
 
				+
			
 
				+// Frechet is a dynamic programming implementation calculating the frechet distance between the two curves c1 and c2.
			
 
				+func Frechet(c1 []Point, c2 []Point) float64 {
			
 
				+	I := len(c1)
			
 
				+	J := len(c2)
			
 
				+	runningMaxI := 0.0
			
 
				+	for i := 0; i < I; i++ {
			
 
				+		currentMin := 1e+09
			
 
				+		for j := 0; j < J; j++ {
			
 
				+			currDist := euclideanDistance(c1[i], c2[j])
			
 
				+			currentMin = math.Min(currentMin, currDist)
			
 
				+		}
			
 
				+
			
 
				+		runningMaxI = math.Max(runningMaxI, currentMin)
			
 
				+	}
			
 
				+
			
 
				+	runningMaxJ := 0.0
			
 
				+	for j := 0; j < J; j++ {
			
 
				+		currentMin := 1e+09
			
 
				+		for i := 0; i < I; i++ {
			
 
				+			currDist := euclideanDistance(c1[i], c2[j])
			
 
				+			currentMin = math.Min(currentMin, currDist)
			
 
				+		}
			
 
				+
			
 
				+		runningMaxJ = math.Max(runningMaxJ, currentMin)
			
 
				+	}
			
 
				+
			
 
				+	return math.Max(runningMaxI, runningMaxJ)
			
 
				+}
			
--- a/internal/pkg/frechet/frechet_test.go
+++ b/internal/pkg/frechet/frechet_test.go
@@ -0,0 +1,33 @@
 
				+package frechet
			
 
				+
			
 
				+import (
			
 
				+	"testing"
			
 
				+)
			
 
				+
			
 
				+func TestFrechet(t *testing.T) {
			
 
				+	curve1 := []Point{Point{X: 0, Y: 0}, Point{X: 1, Y: 1}, Point{X: 2, Y: 2}}
			
 
				+	curve2 := []Point{Point{X: 0, Y: 1}, Point{X: 1, Y: 2}, Point{X: 2, Y: 3}}
			
 
				+
			
 
				+	dist := Frechet(curve1, curve2)
			
 
				+	if dist != 1.0 {
			
 
				+		t.Fatalf("%v != 1.0", dist)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func BenchmarkFrechet(b *testing.B) {
			
 
				+	curve1 := []Point{}
			
 
				+	curve2 := []Point{}
			
 
				+
			
 
				+	for i := 0; i < 1000; i++ {
			
 
				+		curve1 = append(curve1, Point{X: float64(i), Y: float64(i)})
			
 
				+		curve2 = append(curve2, Point{X: float64(i), Y: float64(i + 1)})
			
 
				+	}
			
 
				+
			
 
				+	b.ResetTimer()
			
 
				+	for i := 0; i < b.N; i++ {
			
 
				+		dist := Frechet(curve1, curve2)
			
 
				+		if dist != 1.0 {
			
 
				+			b.Fatalf("%v != 1.0", dist)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
--- a/internal/pkg/sub_helper/key_features.go
+++ b/internal/pkg/sub_helper/key_features.go
@@ -0,0 +1,38 @@
 
				+package sub_helper
			
 
				+
			
 
				+// KeyFeatures 钥匙的组合特征
			
 
				+type KeyFeatures struct {
			
 
				+	Big    Feature // 大锯齿
			
 
				+	Middle Feature // 中锯齿
			
 
				+	Small  Feature // 小锯齿
			
 
				+}
			
 
				+
			
 
				+func NewKeyFeatures(big, middle, small Feature) *KeyFeatures {
			
 
				+	return &KeyFeatures{
			
 
				+		big, middle, small,
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// Feature 钥匙锯齿的特征
			
 
				+type Feature struct {
			
 
				+	BigThanTime  float64 // 大于这个时间
			
 
				+	LessThanTime float64 // 小于这个时间
			
 
				+	LeastCount   int     // 至少有几个特征
			
 
				+	NowCount     int     // 用于计数
			
 
				+}
			
 
				+
			
 
				+// NewFeature 时间如果传入的是 -1，那么就跳过这个判断
			
 
				+func NewFeature(BigThanTime, LessThanTime float64, LeastCount int) Feature {
			
 
				+	return Feature{
			
 
				+		BigThanTime, LessThanTime, LeastCount, 0,
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// Match 判断这个间隔是否符合要求
			
 
				+func (f Feature) Match(interval float64) bool {
			
 
				+	if interval > f.BigThanTime && interval < f.LessThanTime {
			
 
				+		return true
			
 
				+	} else {
			
 
				+		return false
			
 
				+	}
			
 
				+}
			
--- a/internal/pkg/sub_helper/sub_helper.go
+++ b/internal/pkg/sub_helper/sub_helper.go
@@ -370,9 +370,9 @@ func MergeMultiDialogue4EngSubtitle(inSubParser *subparser.FileInfo) {
 
				 }
			
 
				 
			
 
				 // GetVADINfoFromSub 跟下面的 GetVADINfoFromSubNeedOffsetTimeWillInsert 函数功能一致
			
 
				-func GetVADINfoFromSub(infoSrc *subparser.FileInfo, FrontAndEndPer float64, SubUnitMaxCount int, insert bool) ([]SubUnit, error) {
			
 
				+func GetVADINfoFromSub(infoSrc *subparser.FileInfo, FrontAndEndPer float64, SubUnitMaxCount int, insert bool, kf *KeyFeatures) ([]SubUnit, error) {
			
 
				 
			
 
				-	return GetVADINfoFromSubNeedOffsetTimeWillInsert(infoSrc, FrontAndEndPer, SubUnitMaxCount, 0, insert)
			
 
				+	return GetVADINfoFromSubNeedOffsetTimeWillInsert(infoSrc, FrontAndEndPer, SubUnitMaxCount, 0, insert, kf)
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -383,11 +383,12 @@ func GetVADINfoFromSub(infoSrc *subparser.FileInfo, FrontAndEndPer float64, SubU
 
				 	3. 可能还有一个需求，默认的模式是每五句话一个单元，还有一种模式是每一句话向后找到连续的四句话组成一个单元，允许重叠
			
 
				 		目前看到的情况是前者的抽样率太低，需要使用后者的逻辑
			
 
				 */
			
 
				-func GetVADINfoFromSubNeedOffsetTimeWillInsert(infoSrc *subparser.FileInfo, FrontAndEndPer float64, SubUnitMaxCount int, offsetTime float64, insert bool) ([]SubUnit, error) {
			
 
				+func GetVADINfoFromSubNeedOffsetTimeWillInsert(infoSrc *subparser.FileInfo, FrontAndEndPer float64, SubUnitMaxCount int, offsetTime float64, insert bool, kf *KeyFeatures) ([]SubUnit, error) {
			
 
				 	if SubUnitMaxCount < 0 {
			
 
				 		SubUnitMaxCount = 0
			
 
				 	}
			
 
				 	srcSubUnitList := make([]SubUnit, 0)
			
 
				+	srcSubDialogueList := make([]subparser.OneDialogueEx, 0)
			
 
				 	srcOneSubUnit := NewSubUnit()
			
 
				 
			
 
				 	// srcDuration
			
@@ -432,13 +433,36 @@ func GetVADINfoFromSubNeedOffsetTimeWillInsert(infoSrc *subparser.FileInfo, Fron
 
				 			} else {
			
 
				 				srcOneSubUnit.Add(oneDialogueExTimeStart, oneDialogueExTimeEnd)
			
 
				 			}
			
 
				+			// 这一个单元的 Dialogue 需要合并起来，才能判断是否符合“钥匙”的要求
			
 
				+			srcSubDialogueList = append(srcSubDialogueList, infoSrc.DialoguesEx[i])
			
 
				+
			
 
				 		} else {
			
 
				+			// 筹够那么多句话了，需要判断一次是否符合“钥匙”的要求
			
 
				+			tmpNowMatchKey := IsMatchKey(srcSubDialogueList, kf)
			
 
				+			srcOneSubUnit.IsMatchKey = tmpNowMatchKey
			
 
				+			// 用完清空
			
 
				+			srcSubDialogueList = make([]subparser.OneDialogueEx, 0)
			
 
				+			// 将拼凑起来的对话组成一个单元进行存储起来
			
 
				 			srcSubUnitList = append(srcSubUnitList, *srcOneSubUnit)
			
 
				+			// 然后重置
			
 
				 			srcOneSubUnit = NewSubUnit()
			
 
				 			// TODO 这里决定了插入数据的密度，有待测试
			
 
				-			//i = i - SubUnitMaxCount + SubUnitMaxCount/5
			
 
				-			//i = i - SubUnitMaxCount + SubUnitMaxCount/2
			
 
				-			i = i - SubUnitMaxCount
			
 
				+			//i = i - SubUnitMaxCount
			
 
				+			if kf == nil {
			
 
				+				// 走原始的逻辑 i 的赋值逻辑跟之前一样，需要每一次进一步，也就是有重叠的部分出现
			
 
				+				//i = i - SubUnitMaxCount + SubUnitMaxCount/5
			
 
				+				//i = i - SubUnitMaxCount + SubUnitMaxCount/2
			
 
				+				i = i - SubUnitMaxCount
			
 
				+			} else {
			
 
				+				if tmpNowMatchKey == false {
			
 
				+					// 走原始的逻辑 i 的赋值逻辑跟之前一样，需要每一次进一步，也就是有重叠的部分出现
			
 
				+					i = i - SubUnitMaxCount
			
 
				+				} else {
			
 
				+					// 判断了“钥匙”特征，且通过了
			
 
				+					// i 需要跳过当前已经覆盖的段
			
 
				+					i = i - SubUnitMaxCount + SubUnitMaxCount/2
			
 
				+				}
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				 	if srcOneSubUnit.GetDialogueCount() > 0 {
			
@@ -447,3 +471,51 @@ func GetVADINfoFromSubNeedOffsetTimeWillInsert(infoSrc *subparser.FileInfo, Fron
 
				 
			
 
				 	return srcSubUnitList, nil
			
 
				 }
			
 
				+
			
 
				+// IsMatchKey 是否符合“钥匙”的标准
			
 
				+func IsMatchKey(srcSubDialogueList []subparser.OneDialogueEx, kf *KeyFeatures) bool {
			
 
				+
			
 
				+	if kf == nil {
			
 
				+		return false
			
 
				+	}
			
 
				+	/*
			
 
				+		这里是设置主要依赖的还是数据源，源必须有足够的对白（暂定 50 句），才可能找到这么多信息
			
 
				+		这里需要匹配的“钥匙”特征，先简单实现为 (这三个需要不交叉时间段)
			
 
				+			1. 大坑（大于 10s 的对白间隔）至少 1 个
			
 
				+			2. 中坑（大于 2 且小于 5s 的对白间隔）至少 3 个
			
 
				+			3. 小坑（大于 1 且小于 2s 的对白间隔）至少 5 个
			
 
				+	*/
			
 
				+	dialogueIntervals := make([]float64, 0)
			
 
				+	tmpFileInfo := subparser.FileInfo{}
			
 
				+	// 现在需要进行凹坑的识别，一共由多少个，间隔多少
			
 
				+	for i := 0; i < len(srcSubDialogueList)-1; i++ {
			
 
				+		startTime, err := tmpFileInfo.ParseTime(srcSubDialogueList[i+1].StartTime)
			
 
				+		if err != nil {
			
 
				+			return false
			
 
				+		}
			
 
				+		endTime, err := tmpFileInfo.ParseTime(srcSubDialogueList[i].EndTime)
			
 
				+		if err != nil {
			
 
				+			return false
			
 
				+		}
			
 
				+		// 对话间的时间间隔
			
 
				+		dialogueIntervals = append(dialogueIntervals, my_util.Time2SecendNumber(startTime)-my_util.Time2SecendNumber(endTime))
			
 
				+	}
			
 
				+	// big
			
 
				+	for _, value := range dialogueIntervals {
			
 
				+		if kf.Big.Match(value) == true {
			
 
				+			kf.Big.NowCount++
			
 
				+		}
			
 
				+		if kf.Middle.Match(value) == true {
			
 
				+			kf.Middle.NowCount++
			
 
				+		}
			
 
				+		if kf.Small.Match(value) == true {
			
 
				+			kf.Small.NowCount++
			
 
				+		}
			
 
				+	}
			
 
				+	// 统计到的要 >= 目标的个数
			
 
				+	if kf.Big.NowCount < kf.Big.LeastCount || kf.Middle.NowCount < kf.Middle.LeastCount || kf.Small.NowCount < kf.Small.LeastCount {
			
 
				+		return false
			
 
				+	}
			
 
				+
			
 
				+	return true
			
 
				+}
			
--- a/internal/pkg/sub_helper/sub_unit.go
+++ b/internal/pkg/sub_helper/sub_unit.go
@@ -3,6 +3,7 @@ package sub_helper
 
				 import (
			
 
				 	"bufio"
			
 
				 	"fmt"
			
 
				+	"github.com/allanpk716/ChineseSubFinder/internal/pkg/frechet"
			
 
				 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/my_util"
			
 
				 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/vad"
			
 
				 	"math"
			
@@ -19,6 +20,7 @@ type SubUnit struct {
 
				 	firstAdd        bool
			
 
				 	outVADBytes     []byte
			
 
				 	outVADFloats    []float64
			
 
				+	IsMatchKey      bool // 是否符合“钥匙”的要求
			
 
				 }
			
 
				 
			
 
				 func NewSubUnit() *SubUnit {
			
@@ -227,7 +229,7 @@ func (s SubUnit) GetFFMPEGCutRangeString(expandTimeRange float64) (string, strin
 
				 	}
			
 
				 
			
 
				 	return fmt.Sprintf("%d:%d:%d.%d", tmpStartTime.Hour(), tmpStartTime.Minute(), tmpStartTime.Second(), tmpStartTime.Nanosecond()/1000/1000),
			
 
				-		fmt.Sprintf("%f", s.GetTimelineRange()+expandTimeRange),
			
 
				+		fmt.Sprintf("%f", s.GetTimelineRange()+2*expandTimeRange),
			
 
				 		tmpStartTime,
			
 
				 		s.GetTimelineRange() + expandTimeRange
			
 
				 }
			
@@ -297,21 +299,50 @@ func (s SubUnit) Save2Txt(outFileFPath string) error {
 
				 	return nil
			
 
				 }
			
 
				 
			
 
				-// IsMatchKey 是否符合“钥匙”的标准
			
 
				-// features 是至少多少个“凹坑”
			
 
				-func (s SubUnit) IsMatchKey(features int) bool {
			
 
				-	nowCount := 0
			
 
				-	for _, value := range s.GetVADByteSlice() {
			
 
				-		if value == 0 {
			
 
				-			nowCount++
			
 
				-		}
			
 
				-	}
			
 
				+// GetStartVADList 获取起始时间的 VAD List
			
 
				+func (s SubUnit) GetStartVADList() []vad.VADInfo {
			
 
				 
			
 
				-	if nowCount >= features {
			
 
				-		return true
			
 
				+	outVADList := make([]vad.VADInfo, len(s.VADList))
			
 
				+	for _, value := range s.VADList {
			
 
				+		outVADList = append(outVADList, value)
			
 
				 	}
			
 
				+	return outVADList
			
 
				+}
			
 
				 
			
 
				-	return false
			
 
				+// GetFrechetPoint 获取 Frechet 曲线相似度的数据结构 List，whichOne = 0 所有，whichOne = 1 只有 Start 的点
			
 
				+func (s SubUnit) GetFrechetPoint(whichOne int) []frechet.Point {
			
 
				+
			
 
				+	outPoint := make([]frechet.Point, 0)
			
 
				+	if whichOne == 0 {
			
 
				+		// 所有点
			
 
				+		for _, info := range s.VADList {
			
 
				+			nowX := 0.0
			
 
				+			if info.Active == true {
			
 
				+				nowX = 1.0
			
 
				+			}
			
 
				+			nowY := info.Time.Seconds()
			
 
				+			outPoint = append(outPoint, frechet.Point{
			
 
				+				X: nowX,
			
 
				+				Y: nowY,
			
 
				+			})
			
 
				+		}
			
 
				+		return outPoint
			
 
				+
			
 
				+	} else {
			
 
				+		// 只有 Start 点
			
 
				+		for _, info := range s.GetStartVADList() {
			
 
				+			nowX := 0.0
			
 
				+			if info.Active == true {
			
 
				+				nowX = 1.0
			
 
				+			}
			
 
				+			nowY := info.Time.Seconds()
			
 
				+			outPoint = append(outPoint, frechet.Point{
			
 
				+				X: nowX,
			
 
				+				Y: nowY,
			
 
				+			})
			
 
				+		}
			
 
				+		return outPoint
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 const perWindows = float64(vad.FrameDuration) / 1000
			
--- a/internal/pkg/sub_timeline_fixer/fixer.go
+++ b/internal/pkg/sub_timeline_fixer/fixer.go
@@ -368,7 +368,7 @@ func (s *SubTimelineFixer) GetOffsetTimeV1(infoBase, infoSrc *subparser.FileInfo
 
				 // GetOffsetTimeV2 使用内置的字幕校正外置的字幕时间轴
			
 
				 func (s *SubTimelineFixer) GetOffsetTimeV2(infoBase, infoSrc *subparser.FileInfo, staticLineFileSavePath string, debugInfoFileSavePath string) (bool, float64, float64, error) {
			
 
				 
			
 
				-	//infoBaseSubUnitList, err := sub_helper.GetVADINfoFromSub(infoBase, 0, 10000)
			
 
				+	//infoBaseSubUnitList, err := sub_helper.GetVADINfoFromSub(infoBase, 0, 10000, bInsert)
			
 
				 	//if err != nil {
			
 
				 	//	return false, 0, 0, err
			
 
				 	//}
			
@@ -378,7 +378,7 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(infoBase, infoSrc *subparser.FileInfo
 
				 	//	return false, 0, 0, err
			
 
				 	//}
			
 
				 	//
			
 
				-	//infoSrcSubUnitList, err := sub_helper.GetVADINfoFromSub(infoSrc, 0, 10000)
			
 
				+	//infoSrcSubUnitList, err := sub_helper.GetVADINfoFromSub(infoSrc, 0, 10000, bInsert)
			
 
				 	//if err != nil {
			
 
				 	//	return false, 0, 0, err
			
 
				 	//}
			
@@ -389,24 +389,19 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(infoBase, infoSrc *subparser.FileInfo
 
				 	//}
			
 
				 
			
 
				 	// 需要拆分成多个 unit
			
 
				-	srcSubUnitList, err := sub_helper.GetVADINfoFromSub(infoSrc, FrontAndEndPer, SubUnitMaxCount, bInsert)
			
 
				+	srcSubUnitList, err := sub_helper.GetVADINfoFromSub(infoSrc, FrontAndEndPer, SubUnitMaxCount, bInsert, kf)
			
 
				 	if err != nil {
			
 
				 		return false, 0, 0, err
			
 
				 	}
			
 
				 	// 时间轴差值数组
			
 
				 	var tmpCorrelationStartDiffTime = make([]float64, 0)
			
 
				 	var CorrelationStartDiffTimeList = make(stat.Float64Slice, 0)
			
 
				+
			
 
				 	// 调试功能，开始针对对白单元进行匹配
			
 
				 	for _, srcSubUnit := range srcSubUnitList {
			
 
				 
			
 
				-		// 这里得到的若干份的 srcSubUnit 未必的合适的，需要挑选， 1、0 占比比较明显的，或者说是比较像“钥匙”的 Unit 来继续
			
 
				-		features := int(float64(len(srcSubUnit.VADList)) * KeyPer)
			
 
				-		println("-------------------")
			
 
				-		if srcSubUnit.IsMatchKey(features) == false {
			
 
				-			println("features:", features, "skip")
			
 
				+		if srcSubUnit.IsMatchKey == false {
			
 
				 			continue
			
 
				-		} else {
			
 
				-			println("features:", features)
			
 
				 		}
			
 
				 		// 得到当前这个单元推算出来需要提取的字幕时间轴范围，这个是 Base Sub 使用的提取段
			
 
				 		startTimeBaseString, subBaseLength, startTimeBaseTime, _ := srcSubUnit.GetFFMPEGCutRangeString(ExpandTimeRange)
			
@@ -425,16 +420,12 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(infoBase, infoSrc *subparser.FileInfo
 
				 			return false, 0, 0, errors.New("DetermineFileTypeFromFile == false")
			
 
				 		}
			
 
				 
			
 
				-		nowTmpBaseSubUnitList, err := sub_helper.GetVADINfoFromSub(nowTmpSubBaseFileInfo, 0, 10000, bInsert)
			
 
				+		nowTmpBaseSubUnitList, err := sub_helper.GetVADINfoFromSub(nowTmpSubBaseFileInfo, 0, 10000, bInsert, nil)
			
 
				 		if err != nil {
			
 
				 			return false, 0, 0, err
			
 
				 		}
			
 
				 		nowTmpBaseSubVADUnit := nowTmpBaseSubUnitList[0]
			
 
				 
			
 
				-		//nowTmpBaseSubVADUnit, b, f, f2, err2 := s.debugV2(err, nowTmpSubBaseFileInfo, srcSubUnit, errString, infoSrc, nowTmpSubBaseFPath)
			
 
				-		//if err2 != nil {
			
 
				-		//	return b, f, f2, err2
			
 
				-		//}
			
 
				 		// -------------------------------------------------
			
 
				 		// 开始匹配
			
 
				 		correlationTM := treemap.NewWith(utils.Float64Comparator)
			
@@ -452,24 +443,35 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(infoBase, infoSrc *subparser.FileInfo
 
				 			compareBase := nowTmpBaseSubVADUnit.GetVADFloatSlice()[windowStartIndex:windowEndIndex]
			
 
				 			correlation := calculate_curve_correlation.CalculateCurveCorrelation(compareSrc, compareBase, len(srcSubUnit.VADList))
			
 
				 			correlationTM.Put(correlation, i)
			
 
				+			//println(fmt.Sprintf("%v %v", i, correlation))
			
 
				 		}
			
 
				 		// 找到最大的数值和索引
			
 
				 		tmpMaxCorrelation, tmpMaxIndex := correlationTM.Max() // tmpMaxCorrelation
			
 
				 		if tmpMaxCorrelation == nil || tmpMaxIndex == nil {
			
 
				 			continue
			
 
				 		}
			
 
				+
			
 
				 		// CalculateCurveCorrelation 计算出来的最优解
			
 
				 		bok, nowCorrelationBaseIndexTime := nowTmpBaseSubVADUnit.GetIndexTimeNumber(tmpMaxIndex.(int), true)
			
 
				 		if bok == false {
			
 
				 			continue
			
 
				 		}
			
 
				+		// 相似度，1 为完全匹配
			
 
				 		if tmpMaxCorrelation.(float64) <= MinCorrelation {
			
 
				 			continue
			
 
				 		}
			
 
				-
			
 
				 		nowSrcRealTime := srcSubUnit.GetStartTimeNumber(true)
			
 
				 		// 时间差值
			
 
				 		TimeDiffStartCorrelation := nowCorrelationBaseIndexTime + my_util.Time2SecendNumber(startTimeBaseTime) - nowSrcRealTime
			
 
				+		// 挑匹配时间非常合适的段落出来，这个时间需要针对调试的文件进行调整
			
 
				+		//if TimeDiffStartCorrelation < -6.5 || TimeDiffStartCorrelation > -6.0 {
			
 
				+		//	continue
			
 
				+		//}
			
 
				+		// 输出调试文件
			
 
				+		//b, f, f2, err2 := s.debugV2(err, nowTmpSubBaseFileInfo, srcSubUnit, errString, infoSrc, nowTmpSubBaseFPath)
			
 
				+		//if err2 != nil {
			
 
				+		//	return b, f, f2, err2
			
 
				+		//}
			
 
				 
			
 
				 		println(fmt.Sprintf("Correlation Index:%v Corre: %v DiffTime %v", tmpMaxIndex, tmpMaxCorrelation, TimeDiffStartCorrelation))
			
 
				 		println("-------------------")
			
@@ -479,19 +481,18 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(infoBase, infoSrc *subparser.FileInfo
 
				 	}
			
 
				 
			
 
				 	outCorrelationFixResult := s.calcMeanAndSD(CorrelationStartDiffTimeList, tmpCorrelationStartDiffTime)
			
 
				-
			
 
				-	println(fmt.Sprintf("Old Mean: %v SD: %v Per: %v", outCorrelationFixResult.OldMean, outCorrelationFixResult.OldSD, outCorrelationFixResult.Per))
			
 
				-	println(fmt.Sprintf("New Mean: %v SD: %v Per: %v", outCorrelationFixResult.NewMean, outCorrelationFixResult.NewSD, outCorrelationFixResult.Per))
			
 
				+	println(fmt.Sprintf("Correlation Old Mean: %v SD: %v Per: %v", outCorrelationFixResult.OldMean, outCorrelationFixResult.OldSD, outCorrelationFixResult.Per))
			
 
				+	println(fmt.Sprintf("Correlation New Mean: %v SD: %v Per: %v", outCorrelationFixResult.NewMean, outCorrelationFixResult.NewSD, outCorrelationFixResult.Per))
			
 
				 
			
 
				 	return true, outCorrelationFixResult.NewMean, outCorrelationFixResult.NewSD, nil
			
 
				 }
			
 
				 
			
 
				 // debugV2 V2 版本的调试信息输出
			
 
				-func (s *SubTimelineFixer) debugV2(err error, nowTmpSubBaseFileInfo *subparser.FileInfo, srcSubUnit sub_helper.SubUnit, errString string, infoSrc *subparser.FileInfo, nowTmpSubBaseFPath string) (sub_helper.SubUnit, bool, float64, float64, error) {
			
 
				+func (s *SubTimelineFixer) debugV2(err error, nowTmpSubBaseFileInfo *subparser.FileInfo, srcSubUnit sub_helper.SubUnit, errString string, infoSrc *subparser.FileInfo, nowTmpSubBaseFPath string) (bool, float64, float64, error) {
			
 
				 	// 这里比较特殊，因为读取的字幕文件是单独切割出来的，所以默认是有偏移的们需要使用不同的函数，把偏移算进去
			
 
				-	nowTmpBaseSubUnitList, err := sub_helper.GetVADINfoFromSub(nowTmpSubBaseFileInfo, 0, 10000, bInsert)
			
 
				+	nowTmpBaseSubUnitList, err := sub_helper.GetVADINfoFromSub(nowTmpSubBaseFileInfo, 0, 10000, bInsert, nil)
			
 
				 	if err != nil {
			
 
				-		return sub_helper.SubUnit{}, false, 0, 0, err
			
 
				+		return false, 0, 0, err
			
 
				 	}
			
 
				 	nowTmpBaseSubVADUnit := nowTmpBaseSubUnitList[0]
			
 
				 
			
@@ -500,7 +501,7 @@ func (s *SubTimelineFixer) debugV2(err error, nowTmpSubBaseFileInfo *subparser.F
 
				 	nowTmpSubSrcFPath, errString, err := s.ffmpegHelper.ExportSubArgsByTimeRange(infoSrc.FileFullPath, "src", startTimeSrcString, subSrcLength)
			
 
				 	if err != nil {
			
 
				 		log_helper.GetLogger().Errorln("ExportSubArgsByTimeRange src", errString, err)
			
 
				-		return sub_helper.SubUnit{}, false, 0, 0, err
			
 
				+		return false, 0, 0, err
			
 
				 	}
			
 
				 
			
 
				 	var nowBaseSubTimeLineData = make([]opts.LineData, 0)
			
@@ -530,7 +531,7 @@ func (s *SubTimelineFixer) debugV2(err error, nowTmpSubBaseFileInfo *subparser.F
 
				 	}
			
 
				 	err = SaveStaticLineV2("Sub src", srcSubVADStaticLineFullPath, nowSrcSubXAxis, nowSrcSubTimeLineData)
			
 
				 	if err != nil {
			
 
				-		return sub_helper.SubUnit{}, false, 0, 0, err
			
 
				+		return false, 0, 0, err
			
 
				 	}
			
 
				 	// -------------------------------------------------
			
 
				 	// base 导出中间文件缓存
			
@@ -541,9 +542,9 @@ func (s *SubTimelineFixer) debugV2(err error, nowTmpSubBaseFileInfo *subparser.F
 
				 	}
			
 
				 	err = SaveStaticLineV2("Sub base", baseSubVADStaticLineFullPath, nowBaseSubXAxis, nowBaseSubTimeLineData)
			
 
				 	if err != nil {
			
 
				-		return sub_helper.SubUnit{}, false, 0, 0, err
			
 
				+		return false, 0, 0, err
			
 
				 	}
			
 
				-	return nowTmpBaseSubVADUnit, false, 0, 0, nil
			
 
				+	return false, 0, 0, nil
			
 
				 }
			
 
				 
			
 
				 func (s *SubTimelineFixer) calcMeanAndSD(startDiffTimeList stat.Float64Slice, tmpStartDiffTime []float64) FixResult {
			
@@ -554,6 +555,16 @@ func (s *SubTimelineFixer) calcMeanAndSD(startDiffTimeList stat.Float64Slice, tm
 
				 	newSd := -1.0
			
 
				 	per := 1.0
			
 
				 
			
 
				+	if len(tmpStartDiffTime) < 3 {
			
 
				+		return FixResult{
			
 
				+			oldMean,
			
 
				+			oldSd,
			
 
				+			newMean,
			
 
				+			newSd,
			
 
				+			per,
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	// 如果 SD 较大的时候才需要剔除
			
 
				 	if oldSd > 0.1 {
			
 
				 		var outliersMap = make(map[float64]int, 0)
			
@@ -634,7 +645,7 @@ func (s *SubTimelineFixer) GetOffsetTimeV3(audioInfo vad.AudioInfo, infoSrc *sub
 
				 		1. 抽取字幕的时间片段的时候，暂定，前 15% 和后 15% 要避开，前奏、主题曲、结尾曲
			
 
				 		2. 将整个字幕，抽取连续 5 句对话为一个单元，提取时间片段信息
			
 
				 	*/
			
 
				-	subUnitList, err := sub_helper.GetVADINfoFromSub(infoSrc, FrontAndEndPer, SubUnitMaxCount, bInsert)
			
 
				+	subUnitList, err := sub_helper.GetVADINfoFromSub(infoSrc, FrontAndEndPer, SubUnitMaxCount, bInsert, nil)
			
 
				 	if err != nil {
			
 
				 		return false, 0, 0, err
			
 
				 	}
			
@@ -720,9 +731,16 @@ func (s *SubTimelineFixer) GetOffsetTimeV3(audioInfo vad.AudioInfo, infoSrc *sub
 
				 
			
 
				 const FixMask = "-fix"
			
 
				 const bInsert = true        // 是否插入点
			
 
				+const whichOne = 0          // 所有，whichOne = 1 只有 Start 的点
			
 
				 const FrontAndEndPer = 0.15 // 前百分之 15 和后百分之 15 都不进行识别
			
 
				-const SubUnitMaxCount = 10  // 一个 Sub单元有五句对白
			
 
				+const SubUnitMaxCount = 50  // 一个 Sub单元有五句对白
			
 
				 const ExpandTimeRange = 50  // 从字幕的时间轴片段需要向前和向后多匹配一部分的音频，这里定义的就是这个 range 以分钟为单位， 正负 60 秒
			
 
				 const KeyPer = 0.1          // 钥匙凹坑的占比
			
 
				-const MinCorrelation = 0.85 // 最低的匹配度
			
 
				+const MinCorrelation = 0.50 // 最低的匹配度
			
 
				 const DTW_Radius = 1000     // DTW 半径
			
 
				+
			
 
				+var kf = sub_helper.NewKeyFeatures(
			
 
				+	sub_helper.NewFeature(10.0, 999999, 2),
			
 
				+	sub_helper.NewFeature(3.0, 10.0, 5),
			
 
				+	sub_helper.NewFeature(1.0, 3.0, 5),
			
 
				+)