Преглед изворни кода

此路不通,需要重写逻辑,考虑直接以一句对白开始作为分段时间轴匹配起始

Signed-off-by: 716 <[email protected]>
716 пре 3 година
родитељ
комит
70112560cd

+ 54 - 23
internal/pkg/sub_timeline_fixer/fix_result.go

@@ -8,37 +8,45 @@ import (
 )
 
 type FixResult struct {
-	StartVADIndex int
-	EndVADIndex   int
-	OldMean       float64
-	OldSD         float64
-	NewMean       float64
-	NewSD         float64
-	Per           float64   // 占比
-	OP            OverParts // 越接处信息
+	StartVADIndex    int
+	EndVADIndex      int
+	OldMean          float64
+	OldSD            float64
+	NewMean          float64
+	NewSD            float64
+	Per              float64           // 占比
+	IsOverParts      bool              // 是否有越接处
+	MatchWindowInfos []MatchWindowInfo // 需要从 MatchInfo 的 IndexMatchWindowInfoMap 中按顺序提取
 }
 
 func (f FixResult) InRange(baseTimeDouble, timeStartDouble float64) (bool, float64) {
 
-	startVad2Second := f.StartVADIndex / 100
-	endVad2Second := f.EndVADIndex / 100
-
-	if baseTimeDouble+float64(startVad2Second) <= timeStartDouble &&
-		timeStartDouble <= baseTimeDouble+float64(endVad2Second) {
+	startVad2Second := float64(f.StartVADIndex) / 100.0
+	// 如果有越接处,因为是滑动窗体的原因,所以这个里的结束 index 并不是 FixResult 的,应该是具体的一个 MatchWindowInfo 的 EndIndex
+	endVad2Second := float64(f.EndVADIndex) / 100.0
+	if f.IsOverParts == true {
+		endVad2Second = float64(f.MatchWindowInfos[0].EndVADIndex) / 100.0
+	}
+	if baseTimeDouble+startVad2Second <= timeStartDouble &&
+		timeStartDouble <= baseTimeDouble+endVad2Second {
 		// 在当前的范围内
-		if f.OP.Has == true {
+		if f.IsOverParts == true {
 			// 这里需要特殊处理,因为这个越接处,还需要二分
-			if timeStartDouble <= baseTimeDouble+float64(startVad2Second)+f.OP.XLen/100 {
-				return true, f.OP.XMean
-			} else {
-				return true, f.OP.YMean
+			for i := 0; i < len(f.MatchWindowInfos); i++ {
+				b, newMean := f.chooseWhichWindow2Process(i, baseTimeDouble, timeStartDouble, startVad2Second)
+				if b == true {
+					return b, newMean
+				}
 			}
+
+			return true, f.NewMean
+
 		} else {
 			// 无需特殊处理
 			return true, f.NewMean
 
 		}
-	} else if timeStartDouble < baseTimeDouble+float64(startVad2Second) {
+	} else if timeStartDouble < baseTimeDouble+startVad2Second {
 		// 小于当前的范围
 		return true, f.NewMean
 	} else {
@@ -47,11 +55,34 @@ func (f FixResult) InRange(baseTimeDouble, timeStartDouble float64) (bool, float
 	}
 }
 
+func (f FixResult) chooseWhichWindow2Process(index int, baseTimeDouble float64, timeStartDouble float64, startVad2Second float64) (bool, float64) {
+
+	if f.MatchWindowInfos[index].OP.XLen <= 0 ||
+		f.MatchWindowInfos[index].OP.YLen <= 0 {
+
+		return false, 0
+	}
+
+	if timeStartDouble <= baseTimeDouble+startVad2Second+f.MatchWindowInfos[index].OP.XLen/100 {
+		return true, f.MatchWindowInfos[index].OP.XMean
+	} else {
+		return true, f.MatchWindowInfos[index].OP.YMean
+	}
+}
+
 // MatchInfo 匹配的信息
 type MatchInfo struct {
-	StartDiffTimeList   []float64
-	StartDiffTimeMap    *treemap.Map
-	StartDiffTimeListEx stat.Float64Slice
+	IndexMatchWindowInfoMap map[int]MatchWindowInfo // 匹配列表的顺序列表
+	StartDiffTimeList       []float64
+	StartDiffTimeMap        *treemap.Map
+	StartDiffTimeListEx     stat.Float64Slice
+}
+
+type MatchWindowInfo struct {
+	TimeDiffStartCorrelation float64 // 对白开始的时间偏移
+	StartVADIndex            int
+	EndVADIndex              int
+	OP                       OverParts // 越接处信息
 }
 
 // WindowInfo 滑动窗体信息
@@ -68,6 +99,7 @@ type WindowInfo struct {
 
 // InputData 修复函数传入多线程的数据结构
 type InputData struct {
+	Index            int                // 为了让并发处理的数据能够按顺序重新排序
 	BaseUnit         sub_helper.SubUnit // 基准 VAD
 	BaseAudioVADList []float64          // 基准 VAD
 	SrcUnit          sub_helper.SubUnit // 需要匹配的 VAD
@@ -86,7 +118,6 @@ type SubVADBlockInfo struct {
 	OverParts 总长度 D = XLen + YLen
 */
 type OverParts struct {
-	Has   bool    // 是否有越接处
 	XLen  float64 // 分段处长度
 	YLen  float64 // 分段处长度
 	XMean float64 // X 段的 Mean 值

+ 59 - 28
internal/pkg/sub_timeline_fixer/fixer.go

@@ -470,7 +470,7 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(baseUnit, srcUnit *sub_helper.SubUnit
 	// 滑动窗口的长度
 	srcWindowLen := int(float64(srcVADLen) * s.FixerConfig.V2_WindowMatchPer)
 	// 划分为 4 个区域,每一个部分的长度
-	const parts = 20
+	const parts = 10
 	perPartLen := srcVADLen / parts
 	matchedInfos := make([]MatchInfo, 0)
 
@@ -517,6 +517,7 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(baseUnit, srcUnit *sub_helper.SubUnit
 
 	fixedResults := make([]FixResult, 0)
 	sdLessCount := 0
+	// 这里的是 matchedInfos 是顺序的
 	for index, matchInfo := range matchedInfos {
 
 		log_helper.GetLogger().Infoln(index, "------------------------------------")
@@ -541,6 +542,19 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(baseUnit, srcUnit *sub_helper.SubUnit
 	if perLess < 0.7 {
 		return false, nil, nil
 	}
+
+	// matchedInfos 与 fixedResults 是对等的关系,fixedResults 中是计算过 Mean 的值,而 matchedInfos 有原始的值
+	for i, info := range matchedInfos {
+		for j := 0; j < len(info.IndexMatchWindowInfoMap); j++ {
+
+			value, bFound := info.IndexMatchWindowInfoMap[j]
+			if bFound == false {
+				continue
+			}
+
+			fixedResults[i].MatchWindowInfos = append(fixedResults[i].MatchWindowInfos, value)
+		}
+	}
 	/*
 		如果 outCorrelationFixResult 的 SD > 0.1,那么大概率这个时间轴的值匹配的有问题,需要向左或者向右找一个值进行继承
 		-4 0.001
@@ -560,11 +574,10 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(baseUnit, srcUnit *sub_helper.SubUnit
 	for index, fixedResult := range fixedResults {
 		// SD 大于 0.1 或者是 当前的 NewMean 与上一个点的 NewMean 差值大于 0.3
 		if fixedResult.NewSD >= 0.1 || (index > 1 && math.Abs(fixedResult.NewMean-fixedResults[index-1].NewMean) > 0.3) {
-			bok, newMean, newSD, op := s.fixOnePart(index, fixedResults, perPartLen)
+			bok, newMean, newSD := s.fixOnePart(index, fixedResults)
 			if bok == true {
 				fixedResults[index].NewMean = newMean
 				fixedResults[index].NewSD = newSD
-				fixedResults[index].OP = op
 			}
 		}
 	}
@@ -573,8 +586,8 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(baseUnit, srcUnit *sub_helper.SubUnit
 }
 
 // fixOnePart 轻微地跳动可以根据左或者右去微调
-func (s SubTimelineFixer) fixOnePart(startIndex int, fixedResults []FixResult, perPartLen int) (bool, float64, float64, OverParts) {
-	op := OverParts{}
+func (s SubTimelineFixer) fixOnePart(startIndex int, fixedResults []FixResult) (bool, float64, float64) {
+
 	/*
 		找到这样情况的进行修正
 	*/
@@ -584,7 +597,7 @@ func (s SubTimelineFixer) fixOnePart(startIndex int, fixedResults []FixResult, p
 		// 如果左边的这个值,与当前值超过了 0.3 的绝对差值,那么是不适合的,就需要往右找
 		if math.Abs(fixedResults[startIndex-1].NewMean-fixedResults[startIndex].NewMean) < 0.3 {
 			// 差值在接受的范围内,那么就使用这个左边的值去校正当前的值
-			return true, fixedResults[startIndex-1].NewMean, fixedResults[startIndex-1].NewSD, OverParts{}
+			return true, fixedResults[startIndex-1].NewMean, fixedResults[startIndex-1].NewSD
 		}
 	}
 
@@ -596,7 +609,7 @@ func (s SubTimelineFixer) fixOnePart(startIndex int, fixedResults []FixResult, p
 			-146.85	243.83
 		*/
 		if startIndex-1 >= 0 {
-			return true, fixedResults[startIndex-1].NewMean, fixedResults[startIndex-1].NewSD, OverParts{}
+			return true, fixedResults[startIndex-1].NewMean, fixedResults[startIndex-1].NewSD
 		}
 	} else {
 		// SD 不是很大,可能就是正常的字幕分段的时间轴偏移的 越接处 !
@@ -620,7 +633,7 @@ func (s SubTimelineFixer) fixOnePart(startIndex int, fixedResults []FixResult, p
 		} else if startIndex-2 >= 0 {
 			left3Mean = float64(fixedResults[startIndex-1].NewMean+fixedResults[startIndex-2].NewMean) / 2.0
 		} else {
-			return false, 0, 0, OverParts{}
+			return false, 0, 0
 		}
 		// 向右,三个或者三个位置
 		if startIndex+3 >= 0 {
@@ -628,22 +641,30 @@ func (s SubTimelineFixer) fixOnePart(startIndex int, fixedResults []FixResult, p
 		} else if startIndex+2 >= 0 {
 			right3Mean = float64(fixedResults[startIndex+1].NewMean+fixedResults[startIndex+2].NewMean) / 2.0
 		} else {
-			return false, 0, 0, OverParts{}
+			return false, 0, 0
 		}
-		// xLen 计算公式见推到公式截图
-		xLen := (fixedResults[startIndex].NewMean*float64(perPartLen) - right3Mean*float64(perPartLen)) / (left3Mean - right3Mean)
-		yLen := float64(perPartLen) - xLen
+		// 将这个匹配的段中的子分段的时间轴偏移都进行一次计算,推算出到底是怎么样的配比可以得到这样的偏移结论
+		for i, info := range fixedResults[startIndex].MatchWindowInfos {
+
+			perPartLen := info.EndVADIndex - info.StartVADIndex
+			op := OverParts{}
+			// xLen 计算公式见推到公式截图
+			xLen := (info.TimeDiffStartCorrelation*float64(perPartLen) - right3Mean*float64(perPartLen)) / (left3Mean - right3Mean)
+			yLen := float64(perPartLen) - xLen
 
-		op.Has = true
-		op.XLen = xLen
-		op.YLen = yLen
-		op.XMean = left3Mean
-		op.YMean = right3Mean
+			op.XLen = xLen
+			op.YLen = yLen
+			op.XMean = left3Mean
+			op.YMean = right3Mean
+
+			fixedResults[startIndex].IsOverParts = true
+			fixedResults[startIndex].MatchWindowInfos[i].OP = op
+		}
 
-		return true, fixedResults[startIndex+1].NewMean, fixedResults[startIndex+1].NewSD, op
+		return true, fixedResults[startIndex+1].NewMean, fixedResults[startIndex+1].NewSD
 	}
 
-	return false, 0, 0, OverParts{}
+	return false, 0, 0
 }
 
 // slidingWindowProcessor 滑动窗口计算时间轴偏移
@@ -662,9 +683,10 @@ func (s *SubTimelineFixer) slidingWindowProcessor(windowInfo *WindowInfo) (*Matc
 	}
 	// -------------------------------------------------
 	outMatchInfo := MatchInfo{
-		StartDiffTimeList:   make([]float64, 0),
-		StartDiffTimeMap:    treemap.NewWith(utils.Float64Comparator),
-		StartDiffTimeListEx: make(stat.Float64Slice, 0),
+		IndexMatchWindowInfoMap: make(map[int]MatchWindowInfo, 0),
+		StartDiffTimeList:       make([]float64, 0),
+		StartDiffTimeMap:        treemap.NewWith(utils.Float64Comparator),
+		StartDiffTimeListEx:     make(stat.Float64Slice, 0),
 	}
 	fixFunc := func(i interface{}) error {
 		inData := i.(InputData)
@@ -676,13 +698,14 @@ func (s *SubTimelineFixer) slidingWindowProcessor(windowInfo *WindowInfo) (*Matc
 		var nowBaseStartTime = 0.0
 		var offsetIndex = 0
 		var score = 0.0
+		srcMaxLen := 0
 		// 图解,参考 Step 3
 		if bUseSubOrAudioAsBase == false {
 			// 使用 音频 来进行匹配
 			// 去掉头和尾,具体百分之多少,见 V2_FrontAndEndPerBase
 			audioCutLen := int(float64(len(inData.BaseAudioVADList)) * s.FixerConfig.V2_FrontAndEndPerBase)
 
-			srcMaxLen := windowInfo.SrcWindowLen + inData.OffsetIndex
+			srcMaxLen = windowInfo.SrcWindowLen + inData.OffsetIndex
 			if srcMaxLen >= len(inData.SrcUnit.GetVADFloatSlice()) {
 				srcMaxLen = len(inData.SrcUnit.GetVADFloatSlice()) - 1
 			}
@@ -697,7 +720,7 @@ func (s *SubTimelineFixer) slidingWindowProcessor(windowInfo *WindowInfo) (*Matc
 		} else {
 			// 使用 字幕 来进行匹配
 
-			srcMaxLen := inData.OffsetIndex + windowInfo.SrcWindowLen
+			srcMaxLen = inData.OffsetIndex + windowInfo.SrcWindowLen
 			if srcMaxLen >= len(inData.SrcUnit.GetVADFloatSlice()) {
 				srcMaxLen = len(inData.SrcUnit.GetVADFloatSlice()) - 1
 			}
@@ -723,6 +746,10 @@ func (s *SubTimelineFixer) slidingWindowProcessor(windowInfo *WindowInfo) (*Matc
 			"score:", fmt.Sprintf("%v", score))
 
 		mutexFixV2.Lock()
+		// 这里的未必的顺序的,所以才有 IndexMatchWindowInfoMap 的存在的意义
+		outMatchInfo.IndexMatchWindowInfoMap[inData.Index] = MatchWindowInfo{TimeDiffStartCorrelation: TimeDiffStartCorrelation,
+			StartVADIndex: inData.OffsetIndex,
+			EndVADIndex:   srcMaxLen}
 		outMatchInfo.StartDiffTimeList = append(outMatchInfo.StartDiffTimeList, TimeDiffStartCorrelation)
 		outMatchInfo.StartDiffTimeListEx = append(outMatchInfo.StartDiffTimeListEx, TimeDiffStartCorrelation)
 		outMatchInfo.StartDiffTimeMap.Put(score, windowInfo.MatchedTimes)
@@ -770,15 +797,16 @@ func (s *SubTimelineFixer) slidingWindowProcessor(windowInfo *WindowInfo) (*Matc
 	defer antPool.Release()
 	// -------------------------------------------------
 	wg := sync.WaitGroup{}
+	index := 0
 	for i := windowInfo.SrcSlideStartIndex; i < windowInfo.SrcSlideStartIndex+windowInfo.SrcSlideLen-1; {
 		wg.Add(1)
 
 		if bUseSubOrAudioAsBase == true {
 			// 使用字幕
-			err = antPool.Invoke(InputData{BaseUnit: *windowInfo.BaseUnit, SrcUnit: *windowInfo.SrcUnit, OffsetIndex: i, Wg: &wg})
+			err = antPool.Invoke(InputData{Index: index, BaseUnit: *windowInfo.BaseUnit, SrcUnit: *windowInfo.SrcUnit, OffsetIndex: i, Wg: &wg})
 		} else {
 			// 使用音频
-			err = antPool.Invoke(InputData{BaseAudioVADList: windowInfo.BaseAudioFloatList, SrcUnit: *windowInfo.SrcUnit, OffsetIndex: i, Wg: &wg})
+			err = antPool.Invoke(InputData{Index: index, BaseAudioVADList: windowInfo.BaseAudioFloatList, SrcUnit: *windowInfo.SrcUnit, OffsetIndex: i, Wg: &wg})
 		}
 
 		if err != nil {
@@ -786,6 +814,7 @@ func (s *SubTimelineFixer) slidingWindowProcessor(windowInfo *WindowInfo) (*Matc
 		}
 
 		i += windowInfo.OneStep
+		index++
 	}
 	wg.Wait()
 
@@ -809,7 +838,8 @@ func (s *SubTimelineFixer) calcMeanAndSD(startDiffTimeList stat.Float64Slice, tm
 			oldMean,
 			oldSd,
 			per,
-			OverParts{},
+			false,
+			make([]MatchWindowInfo, 0),
 		}
 	}
 
@@ -858,7 +888,8 @@ func (s *SubTimelineFixer) calcMeanAndSD(startDiffTimeList stat.Float64Slice, tm
 		newMean,
 		newSd,
 		per,
-		OverParts{},
+		false,
+		make([]MatchWindowInfo, 0),
 	}
 }
 

+ 6 - 3
internal/pkg/sub_timeline_fixer/fixer_test.go

@@ -815,8 +815,11 @@ func TestGetOffsetTimeV2_MoreTest(t *testing.T) {
 				t.Fatal("GetOffsetTimeV2 return false")
 			}
 
-			debug_view.SaveDebugChart(*baseUnitNew, tt.name+" -- baseUnitNew", "baseUnitNew")
-			debug_view.SaveDebugChart(*srcUnitNew, tt.name+" -- srcUnitNew", "srcUnitNew")
+			//debug_view.SaveDebugChart(*baseUnitNew, tt.name+" -- baseUnitNew", "baseUnitNew")
+			//debug_view.SaveDebugChart(*srcUnitNew, tt.name+" -- srcUnitNew", "srcUnitNew")
+
+			println("baseUnitNew:", fmt.Sprintf("%f", baseUnitNew.GetStartTimeNumber(true)))
+			println("srcUnitNew:", fmt.Sprintf("%f", srcUnitNew.GetStartTimeNumber(true)))
 
 			_, err = timelineFixer.FixSubTimelineByFixResults(infoSrc, srcUnitNew, fixedResults, tt.args.srcSubFile+FixMask+infoBase.Ext)
 			if err != nil {
@@ -839,7 +842,7 @@ var timelineFixer = NewSubTimelineFixer(sub_timeline_fiexer.SubTimelineFixerConf
 	V2_FrontAndEndPerSrc:        0.2,
 	V2_WindowMatchPer:           0.2,
 	V2_CompareParts:             3,
-	V2_FixThreads:               2,
+	V2_FixThreads:               1,
 	V2_MaxStartTimeDiffSD:       0.1,
 	V2_MinOffset:                0.2,
 })