Browse Source

fixer.go V2 版本修正函数,除了自身字幕校正还遇到问题,其他调试完成。

Signed-off-by: allan716 <[email protected]>
allan716 4 years ago
parent
commit
81f22dd38f

+ 34 - 34
internal/pkg/sub_helper/sub_helper.go

@@ -462,12 +462,9 @@ func GetVADInfoFeatureFromSubNeedOffsetTimeWillInsert(fileInfo *subparser.FileIn
 /*
 	GetVADInfoFeatureFromSubNew 将 Sub 文件转换为 VAD List 信息
 */
-func GetVADInfoFeatureFromSubNew(fileInfo *subparser.FileInfo, SkipFrontAndEndPer float64, pieces int) ([]SubUnit, error) {
-	// 至少分为一份
-	if pieces <= 0 {
-		pieces = 1
-	}
-	outSubUnits := make([]SubUnit, 0)
+func GetVADInfoFeatureFromSubNew(fileInfo *subparser.FileInfo, SkipFrontAndEndPer float64) (*SubUnit, error) {
+
+	outSubUnits := NewSubUnit()
 	if len(fileInfo.DialoguesEx) <= 0 {
 		return nil, errors.New("GetVADInfoFeatureFromSubNew fileInfo Dialogue Length is 0")
 	}
@@ -492,8 +489,8 @@ func GetVADInfoFeatureFromSubNew(fileInfo *subparser.FileInfo, SkipFrontAndEndPe
 	}
 	// 计算出需要截取的片段,起始和结束
 	skipLen := int(float64(vadLen) * SkipFrontAndEndPer)
-	skipStartIndex := int(subStartTimeFloor10ms) + skipLen
-	skipEndIndex := skipStartIndex + (vadLen - 2*skipLen)
+	skipStartIndex := skipLen
+	skipEndIndex := vadLen - skipLen
 	// 现在需要从 fileInfo 的每一句对白也就对应一段连续的 VAD active = true 来进行改写,记得向下取整
 	for index, dialogueEx := range fileInfo.DialoguesEx {
 
@@ -518,10 +515,15 @@ func GetVADInfoFeatureFromSubNew(fileInfo *subparser.FileInfo, SkipFrontAndEndPe
 		// 得到一句对白的时长
 		changeVADStartIndex := int(oneDialogueStartTimeFloor * 100)
 		changeVADEndIndex := int(oneDialogueEndTimeFloor * 100)
-		// 跳过整体的前后百分比
-		if changeVADStartIndex < skipStartIndex || changeVADEndIndex > skipEndIndex {
+		// 不能超过 最后一句话的时常
+		if changeVADStartIndex > int(subEndTimeFloor*100) {
+			continue
+		}
+		// 也不能比起始的第一句话时间轴更低
+		if changeVADStartIndex < int(subStartTimeFloor10ms) {
 			continue
 		}
+
 		// 如果上一个对白的最后一个 OffsetIndex 链接着当前这一句的索引的 VAD 信息 active 是 true 就设置为 false
 		lastDialogueEndIndex := changeVADStartIndex - int(subStartTimeFloor10ms) - 1
 		if lastDialogueEndIndex >= 0 {
@@ -530,33 +532,31 @@ func GetVADInfoFeatureFromSubNew(fileInfo *subparser.FileInfo, SkipFrontAndEndPe
 			}
 		}
 		// 调整之前做好的整体 VAD 的信息,符合 VAD active = true
-		for i := changeVADStartIndex - int(subStartTimeFloor10ms); i < changeVADEndIndex-int(subStartTimeFloor10ms); i++ {
+		changerStartIndex := changeVADStartIndex - int(subStartTimeFloor10ms)
+		if changerStartIndex < 0 {
+			continue
+		}
+		changerEndIndex := changeVADEndIndex - int(subStartTimeFloor10ms) - 1
+		if changerEndIndex < 0 {
+			continue
+		}
+		for i := changerStartIndex; i < changerEndIndex; i++ {
 			subVADs[i].Active = true
 		}
 	}
-	// 整体的 VAD 信息构建完了,现在需要进行切割,分成多份
-	// 需要根据去头去尾,调整整体的总长度再进行多分的拆分
-	afterCutVADLen := vadLen - 2*skipLen
-	onePartLen := afterCutVADLen / pieces
-	// 余下的不要了,暂定
-	//yu := vadLen % pieces
-	for i := 0; i < pieces; i++ {
-		tmpSubUnit := NewSubUnit()
-		// 截取出来当前这一段
-		tmpVADList := subVADs[skipStartIndex+i*onePartLen-int(subStartTimeFloor10ms) : skipStartIndex+i*onePartLen+onePartLen-int(subStartTimeFloor10ms)]
-		tmpSubUnit.VADList = tmpVADList
-
-		tmpStartTime := time.Time{}
-		tmpStartTime = tmpStartTime.Add(tmpVADList[0].Time)
-		tmpEndTime := time.Time{}
-		tmpEndTime = tmpEndTime.Add(tmpVADList[len(tmpVADList)-1].Time)
-
-		tmpSubUnit.SetBaseTime(tmpStartTime)
-		tmpSubUnit.SetOffsetStartTime(tmpStartTime)
-		tmpSubUnit.SetOffsetEndTime(tmpEndTime)
-
-		outSubUnits = append(outSubUnits, *tmpSubUnit)
-	}
+
+	// 截取出来当前这一段
+	tmpVADList := subVADs[skipStartIndex:skipEndIndex]
+	outSubUnits.VADList = tmpVADList
+
+	tmpStartTime := time.Time{}
+	tmpStartTime = tmpStartTime.Add(tmpVADList[0].Time)
+	tmpEndTime := time.Time{}
+	tmpEndTime = tmpEndTime.Add(tmpVADList[len(tmpVADList)-1].Time)
+
+	outSubUnits.SetBaseTime(tmpStartTime)
+	outSubUnits.SetOffsetStartTime(tmpStartTime)
+	outSubUnits.SetOffsetEndTime(tmpEndTime)
 
 	return outSubUnits, nil
 }

+ 2 - 7
internal/pkg/sub_helper/sub_helper_test.go

@@ -53,16 +53,14 @@ func TestGetVADInfosFromSub(t *testing.T) {
 			len(infoBase.DialoguesEx), len(infoSrc.DialoguesEx)))
 	}
 
-	baseSubUnits, err := GetVADInfoFeatureFromSubNew(infoBase, FrontAndEndPerBase, 1)
+	baseSubUnit, err := GetVADInfoFeatureFromSubNew(infoBase, FrontAndEndPerBase)
 	if err != nil {
 		t.Fatal(err)
 	}
-	baseSubUnit := baseSubUnits[0]
-	srcSubUnits, err := GetVADInfoFeatureFromSubNew(infoSrc, FrontAndEndPerBase, 1)
+	srcSubUnit, err := GetVADInfoFeatureFromSubNew(infoSrc, FrontAndEndPerBase)
 	if err != nil {
 		t.Fatal(err)
 	}
-	srcSubUnit := srcSubUnits[0]
 	if len(baseSubUnit.VADList) != len(srcSubUnit.VADList) {
 		t.Fatal(fmt.Sprintf("info Base And Src Parse Error, infoBase.VADList Len = %v, infoSrc.VADList Len = %v",
 			len(baseSubUnit.VADList), len(srcSubUnit.VADList)))
@@ -73,9 +71,6 @@ func TestGetVADInfosFromSub(t *testing.T) {
 			println(fmt.Sprintf("base src VADList i=%v, not the same", i))
 		}
 	}
-
-	println(len(baseSubUnits))
-	println(len(srcSubUnits))
 }
 
 const FrontAndEndPerBase = 0

+ 2 - 2
internal/pkg/sub_timeline_fixer/fixer.go

@@ -523,7 +523,7 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(baseUnit, srcUnit *sub_helper.SubUnit
 	}
 	wg.Wait()
 	// 这里可能遇到匹配的时候没有能够执行够 CompareParts 次,有可能是负数跳过或者时间转换失败导致,前者为主(可能是这两个就是一个东西的时候,或者说没有时间轴偏移的时候)
-	if insertIndex < CompareParts {
+	if insertIndex < CompareParts/2 {
 		return false, 0, 0, nil
 	}
 	outCorrelationFixResult := s.calcMeanAndSD(tmpStartDiffTimeListEx, tmpStartDiffTimeList)
@@ -605,7 +605,7 @@ const SubOneUnitProcessTimeOut = 60 * 5 * time.Second // 字幕时间轴校正
 
 // 字幕匹配字幕 GetVADInfoFeatureFromSubNew
 const FrontAndEndPerBase = 0.15 // 前百分之 15 和后百分之 15 都不进行识别
-const FrontAndEndPerSrc = 0.20  // 前百分之 20 和后百分之 20 都不进行识别
+const FrontAndEndPerSrc = 0.0   // 前百分之 20 和后百分之 20 都不进行识别
 const MatchPer = 0.7
 const CompareParts = 5
 const FixThreads = 3 // 字幕校正的并发线程

+ 8 - 11
internal/pkg/sub_timeline_fixer/fixer_test.go

@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"github.com/allanpk716/ChineseSubFinder/internal/logic/sub_parser/ass"
 	"github.com/allanpk716/ChineseSubFinder/internal/logic/sub_parser/srt"
+	"github.com/allanpk716/ChineseSubFinder/internal/pkg/debug_view"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/my_util"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_helper"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_parser_hub"
@@ -608,11 +609,10 @@ func TestGetOffsetTimeV2_BaseSub(t *testing.T) {
 			//	t.Fatal(err)
 			//}
 			//baseUnitOld := baseUnitListOld[0]
-			baseUnitListNew, err := sub_helper.GetVADInfoFeatureFromSubNew(infoBase, FrontAndEndPerBase, 1)
+			baseUnitNew, err := sub_helper.GetVADInfoFeatureFromSubNew(infoBase, FrontAndEndPerBase)
 			if err != nil {
 				t.Fatal(err)
 			}
-			baseUnitNew := baseUnitListNew[0]
 			// ---------------------------------------------------------------------------------------
 			// Src,截取的部分要小于 Base 的部分
 			//srcUnitListOld, err := sub_helper.GetVADInfoFeatureFromSub(infoSrc, FrontAndEndPerSrc, 100000, true)
@@ -620,14 +620,13 @@ func TestGetOffsetTimeV2_BaseSub(t *testing.T) {
 			//	t.Fatal(err)
 			//}
 			//srcUnitOld := srcUnitListOld[0]
-			srcUnitListNew, err := sub_helper.GetVADInfoFeatureFromSubNew(infoSrc, FrontAndEndPerSrc, 1)
+			srcUnitNew, err := sub_helper.GetVADInfoFeatureFromSubNew(infoSrc, FrontAndEndPerSrc)
 			if err != nil {
 				t.Fatal(err)
 			}
-			srcUnitNew := srcUnitListNew[0]
 			// ---------------------------------------------------------------------------------------
 			//bok, got, sd, err := timelineFixer.GetOffsetTimeV2(&baseUnitOld, &srcUnitOld, nil, 0)
-			bok, got, sd, err := timelineFixer.GetOffsetTimeV2(&baseUnitNew, &srcUnitNew, nil)
+			bok, got, sd, err := timelineFixer.GetOffsetTimeV2(baseUnitNew, srcUnitNew, nil)
 			if (err != nil) != tt.wantErr {
 				t.Errorf("GetOffsetTimeV1() error = %v, wantErr %v", err, tt.wantErr)
 				return
@@ -763,12 +762,10 @@ func TestGetOffsetTimeV2_BaseAudio(t *testing.T) {
 			*/
 			//sub_helper.MergeMultiDialogue4EngSubtitle(infoSrc)
 			// Src,截取的部分要小于 Base 的部分
-			srcUnitNewList, err := sub_helper.GetVADInfoFeatureFromSubNew(infoSrc, FrontAndEndPerSrc, 1)
+			srcUnitNew, err := sub_helper.GetVADInfoFeatureFromSubNew(infoSrc, FrontAndEndPerSrc)
 			if err != nil {
 				t.Fatal(err)
 			}
-			srcUnitNew := srcUnitNewList[0]
-
 			audioVADInfos, err := vad.GetVADInfoFromAudio(vad.AudioInfo{
 				FileFullPath: tt.args.audioInfo.FileFullPath,
 				SampleRate:   16000,
@@ -779,14 +776,14 @@ func TestGetOffsetTimeV2_BaseAudio(t *testing.T) {
 			}
 
 			println("-------New--------")
-			got, got1, sd, err := s.GetOffsetTimeV2(nil, &srcUnitNew, audioVADInfos)
+			got, got1, sd, err := s.GetOffsetTimeV2(nil, srcUnitNew, audioVADInfos)
 			if (err != nil) != tt.wantErr {
 				t.Errorf("GetOffsetTimeV3() error = %v, wantErr %v", err, tt.wantErr)
 				return
 			}
 
-			//debug_view.SaveDebugChartBase(audioVADInfos, "audioVADInfos", "audioVADInfos")
-			//debug_view.SaveDebugChart(srcUnitNew, "srcUnitNew", "srcUnitNew")
+			debug_view.SaveDebugChartBase(audioVADInfos, "audioVADInfos", "audioVADInfos")
+			debug_view.SaveDebugChart(*srcUnitNew, "srcUnitNew", "srcUnitNew")
 			if got != tt.want {
 				t.Errorf("GetOffsetTimeV3() got = %v, want %v", got, tt.want)
 			}