Ver código fonte

正在调试 V2 时间轴修正

Signed-off-by: allan716 <[email protected]>
allan716 4 anos atrás
pai
commit
d37a58b464

+ 37 - 5
internal/pkg/ffmpeg_helper/ffmpeg_helper.go

@@ -130,8 +130,8 @@ func (f *FFMPEGHelper) GetAudioInfo(audioFileFullPath string) (bool, float64, er
 	return true, duration, nil
 }
 
-// ExportAudioArgsByTimeRange 根据输入的时间轴导出音频分段信息 "0:1:27" "28.2"
-func (f *FFMPEGHelper) ExportAudioArgsByTimeRange(audioFullPath string, startTimeString, timeLength string) (string, string, error) {
+// ExportAudioAndSubArgsByTimeRange 根据输入的时间轴导出音频分段信息 "0:1:27" "28.2"
+func (f *FFMPEGHelper) ExportAudioAndSubArgsByTimeRange(audioFullPath, subFullPath string, startTimeString, timeLength string) (string, string, string, error) {
 
 	outStartTimeString := strings.ReplaceAll(startTimeString, ":", "-")
 	outStartTimeString = strings.ReplaceAll(outStartTimeString, ".", "#")
@@ -141,22 +141,37 @@ func (f *FFMPEGHelper) ExportAudioArgsByTimeRange(audioFullPath string, startTim
 	frontName := strings.ReplaceAll(filepath.Base(audioFullPath), filepath.Ext(audioFullPath), "")
 
 	outAudioName := frontName + "_" + outStartTimeString + "_" + outTimeLength + filepath.Ext(audioFullPath)
+	outSubName := frontName + "_" + outStartTimeString + "_" + outTimeLength + common.SubExtSRT
 
 	var outAudioFullPath = filepath.Join(filepath.Dir(audioFullPath), outAudioName)
+	var outSubFullPath = filepath.Join(filepath.Dir(audioFullPath), outSubName)
 
+	// 导出音频
 	if my_util.IsFile(outAudioFullPath) == true {
 		err := os.Remove(outAudioFullPath)
 		if err != nil {
-			return "", "", err
+			return "", "", "", err
 		}
 	}
 	args := f.getAudioExportArgsByTimeRange(audioFullPath, startTimeString, timeLength, outAudioFullPath)
 	execFFMPEG, err := f.execFFMPEG(args)
 	if err != nil {
-		return "", execFFMPEG, err
+		return "", "", execFFMPEG, err
+	}
+	// 导出字幕
+	if my_util.IsFile(outSubFullPath) == true {
+		err := os.Remove(outSubFullPath)
+		if err != nil {
+			return "", "", "", err
+		}
+	}
+	args = f.getSubExportArgsByTimeRange(subFullPath, startTimeString, timeLength, outSubFullPath)
+	execFFMPEG, err = f.execFFMPEG(args)
+	if err != nil {
+		return "", "", execFFMPEG, err
 	}
 
-	return outAudioFullPath, "", nil
+	return outAudioFullPath, outSubFullPath, "", nil
 }
 
 // parseJsonString2GetFFProbeInfo 使用 ffprobe 获取视频的 stream 信息,从中解析出字幕和音频的索引
@@ -372,6 +387,23 @@ func (f *FFMPEGHelper) getAudioExportArgsByTimeRange(audioFullPath string, start
 	return audioArgs
 }
 
+func (f *FFMPEGHelper) getSubExportArgsByTimeRange(subFullPath string, startTimeString, timeLength, outSubFullPath string) []string {
+
+	/*
+		ffmpeg.exe -i aa.srt -ss 00:1:27 -t 28 bb.srt
+	*/
+	var subArgs = make([]string, 0)
+	subArgs = append(subArgs, "-i")
+	subArgs = append(subArgs, subFullPath)
+	subArgs = append(subArgs, "-ss")
+	subArgs = append(subArgs, startTimeString)
+	subArgs = append(subArgs, "-t")
+	subArgs = append(subArgs, timeLength)
+	subArgs = append(subArgs, outSubFullPath)
+
+	return subArgs
+}
+
 // addSubMapArg 构建字幕的导出参数
 func (f *FFMPEGHelper) addSubMapArg(subArgs *[]string, index int, subSaveFullPath string) {
 	*subArgs = append(*subArgs, "-map")

+ 2 - 1
internal/pkg/ffmpeg_helper/ffmpeg_helper_test.go

@@ -75,12 +75,13 @@ func Test_parseJsonString2GetFFMPEGInfo(t *testing.T) {
 func TestFFMPEGHelper_ExportAudioArgsByTimeRange(t *testing.T) {
 
 	audioFullPath := "C:\\Tmp\\Rick and Morty - S05E10\\英_1.pcm"
+	subFullPath := "C:\\Tmp\\Rick and Morty - S05E10\\英_2.srt"
 	startTimeString := "0:1:27"
 	timeLeng := "28.2"
 
 	f := NewFFMPEGHelper()
 
-	_, timeRange, err := f.ExportAudioArgsByTimeRange(audioFullPath, startTimeString, timeLeng)
+	_, _, timeRange, err := f.ExportAudioAndSubArgsByTimeRange(audioFullPath, subFullPath, startTimeString, timeLeng)
 	if err != nil {
 		println(timeRange)
 		t.Fatal(err)

+ 2 - 1
internal/pkg/my_util/util.go

@@ -10,6 +10,7 @@ import (
 	"github.com/go-resty/resty/v2"
 	"io"
 	"io/ioutil"
+	"math"
 	"net/http"
 	"os"
 	"os/exec"
@@ -457,5 +458,5 @@ func Time2SecendNumber(inTime time.Time) float64 {
 }
 
 func Time2Duration(inTime time.Time) time.Duration {
-	return time.Duration(Time2SecendNumber(inTime))
+	return time.Duration(Time2SecendNumber(inTime) * math.Pow10(9))
 }

+ 44 - 8
internal/pkg/sub_timeline_fixer/fixer.go

@@ -14,6 +14,7 @@ import (
 	"github.com/mndrix/tukey"
 	"gonum.org/v1/gonum/mat"
 	"os"
+	"path/filepath"
 	"strings"
 	"time"
 )
@@ -319,7 +320,7 @@ func (s *SubTimelineFixer) GetOffsetTimeV1(infoBase, infoSrc *subparser.FileInfo
 	// 不为空的时候,生成调试文件
 	if staticLineFileSavePath != "" {
 		//staticLineFileSavePath = "bar.html"
-		err = SaveStaticLine(staticLineFileSavePath, infoBase.Name, infoSrc.Name,
+		err = SaveStaticLineV1(staticLineFileSavePath, infoBase.Name, infoSrc.Name,
 			per, oldMean, oldSd, newMean, newSd, xAxis,
 			startDiffTimeLineData, endDiffTimeLineData)
 		if err != nil {
@@ -415,11 +416,12 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(audioInfo vad.AudioInfo, infoSrc *sub
 
 	// 开始针对对白单元进行匹配
 	for _, subUnit := range subUnitList {
-		startTimeString, subLeng := subUnit.GetFFMPEGCutRange(ExpandTimeRange)
 
-		outAudioFPath, errString, err := s.ffmpegHelper.ExportAudioArgsByTimeRange(audioInfo.FileFullPath, startTimeString, subLeng)
+		startTimeString, subLength := subUnit.GetFFMPEGCutRange(ExpandTimeRange)
+
+		outAudioFPath, _, errString, err := s.ffmpegHelper.ExportAudioAndSubArgsByTimeRange(audioInfo.FileFullPath, infoSrc.FileFullPath, startTimeString, subLength)
 		if err != nil {
-			log_helper.GetLogger().Errorln("ExportAudioArgsByTimeRange", errString, err)
+			log_helper.GetLogger().Errorln("ExportAudioAndSubArgsByTimeRange", errString, err)
 			return false, 0, 0, err
 		}
 
@@ -432,13 +434,47 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(audioInfo vad.AudioInfo, infoSrc *sub
 			return false, 0, 0, err
 		}
 
-		println(len(audioVADInfos))
+		var subTimeLineData = make([]opts.LineData, 0)
+		var subxAxis = make([]string, 0)
+		var audioTimeLineData = make([]opts.LineData, 0)
+		var audioxAxis = make([]string, 0)
+
+		for _, vadInfo := range subUnit.VADList {
+
+			subTimeLineData = append(subTimeLineData, opts.LineData{Value: vadInfo.Active})
+			baseTime := subUnit.GetBaseTimeNumber()
+			subxAxis = append(subxAxis, fmt.Sprintf("%f", vadInfo.Time.Seconds()-baseTime))
+		}
+
+		outDir := filepath.Dir(outAudioFPath)
+		outBaseName := filepath.Base(outAudioFPath)
+		outBaseNameWithOutExt := strings.ReplaceAll(outBaseName, filepath.Ext(outBaseName), "")
+
+		subVADStaticLineFullPath := filepath.Join(outDir, outBaseNameWithOutExt+"_sub.html")
+
+		err = SaveStaticLineV2("Sub", subVADStaticLineFullPath, subxAxis, subTimeLineData)
+		if err != nil {
+			return false, 0, 0, err
+		}
+
+		for _, vadInfo := range audioVADInfos {
+
+			audioTimeLineData = append(audioTimeLineData, opts.LineData{Value: vadInfo.Active})
+			audioxAxis = append(audioxAxis, fmt.Sprintf("%f", vadInfo.Time.Seconds()))
+		}
+
+		audioVADStaticLineFullPath := filepath.Join(outDir, outBaseNameWithOutExt+"_audio.html")
+
+		err = SaveStaticLineV2("Audio", audioVADStaticLineFullPath, audioxAxis, audioTimeLineData)
+		if err != nil {
+			return false, 0, 0, err
+		}
 	}
 
 	return false, -1, -1, nil
 }
 
 const FixMask = "-fix"
-const FrontAndEndPer = 0.15
-const SubUnitMaxCount = 5
-const ExpandTimeRange = 1 // 从字幕的时间轴片段需要向前和向后多匹配一部分的音频,这里定义的就是这个 range 以分钟为单位, 正负 1 分钟
+const FrontAndEndPer = 0.15 // 前百分之 15 和后百分之 15 都不进行识别
+const SubUnitMaxCount = 10  // 一个 Sub单元有五句对白
+const ExpandTimeRange = 0   // 从字幕的时间轴片段需要向前和向后多匹配一部分的音频,这里定义的就是这个 range 以分钟为单位, 正负 1 分钟

+ 27 - 1
internal/pkg/sub_timeline_fixer/static_line.go

@@ -7,7 +7,7 @@ import (
 	"os"
 )
 
-func SaveStaticLine(saveFPath string, infoBaseName, infoSrcName string,
+func SaveStaticLineV1(saveFPath string, infoBaseName, infoSrcName string,
 	per, oldMean, OldSd, NewMean, NewSd float64, xAxis []string,
 	startDiffTimeLineData, endDiffTimeLineData []opts.LineData) error {
 	// 1.New 一个条形图对象
@@ -37,3 +37,29 @@ func SaveStaticLine(saveFPath string, infoBaseName, infoSrcName string,
 
 	return nil
 }
+
+func SaveStaticLineV2(name, saveFPath string, xAxis []string, timeLineData []opts.LineData) error {
+
+	// 1.New 一个条形图对象
+	bar := charts.NewLine()
+	// 2.设置 标题 和 子标题
+	bar.SetGlobalOptions(charts.WithTitleOpts(opts.Title{
+		Title: name + " VAD",
+	}))
+	// 3.设置 数据组
+	bar.SetXAxis(xAxis).
+		AddSeries(name+" VAD", timeLineData)
+	// 4.绘图 生成html
+	outfile, err := os.Create(saveFPath)
+	defer func() {
+		_ = outfile.Close()
+	}()
+	if err != nil {
+		return err
+	}
+	err = bar.Render(outfile)
+	if err != nil {
+		return err
+	}
+	return nil
+}

+ 34 - 19
internal/pkg/sub_timeline_fixer/sub_unit.go

@@ -9,30 +9,35 @@ import (
 )
 
 type SubUnit struct {
-	StartTime time.Time
-	EndTime   time.Time
-	vadList   []vad.VADInfo
+	baseTime  time.Time // 这个是基础的时间,后续需要减去这个,不然与导出的片段字幕去对比会有一个起始时间的偏差
+	StartTime time.Time // 这个时间会减去 baseTime 再存储
+	EndTime   time.Time // 这个时间会减去 baseTime 再存储
+	VADList   []vad.VADInfo
 	subCount  int
+	firstAdd  bool
 }
 
 func NewSubUnit() *SubUnit {
 	return &SubUnit{
-		vadList:  make([]vad.VADInfo, 0),
+		VADList:  make([]vad.VADInfo, 0),
 		subCount: 0,
+		firstAdd: false,
 	}
 }
 
 // Add 添加一句对白进来
 func (s *SubUnit) Add(oneSubStartTime, oneSubEndTime time.Time) {
 
-	if s.GetStartTimeNumber() == 0 {
-		s.StartTime = oneSubStartTime
+	if s.firstAdd == false {
+		s.baseTime = oneSubStartTime
+		s.StartTime = oneSubStartTime.Add(-my_util.Time2Duration(s.baseTime))
+		s.firstAdd = true
 	}
-	s.EndTime = oneSubEndTime
+	s.EndTime = oneSubEndTime.Add(-my_util.Time2Duration(s.baseTime))
 	// 每一句对白的开始就人为 VAD active 是 1,直到结束,才是 0
-	s.vadList = append(s.vadList, *vad.NewVADInfoBase(true, time.Duration(s.GetStartTimeNumber()*math.Pow10(9))))
+	s.VADList = append(s.VADList, *vad.NewVADInfoBase(true, time.Duration(s.GetStartTimeNumber()*math.Pow10(9))))
 
-	s.vadList = append(s.vadList, *vad.NewVADInfoBase(false, time.Duration(s.GetEndTimeNumber()*math.Pow10(9))))
+	s.VADList = append(s.VADList, *vad.NewVADInfoBase(false, time.Duration(s.GetEndTimeNumber()*math.Pow10(9))))
 
 	s.subCount++
 }
@@ -42,24 +47,30 @@ func (s *SubUnit) AddAndInsert(oneSubStartTime, oneSubEndTime time.Time) {
 
 	perWindows := float64(vad.FrameDuration) / 1000
 	// 不是第一次添加,那么就需要把两句对白中间间隔的 active == false 的插入,插入间隙
-	if len(s.vadList) > 0 {
-		needAddRange := my_util.Time2SecendNumber(oneSubStartTime) - s.GetEndTimeNumber()
+	if len(s.VADList) > 0 {
+		dd := my_util.Time2Duration(s.baseTime)
+		tmpSubStartTime := oneSubStartTime.Add(-dd)
+		needAddRange := my_util.Time2SecendNumber(tmpSubStartTime) - s.GetEndTimeNumber()
 		for i := 0.0; i < needAddRange; {
 
-			s.vadList = append(s.vadList, *vad.NewVADInfoBase(false, time.Duration((s.GetEndTimeNumber()+i)*math.Pow10(9))))
+			s.VADList = append(s.VADList, *vad.NewVADInfoBase(false, time.Duration((s.GetEndTimeNumber()+i)*math.Pow10(9))))
 			i += perWindows
 		}
 	}
 
-	if s.GetStartTimeNumber() == 0 {
-		s.StartTime = oneSubStartTime
+	if s.firstAdd == false {
+		s.baseTime = oneSubStartTime
+		dd := my_util.Time2Duration(s.baseTime)
+		s.StartTime = oneSubStartTime.Add(-dd)
+		s.firstAdd = true
 	}
-	s.EndTime = oneSubEndTime
+
+	s.EndTime = oneSubEndTime.Add(-my_util.Time2Duration(s.baseTime))
 
 	needAddRange := my_util.Time2SecendNumber(oneSubEndTime) - my_util.Time2SecendNumber(oneSubStartTime)
 	for i := 0.0; i < needAddRange; {
 
-		s.vadList = append(s.vadList, *vad.NewVADInfoBase(true, time.Duration((s.GetStartTimeNumber()+i)*math.Pow10(9))))
+		s.VADList = append(s.VADList, *vad.NewVADInfoBase(true, time.Duration((s.GetStartTimeNumber()+i)*math.Pow10(9))))
 		i += perWindows
 	}
 
@@ -73,12 +84,12 @@ func (s SubUnit) GetDialogueCount() int {
 
 // GetStartTimeNumber 获取这个单元的起始时间,单位是秒
 func (s SubUnit) GetStartTimeNumber() float64 {
-	return my_util.Time2SecendNumber(s.StartTime)
+	return my_util.Time2SecendNumber(s.StartTime.Add(my_util.Time2Duration(s.baseTime)))
 }
 
 // GetEndTimeNumber 获取这个单元的结束时间,单位是秒
 func (s SubUnit) GetEndTimeNumber() float64 {
-	return my_util.Time2SecendNumber(s.EndTime)
+	return my_util.Time2SecendNumber(s.EndTime.Add(my_util.Time2Duration(s.baseTime)))
 }
 
 // GetTimelineRange 开始到结束的时间长度,单位是秒
@@ -86,6 +97,10 @@ func (s SubUnit) GetTimelineRange() float64 {
 	return s.GetEndTimeNumber() - s.GetStartTimeNumber()
 }
 
+func (s SubUnit) GetBaseTimeNumber() float64 {
+	return my_util.Time2SecendNumber(s.baseTime)
+}
+
 // GetFFMPEGCutRange 这里会生成导出 FFMPEG 的参数字段,起始时间和结束的时间长度
 func (s SubUnit) GetFFMPEGCutRange(expandTimeRange int) (string, string) {
 
@@ -93,7 +108,7 @@ func (s SubUnit) GetFFMPEGCutRange(expandTimeRange int) (string, string) {
 	if s.GetStartTimeNumber()-float64(expandTimeRange)*60 < 0 {
 		tmpStartTime = time.Time{}
 	} else {
-		tmpStartTime = s.StartTime.Add(time.Duration(expandTimeRange) * time.Minute)
+		tmpStartTime = s.StartTime.Add(time.Duration(expandTimeRange) * time.Minute).Add(my_util.Time2Duration(s.baseTime))
 	}
 
 	return fmt.Sprintf("%d:%d:%d.%d", tmpStartTime.Hour(), tmpStartTime.Minute(), tmpStartTime.Second(), tmpStartTime.Nanosecond()/1000/1000),

+ 1 - 1
internal/pkg/vad/vad_info.go

@@ -29,7 +29,7 @@ func NewVADInfoBase(active bool, nowTime time.Duration) *VADInfo {
 
 const (
 	// Mode vad mode,VAD 的模式
-	Mode = 2
+	Mode = 1
 	// FrameDuration frame duration,分析的时间窗口
 	FrameDuration = 10
 )