Explorar o código

正在做 vad 时间轴校正功能

Signed-off-by: allan716 <[email protected]>
allan716 %!s(int64=4) %!d(string=hai) anos
pai
achega
2fb55431f2

+ 2 - 0
internal/pkg/ffmpeg_helper/audio_info.go

@@ -14,6 +14,7 @@ type AudioInfo struct {
 	startTime string
 	language  string
 	FullPath  string
+	Duration  float64
 }
 
 func NewAudioInfo(index int, codecName, codecType, timeBase, startTime, language string) *AudioInfo {
@@ -24,6 +25,7 @@ func NewAudioInfo(index int, codecName, codecType, timeBase, startTime, language
 		timeBase:  timeBase,
 		startTime: startTime,
 		language:  language,
+		Duration:  0,
 	}
 }
 

+ 42 - 3
internal/pkg/ffmpeg_helper/ffmpeg_helper.go

@@ -2,6 +2,7 @@ package ffmpeg_helper
 
 import (
 	"bytes"
+	"errors"
 	"fmt"
 	"github.com/allanpk716/ChineseSubFinder/internal/common"
 	"github.com/allanpk716/ChineseSubFinder/internal/logic/sub_parser/ass"
@@ -49,7 +50,7 @@ func (f *FFMPEGHelper) GetFFMPEGInfo(videoFileFullPath string) (bool, *FFMPEGInf
 		return false, nil, err
 	}
 	// 解析得到的字符串反馈
-	bok, ffMPEGInfo := f.parseJsonString2GetFFMPEGInfo(videoFileFullPath, buf.String())
+	bok, ffMPEGInfo := f.parseJsonString2GetFFProbeInfo(videoFileFullPath, buf.String())
 	if bok == false {
 		return false, nil, nil
 	}
@@ -102,6 +103,33 @@ func (f *FFMPEGHelper) GetFFMPEGInfo(videoFileFullPath string) (bool, *FFMPEGInf
 	return bok, ffMPEGInfo, nil
 }
 
+func (f *FFMPEGHelper) GetAudioInfo(audioFileFullPath string) (bool, float64, error) {
+
+	const args = "-v error -show_format -show_streams -print_format json -f s16le -ac 1 -ar 16000"
+	cmdArgs := strings.Fields(args)
+	cmdArgs = append(cmdArgs, audioFileFullPath)
+	cmd := exec.Command("ffprobe", cmdArgs...)
+	buf := bytes.NewBufferString("")
+	//指定输出位置
+	cmd.Stderr = buf
+	cmd.Stdout = buf
+	err := cmd.Start()
+	if err != nil {
+		return false, 0, err
+	}
+	err = cmd.Wait()
+	if err != nil {
+		return false, 0, err
+	}
+
+	bok, duration := f.parseJsonString2GetAudioInfo(buf.String())
+	if bok == false {
+		return false, 0, errors.New("ffprobe get " + audioFileFullPath + " duration error")
+	}
+
+	return true, duration, nil
+}
+
 // ExportAudioArgsByTimeRange 根据输入的时间轴导出音频分段信息
 func (f *FFMPEGHelper) ExportAudioArgsByTimeRange(audioFullPath string, startTimeString, timeLeng, outAudioFullPath string) (string, error) {
 
@@ -120,8 +148,8 @@ func (f *FFMPEGHelper) ExportAudioArgsByTimeRange(audioFullPath string, startTim
 	return "", nil
 }
 
-// parseJsonString2GetFFMPEGInfo 使用 ffprobe 获取视频的 stream 信息,从中解析出字幕和音频的索引
-func (f *FFMPEGHelper) parseJsonString2GetFFMPEGInfo(videoFileFullPath, inputFFProbeString string) (bool, *FFMPEGInfo) {
+// parseJsonString2GetFFProbeInfo 使用 ffprobe 获取视频的 stream 信息,从中解析出字幕和音频的索引
+func (f *FFMPEGHelper) parseJsonString2GetFFProbeInfo(videoFileFullPath, inputFFProbeString string) (bool, *FFMPEGInfo) {
 
 	streamsValue := gjson.Get(inputFFProbeString, "streams.#")
 	if streamsValue.Exists() == false {
@@ -206,6 +234,16 @@ func (f *FFMPEGHelper) parseJsonString2GetFFMPEGInfo(videoFileFullPath, inputFFP
 	return true, ffmpegInfo
 }
 
+// parseJsonString2GetAudioInfo 获取 pcm 音频的长度
+func (f *FFMPEGHelper) parseJsonString2GetAudioInfo(inputFFProbeString string) (bool, float64) {
+
+	durationValue := gjson.Get(inputFFProbeString, "format.duration")
+	if durationValue.Exists() == false {
+		return false, 0
+	}
+	return true, durationValue.Float()
+}
+
 // exportAudioAndSubtitles 导出音频和字幕文件
 func (f *FFMPEGHelper) exportAudioAndSubtitles(subArgs, audioArgs []string) (string, error) {
 
@@ -289,6 +327,7 @@ func (f *FFMPEGHelper) getAudioExportArgsByTimeRange(audioFullPath string, start
 	/*
 		ffmpeg.exe -ar 16000 -ac 1 -f s16le -i aa.pcm -ss 00:1:27 -t 28 -acodec pcm_s16le -f s16le -ac 1 -ar 16000 bb.pcm
 
+		ffmpeg.exe -i aa.srt -ss 00:1:27 -t 28 bb.srt
 	*/
 
 	var audioArgs = make([]string, 0)

+ 17 - 4
internal/pkg/ffmpeg_helper/ffmpeg_helper_test.go

@@ -60,13 +60,13 @@ func Test_parseJsonString2GetFFMPEGInfo(t *testing.T) {
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			got, got1 := f.parseJsonString2GetFFMPEGInfo(tt.args.videoFileFullPath, tt.args.input)
+			got, got1 := f.parseJsonString2GetFFProbeInfo(tt.args.videoFileFullPath, tt.args.input)
 			if got != tt.want {
-				t.Errorf("parseJsonString2GetFFMPEGInfo() got = %v, want %v", got, tt.want)
+				t.Errorf("parseJsonString2GetFFProbeInfo() got = %v, want %v", got, tt.want)
 			}
 
 			if len(got1.AudioInfoList) != tt.audios || len(got1.SubtitleInfoList) != tt.subs {
-				t.Fatal("parseJsonString2GetFFMPEGInfo result List < 1")
+				t.Fatal("parseJsonString2GetFFProbeInfo result List < 1")
 			}
 		})
 	}
@@ -76,7 +76,7 @@ func TestFFMPEGHelper_ExportAudioArgsByTimeRange(t *testing.T) {
 
 	audioFullPath := "C:\\Tmp\\Rick and Morty - S05E10\\英_1.pcm"
 	startTimeString := "0:1:27"
-	timeLeng := "28"
+	timeLeng := "28.2"
 	outAudioFullPath := "C:\\Tmp\\Rick and Morty - S05E10\\英_1_cut.pcm"
 
 	f := NewFFMPEGHelper()
@@ -87,3 +87,16 @@ func TestFFMPEGHelper_ExportAudioArgsByTimeRange(t *testing.T) {
 		t.Fatal(err)
 	}
 }
+
+func TestFFMPEGHelper_GetAudioInfo(t *testing.T) {
+
+	audioFullPath := "C:\\Tmp\\Rick and Morty - S05E10\\英_1.pcm"
+
+	f := NewFFMPEGHelper()
+	bok, duration, err := f.GetAudioInfo(audioFullPath)
+	if err != nil || bok == false {
+		t.Fatal(err)
+	}
+
+	println(duration)
+}

+ 52 - 17
internal/pkg/sub_timeline_fixer/fixer.go

@@ -2,8 +2,8 @@ package sub_timeline_fixer
 
 import (
 	"fmt"
-	"github.com/allanpk716/ChineseSubFinder/internal/common"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg"
+	"github.com/allanpk716/ChineseSubFinder/internal/pkg/ffmpeg_helper"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/log_helper"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/vad"
 	"github.com/allanpk716/ChineseSubFinder/internal/types/sub_timeline_fiexer"
@@ -19,12 +19,14 @@ import (
 )
 
 type SubTimelineFixer struct {
-	fixerConfig sub_timeline_fiexer.SubTimelineFixerConfig
+	fixerConfig  sub_timeline_fiexer.SubTimelineFixerConfig
+	ffmpegHelper *ffmpeg_helper.FFMPEGHelper
 }
 
 func NewSubTimelineFixer(fixerConfig sub_timeline_fiexer.SubTimelineFixerConfig) *SubTimelineFixer {
 	return &SubTimelineFixer{
-		fixerConfig: fixerConfig,
+		fixerConfig:  fixerConfig,
+		ffmpegHelper: ffmpeg_helper.NewFFMPEGHelper(),
 	}
 }
 
@@ -67,12 +69,7 @@ func (s *SubTimelineFixer) FixSubTimeline(infoSrc *subparser.FileInfo, inOffsetT
 	*/
 	// 偏移时间
 	offsetTime := time.Duration(inOffsetTime*1000) * time.Millisecond
-	timeFormat := ""
-	if infoSrc.Ext == common.SubExtASS || infoSrc.Ext == common.SubExtSSA {
-		timeFormat = common.TimeFormatAss
-	} else {
-		timeFormat = common.TimeFormatSrt
-	}
+	timeFormat := infoSrc.GetTimeFormat()
 	fixContent := infoSrc.Content
 	for _, srcOneDialogue := range infoSrc.Dialogues {
 
@@ -213,12 +210,7 @@ func (s *SubTimelineFixer) GetOffsetTimeV1(infoBase, infoSrc *subparser.FileInfo
 		srcIndex++
 	}
 
-	timeFormat := ""
-	if infoBase.Ext == common.SubExtASS || infoBase.Ext == common.SubExtSSA {
-		timeFormat = common.TimeFormatAss
-	} else {
-		timeFormat = common.TimeFormatSrt
-	}
+	timeFormat := infoBase.GetTimeFormat()
 
 	var startDiffTimeLineData = make([]opts.LineData, 0)
 	var endDiffTimeLineData = make([]opts.LineData, 0)
@@ -370,9 +362,52 @@ func (s *SubTimelineFixer) GetOffsetTimeV1(infoBase, infoSrc *subparser.FileInfo
 }
 
 // GetOffsetTimeV2 使用 VAD 检测语音是否有人声,输出连续的点标记,再通过 SimHash 进行匹配,找到最佳的偏移时间
-func (s *SubTimelineFixer) GetOffsetTimeV2(audioInfo vad.AudioInfo, infoSrc *subparser.FileInfo, staticLineFileSavePath string, debugInfoFileSavePath string) error {
+func (s *SubTimelineFixer) GetOffsetTimeV2(audioInfo vad.AudioInfo, infoSrc *subparser.FileInfo, staticLineFileSavePath string, debugInfoFileSavePath string) (bool, float64, float64, error) {
+
+	/*
+		分割字幕成若干段,然后得到若干段的时间轴,将这些段从字幕文字转换成 VADInfo
+		从上面若干段时间轴,把音频给分割成多段
+		然后使用 simhash 的进行比较,输出分析的曲线图等信息
+	*/
+
+	bok, duration, err := s.ffmpegHelper.GetAudioInfo(audioInfo.FileFullPath)
+	if err != nil || bok == false {
+		return false, 0, 0, err
+	}
+
+	/*
+		这里的字幕要求是完整的一个字幕
+		1. 抽取字幕的时间片段的时候,暂定,前 15% 和后 15% 要避开,前奏、主题曲、结尾曲
+		2. 将整个字幕,抽取连续 5 句对话为一个单元,提取时间片段信息
+	*/
+
+	timeFormat := infoSrc.GetTimeFormat()
+	for _, oneDialogueEx := range infoSrc.DialoguesEx {
+
+		oneDialogueExTimeStart, err := time.Parse(timeFormat, oneDialogueEx.StartTime)
+		if err != nil {
+			return false, 0, 0, err
+		}
+		oneDialogueExTimeEnd, err := time.Parse(timeFormat, oneDialogueEx.EndTime)
+		if err != nil {
+			return false, 0, 0, err
+		}
+
+		oneStart := pkg.Time2Number(oneDialogueExTimeStart)
+		oneEnd := pkg.Time2Number(oneDialogueExTimeEnd)
+
+		if duration*0.15 > oneStart || duration*(1.0-0.15) < oneStart {
+			continue
+		}
+		if oneDialogueEx.ChLine == "" {
+			continue
+		}
+
+		//baseCorpus = append(baseCorpus, oneDialogueEx.EnLine)
+		//baseDialogueFilterMap[len(baseCorpus)-1] = index
+	}
 
-	return nil
+	return false, -1, -1, nil
 }
 
 const FixMask = "-fix"

+ 58 - 1
internal/pkg/sub_timeline_fixer/fixer_test.go

@@ -7,6 +7,7 @@ import (
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_helper"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_parser_hub"
+	"github.com/allanpk716/ChineseSubFinder/internal/pkg/vad"
 	"github.com/allanpk716/ChineseSubFinder/internal/types/sub_timeline_fiexer"
 	"github.com/james-bowman/nlp"
 	"github.com/james-bowman/nlp/measures/pairwise"
@@ -43,7 +44,7 @@ func TestStopWordCounter(t *testing.T) {
 	println(info.Name)
 }
 
-func TestGetOffsetTime(t *testing.T) {
+func TestGetOffsetTimeV1(t *testing.T) {
 	testDataPath := "../../../TestData/FixTimeline"
 	testRootDir, err := pkg.CopyTestData(testDataPath)
 	if err != nil {
@@ -385,3 +386,59 @@ func TestTFIDF(t *testing.T) {
 	fmt.Printf("Matched '%s'", testCorpus[matched])
 	// Output: Matched 'The quick brown fox jumped over the lazy dog'
 }
+
+func TestSubTimelineFixer_GetOffsetTimeV2(t *testing.T) {
+
+	subParserHub := sub_parser_hub.NewSubParserHub(ass.NewParser(), srt.NewParser())
+
+	type fields struct {
+		fixerConfig sub_timeline_fiexer.SubTimelineFixerConfig
+	}
+	type args struct {
+		audioInfo              vad.AudioInfo
+		subFilePath            string
+		staticLineFileSavePath string
+		debugInfoFileSavePath  string
+	}
+	tests := []struct {
+		name    string
+		fields  fields
+		args    args
+		want    bool
+		want1   float64
+		want2   float64
+		wantErr bool
+	}{
+		{name: "Rick and Morty - S05E10", args: args{audioInfo: vad.AudioInfo{FileFullPath: "C:\\Tmp\\Rick and Morty - S05E10\\英_1.pcm"}, subFilePath: "C:\\Tmp\\Rick and Morty - S05E10\\英_2.ass"}},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			s := &SubTimelineFixer{
+				fixerConfig: tt.fields.fixerConfig,
+			}
+
+			bok, fileInfo, err := subParserHub.DetermineFileTypeFromFile(tt.args.subFilePath)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if bok == false {
+				t.Fatal("DetermineFileTypeFromFile == false")
+			}
+
+			got, got1, got2, err := s.GetOffsetTimeV2(tt.args.audioInfo, fileInfo, tt.args.staticLineFileSavePath, tt.args.debugInfoFileSavePath)
+			if (err != nil) != tt.wantErr {
+				t.Errorf("GetOffsetTimeV2() error = %v, wantErr %v", err, tt.wantErr)
+				return
+			}
+			if got != tt.want {
+				t.Errorf("GetOffsetTimeV2() got = %v, want %v", got, tt.want)
+			}
+			if got1 != tt.want1 {
+				t.Errorf("GetOffsetTimeV2() got1 = %v, want %v", got1, tt.want1)
+			}
+			if got2 != tt.want2 {
+				t.Errorf("GetOffsetTimeV2() got2 = %v, want %v", got2, tt.want2)
+			}
+		})
+	}
+}

+ 42 - 0
internal/pkg/sub_timeline_fixer/sub_unit.go

@@ -0,0 +1,42 @@
+package sub_timeline_fixer
+
+import (
+	"fmt"
+	"github.com/allanpk716/ChineseSubFinder/internal/pkg"
+	"github.com/allanpk716/ChineseSubFinder/internal/pkg/vad"
+	"time"
+)
+
+type SubUnit struct {
+	StartTime time.Time
+	EndTime   time.Time
+	vadList   []vad.VADInfo
+}
+
+func NewSubUnit() *SubUnit {
+	return &SubUnit{
+		vadList: make([]vad.VADInfo, 0),
+	}
+}
+
+func (s *SubUnit) Add(oneSubStartTime, oneSubEndTime time.Time) {
+
+	if s.GetStartTimeNumber() == 0 {
+		s.StartTime = oneSubStartTime
+	}
+	s.EndTime = oneSubEndTime
+	//
+}
+
+func (s SubUnit) GetStartTimeNumber() float64 {
+	return pkg.Time2Number(s.StartTime)
+}
+
+func (s SubUnit) GetEndTimeNumber() float64 {
+	return pkg.Time2Number(s.EndTime)
+}
+
+func (s SubUnit) GetFFMPEGCutRange() (string, string) {
+	return fmt.Sprintf("%d:%d:%d", s.StartTime.Hour(), s.StartTime.Minute(), s.StartTime.Second()),
+		fmt.Sprintf("%f", s.GetEndTimeNumber()-s.GetStartTimeNumber())
+}

+ 11 - 0
internal/pkg/util.go

@@ -18,6 +18,7 @@ import (
 	"runtime"
 	"strconv"
 	"strings"
+	"time"
 )
 
 // NewHttpClient 新建一个 resty 的对象
@@ -444,3 +445,13 @@ func WriteStrings2File(desfilePath string, strings []string) error {
 	}
 	return nil
 }
+
+func Time2Number(inTime time.Time) float64 {
+	outSecend := 0.0
+	outSecend += float64(inTime.Hour() * 60 * 60)
+	outSecend += float64(inTime.Minute() * 60)
+	outSecend += float64(inTime.Second())
+	outSecend += float64(inTime.Nanosecond()) / 1000 / 1000 / 1000
+
+	return outSecend
+}

+ 3 - 2
internal/pkg/vad/vad_helper.go

@@ -86,10 +86,11 @@ func GetVADInfoFromAudio(audioInfo AudioInfo) ([]VADInfo, error) {
 }
 
 // GetVADInfoFromSubtitle 分析字幕文件(暂时考虑的是外置的字幕),得到 VAD 分析信息,看样子是不支持并发的,只能单线程使用
-func GetVADInfoFromSubtitle(subFileInfo *subparser.FileInfo) ([]VADInfo, error) {
+func GetVADInfoFromSubtitle(subFileInfo *subparser.FileInfo, startTime, endIndex int) ([]VADInfo, error) {
 
 	var vadInfos = make([]VADInfo, 0)
-
+	timeFormat := subFileInfo.GetTimeFormat()
+	println(timeFormat)
 	for _, oneDialogueEx := range subFileInfo.DialoguesEx {
 
 		// 考虑的是外置字幕,所以就应该是有中文的

+ 1 - 1
internal/pkg/vad/vad_info.go

@@ -39,7 +39,7 @@ func GetTimeRange(inVADInfos []VADInfo, starttime, timeRange int) []VADInfo {
 	return outVADInfos
 }
 
-// InsertVADInfo 得到的是 VAD 状态变换的节点,中间缺失了连续的 VAD 点信息,使用本函数可以进行插值
+// InsertVADInfo 整个函数待定,未必会实现。得到的是 VAD 状态变换的节点,中间缺失了连续的 VAD 点信息,使用本函数可以进行插值
 func InsertVADInfo(inVADInfos []VADInfo, duration int) []VADInfo {
 
 	var outVADInfos = make([]VADInfo, 0)

+ 9 - 0
internal/types/subparser/fileinfo.go

@@ -1,6 +1,7 @@
 package subparser
 
 import (
+	"github.com/allanpk716/ChineseSubFinder/internal/common"
 	"github.com/allanpk716/ChineseSubFinder/internal/types/language"
 )
 
@@ -18,6 +19,14 @@ type FileInfo struct {
 	OtherLines    []string            // 抽取出所有的第二语言对话,可能是英文、韩文、日文
 }
 
+func (f FileInfo) GetTimeFormat() string {
+	if f.Ext == common.SubExtASS || f.Ext == common.SubExtSSA {
+		return common.TimeFormatAss
+	} else {
+		return common.TimeFormatSrt
+	}
+}
+
 // OneDialogue 一句对话
 type OneDialogue struct {
 	StartTime string   // 开始时间