|
@@ -1,8 +1,12 @@
|
|
|
package sub_timeline_fixer
|
|
|
|
|
|
import (
|
|
|
+ "fmt"
|
|
|
"github.com/allanpk716/ChineseSubFinder/internal/pkg/gss"
|
|
|
+ "github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_helper"
|
|
|
"github.com/allanpk716/ChineseSubFinder/internal/types/subparser"
|
|
|
+ "github.com/huandu/go-clone"
|
|
|
+ "sort"
|
|
|
)
|
|
|
|
|
|
type Pipeline struct {
|
|
@@ -15,26 +19,65 @@ func NewPipeline() *Pipeline {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-func (p Pipeline) Fit_gss(infoBase, infoSrc subparser.FileInfo) error {
|
|
|
-
|
|
|
- /*
|
|
|
- ffsubsync 的 pipeline 有这三个步骤
|
|
|
- 1. parse 解析字幕
|
|
|
- 2. scale 根据帧数比率调整时间轴
|
|
|
- 3. speech_extract 从字幕转换为 VAD 的语音检测信息
|
|
|
- */
|
|
|
- opt_func := func(framerateRatio float64) float64 {
|
|
|
- nowInfoSrc := infoSrc
|
|
|
- err := nowInfoSrc.ChangeDialoguesFilterExTimeByFramerateRatio(framerateRatio)
|
|
|
+func (p Pipeline) FitGSS(infoBase, infoSrc *subparser.FileInfo) error {
|
|
|
+
|
|
|
+ pipeResults := make([]PipeResult, 0)
|
|
|
+ // 排序
|
|
|
+ sort.Sort(subparser.OneDialogueByStartTime(infoBase.DialoguesFilter))
|
|
|
+ sort.Sort(subparser.OneDialogueByStartTime(infoSrc.DialoguesFilter))
|
|
|
+ // 解析处 VAD 信息
|
|
|
+ baseUnitNew, err := sub_helper.GetVADInfoFeatureFromSubNew(infoBase, 0)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ fffAligner := NewFFTAligner(DefaultMaxOffsetSeconds, SampleRate)
|
|
|
+
|
|
|
+ framerateRatios := p.getFramerateRatios2Try()
|
|
|
+ for _, framerateRatio := range framerateRatios {
|
|
|
+
|
|
|
+ /*
|
|
|
+ ffsubsync 的 pipeline 有这三个步骤
|
|
|
+ 1. parse 解析字幕
|
|
|
+ 2. scale 根据帧数比率调整时间轴
|
|
|
+ 3. speech_extract 从字幕转换为 VAD 的语音检测信息
|
|
|
+ */
|
|
|
+ // 外部传入
|
|
|
+ // 1. parse 解析字幕
|
|
|
+ tmpInfoSrc := clone.Clone(infoSrc).(*subparser.FileInfo)
|
|
|
+ // 2. scale 根据帧数比率调整时间轴
|
|
|
+ err := tmpInfoSrc.ChangeDialoguesFilterExTimeByFramerateRatio(framerateRatio)
|
|
|
if err != nil {
|
|
|
// 还原
|
|
|
println("ChangeDialoguesFilterExTimeByFramerateRatio", err)
|
|
|
- nowInfoSrc = infoSrc
|
|
|
+ tmpInfoSrc = clone.Clone(infoSrc).(*subparser.FileInfo)
|
|
|
+ }
|
|
|
+ tmpSrcInfoUnit, err := sub_helper.GetVADInfoFeatureFromSubNew(tmpInfoSrc, 0)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
}
|
|
|
- // 然后进行 base 与 src 匹配计算,将每一次变动 framerateRatio 计算得到的 偏移值和分数进行记录
|
|
|
|
|
|
+ optFunc := func(framerateRatio float64, isLastIter bool) float64 {
|
|
|
+
|
|
|
+ // 3. speech_extract 从字幕转换为 VAD 的语音检测信息
|
|
|
+ // 然后进行 base 与 src 匹配计算,将每一次变动 framerateRatio 计算得到的 偏移值和分数进行记录
|
|
|
+ bestOffset, score := fffAligner.Fit(baseUnitNew.GetVADFloatSlice(), tmpSrcInfoUnit.GetVADFloatSlice())
|
|
|
+ println(fmt.Sprintf("got score %.0f (offset %d) for ratio %.3f", score, bestOffset, framerateRatio))
|
|
|
+ // 放到外部的存储中
|
|
|
+ if isLastIter == true {
|
|
|
+ pipeResult := PipeResult{
|
|
|
+ Score: score,
|
|
|
+ BestOffset: bestOffset,
|
|
|
+ ScaleFactor: framerateRatio,
|
|
|
+ }
|
|
|
+ pipeResults = append(pipeResults, pipeResult)
|
|
|
+ }
|
|
|
+ return -score
|
|
|
+ }
|
|
|
+
|
|
|
+ gss.Gss(optFunc, MinFramerateRatio, MaxFramerateRatio, 1e-4, nil)
|
|
|
}
|
|
|
- gss.Gss(opt_func, MIN_FRAMERATE_RATIO, MAX_FRAMERATE_RATIO, 1e-4, nil)
|
|
|
+
|
|
|
+ return nil
|
|
|
}
|
|
|
|
|
|
func (p *Pipeline) getFramerateRatios2Try() []float64 {
|
|
@@ -42,14 +85,23 @@ func (p *Pipeline) getFramerateRatios2Try() []float64 {
|
|
|
if len(p.framerateRatios) > 0 {
|
|
|
return p.framerateRatios
|
|
|
}
|
|
|
- p.framerateRatios = append(p.framerateRatios, FRAMERATE_RATIOS...)
|
|
|
- for i := 0; i < len(FRAMERATE_RATIOS); i++ {
|
|
|
- p.framerateRatios = append(p.framerateRatios, 1.0/FRAMERATE_RATIOS[i])
|
|
|
+ p.framerateRatios = append(p.framerateRatios, 1.0)
|
|
|
+ p.framerateRatios = append(p.framerateRatios, FramerateRatios...)
|
|
|
+ for i := 0; i < len(FramerateRatios); i++ {
|
|
|
+ p.framerateRatios = append(p.framerateRatios, 1.0/FramerateRatios[i])
|
|
|
}
|
|
|
return p.framerateRatios
|
|
|
}
|
|
|
|
|
|
-var FRAMERATE_RATIOS = []float64{24. / 23.976, 25. / 23.976, 25. / 24.}
|
|
|
+var FramerateRatios = []float64{24. / 23.976, 25. / 23.976, 25. / 24.}
|
|
|
+
|
|
|
+const MinFramerateRatio = 0.9
|
|
|
+const MaxFramerateRatio = 1.1
|
|
|
+const DefaultMaxOffsetSeconds = 60
|
|
|
+const SampleRate = 100
|
|
|
|
|
|
-const MIN_FRAMERATE_RATIO = 0.9
|
|
|
-const MAX_FRAMERATE_RATIO = 1.1
|
|
|
+type PipeResult struct {
|
|
|
+ Score float64
|
|
|
+ BestOffset int
|
|
|
+ ScaleFactor float64
|
|
|
+}
|