pipeline.go 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. package sub_timeline_fixer
  2. import (
  3. "fmt"
  4. "github.com/allanpk716/ChineseSubFinder/internal/pkg/gss"
  5. "github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_helper"
  6. "github.com/allanpk716/ChineseSubFinder/internal/types/subparser"
  7. "github.com/huandu/go-clone"
  8. "sort"
  9. )
  10. type Pipeline struct {
  11. framerateRatios []float64
  12. }
  13. func NewPipeline() *Pipeline {
  14. return &Pipeline{
  15. framerateRatios: make([]float64, 0),
  16. }
  17. }
  18. func (p Pipeline) FitGSS(infoBase, infoSrc *subparser.FileInfo) error {
  19. pipeResults := make([]PipeResult, 0)
  20. // 排序
  21. sort.Sort(subparser.OneDialogueByStartTime(infoBase.DialoguesFilter))
  22. sort.Sort(subparser.OneDialogueByStartTime(infoSrc.DialoguesFilter))
  23. // 解析处 VAD 信息
  24. baseUnitNew, err := sub_helper.GetVADInfoFeatureFromSubNew(infoBase, 0)
  25. if err != nil {
  26. return err
  27. }
  28. fffAligner := NewFFTAligner(DefaultMaxOffsetSeconds, SampleRate)
  29. framerateRatios := p.getFramerateRatios2Try()
  30. for _, framerateRatio := range framerateRatios {
  31. /*
  32. ffsubsync 的 pipeline 有这三个步骤
  33. 1. parse 解析字幕
  34. 2. scale 根据帧数比率调整时间轴
  35. 3. speech_extract 从字幕转换为 VAD 的语音检测信息
  36. */
  37. // 外部传入
  38. // 1. parse 解析字幕
  39. tmpInfoSrc := clone.Clone(infoSrc).(*subparser.FileInfo)
  40. // 2. scale 根据帧数比率调整时间轴
  41. err := tmpInfoSrc.ChangeDialoguesFilterExTimeByFramerateRatio(framerateRatio)
  42. if err != nil {
  43. // 还原
  44. println("ChangeDialoguesFilterExTimeByFramerateRatio", err)
  45. tmpInfoSrc = clone.Clone(infoSrc).(*subparser.FileInfo)
  46. }
  47. tmpSrcInfoUnit, err := sub_helper.GetVADInfoFeatureFromSubNew(tmpInfoSrc, 0)
  48. if err != nil {
  49. return err
  50. }
  51. optFunc := func(framerateRatio float64, isLastIter bool) float64 {
  52. // 3. speech_extract 从字幕转换为 VAD 的语音检测信息
  53. // 然后进行 base 与 src 匹配计算,将每一次变动 framerateRatio 计算得到的 偏移值和分数进行记录
  54. bestOffset, score := fffAligner.Fit(baseUnitNew.GetVADFloatSlice(), tmpSrcInfoUnit.GetVADFloatSlice())
  55. println(fmt.Sprintf("got score %.0f (offset %d) for ratio %.3f", score, bestOffset, framerateRatio))
  56. // 放到外部的存储中
  57. if isLastIter == true {
  58. pipeResult := PipeResult{
  59. Score: score,
  60. BestOffset: bestOffset,
  61. ScaleFactor: framerateRatio,
  62. }
  63. pipeResults = append(pipeResults, pipeResult)
  64. }
  65. return -score
  66. }
  67. gss.Gss(optFunc, MinFramerateRatio, MaxFramerateRatio, 1e-4, nil)
  68. }
  69. return nil
  70. }
  71. func (p *Pipeline) getFramerateRatios2Try() []float64 {
  72. if len(p.framerateRatios) > 0 {
  73. return p.framerateRatios
  74. }
  75. p.framerateRatios = append(p.framerateRatios, 1.0)
  76. p.framerateRatios = append(p.framerateRatios, FramerateRatios...)
  77. for i := 0; i < len(FramerateRatios); i++ {
  78. p.framerateRatios = append(p.framerateRatios, 1.0/FramerateRatios[i])
  79. }
  80. return p.framerateRatios
  81. }
  82. var FramerateRatios = []float64{24. / 23.976, 25. / 23.976, 25. / 24.}
  83. const MinFramerateRatio = 0.9
  84. const MaxFramerateRatio = 1.1
  85. const DefaultMaxOffsetSeconds = 60
  86. const SampleRate = 100
  87. type PipeResult struct {
  88. Score float64
  89. BestOffset int
  90. ScaleFactor float64
  91. }