pipeline.go 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. package sub_timeline_fixer
  2. import (
  3. "fmt"
  4. "github.com/allanpk716/ChineseSubFinder/internal/pkg/gss"
  5. "github.com/allanpk716/ChineseSubFinder/internal/pkg/my_util"
  6. "github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_helper"
  7. "github.com/allanpk716/ChineseSubFinder/internal/types/subparser"
  8. "github.com/huandu/go-clone"
  9. )
  10. type Pipeline struct {
  11. framerateRatios []float64
  12. }
  13. func NewPipeline() *Pipeline {
  14. return &Pipeline{
  15. framerateRatios: make([]float64, 0),
  16. }
  17. }
  18. func (p Pipeline) Fit(infoBase, infoSrc *subparser.FileInfo, useGSS bool) error {
  19. pipeResults := make([]PipeResult, 0)
  20. // 排序
  21. infoBase.SortDialogues()
  22. infoSrc.SortDialogues()
  23. println(fmt.Sprintf("%f", my_util.Time2SecondNumber(infoBase.GetStartTime())))
  24. println(fmt.Sprintf("%f", my_util.Time2SecondNumber(infoBase.GetEndTime())))
  25. // 解析处 VAD 信息
  26. baseUnitNew, err := sub_helper.GetVADInfoFeatureFromSubNew(infoBase, 0)
  27. if err != nil {
  28. return err
  29. }
  30. /*
  31. 这里复现 ffsubsync 的思路
  32. 1. 首先由 getFramerateRatios2Try 得到多个帧数比率的数值,理论上有以下 7 个值:
  33. 将 frameRateRatio = 1.0 插入到 framerateRatios 这个队列的首位
  34. [0] 1.0
  35. [1] 1.001001001001001
  36. [2] 1.0427093760427095
  37. [3] 1.0416666666666667
  38. [4] 0.9989999999999999
  39. [5] 0.9590399999999999
  40. [6] 0.96
  41. 得到一个 framerateRatios 列表
  42. 2. 计算 base 字幕的 num_frames,以及 frameRateRatio = 1.0 时 src 字幕的 num_frames
  43. 推断 frame ratio 比率是多少,得到一个,inferred_framerate_ratio_from_length = base / src
  44. 把这个值插入到 framerateRatios 的尾部也就是第八个元素
  45. 3. 使用上述的 framerateRatios 作为传入参数,开始 FFT 模块的 fit 计算,得到(分数、偏移)信息,选择分数最大的作为匹配的结论
  46. */
  47. // 1.
  48. framerateRatios := make([]float64, 0)
  49. framerateRatios = p.getFramerateRatios2Try()
  50. // 2.
  51. inferredFramerateRatioFromLength := float64(infoBase.GetNumFrames()) / float64(infoSrc.GetNumFrames())
  52. framerateRatios = append(framerateRatios, inferredFramerateRatioFromLength)
  53. // 3.
  54. fffAligner := NewFFTAligner(DefaultMaxOffsetSeconds, SampleRate)
  55. for _, framerateRatio := range framerateRatios {
  56. /*
  57. ffsubsync 的 pipeline 有这三个步骤
  58. 1. parse 解析字幕
  59. 2. scale 根据帧数比率调整时间轴
  60. 3. speech_extract 从字幕转换为 VAD 的语音检测信息
  61. */
  62. // 外部传入
  63. // 1. parse 解析字幕
  64. tmpInfoSrc := clone.Clone(infoSrc).(*subparser.FileInfo)
  65. // 2. scale 根据帧数比率调整时间轴
  66. err = tmpInfoSrc.ChangeDialoguesTimeByFramerateRatio(framerateRatio)
  67. if err != nil {
  68. // 还原
  69. println("ChangeDialoguesTimeByFramerateRatio", err)
  70. tmpInfoSrc = clone.Clone(infoSrc).(*subparser.FileInfo)
  71. }
  72. // 3. speech_extract 从字幕转换为 VAD 的语音检测信息
  73. tmpSrcInfoUnit, err := sub_helper.GetVADInfoFeatureFromSubNew(tmpInfoSrc, 0)
  74. if err != nil {
  75. return err
  76. }
  77. // 不是用 GSS
  78. bestOffset, score := fffAligner.Fit(baseUnitNew.GetVADFloatSlice(), tmpSrcInfoUnit.GetVADFloatSlice())
  79. pipeResult := PipeResult{
  80. Score: score,
  81. BestOffset: bestOffset,
  82. ScaleFactor: framerateRatio,
  83. }
  84. pipeResults = append(pipeResults, pipeResult)
  85. }
  86. if useGSS == true {
  87. // 最后一个才需要额外使用 GSS
  88. // 使用 GSS
  89. optFunc := func(framerateRatio float64, isLastIter bool) float64 {
  90. // 1. parse 解析字幕
  91. tmpInfoSrc := clone.Clone(infoSrc).(*subparser.FileInfo)
  92. // 2. scale 根据帧数比率调整时间轴
  93. err = tmpInfoSrc.ChangeDialoguesTimeByFramerateRatio(framerateRatio)
  94. if err != nil {
  95. // 还原
  96. println("ChangeDialoguesTimeByFramerateRatio", err)
  97. tmpInfoSrc = clone.Clone(infoSrc).(*subparser.FileInfo)
  98. }
  99. // 3. speech_extract 从字幕转换为 VAD 的语音检测信息
  100. tmpSrcInfoUnit, err := sub_helper.GetVADInfoFeatureFromSubNew(tmpInfoSrc, 0)
  101. if err != nil {
  102. return 0
  103. }
  104. // 然后进行 base 与 src 匹配计算,将每一次变动 framerateRatio 计算得到的 偏移值和分数进行记录
  105. bestOffset, score := fffAligner.Fit(baseUnitNew.GetVADFloatSlice(), tmpSrcInfoUnit.GetVADFloatSlice())
  106. println(fmt.Sprintf("got score %.0f (offset %d) for ratio %.3f", score, bestOffset, framerateRatio))
  107. // 放到外部的存储中
  108. if isLastIter == true {
  109. pipeResult := PipeResult{
  110. Score: score,
  111. BestOffset: bestOffset,
  112. ScaleFactor: framerateRatio,
  113. }
  114. pipeResults = append(pipeResults, pipeResult)
  115. }
  116. return -score
  117. }
  118. gss.Gss(optFunc, MinFramerateRatio, MaxFramerateRatio, 1e-4, nil)
  119. }
  120. return nil
  121. }
  122. func (p *Pipeline) getFramerateRatios2Try() []float64 {
  123. if len(p.framerateRatios) > 0 {
  124. return p.framerateRatios
  125. }
  126. p.framerateRatios = append(p.framerateRatios, 1.0)
  127. p.framerateRatios = append(p.framerateRatios, FramerateRatios...)
  128. for i := 0; i < len(FramerateRatios); i++ {
  129. p.framerateRatios = append(p.framerateRatios, 1.0/FramerateRatios[i])
  130. }
  131. return p.framerateRatios
  132. }
  133. var FramerateRatios = []float64{24. / 23.976, 25. / 23.976, 25. / 24.}
  134. const MinFramerateRatio = 0.9
  135. const MaxFramerateRatio = 1.1
  136. const DefaultMaxOffsetSeconds = 60
  137. const SampleRate = 100
  138. type PipeResult struct {
  139. Score float64
  140. BestOffset int
  141. ScaleFactor float64
  142. }