fft_aligner.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. package sub_timeline_fixer
  2. import (
  3. "github.com/emirpasic/gods/maps/treemap"
  4. "github.com/emirpasic/gods/utils"
  5. "gonum.org/v1/gonum/cmplxs"
  6. "gonum.org/v1/gonum/dsp/fourier"
  7. "gonum.org/v1/gonum/floats"
  8. "math"
  9. )
  10. /*
  11. 复现 https://github.com/smacke/ffsubsync 的 FFTAligner 算法
  12. */
  13. type FFTAligner struct {
  14. maxOffsetSamples int
  15. }
  16. func NewFFTAligner(maxOffsetSeconds, sampleRate int) *FFTAligner {
  17. maxOffsetSamples := maxOffsetSeconds * sampleRate
  18. if maxOffsetSamples < 0 {
  19. maxOffsetSamples = -maxOffsetSamples
  20. }
  21. return &FFTAligner{
  22. maxOffsetSamples: maxOffsetSamples,
  23. }
  24. }
  25. // Fit 给出最佳的偏移,还需要根据实际情况进行转换(比如,1 步 是 10 ms),输入的数组只能是 1 -1 这样的值,需要在外部做好归一化
  26. func (f FFTAligner) Fit(refFloats, subFloats []float64) (int, float64) {
  27. convolve := f.fit(refFloats, subFloats)
  28. return f.computeArgmax(f.eliminateExtremeOffsetsFromSolutions(convolve, subFloats), subFloats)
  29. }
  30. // fit 返回 convolve
  31. func (f FFTAligner) fit(refFloats, subFloats []float64) []float64 {
  32. // 计算出一维矩阵的长度
  33. totalBits := math.Log2(float64(len(refFloats)) + float64(len(subFloats)))
  34. totalLength := int(math.Pow(2, math.Ceil(totalBits)))
  35. // 需要补零的个数
  36. extraZeros := totalLength - len(refFloats) - len(subFloats)
  37. // 2 的倍数长度
  38. power2Len := extraZeros + len(refFloats) + len(subFloats)
  39. // ----------------------------------------------------------
  40. // 对于 sub 需要在前面补零
  41. power2Sub := make([]float64, power2Len)
  42. fillUpZeroLen4Sub := power2Len - len(subFloats)
  43. for i := 0; i < fillUpZeroLen4Sub; i++ {
  44. power2Sub[i] = 0
  45. }
  46. for i := 0; i < len(subFloats); i++ {
  47. power2Sub[fillUpZeroLen4Sub+i] = subFloats[i]
  48. }
  49. // 可选择的 FFT 实现 "github.com/brettbuddin/fourier"
  50. //subFT := fourier.Forward()
  51. // 先初始化一个 fft 共用实例
  52. fftIns := fourier.NewFFT(len(power2Sub))
  53. fftIns.Reset(len(power2Sub))
  54. subFT := fftIns.Coefficients(nil, power2Sub)
  55. // ----------------------------------------------------------
  56. // 对于 ref 需要在后面补零
  57. power2Ref := make([]float64, power2Len)
  58. for i := 0; i < len(refFloats); i++ {
  59. power2Ref[i] = refFloats[i]
  60. }
  61. for i := 0; i < power2Len-len(refFloats); i++ {
  62. power2Ref[len(refFloats)+i] = 0
  63. }
  64. // 反转 power2Ref 0, 1,1,0,0 -> 0,0,1,1,0
  65. for i, j := 0, len(power2Ref)-1; i < j; i, j = i+1, j-1 {
  66. power2Ref[i], power2Ref[j] = power2Ref[j], power2Ref[i]
  67. }
  68. fftIns.Reset(len(power2Ref))
  69. refFT := fftIns.Coefficients(nil, power2Ref)
  70. // ----------------------------------------------------------
  71. // 先计算 subFT * refFT,结果放置在 refFT
  72. cmplxs.Mul(refFT, subFT)
  73. // 然后执行 numpy 的 ifft 操作
  74. convolve := fftIns.Sequence(nil, refFT)
  75. floats.Scale(1/float64(len(power2Ref)), convolve)
  76. return convolve
  77. }
  78. func (f FFTAligner) eliminateExtremeOffsetsFromSolutions(convolve, subSting []float64) []float64 {
  79. if f.maxOffsetSamples == 0 {
  80. return convolve
  81. }
  82. convolveCopy := convolve
  83. offsetFun := func(offset int) int {
  84. return len(convolveCopy) - 1 + offset - len(subSting)
  85. }
  86. s1 := offsetFun(-f.maxOffsetSamples)
  87. s2 := offsetFun(f.maxOffsetSamples)
  88. for i := 0; i < s1; i++ {
  89. convolveCopy[i] = math.NaN()
  90. }
  91. for i := s2; i < len(convolveCopy); i++ {
  92. convolveCopy[i] = math.NaN()
  93. }
  94. return convolveCopy
  95. }
  96. // computeArgmax 找对最优偏移,还需要根据实际情况进行转换(比如,1 步 是 10 ms)
  97. func (f FFTAligner) computeArgmax(convolve, subFloats []float64) (int, float64) {
  98. convolveTM := treemap.NewWith(utils.Float64Comparator)
  99. for i, value := range convolve {
  100. convolveTM.Put(value, i)
  101. }
  102. bestScore, bestIndex := convolveTM.Max()
  103. bestOffset := len(convolve) - 1 - bestIndex.(int) - len(subFloats)
  104. return bestOffset, bestScore.(float64)
  105. }