123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121 |
- package sub_timeline_fixer
- import (
- "github.com/emirpasic/gods/maps/treemap"
- "github.com/emirpasic/gods/utils"
- "gonum.org/v1/gonum/cmplxs"
- "gonum.org/v1/gonum/dsp/fourier"
- "gonum.org/v1/gonum/floats"
- "math"
- )
- /*
- 复现 https://github.com/smacke/ffsubsync 的 FFTAligner 算法
- */
- type FFTAligner struct {
- maxOffsetSamples int
- }
- func NewFFTAligner(maxOffsetSeconds, sampleRate int) *FFTAligner {
- maxOffsetSamples := maxOffsetSeconds * sampleRate
- if maxOffsetSamples < 0 {
- maxOffsetSamples = -maxOffsetSamples
- }
- return &FFTAligner{
- maxOffsetSamples: maxOffsetSamples,
- }
- }
- // Fit 给出最佳的偏移,还需要根据实际情况进行转换(比如,1 步 是 10 ms),输入的数组只能是 1 -1 这样的值,需要在外部做好归一化
- func (f FFTAligner) Fit(refFloats, subFloats []float64) (int, float64) {
- convolve := f.fit(refFloats, subFloats)
- return f.computeArgmax(f.eliminateExtremeOffsetsFromSolutions(convolve, subFloats), subFloats)
- }
- // fit 返回 convolve
- func (f FFTAligner) fit(refFloats, subFloats []float64) []float64 {
- // 计算出一维矩阵的长度
- totalBits := math.Log2(float64(len(refFloats)) + float64(len(subFloats)))
- totalLength := int(math.Pow(2, math.Ceil(totalBits)))
- // 需要补零的个数
- extraZeros := totalLength - len(refFloats) - len(subFloats)
- // 2 的倍数长度
- power2Len := extraZeros + len(refFloats) + len(subFloats)
- // ----------------------------------------------------------
- // 对于 sub 需要在前面补零
- power2Sub := make([]float64, power2Len)
- fillUpZeroLen4Sub := power2Len - len(subFloats)
- for i := 0; i < fillUpZeroLen4Sub; i++ {
- power2Sub[i] = 0
- }
- for i := 0; i < len(subFloats); i++ {
- power2Sub[fillUpZeroLen4Sub+i] = subFloats[i]
- }
- // 可选择的 FFT 实现 "github.com/brettbuddin/fourier"
- //subFT := fourier.Forward()
- // 先初始化一个 fft 共用实例
- fftIns := fourier.NewFFT(len(power2Sub))
- fftIns.Reset(len(power2Sub))
- subFT := fftIns.Coefficients(nil, power2Sub)
- // ----------------------------------------------------------
- // 对于 ref 需要在后面补零
- power2Ref := make([]float64, power2Len)
- for i := 0; i < len(refFloats); i++ {
- power2Ref[i] = refFloats[i]
- }
- for i := 0; i < power2Len-len(refFloats); i++ {
- power2Ref[len(refFloats)+i] = 0
- }
- // 反转 power2Ref 0, 1,1,0,0 -> 0,0,1,1,0
- for i, j := 0, len(power2Ref)-1; i < j; i, j = i+1, j-1 {
- power2Ref[i], power2Ref[j] = power2Ref[j], power2Ref[i]
- }
- fftIns.Reset(len(power2Ref))
- refFT := fftIns.Coefficients(nil, power2Ref)
- // ----------------------------------------------------------
- // 先计算 subFT * refFT,结果放置在 refFT
- cmplxs.Mul(refFT, subFT)
- // 然后执行 numpy 的 ifft 操作
- convolve := fftIns.Sequence(nil, refFT)
- floats.Scale(1/float64(len(power2Ref)), convolve)
- return convolve
- }
- func (f FFTAligner) eliminateExtremeOffsetsFromSolutions(convolve, subSting []float64) []float64 {
- if f.maxOffsetSamples == 0 {
- return convolve
- }
- convolveCopy := convolve
- offsetFun := func(offset int) int {
- return len(convolveCopy) - 1 + offset - len(subSting)
- }
- s1 := offsetFun(-f.maxOffsetSamples)
- s2 := offsetFun(f.maxOffsetSamples)
- for i := 0; i < s1; i++ {
- convolveCopy[i] = math.NaN()
- }
- for i := s2; i < len(convolveCopy); i++ {
- convolveCopy[i] = math.NaN()
- }
- return convolveCopy
- }
- // computeArgmax 找对最优偏移,还需要根据实际情况进行转换(比如,1 步 是 10 ms)
- func (f FFTAligner) computeArgmax(convolve, subFloats []float64) (int, float64) {
- convolveTM := treemap.NewWith(utils.Float64Comparator)
- for i, value := range convolve {
- convolveTM.Put(value, i)
- }
- bestScore, bestIndex := convolveTM.Max()
- bestOffset := len(convolve) - 1 - bestIndex.(int) - len(subFloats)
- return bestOffset, bestScore.(float64)
- }
|