vad_helper.go 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. package vad
  2. import (
  3. "bufio"
  4. "errors"
  5. "fmt"
  6. webRTCVAD "github.com/baabaaox/go-webrtcvad"
  7. "io"
  8. "os"
  9. "time"
  10. )
  11. // GetVADInfoFromAudio 分析音频文件,得到 VAD 分析信息,看样子是不支持并发的,只能单线程使用
  12. // 无需使用插值的函数
  13. func GetVADInfoFromAudio(audioInfo AudioInfo, insert bool) ([]VADInfo, error) {
  14. var (
  15. frameIndex = 0
  16. frameSize = audioInfo.SampleRate / 1000 * FrameDuration
  17. frameBuffer = make([]byte, audioInfo.SampleRate/1000*FrameDuration*audioInfo.BitDepth/8)
  18. frameActive = false
  19. vadInfos = make([]VADInfo, 0)
  20. )
  21. audioFile, err := os.Open(audioInfo.FileFullPath)
  22. if err != nil {
  23. return nil, err
  24. }
  25. defer audioFile.Close()
  26. reader := bufio.NewReader(audioFile)
  27. vadInst := webRTCVAD.Create()
  28. defer webRTCVAD.Free(vadInst)
  29. err = webRTCVAD.Init(vadInst)
  30. if err != nil {
  31. return nil, err
  32. }
  33. if err != nil {
  34. return nil, err
  35. }
  36. err = webRTCVAD.SetMode(vadInst, Mode)
  37. if err != nil {
  38. return nil, err
  39. }
  40. if ok := webRTCVAD.ValidRateAndFrameLength(audioInfo.SampleRate, frameSize); !ok {
  41. return nil, errors.New(fmt.Sprintf("invalid rate or frame length, %v", audioInfo.FileFullPath))
  42. }
  43. var offset int
  44. report := func() {
  45. t := time.Duration(offset) * time.Second / time.Duration(audioInfo.SampleRate) / 2
  46. //log.Printf("Frame: %v, offset: %v, Active: %v, t = %v", frameIndex, offset, frameActive, t)
  47. vadInfos = append(vadInfos, *NewVADInfo(
  48. frameIndex,
  49. offset,
  50. frameActive,
  51. t,
  52. ))
  53. }
  54. for {
  55. _, err = io.ReadFull(reader, frameBuffer)
  56. if err == io.EOF || err == io.ErrUnexpectedEOF {
  57. break
  58. }
  59. tmpFrameActive, err := webRTCVAD.Process(vadInst, audioInfo.SampleRate, frameBuffer, frameSize)
  60. if err != nil {
  61. return nil, err
  62. }
  63. if tmpFrameActive != frameActive || offset == 0 {
  64. frameActive = tmpFrameActive
  65. if insert == false {
  66. report()
  67. }
  68. }
  69. if insert == true {
  70. report()
  71. }
  72. offset += len(frameBuffer)
  73. frameIndex++
  74. }
  75. report()
  76. return vadInfos, nil
  77. }
  78. // GetFloatSlice 返回 1 -1 归一化的数组
  79. func GetFloatSlice(inVADs []VADInfo) []float64 {
  80. outVADFloats := make([]float64, len(inVADs))
  81. for i, vad := range inVADs {
  82. if vad.Active == true {
  83. outVADFloats[i] = 1
  84. } else {
  85. outVADFloats[i] = -1
  86. }
  87. }
  88. return outVADFloats
  89. }
  90. // GetAudioIndex2Time 从 Audio 的 OffsetIndex 推算出它所在的时间,返回 float64 的秒
  91. func GetAudioIndex2Time(index int) float64 {
  92. return float64(index*FrameDuration) / 1000.0
  93. }
  94. const (
  95. // Mode vad mode,VAD 的模式 0-3
  96. Mode = 2
  97. // FrameDuration frame duration,分析的时间窗口
  98. FrameDuration = 10
  99. )