vad_helper.go 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. package vad
  2. import (
  3. "bufio"
  4. "errors"
  5. "fmt"
  6. webRTCVAD "github.com/baabaaox/go-webrtcvad"
  7. "io"
  8. "os"
  9. "time"
  10. )
  11. // GetVADInfoFromAudio 分析音频文件,得到 VAD 分析信息,看样子是不支持并发的,只能单线程使用
  12. // 无需使用插值的函数
  13. func GetVADInfoFromAudio(audioInfo AudioInfo, insert bool) ([]VADInfo, error) {
  14. var (
  15. frameIndex = 0
  16. frameSize = audioInfo.SampleRate / 1000 * FrameDuration
  17. frameBuffer = make([]byte, audioInfo.SampleRate/1000*FrameDuration*audioInfo.BitDepth/8)
  18. frameActive = false
  19. vadInfos = make([]VADInfo, 0)
  20. )
  21. audioFile, err := os.Open(audioInfo.FileFullPath)
  22. if err != nil {
  23. return nil, err
  24. }
  25. defer audioFile.Close()
  26. reader := bufio.NewReader(audioFile)
  27. vadInst := webRTCVAD.Create()
  28. defer webRTCVAD.Free(vadInst)
  29. err = webRTCVAD.Init(vadInst)
  30. if err != nil {
  31. return nil, err
  32. }
  33. if err != nil {
  34. return nil, err
  35. }
  36. err = webRTCVAD.SetMode(vadInst, Mode)
  37. if err != nil {
  38. return nil, err
  39. }
  40. if ok := webRTCVAD.ValidRateAndFrameLength(audioInfo.SampleRate, frameSize); !ok {
  41. return nil, errors.New(fmt.Sprintf("invalid rate or frame length, %v", audioInfo.FileFullPath))
  42. }
  43. var offset int
  44. report := func() {
  45. t := time.Duration(offset) * time.Second / time.Duration(audioInfo.SampleRate) / 2
  46. //log.Printf("Frame: %v, offset: %v, Active: %v, t = %v", frameIndex, offset, frameActive, t)
  47. vadInfos = append(vadInfos, *NewVADInfo(
  48. frameIndex,
  49. offset,
  50. frameActive,
  51. t,
  52. ))
  53. }
  54. for {
  55. _, err = io.ReadFull(reader, frameBuffer)
  56. if err == io.EOF || err == io.ErrUnexpectedEOF {
  57. break
  58. }
  59. tmpFrameActive, err := webRTCVAD.Process(vadInst, audioInfo.SampleRate, frameBuffer, frameSize)
  60. if err != nil {
  61. return nil, err
  62. }
  63. if tmpFrameActive != frameActive || offset == 0 {
  64. frameActive = tmpFrameActive
  65. if insert == false {
  66. report()
  67. }
  68. }
  69. if insert == true {
  70. report()
  71. }
  72. offset += len(frameBuffer)
  73. frameIndex++
  74. }
  75. report()
  76. return vadInfos, nil
  77. }
  78. // GetFloatSlice 返回 1 -1 归一化的数组
  79. func GetFloatSlice(inVADs []VADInfo, skipFrontAndEndPerBase float64) []float64 {
  80. skipLen := int(float64(len(inVADs)) * skipFrontAndEndPerBase)
  81. skipStartIndex := skipLen
  82. skipEndIndex := len(inVADs) - skipLen
  83. if skipStartIndex <= 0 {
  84. skipStartIndex = 0
  85. skipEndIndex = len(inVADs)
  86. }
  87. outVADFloats := make([]float64, len(inVADs))
  88. for i, vad := range inVADs {
  89. if vad.Active == true {
  90. outVADFloats[i] = 1
  91. } else {
  92. outVADFloats[i] = -1
  93. }
  94. }
  95. return outVADFloats[skipStartIndex:skipEndIndex]
  96. }
  97. func WriteVADSlice2File(vadSliceFloat []float64, fileFullPath string) error {
  98. f, err := os.Create(fileFullPath)
  99. if err != nil {
  100. return err
  101. }
  102. defer f.Close()
  103. for _, v := range vadSliceFloat {
  104. _, err = f.WriteString(fmt.Sprintf("%v\n", v))
  105. if err != nil {
  106. return err
  107. }
  108. }
  109. return nil
  110. }
  111. // GetAudioIndex2Time 从 Audio 的 OffsetIndex 推算出它所在的时间,返回 float64 的秒
  112. func GetAudioIndex2Time(index int) float64 {
  113. return float64(index*FrameDuration) / 1000.0
  114. }
  115. const (
  116. // Mode vad mode,VAD 的模式 0-3
  117. Mode = 3
  118. // FrameDuration frame duration,分析的时间窗口
  119. FrameDuration = 10
  120. )