vad_info.go 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. package vad
  2. import (
  3. "bufio"
  4. "errors"
  5. "fmt"
  6. webRTCVAD "github.com/baabaaox/go-webrtcvad"
  7. "io"
  8. "os"
  9. "time"
  10. )
  11. // GetVADInfo 分析音频文件,得到 VAD 分析信息,看样子是不支持并发的,只能单线程使用
  12. func GetVADInfo(audioInfo AudioInfo) ([]VADInfo, error) {
  13. var (
  14. frameIndex = 0
  15. frameSize = audioInfo.SampleRate / 1000 * FrameDuration
  16. frameBuffer = make([]byte, audioInfo.SampleRate/1000*FrameDuration*audioInfo.BitDepth/8)
  17. frameActive = false
  18. vadInfos = make([]VADInfo, 0)
  19. )
  20. audioFile, err := os.Open(audioInfo.FileFullPath)
  21. if err != nil {
  22. return nil, err
  23. }
  24. defer audioFile.Close()
  25. reader := bufio.NewReader(audioFile)
  26. vadInst := webRTCVAD.Create()
  27. defer webRTCVAD.Free(vadInst)
  28. err = webRTCVAD.Init(vadInst)
  29. if err != nil {
  30. return nil, err
  31. }
  32. if err != nil {
  33. return nil, err
  34. }
  35. err = webRTCVAD.SetMode(vadInst, Mode)
  36. if err != nil {
  37. return nil, err
  38. }
  39. if ok := webRTCVAD.ValidRateAndFrameLength(audioInfo.SampleRate, frameSize); !ok {
  40. return nil, errors.New(fmt.Sprintf("invalid rate or frame length, %v", audioInfo.FileFullPath))
  41. }
  42. var offset int
  43. report := func() {
  44. t := time.Duration(offset) * time.Second / time.Duration(audioInfo.SampleRate) / 2
  45. //log.Printf("Frame: %v, offset: %v, Active: %v, t = %v", frameIndex, offset, frameActive, t)
  46. vadInfos = append(vadInfos, VADInfo{
  47. Active: frameActive,
  48. Time: t,
  49. })
  50. }
  51. for {
  52. _, err = io.ReadFull(reader, frameBuffer)
  53. if err == io.EOF || err == io.ErrUnexpectedEOF {
  54. break
  55. }
  56. tmpFrameActive, err := webRTCVAD.Process(vadInst, audioInfo.SampleRate, frameBuffer, frameSize)
  57. if err != nil {
  58. return nil, err
  59. }
  60. if tmpFrameActive != frameActive || offset == 0 {
  61. frameActive = tmpFrameActive
  62. report()
  63. }
  64. offset += len(frameBuffer)
  65. frameIndex++
  66. }
  67. report()
  68. return vadInfos, nil
  69. }
  70. type VADInfo struct {
  71. Frame int // 第几帧
  72. Offset int // 音频的偏移
  73. Active bool // 当前帧(时间窗口)是否检测到语音
  74. Time time.Duration // 时间点
  75. }
  76. const (
  77. // Mode vad mode,VAD 的模式
  78. Mode = 2
  79. // FrameDuration frame duration,分析的时间窗口
  80. FrameDuration = 10
  81. )