vad_helper.go 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. package vad
  2. import (
  3. "bufio"
  4. "errors"
  5. "fmt"
  6. "github.com/allanpk716/ChineseSubFinder/internal/types/subparser"
  7. webRTCVAD "github.com/baabaaox/go-webrtcvad"
  8. "io"
  9. "os"
  10. "time"
  11. )
  12. // GetVADInfoFromAudio 分析音频文件,得到 VAD 分析信息,看样子是不支持并发的,只能单线程使用
  13. func GetVADInfoFromAudio(audioInfo AudioInfo) ([]VADInfo, error) {
  14. var (
  15. frameIndex = 0
  16. frameSize = audioInfo.SampleRate / 1000 * FrameDuration
  17. frameBuffer = make([]byte, audioInfo.SampleRate/1000*FrameDuration*audioInfo.BitDepth/8)
  18. frameActive = false
  19. vadInfos = make([]VADInfo, 0)
  20. )
  21. audioFile, err := os.Open(audioInfo.FileFullPath)
  22. if err != nil {
  23. return nil, err
  24. }
  25. defer audioFile.Close()
  26. reader := bufio.NewReader(audioFile)
  27. vadInst := webRTCVAD.Create()
  28. defer webRTCVAD.Free(vadInst)
  29. err = webRTCVAD.Init(vadInst)
  30. if err != nil {
  31. return nil, err
  32. }
  33. if err != nil {
  34. return nil, err
  35. }
  36. err = webRTCVAD.SetMode(vadInst, Mode)
  37. if err != nil {
  38. return nil, err
  39. }
  40. if ok := webRTCVAD.ValidRateAndFrameLength(audioInfo.SampleRate, frameSize); !ok {
  41. return nil, errors.New(fmt.Sprintf("invalid rate or frame length, %v", audioInfo.FileFullPath))
  42. }
  43. var offset int
  44. report := func() {
  45. t := time.Duration(offset) * time.Second / time.Duration(audioInfo.SampleRate) / 2
  46. //log.Printf("Frame: %v, offset: %v, Active: %v, t = %v", frameIndex, offset, frameActive, t)
  47. vadInfos = append(vadInfos, VADInfo{
  48. Frame: frameIndex,
  49. Offset: offset,
  50. Active: frameActive,
  51. Time: t,
  52. })
  53. }
  54. for {
  55. _, err = io.ReadFull(reader, frameBuffer)
  56. if err == io.EOF || err == io.ErrUnexpectedEOF {
  57. break
  58. }
  59. tmpFrameActive, err := webRTCVAD.Process(vadInst, audioInfo.SampleRate, frameBuffer, frameSize)
  60. if err != nil {
  61. return nil, err
  62. }
  63. if tmpFrameActive != frameActive || offset == 0 {
  64. frameActive = tmpFrameActive
  65. }
  66. report()
  67. offset += len(frameBuffer)
  68. frameIndex++
  69. }
  70. report()
  71. return vadInfos, nil
  72. }
  73. // GetVADInfoFromSubtitle 分析字幕文件(暂时考虑的是外置的字幕),得到 VAD 分析信息,看样子是不支持并发的,只能单线程使用
  74. func GetVADInfoFromSubtitle(subFileInfo *subparser.FileInfo) ([]VADInfo, error) {
  75. var vadInfos = make([]VADInfo, 0)
  76. // 考虑的是外置字幕,所以就应该是有中文的
  77. for _, oneDialogueEx := range subFileInfo.DialoguesEx {
  78. if oneDialogueEx.ChLine == "" {
  79. continue
  80. }
  81. }
  82. return vadInfos, nil
  83. }