fixer.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572
  1. package sub_timeline_fixer
  2. import (
  3. "errors"
  4. "fmt"
  5. "github.com/allanpk716/ChineseSubFinder/internal/pkg/ffmpeg_helper"
  6. "github.com/allanpk716/ChineseSubFinder/internal/pkg/log_helper"
  7. "github.com/allanpk716/ChineseSubFinder/internal/pkg/my_util"
  8. "github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_helper"
  9. "github.com/allanpk716/ChineseSubFinder/internal/pkg/vad"
  10. "github.com/allanpk716/ChineseSubFinder/internal/types/sub_timeline_fiexer"
  11. "github.com/allanpk716/ChineseSubFinder/internal/types/subparser"
  12. "github.com/brettbuddin/fourier"
  13. "github.com/go-echarts/go-echarts/v2/opts"
  14. "github.com/grd/stat"
  15. "github.com/james-bowman/nlp/measures/pairwise"
  16. "github.com/mndrix/tukey"
  17. "gonum.org/v1/gonum/mat"
  18. "os"
  19. "path/filepath"
  20. "strings"
  21. "time"
  22. )
  23. type SubTimelineFixer struct {
  24. fixerConfig sub_timeline_fiexer.SubTimelineFixerConfig
  25. ffmpegHelper *ffmpeg_helper.FFMPEGHelper
  26. }
  27. func NewSubTimelineFixer(fixerConfig sub_timeline_fiexer.SubTimelineFixerConfig) *SubTimelineFixer {
  28. return &SubTimelineFixer{
  29. fixerConfig: fixerConfig,
  30. ffmpegHelper: ffmpeg_helper.NewFFMPEGHelper(),
  31. }
  32. }
  33. // StopWordCounter 停止词统计
  34. func (s *SubTimelineFixer) StopWordCounter(inString string, per int) []string {
  35. statisticTimes := make(map[string]int)
  36. wordsLength := strings.Fields(inString)
  37. for counts, word := range wordsLength {
  38. // 判断key是否存在,这个word是字符串,这个counts是统计的word的次数。
  39. word, ok := statisticTimes[word]
  40. if ok {
  41. word = word
  42. statisticTimes[wordsLength[counts]] = statisticTimes[wordsLength[counts]] + 1
  43. } else {
  44. statisticTimes[wordsLength[counts]] = 1
  45. }
  46. }
  47. stopWords := make([]string, 0)
  48. mapByValue := sortMapByValue(statisticTimes)
  49. breakIndex := len(mapByValue) * per / 100
  50. for index, wordInfo := range mapByValue {
  51. if index > breakIndex {
  52. break
  53. }
  54. stopWords = append(stopWords, wordInfo.Name)
  55. }
  56. return stopWords
  57. }
  58. // FixSubTimeline 校正时间轴
  59. func (s *SubTimelineFixer) FixSubTimeline(infoSrc *subparser.FileInfo, inOffsetTime float64, desSaveSubFileFullPath string) (string, error) {
  60. /*
  61. 从解析的实例中,正常来说是可以匹配出所有的 Dialogue 对话的 Start 和 End time 的信息
  62. 然后找到对应的字幕的文件,进行文件内容的替换来做时间轴的校正
  63. */
  64. // 偏移时间
  65. offsetTime := time.Duration(inOffsetTime*1000) * time.Millisecond
  66. timeFormat := infoSrc.GetTimeFormat()
  67. fixContent := infoSrc.Content
  68. for _, srcOneDialogue := range infoSrc.Dialogues {
  69. timeStart, err := time.Parse(timeFormat, srcOneDialogue.StartTime)
  70. if err != nil {
  71. return "", err
  72. }
  73. timeEnd, err := time.Parse(timeFormat, srcOneDialogue.EndTime)
  74. if err != nil {
  75. return "", err
  76. }
  77. fixTimeStart := timeStart.Add(offsetTime)
  78. fixTimeEnd := timeEnd.Add(offsetTime)
  79. fixContent = strings.ReplaceAll(fixContent, srcOneDialogue.StartTime, fixTimeStart.Format(timeFormat))
  80. fixContent = strings.ReplaceAll(fixContent, srcOneDialogue.EndTime, fixTimeEnd.Format(timeFormat))
  81. }
  82. dstFile, err := os.Create(desSaveSubFileFullPath)
  83. if err != nil {
  84. return "", err
  85. }
  86. defer func() {
  87. _ = dstFile.Close()
  88. }()
  89. _, err = dstFile.WriteString(fixContent)
  90. if err != nil {
  91. return "", err
  92. }
  93. return fixContent, nil
  94. }
  95. /*
  96. 对于 V1 版本的字幕时间轴校正来说,是有特殊的前置要求的
  97. 1. 视频要有英文字幕
  98. 2. 外置的字幕必须是中文的双语字幕(简英、繁英)
  99. */
  100. // GetOffsetTimeV1 暂时只支持英文的基准字幕,源字幕必须是双语中英字幕
  101. func (s *SubTimelineFixer) GetOffsetTimeV1(infoBase, infoSrc *subparser.FileInfo, staticLineFileSavePath string, debugInfoFileSavePath string) (bool, float64, float64, error) {
  102. var debugInfos = make([]string, 0)
  103. // 构建基准语料库,目前阶段只需要考虑是 En 的就行了
  104. var baseCorpus = make([]string, 0)
  105. var baseDialogueFilterMap = make(map[int]int, 0)
  106. /*
  107. 这里原来的写法是所有的 base 的都放进去匹配,这样会带来一些不必要的对白
  108. 需要剔除空白。那么就需要建立一个转换的字典
  109. */
  110. for index, oneDialogueEx := range infoBase.DialoguesEx {
  111. if oneDialogueEx.EnLine == "" {
  112. continue
  113. }
  114. baseCorpus = append(baseCorpus, oneDialogueEx.EnLine)
  115. baseDialogueFilterMap[len(baseCorpus)-1] = index
  116. }
  117. // 初始化
  118. pipLine, tfidf, err := NewTFIDF(baseCorpus)
  119. if err != nil {
  120. return false, 0, 0, err
  121. }
  122. /*
  123. 确认两个字幕间的偏移,暂定的方案是两边都连续匹配上 5 个索引,再抽取一个对话的时间进行修正计算
  124. */
  125. maxCompareDialogue := s.fixerConfig.MaxCompareDialogue
  126. // 基线的长度
  127. _, docsLength := tfidf.Dims()
  128. var matchIndexList = make([]MatchIndex, 0)
  129. sc := NewSubCompare(maxCompareDialogue)
  130. // 开始比较相似度,默认认为是 Ch_en 就行了
  131. for srcIndex := 0; srcIndex < len(infoSrc.DialoguesEx); {
  132. srcOneDialogueEx := infoSrc.DialoguesEx[srcIndex]
  133. // 这里只考虑 英文 的语言
  134. if srcOneDialogueEx.EnLine == "" {
  135. srcIndex++
  136. continue
  137. }
  138. // run the query through the same pipeline that was fitted to the corpus and
  139. // to project it into the same dimensional space
  140. queryVector, err := pipLine.Transform(srcOneDialogueEx.EnLine)
  141. if err != nil {
  142. return false, 0, 0, err
  143. }
  144. // iterate over document feature vectors (columns) in the LSI matrix and compare
  145. // with the query vector for similarity. Similarity is determined by the difference
  146. // between the angles of the vectors known as the cosine similarity
  147. highestSimilarity := -1.0
  148. // 匹配上的基准的索引
  149. var baseIndex int
  150. // 这里理论上需要把所有的基线遍历一次,但是,一般来说,两个字幕不可能差距在 50 行
  151. // 这样的好处是有助于提高搜索的性能
  152. // 那么就以当前的 src 的位置,向前、向后各 50 来遍历
  153. nowMaxScanLength := srcIndex + 50
  154. nowMinScanLength := srcIndex - 50
  155. if nowMinScanLength < 0 {
  156. nowMinScanLength = 0
  157. }
  158. if nowMaxScanLength > docsLength {
  159. nowMaxScanLength = docsLength
  160. }
  161. for i := nowMinScanLength; i < nowMaxScanLength; i++ {
  162. similarity := pairwise.CosineSimilarity(queryVector.(mat.ColViewer).ColView(0), tfidf.(mat.ColViewer).ColView(i))
  163. if similarity > highestSimilarity {
  164. baseIndex = i
  165. highestSimilarity = similarity
  166. }
  167. }
  168. startBaseIndex, startSrcIndex := sc.GetStartIndex()
  169. if sc.Add(baseIndex, srcIndex) == false {
  170. sc.Clear()
  171. srcIndex = startSrcIndex + 1
  172. continue
  173. //sc.Add(baseIndex, srcIndex)
  174. }
  175. if sc.Check() == false {
  176. srcIndex++
  177. continue
  178. } else {
  179. sc.Clear()
  180. }
  181. matchIndexList = append(matchIndexList, MatchIndex{
  182. BaseNowIndex: startBaseIndex,
  183. //BaseNowIndex: baseDialogueFilterMap[startBaseIndex],
  184. SrcNowIndex: startSrcIndex,
  185. Similarity: highestSimilarity,
  186. })
  187. //println(fmt.Sprintf("Similarity: %f Base[%d] %s-%s '%s' <--> Src[%d] %s-%s '%s'",
  188. // highestSimilarity,
  189. // baseIndex, infoBase.DialoguesEx[baseIndex].relativelyStartTime, infoBase.DialoguesEx[baseIndex].relativelyEndTime, baseCorpus[baseIndex],
  190. // srcIndex, srcOneDialogueEx.relativelyStartTime, srcOneDialogueEx.relativelyEndTime, srcOneDialogueEx.EnLine))
  191. srcIndex++
  192. }
  193. timeFormat := infoBase.GetTimeFormat()
  194. var startDiffTimeLineData = make([]opts.LineData, 0)
  195. var endDiffTimeLineData = make([]opts.LineData, 0)
  196. var tmpStartDiffTime = make([]float64, 0)
  197. var tmpEndDiffTime = make([]float64, 0)
  198. var startDiffTimeList = make(stat.Float64Slice, 0)
  199. var endDiffTimeList = make(stat.Float64Slice, 0)
  200. var xAxis = make([]string, 0)
  201. // 上面找出了连续匹配 maxCompareDialogue:N 次的字幕语句块
  202. // 求出平均时间偏移
  203. for mIndex, matchIndexItem := range matchIndexList {
  204. for i := 0; i < maxCompareDialogue; i++ {
  205. // 这里会统计连续的这 5 句话的时间差
  206. //tmpBaseIndex := matchIndexItem.BaseNowIndex + i
  207. tmpBaseIndex := baseDialogueFilterMap[matchIndexItem.BaseNowIndex+i]
  208. tmpSrcIndex := matchIndexItem.SrcNowIndex + i
  209. baseTimeStart, err := time.Parse(timeFormat, infoBase.DialoguesEx[tmpBaseIndex].StartTime)
  210. if err != nil {
  211. return false, 0, 0, err
  212. }
  213. baseTimeEnd, err := time.Parse(timeFormat, infoBase.DialoguesEx[tmpBaseIndex].EndTime)
  214. if err != nil {
  215. return false, 0, 0, err
  216. }
  217. srtTimeStart, err := time.Parse(timeFormat, infoSrc.DialoguesEx[tmpSrcIndex].StartTime)
  218. if err != nil {
  219. return false, 0, 0, err
  220. }
  221. srtTimeEnd, err := time.Parse(timeFormat, infoSrc.DialoguesEx[tmpSrcIndex].EndTime)
  222. if err != nil {
  223. return false, 0, 0, err
  224. }
  225. TimeDiffStart := baseTimeStart.Sub(srtTimeStart)
  226. TimeDiffEnd := baseTimeEnd.Sub(srtTimeEnd)
  227. startDiffTimeLineData = append(startDiffTimeLineData, opts.LineData{Value: TimeDiffStart.Seconds()})
  228. endDiffTimeLineData = append(endDiffTimeLineData, opts.LineData{Value: TimeDiffEnd.Seconds()})
  229. tmpStartDiffTime = append(tmpStartDiffTime, TimeDiffStart.Seconds())
  230. tmpEndDiffTime = append(tmpEndDiffTime, TimeDiffEnd.Seconds())
  231. startDiffTimeList = append(startDiffTimeList, TimeDiffStart.Seconds())
  232. endDiffTimeList = append(endDiffTimeList, TimeDiffEnd.Seconds())
  233. xAxis = append(xAxis, fmt.Sprintf("%d_%d", mIndex, i))
  234. debugInfos = append(debugInfos, "bs "+infoBase.DialoguesEx[tmpBaseIndex].StartTime+" <-> "+infoBase.DialoguesEx[tmpBaseIndex].EndTime)
  235. debugInfos = append(debugInfos, "sc "+infoSrc.DialoguesEx[tmpSrcIndex].StartTime+" <-> "+infoSrc.DialoguesEx[tmpSrcIndex].EndTime)
  236. debugInfos = append(debugInfos, "StartDiffTime: "+fmt.Sprintf("%f", TimeDiffStart.Seconds()))
  237. //println(fmt.Sprintf("Diff Start-End: %s - %s Base[%d] %s-%s '%s' <--> Src[%d] %s-%s '%s'",
  238. // TimeDiffStart, TimeDiffEnd,
  239. // tmpBaseIndex, infoBase.DialoguesEx[tmpBaseIndex].relativelyStartTime, infoBase.DialoguesEx[tmpBaseIndex].relativelyEndTime, infoBase.DialoguesEx[tmpBaseIndex].EnLine,
  240. // tmpSrcIndex, infoSrc.DialoguesEx[tmpSrcIndex].relativelyStartTime, infoSrc.DialoguesEx[tmpSrcIndex].relativelyEndTime, infoSrc.DialoguesEx[tmpSrcIndex].EnLine))
  241. }
  242. debugInfos = append(debugInfos, "---------------------------------------------")
  243. //println("---------------------------------------------")
  244. }
  245. oldMean := stat.Mean(startDiffTimeList)
  246. oldSd := stat.Sd(startDiffTimeList)
  247. newMean := -1.0
  248. newSd := -1.0
  249. per := 1.0
  250. // 如果 SD 较大的时候才需要剔除
  251. if oldSd > 0.1 {
  252. var outliersMap = make(map[float64]int, 0)
  253. outliers, _, _ := tukey.Outliers(0.3, tmpStartDiffTime)
  254. for _, outlier := range outliers {
  255. outliersMap[outlier] = 0
  256. }
  257. var newStartDiffTimeList = make([]float64, 0)
  258. for _, f := range tmpStartDiffTime {
  259. _, ok := outliersMap[f]
  260. if ok == true {
  261. continue
  262. }
  263. newStartDiffTimeList = append(newStartDiffTimeList, f)
  264. }
  265. orgLen := startDiffTimeList.Len()
  266. startDiffTimeList = make(stat.Float64Slice, 0)
  267. for _, f := range newStartDiffTimeList {
  268. startDiffTimeList = append(startDiffTimeList, f)
  269. }
  270. newLen := startDiffTimeList.Len()
  271. per = float64(newLen) / float64(orgLen)
  272. newMean = stat.Mean(startDiffTimeList)
  273. newSd = stat.Sd(startDiffTimeList)
  274. }
  275. if newMean == -1.0 {
  276. newMean = oldMean
  277. }
  278. if newSd == -1.0 {
  279. newSd = oldSd
  280. }
  281. // 不为空的时候,生成调试文件
  282. if staticLineFileSavePath != "" {
  283. //staticLineFileSavePath = "bar.html"
  284. err = SaveStaticLineV1(staticLineFileSavePath, infoBase.Name, infoSrc.Name,
  285. per, oldMean, oldSd, newMean, newSd, xAxis,
  286. startDiffTimeLineData, endDiffTimeLineData)
  287. if err != nil {
  288. return false, 0, 0, err
  289. }
  290. }
  291. // 跳过的逻辑是 mean 是 0 ,那么现在如果判断有问题,缓存的调试文件继续生成,然后强制返回 0 来跳过后续的逻辑
  292. // 这里需要考虑,找到的连续 5 句话匹配的有多少句,占比整体所有的 Dialogue 是多少,太低也需要跳过
  293. matchIndexLineCount := len(matchIndexList) * maxCompareDialogue
  294. //perMatch := float64(matchIndexLineCount) / float64(len(infoSrc.DialoguesEx))
  295. perMatch := float64(matchIndexLineCount) / float64(len(baseCorpus))
  296. if perMatch < s.fixerConfig.MinMatchedPercent {
  297. tmpContent := infoSrc.Name + fmt.Sprintf(" Sequence match %d dialogues (< %f%%), Skip,", s.fixerConfig.MaxCompareDialogue, s.fixerConfig.MinMatchedPercent*100) + fmt.Sprintf(" %f%% ", perMatch*100)
  298. debugInfos = append(debugInfos, tmpContent)
  299. log_helper.GetLogger().Infoln(tmpContent)
  300. } else {
  301. tmpContent := infoSrc.Name + fmt.Sprintf(" Sequence match %d dialogues,", s.fixerConfig.MaxCompareDialogue) + fmt.Sprintf(" %f%% ", perMatch*100)
  302. debugInfos = append(debugInfos, tmpContent)
  303. log_helper.GetLogger().Infoln(tmpContent)
  304. }
  305. // 输出调试的匹配时间轴信息的列表
  306. if debugInfoFileSavePath != "" {
  307. err = my_util.WriteStrings2File(debugInfoFileSavePath, debugInfos)
  308. if err != nil {
  309. return false, 0, 0, err
  310. }
  311. }
  312. // 虽然有条件判断是认为有问题的,但是返回值还是要填写除去的
  313. if perMatch < s.fixerConfig.MinMatchedPercent {
  314. return false, newMean, newSd, nil
  315. }
  316. return true, newMean, newSd, nil
  317. }
  318. // GetOffsetTimeV2 使用内置的字幕校正外置的字幕时间轴
  319. func (s *SubTimelineFixer) GetOffsetTimeV2(infoBase, infoSrc *subparser.FileInfo, staticLineFileSavePath string, debugInfoFileSavePath string) (bool, float64, float64, error) {
  320. srcSubUnitList, err := sub_helper.GetVADINfoFromSub(infoSrc, FrontAndEndPer, SubUnitMaxCount)
  321. if err != nil {
  322. return false, 0, 0, err
  323. }
  324. // 开始针对对白单元进行匹配
  325. for _, srcSubUnit := range srcSubUnitList {
  326. startTimeString, subLength := srcSubUnit.GetFFMPEGCutRange(ExpandTimeRange)
  327. // 导出当前的字幕文件适合与匹配的范围的临时字幕文件
  328. nowTmpSubBaseFPath, errString, err := s.ffmpegHelper.ExportSubArgsByTimeRange(infoBase.FileFullPath, "base", startTimeString, subLength)
  329. if err != nil {
  330. log_helper.GetLogger().Errorln("ExportSubArgsByTimeRange base", errString, err)
  331. return false, 0, 0, err
  332. }
  333. // 导出当前的字幕文件适合与匹配的范围的临时字幕文件
  334. startTimeString, subLength = srcSubUnit.GetFFMPEGCutRange(0)
  335. nowTmpSubSrcFPath, errString, err := s.ffmpegHelper.ExportSubArgsByTimeRange(infoSrc.FileFullPath, "src", startTimeString, subLength)
  336. if err != nil {
  337. log_helper.GetLogger().Errorln("ExportSubArgsByTimeRange src", errString, err)
  338. return false, 0, 0, err
  339. }
  340. bok, nowTmpSubBaseFileInfo, err := s.ffmpegHelper.SubParserHub.DetermineFileTypeFromFile(nowTmpSubBaseFPath)
  341. if err != nil {
  342. return false, 0, 0, err
  343. }
  344. if bok == false {
  345. return false, 0, 0, errors.New("DetermineFileTypeFromFile == false")
  346. }
  347. nowTmpBaseSubUnitList, err := sub_helper.GetVADINfoFromSub(nowTmpSubBaseFileInfo, 0, 10000)
  348. if err != nil {
  349. return false, 0, 0, err
  350. }
  351. nowTmpBaseSubVADList := nowTmpBaseSubUnitList[0]
  352. var nowBaseSubTimeLineData = make([]opts.LineData, 0)
  353. var nowBaseSubXAxis = make([]string, 0)
  354. var nowSrcSubTimeLineData = make([]opts.LineData, 0)
  355. var nowSrcSubXAxis = make([]string, 0)
  356. outDir := filepath.Dir(nowTmpSubBaseFPath)
  357. outBaseName := filepath.Base(nowTmpSubBaseFPath)
  358. outSrcName := filepath.Base(nowTmpSubSrcFPath)
  359. outBaseNameWithOutExt := strings.ReplaceAll(outBaseName, filepath.Ext(outBaseName), "")
  360. outSrcNameWithOutExt := strings.ReplaceAll(outSrcName, filepath.Ext(outSrcName), "")
  361. srcSubVADStaticLineFullPath := filepath.Join(outDir, outSrcNameWithOutExt+"_sub_src.html")
  362. baseSubVADStaticLineFullPath := filepath.Join(outDir, outBaseNameWithOutExt+"_sub_base.html")
  363. // src
  364. for _, vadInfo := range srcSubUnit.VADList {
  365. nowSrcSubTimeLineData = append(nowSrcSubTimeLineData, opts.LineData{Value: vadInfo.Active})
  366. baseTime := srcSubUnit.GetOffsetTimeNumber()
  367. nowVADInfoTimeNumber := vadInfo.Time.Seconds()
  368. //println(fmt.Sprintf("%d - %f", index, nowVADInfoTimeNumber-baseTime))
  369. nowOffsetTime := nowVADInfoTimeNumber - baseTime
  370. nowSrcSubXAxis = append(nowSrcSubXAxis, fmt.Sprintf("%f", nowOffsetTime))
  371. }
  372. err = SaveStaticLineV2("Sub src", srcSubVADStaticLineFullPath, nowSrcSubXAxis, nowSrcSubTimeLineData)
  373. if err != nil {
  374. return false, 0, 0, err
  375. }
  376. // base
  377. for _, vadInfo := range nowTmpBaseSubVADList.VADList {
  378. nowBaseSubTimeLineData = append(nowBaseSubTimeLineData, opts.LineData{Value: vadInfo.Active})
  379. //baseTime := srcSubUnit.GetOffsetTimeNumber()
  380. nowVADInfoTimeNumber := vadInfo.Time.Seconds()
  381. //println(fmt.Sprintf("%d - %f", index, nowVADInfoTimeNumber-baseTime))
  382. //nowOffsetTime := nowVADInfoTimeNumber// - baseTime
  383. nowBaseSubXAxis = append(nowBaseSubXAxis, fmt.Sprintf("%f", nowVADInfoTimeNumber))
  384. }
  385. err = SaveStaticLineV2("Sub base", baseSubVADStaticLineFullPath, nowBaseSubXAxis, nowBaseSubTimeLineData)
  386. if err != nil {
  387. return false, 0, 0, err
  388. }
  389. }
  390. return false, -1, -1, nil
  391. }
  392. // GetOffsetTimeV3 使用 VAD 检测语音是否有人声,输出连续的点标记,再通过 SimHash 进行匹配,找到最佳的偏移时间
  393. func (s *SubTimelineFixer) GetOffsetTimeV3(audioInfo vad.AudioInfo, infoSrc *subparser.FileInfo, staticLineFileSavePath string, debugInfoFileSavePath string) (bool, float64, float64, error) {
  394. /*
  395. 分割字幕成若干段,然后得到若干段的时间轴,将这些段从字幕文字转换成 VADInfo
  396. 从上面若干段时间轴,把音频给分割成多段
  397. 然后使用 simhash 的进行比较,输出分析的曲线图等信息
  398. */
  399. //bok, duration, err := s.ffmpegHelper.GetAudioInfo(audioInfo.FileFullPath)
  400. //if err != nil || bok == false {
  401. // return false, 0, 0, err
  402. //}
  403. /*
  404. 这里的字幕要求是完整的一个字幕
  405. 1. 抽取字幕的时间片段的时候,暂定,前 15% 和后 15% 要避开,前奏、主题曲、结尾曲
  406. 2. 将整个字幕,抽取连续 5 句对话为一个单元,提取时间片段信息
  407. */
  408. subUnitList, err := sub_helper.GetVADINfoFromSub(infoSrc, FrontAndEndPer, SubUnitMaxCount)
  409. if err != nil {
  410. return false, 0, 0, err
  411. }
  412. // 开始针对对白单元进行匹配
  413. for _, subUnit := range subUnitList {
  414. startTimeString, subLength := subUnit.GetFFMPEGCutRange(ExpandTimeRange)
  415. // 导出当前的音频文件适合与匹配的范围的临时音频文件
  416. outAudioFPath, _, errString, err := s.ffmpegHelper.ExportAudioAndSubArgsByTimeRange(audioInfo.FileFullPath, infoSrc.FileFullPath, startTimeString, subLength)
  417. if err != nil {
  418. log_helper.GetLogger().Errorln("ExportAudioAndSubArgsByTimeRange", errString, err)
  419. return false, 0, 0, err
  420. }
  421. audioVADInfos, err := vad.GetVADInfoFromAudio(vad.AudioInfo{
  422. FileFullPath: outAudioFPath,
  423. SampleRate: 16000,
  424. BitDepth: 16,
  425. })
  426. if err != nil {
  427. return false, 0, 0, err
  428. }
  429. var subTimeLineData = make([]opts.LineData, 0)
  430. var subTimeLineFFTData = make([]opts.LineData, 0)
  431. var subXAxis = make([]string, 0)
  432. var audioTimeLineData = make([]opts.LineData, 0)
  433. var audioTimeLineFFTData = make([]opts.LineData, 0)
  434. var audioXAxis = make([]string, 0)
  435. subBuf := make([]complex128, my_util.MakePowerOfTwo(int64(len(subUnit.VADList))))
  436. audioBuf := make([]complex128, my_util.MakePowerOfTwo(int64(len(audioVADInfos))))
  437. for index, vadInfo := range subUnit.VADList {
  438. subTimeLineData = append(subTimeLineData, opts.LineData{Value: vadInfo.Active})
  439. baseTime := subUnit.GetOffsetTimeNumber()
  440. nowVADInfoTimeNumber := vadInfo.Time.Seconds()
  441. //println(fmt.Sprintf("%d - %f", index, nowVADInfoTimeNumber-baseTime))
  442. nowOffsetTime := nowVADInfoTimeNumber - baseTime
  443. subXAxis = append(subXAxis, fmt.Sprintf("%f", nowOffsetTime))
  444. subBuf[index] = complex(float64(my_util.Bool2Int(vadInfo.Active)), nowOffsetTime)
  445. }
  446. // FFT 转换
  447. err = fourier.Forward(subBuf)
  448. if err != nil {
  449. return false, 0, 0, err
  450. }
  451. for i := 0; i < len(subUnit.VADList); i++ {
  452. subTimeLineFFTData = append(subTimeLineFFTData, opts.LineData{Value: real(subBuf[i])})
  453. }
  454. outDir := filepath.Dir(outAudioFPath)
  455. outBaseName := filepath.Base(outAudioFPath)
  456. outBaseNameWithOutExt := strings.ReplaceAll(outBaseName, filepath.Ext(outBaseName), "")
  457. subVADStaticLineFullPath := filepath.Join(outDir, outBaseNameWithOutExt+"_sub.html")
  458. err = SaveStaticLineV3("Sub", subVADStaticLineFullPath, subXAxis, subTimeLineData, subTimeLineFFTData)
  459. if err != nil {
  460. return false, 0, 0, err
  461. }
  462. for index, vadInfo := range audioVADInfos {
  463. audioTimeLineData = append(audioTimeLineData, opts.LineData{Value: vadInfo.Active})
  464. audioXAxis = append(audioXAxis, fmt.Sprintf("%f", vadInfo.Time.Seconds()))
  465. audioBuf[index] = complex(float64(my_util.Bool2Int(vadInfo.Active)), vadInfo.Time.Seconds())
  466. }
  467. // FFT 转换
  468. err = fourier.Forward(audioBuf)
  469. if err != nil {
  470. return false, 0, 0, err
  471. }
  472. for i := 0; i < len(audioBuf); i++ {
  473. audioTimeLineFFTData = append(audioTimeLineFFTData, opts.LineData{Value: real(audioBuf[i])})
  474. }
  475. audioVADStaticLineFullPath := filepath.Join(outDir, outBaseNameWithOutExt+"_audio.html")
  476. err = SaveStaticLineV3("Audio", audioVADStaticLineFullPath, audioXAxis, audioTimeLineData, audioTimeLineFFTData)
  477. if err != nil {
  478. return false, 0, 0, err
  479. }
  480. }
  481. return false, -1, -1, nil
  482. }
  483. const FixMask = "-fix"
  484. const FrontAndEndPer = 0.10 // 前百分之 15 和后百分之 15 都不进行识别
  485. const SubUnitMaxCount = 100 // 一个 Sub单元有五句对白
  486. const ExpandTimeRange = 50 // 从字幕的时间轴片段需要向前和向后多匹配一部分的音频,这里定义的就是这个 range 以分钟为单位, 正负 60 秒