fixer.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626
  1. package sub_timeline_fixer
  2. import (
  3. "errors"
  4. "fmt"
  5. "github.com/allanpk716/ChineseSubFinder/internal/pkg/ffmpeg_helper"
  6. "github.com/allanpk716/ChineseSubFinder/internal/pkg/log_helper"
  7. "github.com/allanpk716/ChineseSubFinder/internal/pkg/my_util"
  8. "github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_helper"
  9. "github.com/allanpk716/ChineseSubFinder/internal/pkg/vad"
  10. "github.com/allanpk716/ChineseSubFinder/internal/types/sub_timeline_fiexer"
  11. "github.com/allanpk716/ChineseSubFinder/internal/types/subparser"
  12. "github.com/emirpasic/gods/maps/treemap"
  13. "github.com/emirpasic/gods/utils"
  14. "github.com/go-echarts/go-echarts/v2/opts"
  15. "github.com/grd/stat"
  16. "github.com/james-bowman/nlp/measures/pairwise"
  17. "github.com/mndrix/tukey"
  18. "github.com/panjf2000/ants/v2"
  19. "golang.org/x/net/context"
  20. "gonum.org/v1/gonum/mat"
  21. "os"
  22. "strings"
  23. "sync"
  24. "time"
  25. )
  26. type SubTimelineFixer struct {
  27. fixerConfig sub_timeline_fiexer.SubTimelineFixerConfig
  28. ffmpegHelper *ffmpeg_helper.FFMPEGHelper
  29. }
  30. func NewSubTimelineFixer(fixerConfig sub_timeline_fiexer.SubTimelineFixerConfig) *SubTimelineFixer {
  31. return &SubTimelineFixer{
  32. fixerConfig: fixerConfig,
  33. ffmpegHelper: ffmpeg_helper.NewFFMPEGHelper(),
  34. }
  35. }
  36. // StopWordCounter 停止词统计
  37. func (s *SubTimelineFixer) StopWordCounter(inString string, per int) []string {
  38. statisticTimes := make(map[string]int)
  39. wordsLength := strings.Fields(inString)
  40. for counts, word := range wordsLength {
  41. // 判断key是否存在,这个word是字符串,这个counts是统计的word的次数。
  42. word, ok := statisticTimes[word]
  43. if ok {
  44. word = word
  45. statisticTimes[wordsLength[counts]] = statisticTimes[wordsLength[counts]] + 1
  46. } else {
  47. statisticTimes[wordsLength[counts]] = 1
  48. }
  49. }
  50. stopWords := make([]string, 0)
  51. mapByValue := sortMapByValue(statisticTimes)
  52. breakIndex := len(mapByValue) * per / 100
  53. for index, wordInfo := range mapByValue {
  54. if index > breakIndex {
  55. break
  56. }
  57. stopWords = append(stopWords, wordInfo.Name)
  58. }
  59. return stopWords
  60. }
  61. // FixSubTimeline 校正时间轴
  62. func (s *SubTimelineFixer) FixSubTimeline(infoSrc *subparser.FileInfo, inOffsetTime float64, desSaveSubFileFullPath string) (string, error) {
  63. /*
  64. 从解析的实例中,正常来说是可以匹配出所有的 Dialogue 对话的 Start 和 End time 的信息
  65. 然后找到对应的字幕的文件,进行文件内容的替换来做时间轴的校正
  66. */
  67. // 偏移时间
  68. offsetTime := time.Duration(inOffsetTime*1000) * time.Millisecond
  69. fixContent := infoSrc.Content
  70. timeFormat := infoSrc.GetTimeFormat()
  71. for _, srcOneDialogue := range infoSrc.Dialogues {
  72. timeStart, err := infoSrc.ParseTime(srcOneDialogue.StartTime)
  73. if err != nil {
  74. return "", err
  75. }
  76. timeEnd, err := infoSrc.ParseTime(srcOneDialogue.EndTime)
  77. if err != nil {
  78. return "", err
  79. }
  80. fixTimeStart := timeStart.Add(offsetTime)
  81. fixTimeEnd := timeEnd.Add(offsetTime)
  82. fixContent = strings.ReplaceAll(fixContent, srcOneDialogue.StartTime, fixTimeStart.Format(timeFormat))
  83. fixContent = strings.ReplaceAll(fixContent, srcOneDialogue.EndTime, fixTimeEnd.Format(timeFormat))
  84. }
  85. dstFile, err := os.Create(desSaveSubFileFullPath)
  86. if err != nil {
  87. return "", err
  88. }
  89. defer func() {
  90. _ = dstFile.Close()
  91. }()
  92. _, err = dstFile.WriteString(fixContent)
  93. if err != nil {
  94. return "", err
  95. }
  96. return fixContent, nil
  97. }
  98. /*
  99. 对于 V1 版本的字幕时间轴校正来说,是有特殊的前置要求的
  100. 1. 视频要有英文字幕
  101. 2. 外置的字幕必须是中文的双语字幕(简英、繁英)
  102. */
  103. // GetOffsetTimeV1 暂时只支持英文的基准字幕,源字幕必须是双语中英字幕
  104. func (s *SubTimelineFixer) GetOffsetTimeV1(infoBase, infoSrc *subparser.FileInfo, staticLineFileSavePath string, debugInfoFileSavePath string) (bool, float64, float64, error) {
  105. var debugInfos = make([]string, 0)
  106. // 构建基准语料库,目前阶段只需要考虑是 En 的就行了
  107. var baseCorpus = make([]string, 0)
  108. var baseDialogueFilterMap = make(map[int]int, 0)
  109. /*
  110. 这里原来的写法是所有的 base 的都放进去匹配,这样会带来一些不必要的对白
  111. 需要剔除空白。那么就需要建立一个转换的字典
  112. */
  113. for index, oneDialogueEx := range infoBase.DialoguesEx {
  114. if oneDialogueEx.EnLine == "" {
  115. continue
  116. }
  117. baseCorpus = append(baseCorpus, oneDialogueEx.EnLine)
  118. baseDialogueFilterMap[len(baseCorpus)-1] = index
  119. }
  120. // 初始化
  121. pipLine, tfidf, err := NewTFIDF(baseCorpus)
  122. if err != nil {
  123. return false, 0, 0, err
  124. }
  125. /*
  126. 确认两个字幕间的偏移,暂定的方案是两边都连续匹配上 5 个索引,再抽取一个对话的时间进行修正计算
  127. */
  128. maxCompareDialogue := s.fixerConfig.MaxCompareDialogue
  129. // 基线的长度
  130. _, docsLength := tfidf.Dims()
  131. var matchIndexList = make([]MatchIndex, 0)
  132. sc := NewSubCompare(maxCompareDialogue)
  133. // 开始比较相似度,默认认为是 Ch_en 就行了
  134. for srcIndex := 0; srcIndex < len(infoSrc.DialoguesEx); {
  135. srcOneDialogueEx := infoSrc.DialoguesEx[srcIndex]
  136. // 这里只考虑 英文 的语言
  137. if srcOneDialogueEx.EnLine == "" {
  138. srcIndex++
  139. continue
  140. }
  141. // run the query through the same pipeline that was fitted to the corpus and
  142. // to project it into the same dimensional space
  143. queryVector, err := pipLine.Transform(srcOneDialogueEx.EnLine)
  144. if err != nil {
  145. return false, 0, 0, err
  146. }
  147. // iterate over document feature vectors (columns) in the LSI matrix and compare
  148. // with the query vector for similarity. Similarity is determined by the difference
  149. // between the angles of the vectors known as the cosine similarity
  150. highestSimilarity := -1.0
  151. // 匹配上的基准的索引
  152. var baseIndex int
  153. // 这里理论上需要把所有的基线遍历一次,但是,一般来说,两个字幕不可能差距在 50 行
  154. // 这样的好处是有助于提高搜索的性能
  155. // 那么就以当前的 src 的位置,向前、向后各 50 来遍历
  156. nowMaxScanLength := srcIndex + 50
  157. nowMinScanLength := srcIndex - 50
  158. if nowMinScanLength < 0 {
  159. nowMinScanLength = 0
  160. }
  161. if nowMaxScanLength > docsLength {
  162. nowMaxScanLength = docsLength
  163. }
  164. for i := nowMinScanLength; i < nowMaxScanLength; i++ {
  165. similarity := pairwise.CosineSimilarity(queryVector.(mat.ColViewer).ColView(0), tfidf.(mat.ColViewer).ColView(i))
  166. if similarity > highestSimilarity {
  167. baseIndex = i
  168. highestSimilarity = similarity
  169. }
  170. }
  171. startBaseIndex, startSrcIndex := sc.GetStartIndex()
  172. if sc.Add(baseIndex, srcIndex) == false {
  173. sc.Clear()
  174. srcIndex = startSrcIndex + 1
  175. continue
  176. //sc.Add(baseIndex, srcIndex)
  177. }
  178. if sc.Check() == false {
  179. srcIndex++
  180. continue
  181. } else {
  182. sc.Clear()
  183. }
  184. matchIndexList = append(matchIndexList, MatchIndex{
  185. BaseNowIndex: startBaseIndex,
  186. //BaseNowIndex: baseDialogueFilterMap[startBaseIndex],
  187. SrcNowIndex: startSrcIndex,
  188. Similarity: highestSimilarity,
  189. })
  190. //println(fmt.Sprintf("Similarity: %f Base[%d] %s-%s '%s' <--> Src[%d] %s-%s '%s'",
  191. // highestSimilarity,
  192. // baseIndex, infoBase.DialoguesEx[baseIndex].relativelyStartTime, infoBase.DialoguesEx[baseIndex].relativelyEndTime, baseCorpus[baseIndex],
  193. // srcIndex, srcOneDialogueEx.relativelyStartTime, srcOneDialogueEx.relativelyEndTime, srcOneDialogueEx.EnLine))
  194. srcIndex++
  195. }
  196. var startDiffTimeLineData = make([]opts.LineData, 0)
  197. var endDiffTimeLineData = make([]opts.LineData, 0)
  198. var tmpStartDiffTime = make([]float64, 0)
  199. var tmpEndDiffTime = make([]float64, 0)
  200. var startDiffTimeList = make(stat.Float64Slice, 0)
  201. var endDiffTimeList = make(stat.Float64Slice, 0)
  202. var xAxis = make([]string, 0)
  203. // 上面找出了连续匹配 maxCompareDialogue:N 次的字幕语句块
  204. // 求出平均时间偏移
  205. for mIndex, matchIndexItem := range matchIndexList {
  206. for i := 0; i < maxCompareDialogue; i++ {
  207. // 这里会统计连续的这 5 句话的时间差
  208. //tmpBaseIndex := matchIndexItem.BaseNowIndex + i
  209. tmpBaseIndex := baseDialogueFilterMap[matchIndexItem.BaseNowIndex+i]
  210. tmpSrcIndex := matchIndexItem.SrcNowIndex + i
  211. baseTimeStart, err := infoBase.ParseTime(infoBase.DialoguesEx[tmpBaseIndex].StartTime)
  212. if err != nil {
  213. return false, 0, 0, err
  214. }
  215. baseTimeEnd, err := infoBase.ParseTime(infoBase.DialoguesEx[tmpBaseIndex].EndTime)
  216. if err != nil {
  217. return false, 0, 0, err
  218. }
  219. srtTimeStart, err := infoBase.ParseTime(infoSrc.DialoguesEx[tmpSrcIndex].StartTime)
  220. if err != nil {
  221. return false, 0, 0, err
  222. }
  223. srtTimeEnd, err := infoBase.ParseTime(infoSrc.DialoguesEx[tmpSrcIndex].EndTime)
  224. if err != nil {
  225. return false, 0, 0, err
  226. }
  227. TimeDiffStart := baseTimeStart.Sub(srtTimeStart)
  228. TimeDiffEnd := baseTimeEnd.Sub(srtTimeEnd)
  229. startDiffTimeLineData = append(startDiffTimeLineData, opts.LineData{Value: TimeDiffStart.Seconds()})
  230. endDiffTimeLineData = append(endDiffTimeLineData, opts.LineData{Value: TimeDiffEnd.Seconds()})
  231. tmpStartDiffTime = append(tmpStartDiffTime, TimeDiffStart.Seconds())
  232. tmpEndDiffTime = append(tmpEndDiffTime, TimeDiffEnd.Seconds())
  233. startDiffTimeList = append(startDiffTimeList, TimeDiffStart.Seconds())
  234. endDiffTimeList = append(endDiffTimeList, TimeDiffEnd.Seconds())
  235. xAxis = append(xAxis, fmt.Sprintf("%d_%d", mIndex, i))
  236. debugInfos = append(debugInfos, "bs "+infoBase.DialoguesEx[tmpBaseIndex].StartTime+" <-> "+infoBase.DialoguesEx[tmpBaseIndex].EndTime)
  237. debugInfos = append(debugInfos, "sc "+infoSrc.DialoguesEx[tmpSrcIndex].StartTime+" <-> "+infoSrc.DialoguesEx[tmpSrcIndex].EndTime)
  238. debugInfos = append(debugInfos, "StartDiffTime: "+fmt.Sprintf("%f", TimeDiffStart.Seconds()))
  239. //println(fmt.Sprintf("Diff Start-End: %s - %s Base[%d] %s-%s '%s' <--> Src[%d] %s-%s '%s'",
  240. // TimeDiffStart, TimeDiffEnd,
  241. // tmpBaseIndex, infoBase.DialoguesEx[tmpBaseIndex].relativelyStartTime, infoBase.DialoguesEx[tmpBaseIndex].relativelyEndTime, infoBase.DialoguesEx[tmpBaseIndex].EnLine,
  242. // tmpSrcIndex, infoSrc.DialoguesEx[tmpSrcIndex].relativelyStartTime, infoSrc.DialoguesEx[tmpSrcIndex].relativelyEndTime, infoSrc.DialoguesEx[tmpSrcIndex].EnLine))
  243. }
  244. debugInfos = append(debugInfos, "---------------------------------------------")
  245. //println("---------------------------------------------")
  246. }
  247. oldMean := stat.Mean(startDiffTimeList)
  248. oldSd := stat.Sd(startDiffTimeList)
  249. newMean := -1.0
  250. newSd := -1.0
  251. per := 1.0
  252. // 如果 SD 较大的时候才需要剔除
  253. if oldSd > 0.1 {
  254. var outliersMap = make(map[float64]int, 0)
  255. outliers, _, _ := tukey.Outliers(0.3, tmpStartDiffTime)
  256. for _, outlier := range outliers {
  257. outliersMap[outlier] = 0
  258. }
  259. var newStartDiffTimeList = make([]float64, 0)
  260. for _, f := range tmpStartDiffTime {
  261. _, ok := outliersMap[f]
  262. if ok == true {
  263. continue
  264. }
  265. newStartDiffTimeList = append(newStartDiffTimeList, f)
  266. }
  267. orgLen := startDiffTimeList.Len()
  268. startDiffTimeList = make(stat.Float64Slice, 0)
  269. for _, f := range newStartDiffTimeList {
  270. startDiffTimeList = append(startDiffTimeList, f)
  271. }
  272. newLen := startDiffTimeList.Len()
  273. per = float64(newLen) / float64(orgLen)
  274. newMean = stat.Mean(startDiffTimeList)
  275. newSd = stat.Sd(startDiffTimeList)
  276. }
  277. if newMean == -1.0 {
  278. newMean = oldMean
  279. }
  280. if newSd == -1.0 {
  281. newSd = oldSd
  282. }
  283. // 不为空的时候,生成调试文件
  284. if staticLineFileSavePath != "" {
  285. //staticLineFileSavePath = "bar.html"
  286. err = SaveStaticLineV1(staticLineFileSavePath, infoBase.Name, infoSrc.Name,
  287. per, oldMean, oldSd, newMean, newSd, xAxis,
  288. startDiffTimeLineData, endDiffTimeLineData)
  289. if err != nil {
  290. return false, 0, 0, err
  291. }
  292. }
  293. // 跳过的逻辑是 mean 是 0 ,那么现在如果判断有问题,缓存的调试文件继续生成,然后强制返回 0 来跳过后续的逻辑
  294. // 这里需要考虑,找到的连续 5 句话匹配的有多少句,占比整体所有的 Dialogue 是多少,太低也需要跳过
  295. matchIndexLineCount := len(matchIndexList) * maxCompareDialogue
  296. //perMatch := float64(matchIndexLineCount) / float64(len(infoSrc.DialoguesEx))
  297. perMatch := float64(matchIndexLineCount) / float64(len(baseCorpus))
  298. if perMatch < s.fixerConfig.MinMatchedPercent {
  299. tmpContent := infoSrc.Name + fmt.Sprintf(" Sequence match %d dialogues (< %f%%), Skip,", s.fixerConfig.MaxCompareDialogue, s.fixerConfig.MinMatchedPercent*100) + fmt.Sprintf(" %f%% ", perMatch*100)
  300. debugInfos = append(debugInfos, tmpContent)
  301. log_helper.GetLogger().Infoln(tmpContent)
  302. } else {
  303. tmpContent := infoSrc.Name + fmt.Sprintf(" Sequence match %d dialogues,", s.fixerConfig.MaxCompareDialogue) + fmt.Sprintf(" %f%% ", perMatch*100)
  304. debugInfos = append(debugInfos, tmpContent)
  305. log_helper.GetLogger().Infoln(tmpContent)
  306. }
  307. // 输出调试的匹配时间轴信息的列表
  308. if debugInfoFileSavePath != "" {
  309. err = my_util.WriteStrings2File(debugInfoFileSavePath, debugInfos)
  310. if err != nil {
  311. return false, 0, 0, err
  312. }
  313. }
  314. // 虽然有条件判断是认为有问题的,但是返回值还是要填写除去的
  315. if perMatch < s.fixerConfig.MinMatchedPercent {
  316. return false, newMean, newSd, nil
  317. }
  318. return true, newMean, newSd, nil
  319. }
  320. // GetOffsetTimeV2 使用内置的字幕校正外置的字幕时间轴
  321. func (s *SubTimelineFixer) GetOffsetTimeV2(baseUnit, srcUnit *sub_helper.SubUnit, audioVadList []vad.VADInfo, audioDuration float64) (bool, float64, float64, error) {
  322. // 时间轴差值数组
  323. var tmpStartDiffTimeList = make([]float64, 0)
  324. var tmpStartDiffTimeMap = treemap.NewWith(utils.Float64Comparator)
  325. var tmpStartDiffTimeListEx = make(stat.Float64Slice, 0)
  326. // -------------------------------------------------
  327. var bUseSubOrAudioAsBase = true
  328. if baseUnit == nil && audioVadList != nil {
  329. // 使用 音频 来进行匹配
  330. bUseSubOrAudioAsBase = false
  331. } else if baseUnit != nil {
  332. // 使用 字幕 来进行匹配
  333. bUseSubOrAudioAsBase = true
  334. } else {
  335. return false, 0, 0, errors.New("GetOffsetTimeV2 input baseUnit or AudioVad is nil")
  336. }
  337. // -------------------------------------------------
  338. /*
  339. 开始针对对白单元进行匹配
  340. 下面的逻辑需要参考 FFT识别流程.jpg 这个图示来理解
  341. 实际实现的时候,会在上述 srcUnit 上,做一个滑动窗口来做匹配,80% 是窗口,20% 用于移动
  342. 步长固定在 10 步
  343. */
  344. audioFloatList := vad.GetFloatSlice(audioVadList)
  345. srcVADLen := len(srcUnit.VADList)
  346. // 滑动窗口的长度
  347. srcWindowLen := int(float64(srcVADLen) * MatchPer)
  348. srcSlideLen := srcVADLen - srcWindowLen
  349. // 窗口可以滑动的长度
  350. srcSlideLenHalf := srcSlideLen / 2
  351. //
  352. oneStep := srcSlideLenHalf / CompareParts
  353. if srcSlideLen <= 0 {
  354. srcSlideLen = 1
  355. }
  356. if oneStep <= 0 {
  357. oneStep = 1
  358. }
  359. insertIndex := 0
  360. // -------------------------------------------------
  361. // 实际 FFT 的匹配逻辑函数
  362. fixFunc := func(i interface{}) error {
  363. inData := i.(InputData)
  364. // -------------------------------------------------
  365. // 开始匹配
  366. // 这里的对白单元,当前的 Base 进行对比,详细示例见图解。Step 2 中橙色的区域
  367. fffAligner := NewFFTAligner()
  368. var bok = false
  369. var nowBaseStartTime = 0.0
  370. var offsetIndex = 0
  371. var score = 0.0
  372. // 图解,参考 Step 3
  373. if bUseSubOrAudioAsBase == false {
  374. // 使用 音频 来进行匹配
  375. // 去掉头和尾,具体百分之多少,见 FrontAndEndPerBase
  376. audioCutLen := int(float64(len(inData.AudioVADList)) * FrontAndEndPerBase)
  377. offsetIndex, score = fffAligner.Fit(inData.AudioVADList[audioCutLen:len(inData.AudioVADList)-audioCutLen], inData.SrcUnit.GetVADFloatSlice()[inData.OffsetIndex:srcWindowLen+inData.OffsetIndex])
  378. if offsetIndex < 0 {
  379. return nil
  380. }
  381. // offsetIndex 这里得到的是 10ms 为一个单位的 OffsetIndex,把去掉的头部时间偏移加回来,以及第一句话的偏移
  382. nowBaseStartTime = vad.GetAudioIndex2Time(offsetIndex + audioCutLen)
  383. nowBaseStartTime = nowBaseStartTime + inData.SrcUnit.GetStartTimeNumber(true)
  384. } else {
  385. // 使用 字幕 来进行匹配
  386. offsetIndex, score = fffAligner.Fit(inData.BaseUnit.GetVADFloatSlice(), inData.SrcUnit.GetVADFloatSlice()[inData.OffsetIndex:inData.OffsetIndex+srcWindowLen])
  387. if offsetIndex < 0 {
  388. return nil
  389. }
  390. bok, nowBaseStartTime = inData.BaseUnit.GetIndexTimeNumber(offsetIndex, true)
  391. if bok == false {
  392. return nil
  393. }
  394. }
  395. // 需要校正的字幕
  396. bok, nowSrcStartTime := inData.SrcUnit.GetIndexTimeNumber(inData.OffsetIndex, true)
  397. if bok == false {
  398. return nil
  399. }
  400. // 时间差值
  401. TimeDiffStartCorrelation := nowBaseStartTime - nowSrcStartTime
  402. println("------------")
  403. println("OffsetTime:", fmt.Sprintf("%v", TimeDiffStartCorrelation), "offsetIndex:", offsetIndex, "score:", fmt.Sprintf("%v", score))
  404. mutexFixV2.Lock()
  405. tmpStartDiffTimeList = append(tmpStartDiffTimeList, TimeDiffStartCorrelation)
  406. tmpStartDiffTimeListEx = append(tmpStartDiffTimeListEx, TimeDiffStartCorrelation)
  407. tmpStartDiffTimeMap.Put(score, insertIndex)
  408. insertIndex++
  409. mutexFixV2.Unlock()
  410. // -------------------------------------------------
  411. return nil
  412. }
  413. // -------------------------------------------------
  414. antPool, err := ants.NewPoolWithFunc(FixThreads, func(inData interface{}) {
  415. data := inData.(InputData)
  416. defer data.Wg.Done()
  417. ctx, cancel := context.WithTimeout(context.Background(), SubOneUnitProcessTimeOut)
  418. defer cancel()
  419. done := make(chan error, 1)
  420. panicChan := make(chan interface{}, 1)
  421. go func() {
  422. defer func() {
  423. if p := recover(); p != nil {
  424. panicChan <- p
  425. }
  426. }()
  427. done <- fixFunc(inData)
  428. }()
  429. select {
  430. case err := <-done:
  431. if err != nil {
  432. log_helper.GetLogger().Errorln("GetOffsetTimeV2.NewPoolWithFunc done with Error", err.Error())
  433. }
  434. return
  435. case p := <-panicChan:
  436. log_helper.GetLogger().Errorln("GetOffsetTimeV2.NewPoolWithFunc got panic", p)
  437. return
  438. case <-ctx.Done():
  439. log_helper.GetLogger().Errorln("GetOffsetTimeV2.NewPoolWithFunc got time out", ctx.Err())
  440. return
  441. }
  442. })
  443. if err != nil {
  444. return false, 0, 0, err
  445. }
  446. defer antPool.Release()
  447. // -------------------------------------------------
  448. wg := sync.WaitGroup{}
  449. for i := srcSlideLenHalf; i < srcSlideLen; {
  450. wg.Add(1)
  451. if bUseSubOrAudioAsBase == true {
  452. // 使用字幕
  453. err = antPool.Invoke(InputData{BaseUnit: *baseUnit, SrcUnit: *srcUnit, OffsetIndex: i, Wg: &wg})
  454. } else {
  455. // 使用音频
  456. err = antPool.Invoke(InputData{AudioVADList: audioFloatList, SrcUnit: *srcUnit, OffsetIndex: i, Wg: &wg})
  457. }
  458. if err != nil {
  459. log_helper.GetLogger().Errorln("GetOffsetTimeV2 ants.Invoke", err)
  460. }
  461. i += oneStep
  462. }
  463. wg.Wait()
  464. // 这里可能遇到匹配的时候没有能够执行够 CompareParts 次,有可能是负数跳过或者时间转换失败导致,前者为主(可能是这两个就是一个东西的时候,或者说没有时间轴偏移的时候)
  465. if insertIndex < CompareParts {
  466. return false, 0, 0, nil
  467. }
  468. outCorrelationFixResult := s.calcMeanAndSD(tmpStartDiffTimeListEx, tmpStartDiffTimeList)
  469. println(fmt.Sprintf("FFTAligner Old Mean: %v SD: %v Per: %v", outCorrelationFixResult.OldMean, outCorrelationFixResult.OldSD, outCorrelationFixResult.Per))
  470. println(fmt.Sprintf("FFTAligner New Mean: %v SD: %v Per: %v", outCorrelationFixResult.NewMean, outCorrelationFixResult.NewSD, outCorrelationFixResult.Per))
  471. value, index := tmpStartDiffTimeMap.Max()
  472. println("FFTAligner Max score:", fmt.Sprintf("%v", value.(float64)), "Time:", fmt.Sprintf("%v", tmpStartDiffTimeList[index.(int)]))
  473. return true, outCorrelationFixResult.NewMean, outCorrelationFixResult.NewSD, nil
  474. }
  475. func (s *SubTimelineFixer) calcMeanAndSD(startDiffTimeList stat.Float64Slice, tmpStartDiffTime []float64) FixResult {
  476. const minValue = -9999.0
  477. oldMean := stat.Mean(startDiffTimeList)
  478. oldSd := stat.Sd(startDiffTimeList)
  479. newMean := minValue
  480. newSd := minValue
  481. per := 1.0
  482. if len(tmpStartDiffTime) < 3 {
  483. return FixResult{
  484. oldMean,
  485. oldSd,
  486. newMean,
  487. newSd,
  488. per,
  489. }
  490. }
  491. // 如果 SD 较大的时候才需要剔除
  492. if oldSd > 0.1 {
  493. var outliersMap = make(map[float64]int, 0)
  494. outliers, _, _ := tukey.Outliers(0.3, tmpStartDiffTime)
  495. for _, outlier := range outliers {
  496. outliersMap[outlier] = 0
  497. }
  498. var newStartDiffTimeList = make([]float64, 0)
  499. for _, f := range tmpStartDiffTime {
  500. _, ok := outliersMap[f]
  501. if ok == true {
  502. continue
  503. }
  504. newStartDiffTimeList = append(newStartDiffTimeList, f)
  505. }
  506. orgLen := startDiffTimeList.Len()
  507. startDiffTimeList = make(stat.Float64Slice, 0)
  508. for _, f := range newStartDiffTimeList {
  509. startDiffTimeList = append(startDiffTimeList, f)
  510. }
  511. newLen := startDiffTimeList.Len()
  512. per = float64(newLen) / float64(orgLen)
  513. newMean = stat.Mean(startDiffTimeList)
  514. newSd = stat.Sd(startDiffTimeList)
  515. }
  516. if newMean == minValue {
  517. newMean = oldMean
  518. }
  519. if newSd == minValue {
  520. newSd = oldSd
  521. }
  522. return FixResult{
  523. oldMean,
  524. oldSd,
  525. newMean,
  526. newSd,
  527. per,
  528. }
  529. }
  530. const FixMask = "-fix"
  531. const SubOneUnitProcessTimeOut = 60 * 5 * time.Second // 字幕时间轴校正一个单元的超时时间
  532. const FrontAndEndPerBase = 0.0 // 前百分之 15 和后百分之 15 都不进行识别
  533. const FrontAndEndPerSrc = 0.0 // 前百分之 20 和后百分之 20 都不进行识别
  534. const MatchPer = 0.8
  535. const CompareParts = 5
  536. const FixThreads = 1 // 字幕校正的并发线程
  537. var mutexFixV2 sync.Mutex
  538. type OutputData struct {
  539. TimeDiffStartCorrelation float64 // 计算出来的时间轴偏移时间
  540. OffsetIndex float64 // 在这个匹配的 Window 中的 OffsetIndex
  541. Score float64 // 匹配的分数
  542. InsertIndex int // 第几个 Step
  543. }
  544. type InputData struct {
  545. BaseUnit sub_helper.SubUnit // 基准 VAD
  546. AudioVADList []float64 // 基准 VAD
  547. SrcUnit sub_helper.SubUnit // 需要匹配的 VAD
  548. OffsetIndex int // 滑动窗体的移动偏移索引
  549. Wg *sync.WaitGroup // 并发锁
  550. }