fixer.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625
  1. package sub_timeline_fixer
  2. import (
  3. "errors"
  4. "fmt"
  5. "github.com/allanpk716/ChineseSubFinder/internal/pkg/log_helper"
  6. "github.com/allanpk716/ChineseSubFinder/internal/pkg/my_util"
  7. "github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_helper"
  8. "github.com/allanpk716/ChineseSubFinder/internal/pkg/vad"
  9. "github.com/allanpk716/ChineseSubFinder/internal/types/sub_timeline_fiexer"
  10. "github.com/allanpk716/ChineseSubFinder/internal/types/subparser"
  11. "github.com/emirpasic/gods/maps/treemap"
  12. "github.com/emirpasic/gods/utils"
  13. "github.com/go-echarts/go-echarts/v2/opts"
  14. "github.com/grd/stat"
  15. "github.com/james-bowman/nlp/measures/pairwise"
  16. "github.com/mndrix/tukey"
  17. "github.com/panjf2000/ants/v2"
  18. "golang.org/x/net/context"
  19. "gonum.org/v1/gonum/mat"
  20. "os"
  21. "strings"
  22. "sync"
  23. "time"
  24. )
  25. type SubTimelineFixer struct {
  26. FixerConfig sub_timeline_fiexer.SubTimelineFixerConfig
  27. }
  28. func NewSubTimelineFixer(fixerConfig sub_timeline_fiexer.SubTimelineFixerConfig) *SubTimelineFixer {
  29. return &SubTimelineFixer{
  30. FixerConfig: fixerConfig,
  31. }
  32. }
  33. // StopWordCounter 停止词统计
  34. func (s *SubTimelineFixer) StopWordCounter(inString string, per int) []string {
  35. statisticTimes := make(map[string]int)
  36. wordsLength := strings.Fields(inString)
  37. for counts, word := range wordsLength {
  38. // 判断key是否存在,这个word是字符串,这个counts是统计的word的次数。
  39. word, ok := statisticTimes[word]
  40. if ok {
  41. word = word
  42. statisticTimes[wordsLength[counts]] = statisticTimes[wordsLength[counts]] + 1
  43. } else {
  44. statisticTimes[wordsLength[counts]] = 1
  45. }
  46. }
  47. stopWords := make([]string, 0)
  48. mapByValue := sortMapByValue(statisticTimes)
  49. breakIndex := len(mapByValue) * per / 100
  50. for index, wordInfo := range mapByValue {
  51. if index > breakIndex {
  52. break
  53. }
  54. stopWords = append(stopWords, wordInfo.Name)
  55. }
  56. return stopWords
  57. }
  58. // FixSubTimeline 校正时间轴
  59. func (s *SubTimelineFixer) FixSubTimeline(infoSrc *subparser.FileInfo, inOffsetTime float64, desSaveSubFileFullPath string) (string, error) {
  60. /*
  61. 从解析的实例中,正常来说是可以匹配出所有的 Dialogue 对话的 Start 和 End time 的信息
  62. 然后找到对应的字幕的文件,进行文件内容的替换来做时间轴的校正
  63. */
  64. // 偏移时间
  65. offsetTime := time.Duration(inOffsetTime*1000) * time.Millisecond
  66. fixContent := infoSrc.Content
  67. /*
  68. 这里进行时间转字符串的时候有一点比较特殊
  69. 正常来说输出的格式是类似 15:04:05.00
  70. 那么有个问题,字幕的时间格式是 0:00:12.00, 小时,是个数,除非有跨度到 20 小时的视频,不然小时就应该是个数
  71. 这就需要一个额外的函数去处理这些情况
  72. */
  73. timeFormat := infoSrc.GetTimeFormat()
  74. for _, srcOneDialogue := range infoSrc.Dialogues {
  75. timeStart, err := infoSrc.ParseTime(srcOneDialogue.StartTime)
  76. if err != nil {
  77. return "", err
  78. }
  79. timeEnd, err := infoSrc.ParseTime(srcOneDialogue.EndTime)
  80. if err != nil {
  81. return "", err
  82. }
  83. fixTimeStart := timeStart.Add(offsetTime)
  84. fixTimeEnd := timeEnd.Add(offsetTime)
  85. fixContent = strings.ReplaceAll(fixContent, srcOneDialogue.StartTime, my_util.Time2SubTimeString(fixTimeStart, timeFormat))
  86. fixContent = strings.ReplaceAll(fixContent, srcOneDialogue.EndTime, my_util.Time2SubTimeString(fixTimeEnd, timeFormat))
  87. }
  88. dstFile, err := os.Create(desSaveSubFileFullPath)
  89. if err != nil {
  90. return "", err
  91. }
  92. defer func() {
  93. _ = dstFile.Close()
  94. }()
  95. _, err = dstFile.WriteString(fixContent)
  96. if err != nil {
  97. return "", err
  98. }
  99. return fixContent, nil
  100. }
  101. /*
  102. 对于 V1 版本的字幕时间轴校正来说,是有特殊的前置要求的
  103. 1. 视频要有英文字幕
  104. 2. 外置的字幕必须是中文的双语字幕(简英、繁英)
  105. */
  106. // GetOffsetTimeV1 暂时只支持英文的基准字幕,源字幕必须是双语中英字幕
  107. func (s *SubTimelineFixer) GetOffsetTimeV1(infoBase, infoSrc *subparser.FileInfo, staticLineFileSavePath string, debugInfoFileSavePath string) (bool, float64, float64, error) {
  108. var debugInfos = make([]string, 0)
  109. // 构建基准语料库,目前阶段只需要考虑是 En 的就行了
  110. var baseCorpus = make([]string, 0)
  111. var baseDialogueFilterMap = make(map[int]int, 0)
  112. /*
  113. 这里原来的写法是所有的 base 的都放进去匹配,这样会带来一些不必要的对白
  114. 需要剔除空白。那么就需要建立一个转换的字典
  115. */
  116. for index, oneDialogueEx := range infoBase.DialoguesEx {
  117. if oneDialogueEx.EnLine == "" {
  118. continue
  119. }
  120. baseCorpus = append(baseCorpus, oneDialogueEx.EnLine)
  121. baseDialogueFilterMap[len(baseCorpus)-1] = index
  122. }
  123. // 初始化
  124. pipLine, tfidf, err := NewTFIDF(baseCorpus)
  125. if err != nil {
  126. return false, 0, 0, err
  127. }
  128. /*
  129. 确认两个字幕间的偏移,暂定的方案是两边都连续匹配上 5 个索引,再抽取一个对话的时间进行修正计算
  130. */
  131. maxCompareDialogue := s.FixerConfig.V1_MaxCompareDialogue
  132. // 基线的长度
  133. _, docsLength := tfidf.Dims()
  134. var matchIndexList = make([]MatchIndex, 0)
  135. sc := NewSubCompare(maxCompareDialogue)
  136. // 开始比较相似度,默认认为是 Ch_en 就行了
  137. for srcIndex := 0; srcIndex < len(infoSrc.DialoguesEx); {
  138. srcOneDialogueEx := infoSrc.DialoguesEx[srcIndex]
  139. // 这里只考虑 英文 的语言
  140. if srcOneDialogueEx.EnLine == "" {
  141. srcIndex++
  142. continue
  143. }
  144. // run the query through the same pipeline that was fitted to the corpus and
  145. // to project it into the same dimensional space
  146. queryVector, err := pipLine.Transform(srcOneDialogueEx.EnLine)
  147. if err != nil {
  148. return false, 0, 0, err
  149. }
  150. // iterate over document feature vectors (columns) in the LSI matrix and compare
  151. // with the query vector for similarity. Similarity is determined by the difference
  152. // between the angles of the vectors known as the cosine similarity
  153. highestSimilarity := -1.0
  154. // 匹配上的基准的索引
  155. var baseIndex int
  156. // 这里理论上需要把所有的基线遍历一次,但是,一般来说,两个字幕不可能差距在 50 行
  157. // 这样的好处是有助于提高搜索的性能
  158. // 那么就以当前的 src 的位置,向前、向后各 50 来遍历
  159. nowMaxScanLength := srcIndex + 50
  160. nowMinScanLength := srcIndex - 50
  161. if nowMinScanLength < 0 {
  162. nowMinScanLength = 0
  163. }
  164. if nowMaxScanLength > docsLength {
  165. nowMaxScanLength = docsLength
  166. }
  167. for i := nowMinScanLength; i < nowMaxScanLength; i++ {
  168. similarity := pairwise.CosineSimilarity(queryVector.(mat.ColViewer).ColView(0), tfidf.(mat.ColViewer).ColView(i))
  169. if similarity > highestSimilarity {
  170. baseIndex = i
  171. highestSimilarity = similarity
  172. }
  173. }
  174. startBaseIndex, startSrcIndex := sc.GetStartIndex()
  175. if sc.Add(baseIndex, srcIndex) == false {
  176. sc.Clear()
  177. srcIndex = startSrcIndex + 1
  178. continue
  179. //sc.Add(baseIndex, srcIndex)
  180. }
  181. if sc.Check() == false {
  182. srcIndex++
  183. continue
  184. } else {
  185. sc.Clear()
  186. }
  187. matchIndexList = append(matchIndexList, MatchIndex{
  188. BaseNowIndex: startBaseIndex,
  189. //BaseNowIndex: baseDialogueFilterMap[startBaseIndex],
  190. SrcNowIndex: startSrcIndex,
  191. Similarity: highestSimilarity,
  192. })
  193. //println(fmt.Sprintf("Similarity: %f Base[%d] %s-%s '%s' <--> Src[%d] %s-%s '%s'",
  194. // highestSimilarity,
  195. // baseIndex, infoBase.DialoguesEx[baseIndex].relativelyStartTime, infoBase.DialoguesEx[baseIndex].relativelyEndTime, baseCorpus[baseIndex],
  196. // srcIndex, srcOneDialogueEx.relativelyStartTime, srcOneDialogueEx.relativelyEndTime, srcOneDialogueEx.EnLine))
  197. srcIndex++
  198. }
  199. var startDiffTimeLineData = make([]opts.LineData, 0)
  200. var endDiffTimeLineData = make([]opts.LineData, 0)
  201. var tmpStartDiffTime = make([]float64, 0)
  202. var tmpEndDiffTime = make([]float64, 0)
  203. var startDiffTimeList = make(stat.Float64Slice, 0)
  204. var endDiffTimeList = make(stat.Float64Slice, 0)
  205. var xAxis = make([]string, 0)
  206. // 上面找出了连续匹配 maxCompareDialogue:N 次的字幕语句块
  207. // 求出平均时间偏移
  208. for mIndex, matchIndexItem := range matchIndexList {
  209. for i := 0; i < maxCompareDialogue; i++ {
  210. // 这里会统计连续的这 5 句话的时间差
  211. //tmpBaseIndex := matchIndexItem.BaseNowIndex + i
  212. tmpBaseIndex := baseDialogueFilterMap[matchIndexItem.BaseNowIndex+i]
  213. tmpSrcIndex := matchIndexItem.SrcNowIndex + i
  214. baseTimeStart, err := infoBase.ParseTime(infoBase.DialoguesEx[tmpBaseIndex].StartTime)
  215. if err != nil {
  216. return false, 0, 0, err
  217. }
  218. baseTimeEnd, err := infoBase.ParseTime(infoBase.DialoguesEx[tmpBaseIndex].EndTime)
  219. if err != nil {
  220. return false, 0, 0, err
  221. }
  222. srtTimeStart, err := infoBase.ParseTime(infoSrc.DialoguesEx[tmpSrcIndex].StartTime)
  223. if err != nil {
  224. return false, 0, 0, err
  225. }
  226. srtTimeEnd, err := infoBase.ParseTime(infoSrc.DialoguesEx[tmpSrcIndex].EndTime)
  227. if err != nil {
  228. return false, 0, 0, err
  229. }
  230. TimeDiffStart := baseTimeStart.Sub(srtTimeStart)
  231. TimeDiffEnd := baseTimeEnd.Sub(srtTimeEnd)
  232. startDiffTimeLineData = append(startDiffTimeLineData, opts.LineData{Value: TimeDiffStart.Seconds()})
  233. endDiffTimeLineData = append(endDiffTimeLineData, opts.LineData{Value: TimeDiffEnd.Seconds()})
  234. tmpStartDiffTime = append(tmpStartDiffTime, TimeDiffStart.Seconds())
  235. tmpEndDiffTime = append(tmpEndDiffTime, TimeDiffEnd.Seconds())
  236. startDiffTimeList = append(startDiffTimeList, TimeDiffStart.Seconds())
  237. endDiffTimeList = append(endDiffTimeList, TimeDiffEnd.Seconds())
  238. xAxis = append(xAxis, fmt.Sprintf("%d_%d", mIndex, i))
  239. debugInfos = append(debugInfos, "bs "+infoBase.DialoguesEx[tmpBaseIndex].StartTime+" <-> "+infoBase.DialoguesEx[tmpBaseIndex].EndTime)
  240. debugInfos = append(debugInfos, "sc "+infoSrc.DialoguesEx[tmpSrcIndex].StartTime+" <-> "+infoSrc.DialoguesEx[tmpSrcIndex].EndTime)
  241. debugInfos = append(debugInfos, "StartDiffTime: "+fmt.Sprintf("%f", TimeDiffStart.Seconds()))
  242. //println(fmt.Sprintf("Diff Start-End: %s - %s Base[%d] %s-%s '%s' <--> Src[%d] %s-%s '%s'",
  243. // TimeDiffStart, TimeDiffEnd,
  244. // tmpBaseIndex, infoBase.DialoguesEx[tmpBaseIndex].relativelyStartTime, infoBase.DialoguesEx[tmpBaseIndex].relativelyEndTime, infoBase.DialoguesEx[tmpBaseIndex].EnLine,
  245. // tmpSrcIndex, infoSrc.DialoguesEx[tmpSrcIndex].relativelyStartTime, infoSrc.DialoguesEx[tmpSrcIndex].relativelyEndTime, infoSrc.DialoguesEx[tmpSrcIndex].EnLine))
  246. }
  247. debugInfos = append(debugInfos, "---------------------------------------------")
  248. //println("---------------------------------------------")
  249. }
  250. oldMean := stat.Mean(startDiffTimeList)
  251. oldSd := stat.Sd(startDiffTimeList)
  252. newMean := -1.0
  253. newSd := -1.0
  254. per := 1.0
  255. // 如果 SD 较大的时候才需要剔除
  256. if oldSd > 0.1 {
  257. var outliersMap = make(map[float64]int, 0)
  258. outliers, _, _ := tukey.Outliers(0.3, tmpStartDiffTime)
  259. for _, outlier := range outliers {
  260. outliersMap[outlier] = 0
  261. }
  262. var newStartDiffTimeList = make([]float64, 0)
  263. for _, f := range tmpStartDiffTime {
  264. _, ok := outliersMap[f]
  265. if ok == true {
  266. continue
  267. }
  268. newStartDiffTimeList = append(newStartDiffTimeList, f)
  269. }
  270. orgLen := startDiffTimeList.Len()
  271. startDiffTimeList = make(stat.Float64Slice, 0)
  272. for _, f := range newStartDiffTimeList {
  273. startDiffTimeList = append(startDiffTimeList, f)
  274. }
  275. newLen := startDiffTimeList.Len()
  276. per = float64(newLen) / float64(orgLen)
  277. newMean = stat.Mean(startDiffTimeList)
  278. newSd = stat.Sd(startDiffTimeList)
  279. }
  280. if newMean == -1.0 {
  281. newMean = oldMean
  282. }
  283. if newSd == -1.0 {
  284. newSd = oldSd
  285. }
  286. // 不为空的时候,生成调试文件
  287. if staticLineFileSavePath != "" {
  288. //staticLineFileSavePath = "bar.html"
  289. err = SaveStaticLineV1(staticLineFileSavePath, infoBase.Name, infoSrc.Name,
  290. per, oldMean, oldSd, newMean, newSd, xAxis,
  291. startDiffTimeLineData, endDiffTimeLineData)
  292. if err != nil {
  293. return false, 0, 0, err
  294. }
  295. }
  296. // 跳过的逻辑是 mean 是 0 ,那么现在如果判断有问题,缓存的调试文件继续生成,然后强制返回 0 来跳过后续的逻辑
  297. // 这里需要考虑,找到的连续 5 句话匹配的有多少句,占比整体所有的 Dialogue 是多少,太低也需要跳过
  298. matchIndexLineCount := len(matchIndexList) * maxCompareDialogue
  299. //perMatch := float64(matchIndexLineCount) / float64(len(infoSrc.DialoguesEx))
  300. perMatch := float64(matchIndexLineCount) / float64(len(baseCorpus))
  301. if perMatch < s.FixerConfig.V1_MinMatchedPercent {
  302. tmpContent := infoSrc.Name + fmt.Sprintf(" Sequence match %d dialogues (< %f%%), Skip,", s.FixerConfig.V1_MaxCompareDialogue, s.FixerConfig.V1_MinMatchedPercent*100) + fmt.Sprintf(" %f%% ", perMatch*100)
  303. debugInfos = append(debugInfos, tmpContent)
  304. log_helper.GetLogger().Infoln(tmpContent)
  305. } else {
  306. tmpContent := infoSrc.Name + fmt.Sprintf(" Sequence match %d dialogues,", s.FixerConfig.V1_MaxCompareDialogue) + fmt.Sprintf(" %f%% ", perMatch*100)
  307. debugInfos = append(debugInfos, tmpContent)
  308. log_helper.GetLogger().Infoln(tmpContent)
  309. }
  310. // 输出调试的匹配时间轴信息的列表
  311. if debugInfoFileSavePath != "" {
  312. err = my_util.WriteStrings2File(debugInfoFileSavePath, debugInfos)
  313. if err != nil {
  314. return false, 0, 0, err
  315. }
  316. }
  317. // 虽然有条件判断是认为有问题的,但是返回值还是要填写除去的
  318. if perMatch < s.FixerConfig.V1_MinMatchedPercent {
  319. return false, newMean, newSd, nil
  320. }
  321. return true, newMean, newSd, nil
  322. }
  323. // GetOffsetTimeV2 使用内置的字幕校正外置的字幕时间轴
  324. func (s *SubTimelineFixer) GetOffsetTimeV2(baseUnit, srcUnit *sub_helper.SubUnit, audioVadList []vad.VADInfo) (bool, float64, float64, error) {
  325. // 时间轴差值数组
  326. var tmpStartDiffTimeList = make([]float64, 0)
  327. var tmpStartDiffTimeMap = treemap.NewWith(utils.Float64Comparator)
  328. var tmpStartDiffTimeListEx = make(stat.Float64Slice, 0)
  329. // -------------------------------------------------
  330. var bUseSubOrAudioAsBase = true
  331. if baseUnit == nil && audioVadList != nil {
  332. // 使用 音频 来进行匹配
  333. bUseSubOrAudioAsBase = false
  334. } else if baseUnit != nil {
  335. // 使用 字幕 来进行匹配
  336. bUseSubOrAudioAsBase = true
  337. } else {
  338. return false, 0, 0, errors.New("GetOffsetTimeV2 input baseUnit or AudioVad is nil")
  339. }
  340. // -------------------------------------------------
  341. /*
  342. 开始针对对白单元进行匹配
  343. 下面的逻辑需要参考 FFT识别流程.jpg 这个图示来理解
  344. 实际实现的时候,会在上述 srcUnit 上,做一个滑动窗口来做匹配,80% 是窗口,20% 用于移动
  345. 步长固定在 10 步
  346. */
  347. audioFloatList := vad.GetFloatSlice(audioVadList)
  348. srcVADLen := len(srcUnit.VADList)
  349. // 滑动窗口的长度
  350. srcWindowLen := int(float64(srcVADLen) * s.FixerConfig.V2_WindowMatchPer)
  351. srcSlideLen := srcVADLen - srcWindowLen
  352. // 窗口可以滑动的长度
  353. srcSlideLenHalf := srcSlideLen / 2
  354. //
  355. oneStep := srcSlideLenHalf / s.FixerConfig.V2_CompareParts
  356. if srcSlideLen <= 0 {
  357. srcSlideLen = 1
  358. }
  359. if oneStep <= 0 {
  360. oneStep = 1
  361. }
  362. insertIndex := 0
  363. // -------------------------------------------------
  364. // 实际 FFT 的匹配逻辑函数
  365. fixFunc := func(i interface{}) error {
  366. inData := i.(InputData)
  367. // -------------------------------------------------
  368. // 开始匹配
  369. // 这里的对白单元,当前的 Base 进行对比,详细示例见图解。Step 2 中橙色的区域
  370. fffAligner := NewFFTAligner()
  371. var bok = false
  372. var nowBaseStartTime = 0.0
  373. var offsetIndex = 0
  374. var score = 0.0
  375. // 图解,参考 Step 3
  376. if bUseSubOrAudioAsBase == false {
  377. // 使用 音频 来进行匹配
  378. // 去掉头和尾,具体百分之多少,见 V2_FrontAndEndPerBase
  379. audioCutLen := int(float64(len(inData.AudioVADList)) * s.FixerConfig.V2_FrontAndEndPerBase)
  380. offsetIndex, score = fffAligner.Fit(inData.AudioVADList[audioCutLen:len(inData.AudioVADList)-audioCutLen], inData.SrcUnit.GetVADFloatSlice()[inData.OffsetIndex:srcWindowLen+inData.OffsetIndex])
  381. realOffsetIndex := offsetIndex + audioCutLen
  382. if realOffsetIndex < 0 {
  383. return nil
  384. }
  385. // offsetIndex 这里得到的是 10ms 为一个单位的 OffsetIndex
  386. nowBaseStartTime = vad.GetAudioIndex2Time(realOffsetIndex)
  387. } else {
  388. // 使用 字幕 来进行匹配
  389. offsetIndex, score = fffAligner.Fit(inData.BaseUnit.GetVADFloatSlice(), inData.SrcUnit.GetVADFloatSlice()[inData.OffsetIndex:inData.OffsetIndex+srcWindowLen])
  390. if offsetIndex < 0 {
  391. return nil
  392. }
  393. bok, nowBaseStartTime = inData.BaseUnit.GetIndexTimeNumber(offsetIndex, true)
  394. if bok == false {
  395. return nil
  396. }
  397. }
  398. // 需要校正的字幕
  399. bok, nowSrcStartTime := inData.SrcUnit.GetIndexTimeNumber(inData.OffsetIndex, true)
  400. if bok == false {
  401. return nil
  402. }
  403. // 时间差值
  404. TimeDiffStartCorrelation := nowBaseStartTime - nowSrcStartTime
  405. log_helper.GetLogger().Debugln("------------")
  406. log_helper.GetLogger().Debugln("OffsetTime:", fmt.Sprintf("%v", TimeDiffStartCorrelation),
  407. "offsetIndex:", offsetIndex,
  408. "score:", fmt.Sprintf("%v", score))
  409. mutexFixV2.Lock()
  410. tmpStartDiffTimeList = append(tmpStartDiffTimeList, TimeDiffStartCorrelation)
  411. tmpStartDiffTimeListEx = append(tmpStartDiffTimeListEx, TimeDiffStartCorrelation)
  412. tmpStartDiffTimeMap.Put(score, insertIndex)
  413. insertIndex++
  414. mutexFixV2.Unlock()
  415. // -------------------------------------------------
  416. return nil
  417. }
  418. // -------------------------------------------------
  419. antPool, err := ants.NewPoolWithFunc(s.FixerConfig.V2_FixThreads, func(inData interface{}) {
  420. data := inData.(InputData)
  421. defer data.Wg.Done()
  422. ctx, cancel := context.WithTimeout(context.Background(), time.Duration(s.FixerConfig.V2_SubOneUnitProcessTimeOut)*time.Second)
  423. defer cancel()
  424. done := make(chan error, 1)
  425. panicChan := make(chan interface{}, 1)
  426. go func() {
  427. defer func() {
  428. if p := recover(); p != nil {
  429. panicChan <- p
  430. }
  431. }()
  432. done <- fixFunc(inData)
  433. }()
  434. select {
  435. case err := <-done:
  436. if err != nil {
  437. log_helper.GetLogger().Errorln("GetOffsetTimeV2.NewPoolWithFunc done with Error", err.Error())
  438. }
  439. return
  440. case p := <-panicChan:
  441. log_helper.GetLogger().Errorln("GetOffsetTimeV2.NewPoolWithFunc got panic", p)
  442. return
  443. case <-ctx.Done():
  444. log_helper.GetLogger().Errorln("GetOffsetTimeV2.NewPoolWithFunc got time out", ctx.Err())
  445. return
  446. }
  447. })
  448. if err != nil {
  449. return false, 0, 0, err
  450. }
  451. defer antPool.Release()
  452. // -------------------------------------------------
  453. wg := sync.WaitGroup{}
  454. for i := srcSlideLenHalf; i < srcSlideLen; {
  455. wg.Add(1)
  456. if bUseSubOrAudioAsBase == true {
  457. // 使用字幕
  458. err = antPool.Invoke(InputData{BaseUnit: *baseUnit, SrcUnit: *srcUnit, OffsetIndex: i, Wg: &wg})
  459. } else {
  460. // 使用音频
  461. err = antPool.Invoke(InputData{AudioVADList: audioFloatList, SrcUnit: *srcUnit, OffsetIndex: i, Wg: &wg})
  462. }
  463. if err != nil {
  464. log_helper.GetLogger().Errorln("GetOffsetTimeV2 ants.Invoke", err)
  465. }
  466. i += oneStep
  467. }
  468. wg.Wait()
  469. // 这里可能遇到匹配的时候没有能够执行够 V2_CompareParts 次,有可能是负数跳过或者时间转换失败导致,前者为主(可能是这两个就是一个东西的时候,或者说没有时间轴偏移的时候)
  470. if insertIndex < s.FixerConfig.V2_CompareParts/2 {
  471. return false, 0, 0, nil
  472. }
  473. outCorrelationFixResult := s.calcMeanAndSD(tmpStartDiffTimeListEx, tmpStartDiffTimeList)
  474. log_helper.GetLogger().Infoln(fmt.Sprintf("FFTAligner Old Mean: %v SD: %v Per: %v", outCorrelationFixResult.OldMean, outCorrelationFixResult.OldSD, outCorrelationFixResult.Per))
  475. log_helper.GetLogger().Infoln(fmt.Sprintf("FFTAligner New Mean: %v SD: %v Per: %v", outCorrelationFixResult.NewMean, outCorrelationFixResult.NewSD, outCorrelationFixResult.Per))
  476. value, index := tmpStartDiffTimeMap.Max()
  477. log_helper.GetLogger().Infoln("FFTAligner Max score:", fmt.Sprintf("%v", value.(float64)), "Time:", fmt.Sprintf("%v", tmpStartDiffTimeList[index.(int)]))
  478. return true, outCorrelationFixResult.NewMean, outCorrelationFixResult.NewSD, nil
  479. }
  480. func (s *SubTimelineFixer) calcMeanAndSD(startDiffTimeList stat.Float64Slice, tmpStartDiffTime []float64) FixResult {
  481. const minValue = -9999.0
  482. oldMean := stat.Mean(startDiffTimeList)
  483. oldSd := stat.Sd(startDiffTimeList)
  484. newMean := minValue
  485. newSd := minValue
  486. per := 1.0
  487. if len(tmpStartDiffTime) < 3 {
  488. return FixResult{
  489. oldMean,
  490. oldSd,
  491. newMean,
  492. newSd,
  493. per,
  494. }
  495. }
  496. // 如果 SD 较大的时候才需要剔除
  497. if oldSd > 0.1 {
  498. var outliersMap = make(map[float64]int, 0)
  499. outliers, _, _ := tukey.Outliers(0.3, tmpStartDiffTime)
  500. for _, outlier := range outliers {
  501. outliersMap[outlier] = 0
  502. }
  503. var newStartDiffTimeList = make([]float64, 0)
  504. for _, f := range tmpStartDiffTime {
  505. _, ok := outliersMap[f]
  506. if ok == true {
  507. continue
  508. }
  509. newStartDiffTimeList = append(newStartDiffTimeList, f)
  510. }
  511. orgLen := startDiffTimeList.Len()
  512. startDiffTimeList = make(stat.Float64Slice, 0)
  513. for _, f := range newStartDiffTimeList {
  514. startDiffTimeList = append(startDiffTimeList, f)
  515. }
  516. newLen := startDiffTimeList.Len()
  517. per = float64(newLen) / float64(orgLen)
  518. newMean = stat.Mean(startDiffTimeList)
  519. newSd = stat.Sd(startDiffTimeList)
  520. }
  521. if newMean == minValue {
  522. newMean = oldMean
  523. }
  524. if newSd == minValue {
  525. newSd = oldSd
  526. }
  527. return FixResult{
  528. oldMean,
  529. oldSd,
  530. newMean,
  531. newSd,
  532. per,
  533. }
  534. }
  535. const FixMask = "-fix"
  536. var mutexFixV2 sync.Mutex
  537. type OutputData struct {
  538. TimeDiffStartCorrelation float64 // 计算出来的时间轴偏移时间
  539. OffsetIndex float64 // 在这个匹配的 Window 中的 OffsetIndex
  540. Score float64 // 匹配的分数
  541. InsertIndex int // 第几个 Step
  542. }
  543. type InputData struct {
  544. BaseUnit sub_helper.SubUnit // 基准 VAD
  545. AudioVADList []float64 // 基准 VAD
  546. SrcUnit sub_helper.SubUnit // 需要匹配的 VAD
  547. OffsetIndex int // 滑动窗体的移动偏移索引
  548. Wg *sync.WaitGroup // 并发锁
  549. }