| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624 |
- package sub_helper
- import (
- "errors"
- "github.com/allanpk716/ChineseSubFinder/internal/common"
- "github.com/allanpk716/ChineseSubFinder/internal/pkg/archive_helper"
- "github.com/allanpk716/ChineseSubFinder/internal/pkg/decode"
- "github.com/allanpk716/ChineseSubFinder/internal/pkg/language"
- "github.com/allanpk716/ChineseSubFinder/internal/pkg/log_helper"
- "github.com/allanpk716/ChineseSubFinder/internal/pkg/my_util"
- "github.com/allanpk716/ChineseSubFinder/internal/pkg/regex_things"
- "github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_parser_hub"
- "github.com/allanpk716/ChineseSubFinder/internal/pkg/vad"
- "github.com/allanpk716/ChineseSubFinder/internal/types/subparser"
- "github.com/allanpk716/ChineseSubFinder/internal/types/supplier"
- "github.com/go-rod/rod/lib/utils"
- "io/ioutil"
- "math"
- "os"
- "path/filepath"
- "strconv"
- "strings"
- "time"
- )
- // OrganizeDlSubFiles 需要从汇总来是网站字幕中,解压对应的压缩包中的字幕出来
- func OrganizeDlSubFiles(tmpFolderName string, subInfos []supplier.SubInfo) (map[string][]string, error) {
- // 缓存列表,整理后的字幕列表
- // SxEx - []string 字幕的路径
- var siteSubInfoDict = make(map[string][]string)
- tmpFolderFullPath, err := my_util.GetTmpFolderByName(tmpFolderName)
- if err != nil {
- return nil, err
- }
- // 把后缀名给改好
- ChangeVideoExt2SubExt(subInfos)
- // 第三方的解压库,首先不支持 io.Reader 的操作,也就是得缓存到本地硬盘再读取解压
- // 且使用 walk 会无法解压 rar,得指定具体的实例,太麻烦了,直接用通用的接口得了,就是得都缓存下来再判断
- // 基于以上两点,写了一堆啰嗦的逻辑···
- for i := range subInfos {
- // 先存下来,保存是时候需要前缀,前缀就是从那个网站下载来的
- nowFileSaveFullPath := filepath.Join(tmpFolderFullPath, GetFrontNameAndOrgName(&subInfos[i]))
- err = utils.OutputFile(nowFileSaveFullPath, subInfos[i].Data)
- if err != nil {
- log_helper.GetLogger().Errorln("getFrontNameAndOrgName - OutputFile", subInfos[i].FromWhere, subInfos[i].Name, subInfos[i].TopN, err)
- continue
- }
- nowExt := strings.ToLower(subInfos[i].Ext)
- epsKey := my_util.GetEpisodeKeyName(subInfos[i].Season, subInfos[i].Episode)
- _, ok := siteSubInfoDict[epsKey]
- if ok == false {
- // 不存在则实例化
- siteSubInfoDict[epsKey] = make([]string, 0)
- }
- if nowExt != ".zip" && nowExt != ".tar" && nowExt != ".rar" && nowExt != ".7z" {
- // 是否是受支持的字幕类型
- if sub_parser_hub.IsSubExtWanted(nowExt) == false {
- continue
- }
- // 加入缓存列表
- siteSubInfoDict[epsKey] = append(siteSubInfoDict[epsKey], nowFileSaveFullPath)
- } else {
- // 那么就是需要解压的文件了
- // 解压,给一个单独的文件夹
- unzipTmpFolder := filepath.Join(tmpFolderFullPath, subInfos[i].FromWhere)
- err = os.MkdirAll(unzipTmpFolder, os.ModePerm)
- if err != nil {
- return nil, err
- }
- err = archive_helper.UnArchiveFile(nowFileSaveFullPath, unzipTmpFolder)
- // 解压完成后,遍历受支持的字幕列表,加入缓存列表
- if err != nil {
- log_helper.GetLogger().Errorln("archiver.UnArchive", subInfos[i].FromWhere, subInfos[i].Name, subInfos[i].TopN, err)
- continue
- }
- // 搜索这个目录下的所有符合字幕格式的文件
- subFileFullPaths, err := SearchMatchedSubFileByDir(unzipTmpFolder)
- if err != nil {
- log_helper.GetLogger().Errorln("searchMatchedSubFile", subInfos[i].FromWhere, subInfos[i].Name, subInfos[i].TopN, err)
- continue
- }
- // 这里需要给这些下载到的文件进行改名,加是从那个网站来的前缀,后续好查找
- for _, fileFullPath := range subFileFullPaths {
- newSubName := AddFrontName(subInfos[i], filepath.Base(fileFullPath))
- newSubNameFullPath := filepath.Join(tmpFolderFullPath, newSubName)
- // 改名
- err = os.Rename(fileFullPath, newSubNameFullPath)
- if err != nil {
- log_helper.GetLogger().Errorln("os.Rename", subInfos[i].FromWhere, subInfos[i].Name, subInfos[i].TopN, err)
- continue
- }
- // 加入缓存列表
- siteSubInfoDict[epsKey] = append(siteSubInfoDict[epsKey], newSubNameFullPath)
- }
- }
- }
- return siteSubInfoDict, nil
- }
- // ChangeVideoExt2SubExt 检测 Name,如果是视频的后缀名就改为字幕的后缀名
- func ChangeVideoExt2SubExt(subInfos []supplier.SubInfo) {
- for x, info := range subInfos {
- tmpSubFileName := info.Name
- // 如果后缀名是下载字幕目标的后缀名 或者 是压缩包格式的,则跳过
- if strings.Contains(tmpSubFileName, info.Ext) == true || archive_helper.IsWantedArchiveExtName(tmpSubFileName) == true {
- } else {
- subInfos[x].Name = tmpSubFileName + info.Ext
- }
- }
- }
- // SelectChineseBestBilingualSubtitle 找到合适的双语中文字幕,简体->繁体,以及 字幕类型的优先级选择
- func SelectChineseBestBilingualSubtitle(subs []subparser.FileInfo, subTypePriority int) *subparser.FileInfo {
- // 先傻一点实现优先双语的,之前的写法有 bug
- for _, info := range subs {
- // 找到了中文字幕
- if language.HasChineseLang(info.Lang) == true {
- // 字幕的优先级 0 - 原样, 1 - srt , 2 - ass/ssa
- if subTypePriority == 1 {
- // 1 - srt
- if strings.ToLower(info.Ext) == common.SubExtSRT {
- // 优先双语
- if language.IsBilingualSubtitle(info.Lang) == true {
- return &info
- }
- }
- } else if subTypePriority == 2 {
- // 2 - ass/ssa
- if strings.ToLower(info.Ext) == common.SubExtASS || strings.ToLower(info.Ext) == common.SubExtSSA {
- // 优先双语
- if language.IsBilingualSubtitle(info.Lang) == true {
- return &info
- }
- }
- } else {
- // 优先双语
- if language.IsBilingualSubtitle(info.Lang) == true {
- return &info
- }
- }
- }
- }
- return nil
- }
- // SelectChineseBestSubtitle 找到合适的中文字幕,简体->繁体,以及 字幕类型的优先级选择
- func SelectChineseBestSubtitle(subs []subparser.FileInfo, subTypePriority int) *subparser.FileInfo {
- // 先傻一点实现优先双语的,之前的写法有 bug
- for _, info := range subs {
- // 找到了中文字幕
- if language.HasChineseLang(info.Lang) == true {
- // 字幕的优先级 0 - 原样, 1 - srt , 2 - ass/ssa
- if subTypePriority == 1 {
- // 1 - srt
- if strings.ToLower(info.Ext) == common.SubExtSRT {
- return &info
- }
- } else if subTypePriority == 2 {
- // 2 - ass/ssa
- if strings.ToLower(info.Ext) == common.SubExtASS || strings.ToLower(info.Ext) == common.SubExtSSA {
- return &info
- }
- } else {
- return &info
- }
- }
- }
- return nil
- }
- // GetFrontNameAndOrgName 返回的名称包含,那个网站下载的,这个网站中排名第几,文件名
- func GetFrontNameAndOrgName(info *supplier.SubInfo) string {
- infoName := ""
- fileName, err := decode.GetVideoInfoFromFileName(info.Name)
- if err != nil {
- log_helper.GetLogger().Warnln("", err)
- infoName = info.Name
- } else {
- infoName = fileName.Title + "_S" + strconv.Itoa(fileName.Season) + "E" + strconv.Itoa(fileName.Episode) + filepath.Ext(info.Name)
- }
- info.Name = infoName
- return "[" + info.FromWhere + "]_" + strconv.FormatInt(info.TopN, 10) + "_" + infoName
- }
- // AddFrontName 添加文件的前缀
- func AddFrontName(info supplier.SubInfo, orgName string) string {
- return "[" + info.FromWhere + "]_" + strconv.FormatInt(info.TopN, 10) + "_" + orgName
- }
- // SearchMatchedSubFileByDir 搜索符合后缀名的视频文件,排除 Sub_SxE0 这样的文件夹中的文件
- func SearchMatchedSubFileByDir(dir string) ([]string, error) {
- // 这里有个梗,会出现 __MACOSX 这类文件夹,那么里面会有一样的文件,需要用文件大小排除一下,至少大于 1 kb 吧
- var fileFullPathList = make([]string, 0)
- pathSep := string(os.PathSeparator)
- files, err := ioutil.ReadDir(dir)
- if err != nil {
- return nil, err
- }
- for _, curFile := range files {
- fullPath := dir + pathSep + curFile.Name()
- if curFile.IsDir() {
- // 需要排除 Sub_S1E0、Sub_S2E0 这样的整季的字幕文件夹,这里仅仅是缓存,不会被加载的
- matched := regex_things.RegOneSeasonSubFolderNameMatch.FindAllStringSubmatch(curFile.Name(), -1)
- if matched == nil || len(matched) > 0 {
- continue
- }
- // 内层的错误就无视了
- oneList, _ := SearchMatchedSubFileByDir(fullPath)
- if oneList != nil {
- fileFullPathList = append(fileFullPathList, oneList...)
- }
- } else {
- // 这里就是文件了
- if curFile.Size() < 1000 {
- continue
- }
- if sub_parser_hub.IsSubExtWanted(filepath.Ext(curFile.Name())) == true {
- fileFullPathList = append(fileFullPathList, fullPath)
- }
- }
- }
- return fileFullPathList, nil
- }
- // SearchMatchedSubFileByOneVideo 搜索这个视频当前目录下匹配的字幕
- func SearchMatchedSubFileByOneVideo(oneVideoFullPath string) ([]string, error) {
- dir := filepath.Dir(oneVideoFullPath)
- fileName := filepath.Base(oneVideoFullPath)
- fileName = strings.ToLower(fileName)
- fileName = strings.ReplaceAll(fileName, filepath.Ext(fileName), "")
- pathSep := string(os.PathSeparator)
- files, err := ioutil.ReadDir(dir)
- if err != nil {
- return nil, err
- }
- var matchedSubs = make([]string, 0)
- for _, curFile := range files {
- if curFile.IsDir() {
- continue
- }
- // 这里就是文件了
- if curFile.Size() < 1000 {
- continue
- }
- // 判断的时候用小写的,后续重命名的时候用原有的名称
- nowFileName := strings.ToLower(curFile.Name())
- // 后缀名得对
- if sub_parser_hub.IsSubExtWanted(filepath.Ext(nowFileName)) == false {
- continue
- }
- // 字幕文件名应该包含 视频文件名(无后缀)
- if strings.Contains(nowFileName, fileName) == false {
- continue
- }
- oldPath := dir + pathSep + curFile.Name()
- matchedSubs = append(matchedSubs, oldPath)
- }
- return matchedSubs, nil
- }
- // SearchVideoMatchSubFileAndRemoveExtMark 找到找个视频目录下相匹配的字幕,同时去除这些字幕中 .default 或者 .forced 的标记。注意这两个标记不应该同时出现,否则无法正确去除
- func SearchVideoMatchSubFileAndRemoveExtMark(oneVideoFullPath string) error {
- dir := filepath.Dir(oneVideoFullPath)
- fileName := filepath.Base(oneVideoFullPath)
- fileName = strings.ToLower(fileName)
- fileName = strings.ReplaceAll(fileName, filepath.Ext(fileName), "")
- pathSep := string(os.PathSeparator)
- files, err := ioutil.ReadDir(dir)
- if err != nil {
- return err
- }
- for _, curFile := range files {
- if curFile.IsDir() {
- continue
- } else {
- // 这里就是文件了
- if curFile.Size() < 1000 {
- continue
- }
- // 判断的时候用小写的,后续重命名的时候用原有的名称
- nowFileName := strings.ToLower(curFile.Name())
- // 后缀名得对
- if sub_parser_hub.IsSubExtWanted(filepath.Ext(nowFileName)) == false {
- continue
- }
- // 字幕文件名应该包含 视频文件名(无后缀)
- if strings.Contains(nowFileName, fileName) == false {
- continue
- }
- // 得包含 .default. 找个关键词
- if strings.Contains(nowFileName, subparser.Sub_Ext_Mark_Default+".") == true {
- oldPath := dir + pathSep + curFile.Name()
- newPath := dir + pathSep + strings.ReplaceAll(curFile.Name(), subparser.Sub_Ext_Mark_Default+".", ".")
- err = os.Rename(oldPath, newPath)
- if err != nil {
- return err
- }
- } else if strings.Contains(nowFileName, subparser.Sub_Ext_Mark_Forced+".") == true {
- // 得包含 .forced. 找个关键词
- oldPath := dir + pathSep + curFile.Name()
- newPath := dir + pathSep + strings.ReplaceAll(curFile.Name(), subparser.Sub_Ext_Mark_Forced+".", ".")
- err = os.Rename(oldPath, newPath)
- if err != nil {
- return err
- }
- } else {
- continue
- }
- }
- }
- return nil
- }
- // DeleteOneSeasonSubCacheFolder 删除一个连续剧中的所有一季字幕的缓存文件夹
- func DeleteOneSeasonSubCacheFolder(seriesDir string) error {
- debugFolderByName, err := my_util.GetDebugFolderByName([]string{filepath.Base(seriesDir)})
- if err != nil {
- return err
- }
- files, err := ioutil.ReadDir(debugFolderByName)
- if err != nil {
- return err
- }
- pathSep := string(os.PathSeparator)
- for _, curFile := range files {
- if curFile.IsDir() == true {
- matched := regex_things.RegOneSeasonSubFolderNameMatch.FindAllStringSubmatch(curFile.Name(), -1)
- if matched == nil || len(matched) < 1 {
- continue
- }
- fullPath := debugFolderByName + pathSep + curFile.Name()
- err = os.RemoveAll(fullPath)
- if err != nil {
- return err
- }
- }
- }
- return nil
- }
- /*
- 只针对英文字幕进行合并分散的 DialoguesFilter
- 会遇到这样的字幕,如下0
- 2line-The Card Counter (2021) WEBDL-1080p.chinese(inside).ass
- 它的对白一句话分了两个 dialogue 去做。这样做后续字幕时间轴校正就会遇到问题,因为只有一半,匹配占比会很低
- (每一个 Dialogue 的首字母需要分析,大写和小写的占比是多少,统计一下,正常的,和上述特殊的)
- 那么,就需要额外的逻辑去对 DialoguesFilterEx 进行额外的推断
- 暂时考虑的方案是,英文对白每一句的开头应该是英文大写字幕,如果是小写字幕,就应该与上语句合并,且每一句的字符长度有大于一定才触发
- */
- func MergeMultiDialogue4EngSubtitle(inSubParser *subparser.FileInfo) {
- merger := NewDialogueMerger()
- for _, dialogueEx := range inSubParser.DialoguesFilterEx {
- merger.Add(dialogueEx)
- }
- inSubParser.DialoguesFilterEx = merger.Get()
- }
- // GetVADInfoFeatureFromSub 跟下面的 GetVADInfoFeatureFromSubNeedOffsetTimeWillInsert 函数功能一致
- func GetVADInfoFeatureFromSub(fileInfo *subparser.FileInfo, frontAndEndPer float64, subUnitMaxCount int, insert bool) ([]SubUnit, error) {
- return GetVADInfoFeatureFromSubNeedOffsetTimeWillInsert(fileInfo, frontAndEndPer, subUnitMaxCount, 0, insert)
- }
- /*
- GetVADInfoFeatureFromSubNeedOffsetTimeWillInsert 只不过这里可以加一个每一句话固定的偏移时间
- 这里的字幕要求是完整的一个字幕
- 1. 抽取字幕的时间片段的时候,暂定,前 15% 和后 15% 要避开,前奏、主题曲、结尾曲
- 2. 将整个字幕,抽取连续 5 句对话为一个单元,提取时间片段信息
- 3. 这里抽取的是特征,也就有额外的逻辑去找这个特征(本程序内会描述为“钥匙”)
- */
- func GetVADInfoFeatureFromSubNeedOffsetTimeWillInsert(fileInfo *subparser.FileInfo, SkipFrontAndEndPer float64, subUnitMaxCount int, offsetTime float64, insert bool) ([]SubUnit, error) {
- if subUnitMaxCount < 0 {
- subUnitMaxCount = 0
- }
- srcSubUnitList := make([]SubUnit, 0)
- srcSubDialogueList := make([]subparser.OneDialogueEx, 0)
- srcOneSubUnit := NewSubUnit()
- // 最后一个对话的结束时间
- lastDialogueExTimeEnd, err := my_util.ParseTime(fileInfo.DialoguesFilterEx[len(fileInfo.DialoguesFilterEx)-1].EndTime)
- if err != nil {
- return nil, err
- }
- // 相当于总时长
- fullDuration := my_util.Time2SecondNumber(lastDialogueExTimeEnd)
- // 最低的起始时间,因为可能需要裁剪范围
- startRangeTimeMin := fullDuration * SkipFrontAndEndPer
- endRangeTimeMax := fullDuration * (1.0 - SkipFrontAndEndPer)
- println(startRangeTimeMin)
- println(endRangeTimeMax)
- for i := 0; i < len(fileInfo.DialoguesFilterEx); i++ {
- oneDialogueExTimeStart, err := my_util.ParseTime(fileInfo.DialoguesFilterEx[i].StartTime)
- if err != nil {
- return nil, err
- }
- oneDialogueExTimeEnd, err := my_util.ParseTime(fileInfo.DialoguesFilterEx[i].EndTime)
- if err != nil {
- return nil, err
- }
- oneStart := my_util.Time2SecondNumber(oneDialogueExTimeStart)
- if SkipFrontAndEndPer > 0 {
- if fullDuration*SkipFrontAndEndPer > oneStart || fullDuration*(1.0-SkipFrontAndEndPer) < oneStart {
- continue
- }
- }
- // 如果当前的这一句话,为空,或者进过正则表达式剔除特殊字符后为空,则跳过
- if my_util.ReplaceSpecString(fileInfo.GetDialogueExContent(i), "") == "" {
- continue
- }
- // 低于 5句对白,则添加
- if srcOneSubUnit.GetDialogueCount() < subUnitMaxCount {
- // 算上偏移
- offsetTimeDuration := time.Duration(offsetTime * math.Pow10(9))
- oneDialogueExTimeStart = oneDialogueExTimeStart.Add(offsetTimeDuration)
- oneDialogueExTimeEnd = oneDialogueExTimeEnd.Add(offsetTimeDuration)
- // 如果没有偏移就是 0
- if insert == true {
- srcOneSubUnit.AddAndInsert(oneDialogueExTimeStart, oneDialogueExTimeEnd)
- } else {
- srcOneSubUnit.Add(oneDialogueExTimeStart, oneDialogueExTimeEnd)
- }
- // 这一个单元的 Dialogue 需要合并起来,才能判断是否符合“钥匙”的要求
- srcSubDialogueList = append(srcSubDialogueList, fileInfo.DialoguesFilterEx[i])
- } else {
- // 用完清空
- srcSubDialogueList = make([]subparser.OneDialogueEx, 0)
- // 将拼凑起来的对话组成一个单元进行存储起来
- srcSubUnitList = append(srcSubUnitList, *srcOneSubUnit)
- // 然后重置
- srcOneSubUnit = NewSubUnit()
- }
- }
- if srcOneSubUnit.GetDialogueCount() > 0 {
- srcSubUnitList = append(srcSubUnitList, *srcOneSubUnit)
- }
- return srcSubUnitList, nil
- }
- /*
- GetVADInfoFeatureFromSubNew 将 Sub 文件转换为 VAD List 信息
- */
- func GetVADInfoFeatureFromSubNew(fileInfo *subparser.FileInfo, SkipFrontAndEndPer float64) (*SubUnit, error) {
- outSubUnits := NewSubUnit()
- if len(fileInfo.DialoguesFilterEx) <= 0 {
- return nil, errors.New("GetVADInfoFeatureFromSubNew fileInfo Dialogue Length is 0")
- }
- /*
- 先拼凑出完整的一个 VAD List
- 因为 VAD 的窗口是 10ms,那么需要多每一句话按 10 ms 的单位进行取整
- 每一句话开始、结束的时间,需要向下取整
- */
- subStartTimeFloor, subEndTimeFloor, err := ReadSubStartAndEndTime(fileInfo)
- if err != nil {
- return nil, err
- }
- // 如果想要从 0 时间点开始算,那么 subStartTimeFloor 这个值就需要重置到0
- subStartTimeFloor = 0
- subFullSecondTimeFloor := subEndTimeFloor - subStartTimeFloor
- // 根据这个时长就能够得到一个完整的 VAD List,然后再通过每一句对白进行 VAD 值的调整即可,这样就能够保证
- // 相同的一个字幕因为使用 ffmpeg 导出 srt 和 ass 后的,可能存在总体时间轴不一致的问题
- // 123.450 - > 12345
- vadLen := int(subFullSecondTimeFloor * 100)
- subVADs := make([]vad.VADInfo, vadLen)
- subStartTimeFloor10ms := subStartTimeFloor * 100
- for i := 0; i < vadLen; i++ {
- subVADs[i] = *vad.NewVADInfoBase(false, time.Duration((subStartTimeFloor10ms+float64(i))*math.Pow10(7)))
- }
- // 计算出需要截取的片段,起始和结束
- skipLen := int(float64(vadLen) * SkipFrontAndEndPer)
- skipStartIndex := skipLen
- skipEndIndex := vadLen - skipLen
- // 现在需要从 fileInfo 的每一句对白也就对应一段连续的 VAD active = true 来进行改写,记得向下取整
- lastDialogueIndex := 0
- for index, dialogueEx := range fileInfo.DialoguesFilterEx {
- // 如果当前的这一句话,为空,或者进过正则表达式剔除特殊字符后为空,则跳过
- if my_util.ReplaceSpecString(fileInfo.GetDialogueExContent(index), "") == "" {
- continue
- }
- // 字幕的开始时间
- oneDialogueStartTime, err := my_util.ParseTime(dialogueEx.StartTime)
- if err != nil {
- return nil, err
- }
- // 字幕的结束时间
- oneDialogueEndTime, err := my_util.ParseTime(dialogueEx.EndTime)
- if err != nil {
- return nil, err
- }
- // 字幕的时长,对时间进行向下取整
- oneDialogueStartTimeFloor := my_util.MakeCeil10msMultipleFromFloat(my_util.Time2SecondNumber(oneDialogueStartTime))
- oneDialogueEndTimeFloor := my_util.MakeFloor10msMultipleFromFloat(my_util.Time2SecondNumber(oneDialogueEndTime))
- // 得到一句对白的时长
- changeVADStartIndex := int(oneDialogueStartTimeFloor * 100)
- changeVADEndIndex := int(oneDialogueEndTimeFloor * 100)
- // 不能超过 最后一句话的时常
- if changeVADStartIndex > int(subEndTimeFloor*100) {
- continue
- }
- // 也不能比起始的第一句话时间轴更低
- if changeVADStartIndex < int(subStartTimeFloor10ms) {
- continue
- }
- // 当前这句话的开始和结束信息
- changerStartIndex := changeVADStartIndex - int(subStartTimeFloor10ms)
- if changerStartIndex < 0 {
- continue
- }
- changerEndIndex := changeVADEndIndex - int(subStartTimeFloor10ms)
- if changerEndIndex < 0 {
- continue
- }
- // 如果上一个对白的最后一个 OffsetIndex 连接着当前这一句的索引的 VAD 信息 active 是 true 就设置为 false
- if lastDialogueIndex == changerStartIndex {
- for i := 1; i <= 2; i++ {
- if lastDialogueIndex-i >= 0 && subVADs[lastDialogueIndex-i].Active == true {
- subVADs[lastDialogueIndex-i].Active = false
- }
- }
- }
- // 开始根据当前这句话进行 VAD 信息的设置
- // 调整之前做好的整体 VAD 的信息,符合 VAD active = true
- if changerEndIndex >= vadLen {
- changerEndIndex = vadLen - 1
- }
- for i := changerStartIndex; i <= changerEndIndex; i++ {
- subVADs[i].Active = true
- }
- lastDialogueIndex = changerEndIndex
- }
- // 截取出来当前这一段
- tmpVADList := subVADs[skipStartIndex:skipEndIndex]
- outSubUnits.VADList = tmpVADList
- tmpStartTime := time.Time{}
- tmpStartTime = tmpStartTime.Add(tmpVADList[0].Time)
- tmpEndTime := time.Time{}
- tmpEndTime = tmpEndTime.Add(tmpVADList[len(tmpVADList)-1].Time)
- outSubUnits.SetBaseTime(tmpStartTime)
- outSubUnits.SetOffsetStartTime(tmpStartTime)
- outSubUnits.SetOffsetEndTime(tmpEndTime)
- return outSubUnits, nil
- }
- func ReadSubStartAndEndTime(fileInfo *subparser.FileInfo) (float64, float64, error) {
- /*
- 因为是先构建完整的时间轴 VAD ,然后再用每一句话去修改对应的 VAD 段
- 那么,如果字幕第一句话时间轴有问题,就会出问题,所以这里返回的时候需要判断是否 StartTime 正确
- 因为可能会有一种情况,读取到的字幕是经过 V1 校正时间的,那么第一句和前几句话,可能时间是 Dialogue: 0,23:59:31.32,23:59:33.23
- 明显时间过大,导致减出来的值是负值,会越界访问
- */
- getTimeFunc := func(fileInfo *subparser.FileInfo, startIndex int) (bool, float64, float64, error) {
- // 字幕的开始时间
- subStartTime, err := my_util.ParseTime(fileInfo.DialoguesFilterEx[startIndex].StartTime)
- if err != nil {
- return false, 0, 0, err
- }
- // 字幕的结束时间
- subEndTime, err := my_util.ParseTime(fileInfo.DialoguesFilterEx[len(fileInfo.DialoguesFilterEx)-1].EndTime)
- if err != nil {
- return false, 0, 0, err
- }
- // 字幕的时长,对时间进行向下取整
- subStartTimeFloor := my_util.MakeFloor10msMultipleFromFloat(my_util.Time2SecondNumber(subStartTime))
- subEndTimeFloor := my_util.MakeFloor10msMultipleFromFloat(my_util.Time2SecondNumber(subEndTime))
- if subEndTimeFloor-subStartTimeFloor < 0 {
- // 说明 StartTime 的数值太大,不正常,超过 EndTime 了,startIndex 需要累加
- return false, 0, 0, nil
- }
- return true, subStartTimeFloor, subEndTimeFloor, nil
- }
- startIndex := 0
- var err error
- var subStartTimeFloor, subEndTimeFloor float64
- bok := false
- for bok == false {
- bok, subStartTimeFloor, subEndTimeFloor, err = getTimeFunc(fileInfo, startIndex)
- if err != nil {
- return 0, 0, err
- }
- if bok == true {
- break
- }
- startIndex++
- }
- return subStartTimeFloor, subEndTimeFloor, err
- }
|