Browse Source

重写 SRT 字幕的解析器

Signed-off-by: allan716 <[email protected]>
allan716 3 years ago
parent
commit
16163f9eaf

+ 168 - 36
internal/logic/sub_parser/srt/srt.go

@@ -3,10 +3,12 @@ package srt
 import (
 import (
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/language"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/language"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/log_helper"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/log_helper"
+	"github.com/allanpk716/ChineseSubFinder/internal/pkg/my_util"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/regex_things"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/regex_things"
 	"github.com/allanpk716/ChineseSubFinder/internal/types/subparser"
 	"github.com/allanpk716/ChineseSubFinder/internal/types/subparser"
 	"os"
 	"os"
 	"path/filepath"
 	"path/filepath"
+	"strconv"
 	"strings"
 	"strings"
 )
 )
 
 
@@ -45,50 +47,48 @@ func (p Parser) DetermineFileTypeFromFile(filePath string) (bool, *subparser.Fil
 // DetermineFileTypeFromBytes 确定字幕文件的类型,是双语字幕或者某一种语言等等信息
 // DetermineFileTypeFromBytes 确定字幕文件的类型,是双语字幕或者某一种语言等等信息
 func (p Parser) DetermineFileTypeFromBytes(inBytes []byte, nowExt string) (bool, *subparser.FileInfo, error) {
 func (p Parser) DetermineFileTypeFromBytes(inBytes []byte, nowExt string) (bool, *subparser.FileInfo, error) {
 
 
-	allString := string(inBytes)
-	// 注意,需要替换掉 \r 不然正则表达式会有问题
-	allString = strings.ReplaceAll(allString, "\r", "")
-
-	// 找到 start end text
-	matched := regex_things.ReMatchDialogueSRT.FindAllStringSubmatch(allString, -1)
-	if matched == nil || len(matched) < 1 {
-		matched = regex_things.ReMatchDialogueSRT2.FindAllStringSubmatch(allString, -1)
-		if matched == nil || len(matched) < 1 {
-			log_helper.GetLogger().Debugln("DetermineFileTypeFromBytes can't found DialoguesFilter, Skip")
-			return false, nil, nil
-		}
-	}
 	subFileInfo := subparser.FileInfo{}
 	subFileInfo := subparser.FileInfo{}
 	subFileInfo.Content = string(inBytes)
 	subFileInfo.Content = string(inBytes)
 	subFileInfo.Ext = nowExt
 	subFileInfo.Ext = nowExt
 	subFileInfo.Dialogues = make([]subparser.OneDialogue, 0)
 	subFileInfo.Dialogues = make([]subparser.OneDialogue, 0)
 	subFileInfo.DialoguesFilter = make([]subparser.OneDialogue, 0)
 	subFileInfo.DialoguesFilter = make([]subparser.OneDialogue, 0)
+
+	orgDialogues := p.parseContent(inBytes)
+	if len(orgDialogues) <= 0 {
+		log_helper.GetLogger().Debugln("DetermineFileTypeFromBytes can't found DialoguesFilter, Skip")
+		return false, nil, nil
+	}
+	subFileInfo.Dialogues = orgDialogues
 	// 这里需要统计一共有几个 \N,以及这个数量在整体行数中的比例,这样就知道是不是双语字幕了
 	// 这里需要统计一共有几个 \N,以及这个数量在整体行数中的比例,这样就知道是不是双语字幕了
 	countLineFeed := 0
 	countLineFeed := 0
-	for _, oneDial := range matched {
-		startTime := oneDial[2]
-		endTime := oneDial[3]
-		nowText := oneDial[4]
-		odl := subparser.OneDialogue{
-			StartTime: startTime,
-			EndTime:   endTime,
+	for _, oneDialogue := range orgDialogues {
+
+		if len(oneDialogue.Lines) == 0 || my_util.ReplaceSpecString(oneDialogue.Lines[0], "") == "" {
+			continue
 		}
 		}
-		odl.Lines = make([]string, 0)
-		nowText = strings.TrimRight(nowText, "\n")
-		texts := strings.Split(nowText, "\n")
-		for i, text := range texts {
-			if i == 1 {
-				// 这样说明有两行字幕,也就是双语啦
-				countLineFeed++
-			}
+		ol := oneDialogue
+		for i, line := range oneDialogue.Lines {
+			fixedLine := line
+
 			// 剔除 {\fn微软雅黑\fs14}C'mon, Rick. We're -- We're almost there. {} 这一段
 			// 剔除 {\fn微软雅黑\fs14}C'mon, Rick. We're -- We're almost there. {} 这一段
-			text = regex_things.ReMatchBrace.ReplaceAllString(text, "")
-			text = regex_things.ReMatchBracket.ReplaceAllString(text, "")
-			text = strings.ReplaceAll(text, `\N`, "")
-			odl.Lines = append(odl.Lines, text)
+			fixedLine = regex_things.ReMatchBrace.ReplaceAllString(line, "")
+			fixedLine = regex_things.ReMatchBracket.ReplaceAllString(fixedLine, "")
+			fixedLine = strings.ReplaceAll(fixedLine, `\N`, "")
+			if my_util.ReplaceSpecString(fixedLine, "") == "" {
+				ol.Lines[i] = ""
+				break
+			} else {
+				if i == 1 {
+					// 这样说明有两行字幕,也就是双语啦
+					countLineFeed++
+				}
+				ol.Lines[i] = fixedLine
+			}
+		}
+		if ol.Lines[0] == "" {
+			continue
 		}
 		}
-		subFileInfo.Dialogues = append(subFileInfo.Dialogues, odl)
-		subFileInfo.DialoguesFilter = append(subFileInfo.DialoguesFilter, odl)
+		subFileInfo.DialoguesFilter = append(subFileInfo.DialoguesFilter, ol)
 	}
 	}
 	// 再分析
 	// 再分析
 	// 需要判断每一个 Line 是啥语言,[语言的code]次数
 	// 需要判断每一个 Line 是啥语言,[语言的code]次数
@@ -105,7 +105,7 @@ func (p Parser) DetermineFileTypeFromBytes(inBytes []byte, nowExt string) (bool,
 		emptyLines += language.DetectSubLangAndStatistics(dialogue, langDict, &usefulDialogueExs, &chLines, &otherLines)
 		emptyLines += language.DetectSubLangAndStatistics(dialogue, langDict, &usefulDialogueExs, &chLines, &otherLines)
 	}
 	}
 	// 从统计出来的字典,找出 Top 1 或者 2 的出来,然后计算出是什么语言的字幕
 	// 从统计出来的字典,找出 Top 1 或者 2 的出来,然后计算出是什么语言的字幕
-	detectLang := language.SubLangStatistics2SubLangType(float32(countLineFeed), float32(len(matched)-emptyLines), langDict, chLines)
+	detectLang := language.SubLangStatistics2SubLangType(float32(countLineFeed), float32(len(subFileInfo.DialoguesFilter)-emptyLines), langDict, chLines)
 	subFileInfo.Lang = detectLang
 	subFileInfo.Lang = detectLang
 	subFileInfo.Data = inBytes
 	subFileInfo.Data = inBytes
 	subFileInfo.DialoguesFilterEx = usefulDialogueExs
 	subFileInfo.DialoguesFilterEx = usefulDialogueExs
@@ -114,9 +114,141 @@ func (p Parser) DetermineFileTypeFromBytes(inBytes []byte, nowExt string) (bool,
 	return true, &subFileInfo, nil
 	return true, &subFileInfo, nil
 }
 }
 
 
-func (p Parser) parseContent(inBytes []byte) {
+func (p Parser) parseContent(inBytes []byte) []subparser.OneDialogue {
 
 
 	allString := string(inBytes)
 	allString := string(inBytes)
 	// 注意,需要替换掉 \r 不然正则表达式会有问题
 	// 注意,需要替换掉 \r 不然正则表达式会有问题
 	allString = strings.ReplaceAll(allString, "\r", "")
 	allString = strings.ReplaceAll(allString, "\r", "")
+
+	lines := strings.Split(allString, "\n")
+	// 需要把每一行如果是多余的特殊剔除掉
+	// 这里的目标是后续的匹配更加容易,但是,后续也得注意
+	// 因为这个样的操作,那么匹配对白内容的时候,可能是不存在的,只要是 index 和 时间匹配上了,就应该算一句话,只要在 dialogue 上是没得问题的
+	// 而 dialogueFilter 中则可以把这样没有内容的排除,但是实际时间轴匹配的时候还是用 dialogue 而不是 dialogueFilter
+	filterLines := make([]string, 0)
+	for _, line := range lines {
+		// 如果当前的这一句话,为空,或者进过正则表达式剔除特殊字符后为空,则跳过
+		if my_util.ReplaceSpecString(line, "") == "" {
+			continue
+		}
+		filterLines = append(filterLines, line)
+	}
+
+	dialogues := make([]subparser.OneDialogue, 0)
+	/*
+		这里可以确定,srt 格式,开始一定是第一句话,那么首先就需要找到,第一行,一定是数字的,从这里开始算起
+		1. 先将 content 进行 \r 的替换为空
+		2. 将 content 进行 \n 来分割
+		3. 将分割的数组进行筛选,把空行剔除掉
+		4. 然后使用循环,用下面的 steps 进行解析一句对白
+		steps:
+				0	找对白的 ID
+				1	找时间轴
+				2	找对白内容,可能有多行,停止的方式,一个是向后能找到 0以及2 或者 是最后一行
+	*/
+	steps := 0
+	nowDialogue := subparser.NewOneDialogue()
+	newOneDialogueFun := func() {
+		// 重新新建一个缓存对白,从新开始
+		steps = 0
+		nowDialogue = subparser.NewOneDialogue()
+	}
+	// 使用过滤后的列表
+	for i, line := range filterLines {
+
+		if steps == 0 {
+			// 匹配对白的索引
+			line = my_util.ReplaceSpecString(line, "")
+			dialogueIndex, err := strconv.Atoi(line)
+			if err != nil {
+				newOneDialogueFun()
+				continue
+			}
+			nowDialogue.Index = dialogueIndex
+			// 继续
+			steps = 1
+			continue
+		}
+
+		if steps == 1 {
+			// 匹配时间
+			matched := regex_things.ReMatchDialogueTimeSRT.FindAllStringSubmatch(line, -1)
+			if matched == nil || len(matched) < 1 {
+				matched = regex_things.ReMatchDialogueTimeSRT2.FindAllStringSubmatch(line, -1)
+				if matched == nil || len(matched) < 1 {
+					newOneDialogueFun()
+					continue
+				}
+			}
+			nowDialogue.StartTime = matched[0][1]
+			nowDialogue.EndTime = matched[0][2]
+
+			// 是否到结尾
+			if i+1 > len(filterLines)-1 {
+				// 是尾部
+				// 那么这一个对白就需要 add 到总列表中了
+				dialogues = append(dialogues, nowDialogue)
+				newOneDialogueFun()
+				continue
+			}
+			// 如上面提到的,因为把特殊字符的行去除了,那么一个对话,如果只有 index 和 时间,也是需要添加进去的
+			if p.needMatchNextContentLine(filterLines, i+1) == true {
+				// 是,那么也认为当前这个对话完成了,需要 add 到总列表中了
+				dialogues = append(dialogues, nowDialogue)
+				newOneDialogueFun()
+				continue
+			}
+			// 非上述特殊情况,继续
+			steps = 2
+			continue
+		}
+
+		if steps == 2 {
+			// 在上述情况排除后,才继续
+			// 匹配内容
+			nowDialogue.Lines = append(nowDialogue.Lines, line)
+			// 是否到结尾
+			if i+1 > len(filterLines)-1 {
+				// 是尾部
+				// 那么这一个对白就需要 add 到总列表中了
+				dialogues = append(dialogues, nowDialogue)
+				newOneDialogueFun()
+				continue
+			}
+
+			// 不是尾部,那么就需要往后看两句话,是否是下一个对白的头部(index 和 时间)
+			if p.needMatchNextContentLine(filterLines, i+1) == true {
+				// 是,那么也认为当前这个对话完成了,需要 add 到总列表中了
+				dialogues = append(dialogues, nowDialogue)
+				newOneDialogueFun()
+				continue
+			} else {
+				// 如果还不是,那么就可能是这个对白有多行,有可能是同一种语言的多行,也可能是多语言的多行
+				// 那么 step 应该不变继续是 2
+				continue
+			}
+		}
+	}
+
+	return dialogues
+}
+
+// needMatchNextContentLine 是否需要继续匹配下一句话作为一个对白的对话内容
+func (p Parser) needMatchNextContentLine(lines []string, index int) bool {
+
+	// 匹配到对白的 Index
+	_, err := strconv.Atoi(lines[index])
+	if err != nil {
+		return false
+	}
+	// 匹配到字幕的时间
+	matched := regex_things.ReMatchDialogueTimeSRT.FindAllStringSubmatch(lines[index+1], -1)
+	if matched == nil || len(matched) < 1 {
+		matched = regex_things.ReMatchDialogueTimeSRT2.FindAllStringSubmatch(lines[index+1], -1)
+		if matched == nil || len(matched) < 1 {
+			return false
+		}
+	}
+
+	return true
 }
 }

+ 24 - 26
internal/logic/sub_parser/srt/srt_test.go

@@ -20,16 +20,17 @@ func TestParser_DetermineFileType(t *testing.T) {
 		filePath string
 		filePath string
 	}
 	}
 	tests := []struct {
 	tests := []struct {
-		name     string
-		args     args
-		wantNil  bool
-		wantErr  bool
-		wantLang language.MyLanguage
+		name            string
+		args            args
+		wantNil         bool
+		wantErr         bool
+		wantDialogueLen int
+		wantLang        language.MyLanguage
 	}{
 	}{
-		{name: "1", args: args{filePath: filepath.Join(testRootDir, "[zimuku]_5_Loki.S01E02.The.Variant.1080p.DSNP.WEB-DL.DDP5.1.Atmos.H.264-CM.chs&eng.srt")}, wantNil: false, wantErr: false, wantLang: language.ChineseSimpleEnglish},
-		{name: "2", args: args{filePath: filepath.Join(testRootDir, "[zimuku]_5_Loki.S01E03.Lamentis.1080p.DSNP.WEB-DL.DDP5.1.H.264-TOMMY.chs&eng.srt")}, wantNil: false, wantErr: false, wantLang: language.ChineseSimpleEnglish},
-		{name: "3", args: args{filePath: filepath.Join(testRootDir, "Bridge of Spies (2015) (1080p BluRay x265 Silence).zh-cn.srt")}, wantNil: false, wantErr: false, wantLang: language.ChineseSimpleEnglish},
-		{name: "4", args: args{filePath: filepath.Join(testRootDir, "Resident Evil Welcome to Raccoon City (2021) WEBRip-1080p.1.zh-cn.srt")}, wantNil: false, wantErr: false, wantLang: language.ChineseSimple},
+		{name: "1", args: args{filePath: filepath.Join(testRootDir, "[zimuku]_5_Loki.S01E02.The.Variant.1080p.DSNP.WEB-DL.DDP5.1.Atmos.H.264-CM.chs&eng.srt")}, wantNil: false, wantErr: false, wantLang: language.ChineseSimpleEnglish, wantDialogueLen: 809},
+		{name: "2", args: args{filePath: filepath.Join(testRootDir, "[zimuku]_5_Loki.S01E03.Lamentis.1080p.DSNP.WEB-DL.DDP5.1.H.264-TOMMY.chs&eng.srt")}, wantNil: false, wantErr: false, wantLang: language.ChineseSimpleEnglish, wantDialogueLen: 484},
+		{name: "3", args: args{filePath: filepath.Join(testRootDir, "Bridge of Spies (2015) (1080p BluRay x265 Silence).zh-cn.srt")}, wantNil: false, wantErr: false, wantLang: language.ChineseSimpleEnglish, wantDialogueLen: 2069},
+		{name: "4", args: args{filePath: filepath.Join(testRootDir, "Resident Evil Welcome to Raccoon City (2021) WEBRip-1080p.1.zh-cn.srt")}, wantNil: false, wantErr: false, wantLang: language.ChineseSimple, wantDialogueLen: 1472},
 		// 特殊一点的字幕
 		// 特殊一点的字幕
 		// 这一个不确定是什么类型的字幕
 		// 这一个不确定是什么类型的字幕
 		//{name: "4", args: args{filePath: filepath.Join(testRootDir, "SP-Empire.Of.Dreams.The.Story.Of.The.Star.Wars.Trilogy.2004.1080p.BluRay.x264.AAC5.1-[YTS.MX].zh-cn.srt")}, wantNil: false, wantErr: false, wantLang: types.ChineseSimple},
 		//{name: "4", args: args{filePath: filepath.Join(testRootDir, "SP-Empire.Of.Dreams.The.Story.Of.The.Star.Wars.Trilogy.2004.1080p.BluRay.x264.AAC5.1-[YTS.MX].zh-cn.srt")}, wantNil: false, wantErr: false, wantLang: types.ChineseSimple},
@@ -59,24 +60,21 @@ func TestParser_DetermineFileType(t *testing.T) {
 				t.Fatal("DetermineFileTypeFromFile got:", got, "wantNil:", tt.wantNil)
 				t.Fatal("DetermineFileTypeFromFile got:", got, "wantNil:", tt.wantNil)
 			}
 			}
 
 
-			println(got.Name, got.Ext, got.Lang)
-		})
-	}
-}
-
-func TestParser_parseContent(t *testing.T) {
-
-	testDataPath := "../../../../TestData/sub_parser/org"
-	srtFileFPath := filepath.Join(testDataPath, "[zimuku]_5_Loki.S01E02.The.Variant.1080p.DSNP.WEB-DL.DDP5.1.Atmos.H.264-CM.chs&eng.srt")
+			fBytes, err := os.ReadFile(tt.args.filePath)
+			if err != nil {
+				t.Fatal(err)
+			}
+			inBytes, err := lan.ChangeFileCoding2UTF8(fBytes)
+			if err != nil {
 
 
-	fBytes, err := os.ReadFile(srtFileFPath)
-	if err != nil {
-		t.Fatal(err)
-	}
-	inBytes, err := lan.ChangeFileCoding2UTF8(fBytes)
-	if err != nil {
+				t.Fatal(err)
+			}
+			dialogueCount := NewParser().parseContent(inBytes)
+			if len(dialogueCount) != tt.wantDialogueLen || len(got.Dialogues) != tt.wantDialogueLen {
+				t.Fatal("parse content dialogue error")
+			}
 
 
-		t.Fatal(err)
+			println(got.Name, got.Ext, got.Lang)
+		})
 	}
 	}
-	NewParser().parseContent(inBytes)
 }
 }

+ 9 - 2
internal/pkg/language/string_encoding.go

@@ -24,7 +24,7 @@ func ConvertToString(src string, srcCode string, tagCode string) string {
 
 
 // 感谢: https://blog.csdn.net/gaoluhua/article/details/109128154,解决了编码问题
 // 感谢: https://blog.csdn.net/gaoluhua/article/details/109128154,解决了编码问题
 
 
-// ChangeFileCoding2UTF8 自动检测文件的编码,然后转换到 UTF-8
+// ChangeFileCoding2UTF8 自动检测文件的编码,然后转换到 UTF-8,但是导出 bytes 的时候会把头部的 BOM 信息去除
 func ChangeFileCoding2UTF8(inBytes []byte) ([]byte, error) {
 func ChangeFileCoding2UTF8(inBytes []byte) ([]byte, error) {
 	best, err := detector.DetectBest(inBytes)
 	best, err := detector.DetectBest(inBytes)
 	utf8String := ""
 	utf8String := ""
@@ -43,5 +43,12 @@ func ChangeFileCoding2UTF8(inBytes []byte) ([]byte, error) {
 	if utf8String == "" {
 	if utf8String == "" {
 		return inBytes, nil
 		return inBytes, nil
 	}
 	}
-	return []byte(utf8String), nil
+
+	// 然后返回的时候需要去除头部的 BOM 信息
+	dat := []byte(utf8String)
+	if dat[0] == 0xef || dat[1] == 0xbb || dat[2] == 0xbf {
+		dat = dat[3:]
+	}
+
+	return dat, nil
 }
 }

+ 2 - 2
internal/pkg/my_util/util.go

@@ -356,8 +356,8 @@ func Time2Duration(inTime time.Time) time.Duration {
 }
 }
 
 
 // ReplaceSpecString 替换特殊的字符
 // ReplaceSpecString 替换特殊的字符
-func ReplaceSpecString(instring string, rep string) string {
-	return regex_things.RegMatchSpString.ReplaceAllString(instring, rep)
+func ReplaceSpecString(inString string, rep string) string {
+	return regex_things.RegMatchSpString.ReplaceAllString(inString, rep)
 }
 }
 
 
 func Bool2Int(inBool bool) int {
 func Bool2Int(inBool bool) int {

+ 7 - 1
internal/pkg/regex_things/regex_things.go

@@ -15,7 +15,8 @@ var ReMatchBracket = regexp.MustCompile(`(?m)((?i)\[[^]]*\])`)
 var ReCutDoubleLanguage = regexp.MustCompile(`(?i)(.*)\\N(.*)`)
 var ReCutDoubleLanguage = regexp.MustCompile(`(?i)(.*)\\N(.*)`)
 
 
 // RegMatchSpString 替换特殊字符
 // RegMatchSpString 替换特殊字符
-var RegMatchSpString = regexp.MustCompile(`(?i)[^\w\s]`)
+//var RegMatchSpString = regexp.MustCompile(`(?i)[^\w\s]`)
+var RegMatchSpString = regexp.MustCompile(`(?m)[\p{P}|\p{Z}}}|\p{S}\s|\t|\v]`)
 
 
 // 字幕文件对话的每一行
 // 字幕文件对话的每一行
 // regStringASS = `Dialogue: [^,.]*[0-9]*,([1-9]?[0-9]*:[0-9]*:[0-9]*.[0-9]*),([1-9]?[0-9]*:[0-9]*:[0-9]*.[0-9]*),[^,.]*,[^,.]*,[0-9]*,[0-9]*,[0-9]*,[^,.]*,(.*)`
 // regStringASS = `Dialogue: [^,.]*[0-9]*,([1-9]?[0-9]*:[0-9]*:[0-9]*.[0-9]*),([1-9]?[0-9]*:[0-9]*:[0-9]*.[0-9]*),[^,.]*,[^,.]*,[0-9]*,[0-9]*,[0-9]*,[^,.]*,(.*)`
@@ -23,9 +24,14 @@ const regStringASS = `Dialogue: [^,.]*[0-9]*,([1-9]?[0-9]*:[0-9]*:[0-9]*.[0-9]*)
 const regStringSRT = `(\d+)\n([\d:,]+)\s+-{2}\>\s+([\d:,]+)\n([\s\S]*?(\n{1,2}|$))`
 const regStringSRT = `(\d+)\n([\d:,]+)\s+-{2}\>\s+([\d:,]+)\n([\s\S]*?(\n{1,2}|$))`
 const regStringSRT2 = `(\d+)\n([\d:.]+)\s+-{2}\>\s+([\d:.]+)\n([\s\S]*?(\n{1,2}|$))`
 const regStringSRT2 = `(\d+)\n([\d:.]+)\s+-{2}\>\s+([\d:.]+)\n([\s\S]*?(\n{1,2}|$))`
 
 
+const regStringSRTime = `([\d:,]+)\s+-{2}\>\s+([\d:,]+)`
+const regStringSRTime2 = `([\d:.]+)\s+-{2}\>\s+([\d:.]+)`
+
 var ReMatchDialogueASS = regexp.MustCompile(regStringASS)
 var ReMatchDialogueASS = regexp.MustCompile(regStringASS)
 var ReMatchDialogueSRT = regexp.MustCompile(regStringSRT)
 var ReMatchDialogueSRT = regexp.MustCompile(regStringSRT)
 var ReMatchDialogueSRT2 = regexp.MustCompile(regStringSRT2)
 var ReMatchDialogueSRT2 = regexp.MustCompile(regStringSRT2)
+var ReMatchDialogueTimeSRT = regexp.MustCompile(regStringSRTime)
+var ReMatchDialogueTimeSRT2 = regexp.MustCompile(regStringSRTime2)
 
 
 // RegOneSeasonSubFolderNameMatch 每个视频文件夹下的缓存文件夹名称,一个季度的
 // RegOneSeasonSubFolderNameMatch 每个视频文件夹下的缓存文件夹名称,一个季度的
 var RegOneSeasonSubFolderNameMatch = regexp.MustCompile(`(?m)^Sub_S\dE0`)
 var RegOneSeasonSubFolderNameMatch = regexp.MustCompile(`(?m)^Sub_S\dE0`)

+ 15 - 3
internal/pkg/sub_helper/sub_helper.go

@@ -221,7 +221,11 @@ func SearchMatchedSubFileByDir(dir string) ([]string, error) {
 			}
 			}
 		} else {
 		} else {
 			// 这里就是文件了
 			// 这里就是文件了
-			if curFile.Size() < 1000 {
+			info, err := curFile.Info()
+			if err != nil {
+				return nil, err
+			}
+			if info.Size() < 1000 {
 				continue
 				continue
 			}
 			}
 			if sub_parser_hub.IsSubExtWanted(filepath.Ext(curFile.Name())) == true {
 			if sub_parser_hub.IsSubExtWanted(filepath.Ext(curFile.Name())) == true {
@@ -251,7 +255,11 @@ func SearchMatchedSubFileByOneVideo(oneVideoFullPath string) ([]string, error) {
 			continue
 			continue
 		}
 		}
 		// 这里就是文件了
 		// 这里就是文件了
-		if curFile.Size() < 1000 {
+		info, err := curFile.Info()
+		if err != nil {
+			return nil, err
+		}
+		if info.Size() < 1000 {
 			continue
 			continue
 		}
 		}
 		// 判断的时候用小写的,后续重命名的时候用原有的名称
 		// 判断的时候用小写的,后续重命名的时候用原有的名称
@@ -289,7 +297,11 @@ func SearchVideoMatchSubFileAndRemoveExtMark(oneVideoFullPath string) error {
 			continue
 			continue
 		} else {
 		} else {
 			// 这里就是文件了
 			// 这里就是文件了
-			if curFile.Size() < 1000 {
+			info, err := curFile.Info()
+			if err != nil {
+				return err
+			}
+			if info.Size() < 1000 {
 				continue
 				continue
 			}
 			}
 			// 判断的时候用小写的,后续重命名的时候用原有的名称
 			// 判断的时候用小写的,后续重命名的时候用原有的名称

+ 7 - 0
internal/types/subparser/fileinfo.go

@@ -134,12 +134,19 @@ func (f FileInfo) GetNumFrames() int {
 
 
 // OneDialogue 一句对话
 // OneDialogue 一句对话
 type OneDialogue struct {
 type OneDialogue struct {
+	Index     int      // 对白的索引
 	StartTime string   // 开始时间
 	StartTime string   // 开始时间
 	EndTime   string   // 结束时间
 	EndTime   string   // 结束时间
 	StyleName string   // StyleName
 	StyleName string   // StyleName
 	Lines     []string // 台词
 	Lines     []string // 台词
 }
 }
 
 
+func NewOneDialogue() OneDialogue {
+	return OneDialogue{
+		Lines: make([]string, 0),
+	}
+}
+
 func (o OneDialogue) GetStartTime() time.Time {
 func (o OneDialogue) GetStartTime() time.Time {
 	srcTimeStartNow, err := my_util.ParseTime(o.StartTime)
 	srcTimeStartNow, err := my_util.ParseTime(o.StartTime)
 	if err != nil {
 	if err != nil {