Browse Source

修复 srt 字幕内带有特效,导致语言识别错误的问题

Signed-off-by: allan716 <[email protected]>
allan716 3 years ago
parent
commit
96c1b8600c

+ 5 - 0
internal/logic/sub_parser/srt/srt.go

@@ -206,6 +206,11 @@ func (p Parser) parseContent(inBytes []byte) []subparser.OneDialogue {
 		if steps == 2 {
 			// 在上述情况排除后,才继续
 			// 匹配内容
+
+			if len(regex_things.ReMatchSrtSubtitleEffects.FindAllString(line, -1)) > 5 {
+				continue
+			}
+
 			nowDialogue.Lines = append(nowDialogue.Lines, line)
 			// 是否到结尾
 			if i+1 > len(filterLines)-1 {

+ 3 - 0
internal/pkg/regex_things/regex_things.go

@@ -27,6 +27,9 @@ const regStringSRT2 = `(\d+)\n([\d:.]+)\s+-{2}\>\s+([\d:.]+)\n([\s\S]*?(\n{1,2}|
 const regStringSRTime = `([\d:,]+)\s+-{2}\>\s+([\d:,]+)`
 const regStringSRTime2 = `([\d:.]+)\s+-{2}\>\s+([\d:.]+)`
 
+// 匹配 srt 的字幕特效,需要移除这些
+var ReMatchSrtSubtitleEffects = regexp.MustCompile(`(?m)([1-9]\d*\.?\d*)|(0\.\d*[1-9])`)
+
 var ReMatchDialogueASS = regexp.MustCompile(regStringASS)
 var ReMatchDialogueSRT = regexp.MustCompile(regStringSRT)
 var ReMatchDialogueSRT2 = regexp.MustCompile(regStringSRT2)

+ 1 - 0
internal/pkg/sub_parser_hub/subParserHub_test.go

@@ -27,6 +27,7 @@ func TestSubParserHubIsSubHasChinese(t *testing.T) {
 		{name: "5", args: args{filePath: filepath.Join(testRootDir, "oslo.2021.1080p.web.h264-naisu.繁体.ass")}, want: true},
 		{name: "6", args: args{filePath: filepath.Join(testRootDir, "[zimuku]_5_Loki.S01E02.The.Variant.1080p.DSNP.WEB-DL.DDP5.1.Atmos.H.264-CM.chs&eng.srt")}, want: true},
 		{name: "7", args: args{filePath: filepath.Join(testRootDir, "[zimuku]_5_Loki.S01E03.Lamentis.1080p.DSNP.WEB-DL.DDP5.1.H.264-TOMMY.chs&eng.srt")}, want: true},
+		{name: "8", args: args{filePath: filepath.Join(testRootDir, "苍穹浩瀚 - S02E06 - 范式转换.chinese(简英,xunlei).default.srt")}, want: true},
 	}
 
 	subParserHub := NewSubParserHub(ass.NewParser(), srt.NewParser())