Browse Source

增强读取字幕文件转换到 UTF8 时候的兼容性

Signed-off-by: 716 <[email protected]>
716 3 years ago
parent
commit
ac5953ba0e
3 changed files with 6 additions and 2 deletions
  1. 1 1
      go.mod
  2. 1 0
      go.sum
  3. 4 1
      internal/pkg/language/string_encoding.go

+ 1 - 1
go.mod

@@ -21,6 +21,7 @@ require (
 	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
 	github.com/gorilla/websocket v1.4.2
 	github.com/grd/stat v0.0.0-20130623202159-138af3fd5012
+	github.com/huandu/go-clone v1.3.0
 	github.com/james-bowman/nlp v0.0.0-20210511120306-26d441fa0ded
 	github.com/james-bowman/sparse v0.0.0-20210729090128-1e6c7dd483e9 // indirect
 	github.com/jinzhu/now v1.1.2
@@ -78,7 +79,6 @@ require (
 	github.com/hashicorp/errwrap v1.0.0 // indirect
 	github.com/hashicorp/go-multierror v1.1.0 // indirect
 	github.com/hashicorp/hcl v1.0.0 // indirect
-	github.com/huandu/go-clone v1.3.0 // indirect
 	github.com/jinzhu/inflection v1.0.0 // indirect
 	github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 // indirect
 	github.com/klauspost/pgzip v1.2.4 // indirect

+ 1 - 0
go.sum

@@ -244,6 +244,7 @@ github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0m
 github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I=
 github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc=
 github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
+github.com/huandu/go-assert v1.1.5 h1:fjemmA7sSfYHJD7CUqs9qTwwfdNAx7/j2/ZlHXzNB3c=
 github.com/huandu/go-assert v1.1.5/go.mod h1:yOLvuqZwmcHIC5rIzrBhT7D3Q9c3GFnd0JrPVhn/06U=
 github.com/huandu/go-clone v1.3.0 h1:gZ0HVFnzdal9t6p12QAoeuRW1Q8tp8gLCRUvLbj0hY0=
 github.com/huandu/go-clone v1.3.0/go.mod h1:bPJ9bAG8fjyAEBRFt6toaGUZcGFGL3f6g5u6yW+9W14=

+ 4 - 1
internal/pkg/language/string_encoding.go

@@ -5,6 +5,7 @@ import (
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/log_helper"
 	"github.com/axgle/mahonia"
 	nzlov "github.com/nzlov/chardet"
+	"strings"
 )
 
 // ConvertToString 将字符串从原始编码转换到目标编码,需要配合字符串检测编码库使用 chardet.NewTextDetector()
@@ -49,6 +50,8 @@ func ChangeFileCoding2UTF8(inBytes []byte) ([]byte, error) {
 	if dat[0] == 0xef || dat[1] == 0xbb || dat[2] == 0xbf {
 		dat = dat[3:]
 	}
+	// 在确认一次
+	validUTF8String := strings.ToValidUTF8(string(dat[:]), "")
 
-	return dat, nil
+	return []byte(validUTF8String), nil
 }