string_encoding.go 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. package language
  2. import (
  3. "github.com/allanpk716/ChineseSubFinder/internal/logic/charset"
  4. "github.com/allanpk716/ChineseSubFinder/internal/pkg/log_helper"
  5. "github.com/axgle/mahonia"
  6. nzlov "github.com/nzlov/chardet"
  7. )
  8. // ConvertToString 将字符串从原始编码转换到目标编码,需要配合字符串检测编码库使用 chardet.NewTextDetector()
  9. func ConvertToString(src string, srcCode string, tagCode string) string {
  10. defer func() {
  11. if err := recover(); err != nil {
  12. log_helper.GetLogger().Errorln("ConvertToString panic:", err)
  13. }
  14. }()
  15. srcCoder := mahonia.NewDecoder(srcCode)
  16. srcResult := srcCoder.ConvertString(src)
  17. tagCoder := mahonia.NewDecoder(tagCode)
  18. _, cdata, _ := tagCoder.Translate([]byte(srcResult), true)
  19. result := string(cdata)
  20. return result
  21. }
  22. // 感谢: https://blog.csdn.net/gaoluhua/article/details/109128154,解决了编码问题
  23. // ChangeFileCoding2UTF8 自动检测文件的编码,然后转换到 UTF-8
  24. func ChangeFileCoding2UTF8(inBytes []byte) ([]byte, error) {
  25. best, err := detector.DetectBest(inBytes)
  26. utf8String := ""
  27. if err != nil {
  28. return nil, err
  29. }
  30. if best.Confidence < 90 {
  31. detectBest := nzlov.Mostlike(inBytes)
  32. utf8String, err = charset.ToUTF8(charset.Charset(detectBest), string(inBytes))
  33. } else {
  34. utf8String, err = charset.ToUTF8(charset.Charset(best.Charset), string(inBytes))
  35. }
  36. if err != nil {
  37. return nil, err
  38. }
  39. if utf8String == "" {
  40. return inBytes, nil
  41. }
  42. return []byte(utf8String), nil
  43. }