charset.go 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. package charset
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "golang.org/x/text/encoding"
  8. "golang.org/x/text/encoding/ianaindex"
  9. "golang.org/x/text/transform"
  10. )
  11. type Charset string
  12. //中文
  13. const (
  14. GBK Charset = "GBK"
  15. GB18030 = "GB18030"
  16. GB2312 = "GB2312"
  17. Big5 = "Big5"
  18. )
  19. //日文
  20. const (
  21. EUCJP Charset = "EUCJP"
  22. ISO2022JP = "ISO2022JP"
  23. ShiftJIS = "ShiftJIS"
  24. )
  25. //韩文
  26. const (
  27. EUCKR Charset = "EUCKR"
  28. )
  29. //Unicode
  30. const (
  31. UTF_8 Charset = "UTF-8"
  32. UTF_16 = "UTF-16"
  33. UTF_16BE = "UTF-16BE"
  34. UTF_16LE = "UTF-16LE"
  35. )
  36. //其他编码
  37. const (
  38. Macintosh Charset = "macintosh"
  39. IBM = "IBM*"
  40. Windows = "Windows*"
  41. ISO = "ISO-*"
  42. )
  43. var charsetAlias = map[string]string{
  44. "HZGB2312": "HZ-GB-2312",
  45. "hzgb2312": "HZ-GB-2312",
  46. "GB2312": "HZ-GB-2312",
  47. "gb2312": "HZ-GB-2312",
  48. "GB-18030": "GB18030",
  49. }
  50. func Convert(dstCharset Charset, srcCharset Charset, src string) (dst string, err error) {
  51. if dstCharset == srcCharset {
  52. return src, nil
  53. }
  54. dst = src
  55. // Converting <src> to UTF-8.
  56. if srcCharset != "UTF-8" {
  57. if e := getEncoding(srcCharset); e != nil {
  58. tmp, err := io.ReadAll(
  59. transform.NewReader(bytes.NewReader([]byte(src)), e.NewDecoder()),
  60. )
  61. if err != nil {
  62. return "", fmt.Errorf("%s to utf8 failed. %v", srcCharset, err)
  63. }
  64. src = string(tmp)
  65. } else {
  66. return dst, errors.New(fmt.Sprintf("unsupport srcCharset: %s", srcCharset))
  67. }
  68. }
  69. // Do the converting from UTF-8 to <dstCharset>.
  70. if dstCharset != "UTF-8" {
  71. if e := getEncoding(dstCharset); e != nil {
  72. tmp, err := io.ReadAll(
  73. transform.NewReader(bytes.NewReader([]byte(src)), e.NewEncoder()),
  74. )
  75. if err != nil {
  76. return "", fmt.Errorf("utf to %s failed. %v", dstCharset, err)
  77. }
  78. dst = string(tmp)
  79. } else {
  80. return dst, errors.New(fmt.Sprintf("unsupport dstCharset: %s", dstCharset))
  81. }
  82. } else {
  83. dst = src
  84. }
  85. return dst, nil
  86. }
  87. func ToUTF8(srcCharset Charset, src string) (dst string, err error) {
  88. return Convert("UTF-8", srcCharset, src)
  89. }
  90. func UTF8To(dstCharset Charset, src string) (dst string, err error) {
  91. return Convert(dstCharset, "UTF-8", src)
  92. }
  93. func getEncoding(charset Charset) encoding.Encoding {
  94. if c, ok := charsetAlias[string(charset)]; ok {
  95. charset = Charset(c)
  96. }
  97. if e, err := ianaindex.MIB.Encoding(string(charset)); err == nil && e != nil {
  98. return e
  99. }
  100. return nil
  101. }