TextEncodingDetector.cs 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. using System;
  2. using System.Collections.Generic;
  3. using System.IO;
  4. using System.Text;
  5. namespace Masuit.Tools.Files;
  6. /// <summary>
  7. /// 字节文本编码检测
  8. /// </summary>
  9. public static class TextEncodingDetector
  10. {
  11. /// <summary>
  12. /// 检测文本文件编码
  13. /// </summary>
  14. /// <param name="file"></param>
  15. /// <returns></returns>
  16. public static Encoding GetEncoding(string file)
  17. {
  18. return GetEncoding(new FileInfo(file));
  19. }
  20. /// <summary>
  21. /// 检测文本文件编码
  22. /// </summary>
  23. /// <param name="file"></param>
  24. /// <returns></returns>
  25. public static Encoding GetEncoding(this FileInfo file)
  26. {
  27. using var fs = file.OpenRead();
  28. return GetEncoding(fs);
  29. }
  30. /// <summary>
  31. /// 检测文本流编码
  32. /// </summary>
  33. /// <param name="stream"></param>
  34. /// <returns></returns>
  35. public static Encoding GetEncoding(this Stream stream)
  36. {
  37. var bytes = new byte[4];
  38. _ = stream.Read(bytes, 0, 4);
  39. return GetEncoding(bytes);
  40. }
  41. private static Encoding GetEncoding(IReadOnlyList<byte> bytes)
  42. {
  43. if (bytes.Count < 4)
  44. {
  45. throw new ArgumentException("Byte array is too short");
  46. }
  47. if (bytes[0] == 255 && bytes[1] == 254 && bytes[2] == 0 && bytes[3] == 0)
  48. {
  49. return Encoding.UTF32;
  50. }
  51. if (bytes[0] == 254 && bytes[1] == 255 && bytes[2] == 0)
  52. {
  53. return Encoding.BigEndianUnicode;
  54. }
  55. if (bytes[0] == 255 && bytes[1] == 254)
  56. {
  57. return Encoding.Unicode;
  58. }
  59. if (bytes[0] == 0 && bytes[1] == 0 && bytes[2] == 254 && bytes[3] == 255)
  60. {
  61. return Encoding.GetEncoding("utf-32BE");
  62. }
  63. if (bytes[0] == 239 && bytes[1] == 187 && bytes[2] == 191)
  64. {
  65. return Encoding.UTF8;
  66. }
  67. return Encoding.ASCII;
  68. }
  69. }