12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576 |
- using System;
- using System.Collections.Generic;
- using System.IO;
- using System.Text;
- namespace Masuit.Tools.Files;
- /// <summary>
- /// 字节文本编码检测
- /// </summary>
- public static class TextEncodingDetector
- {
- /// <summary>
- /// 检测文本文件编码
- /// </summary>
- /// <param name="file"></param>
- /// <returns></returns>
- public static Encoding GetEncoding(string file)
- {
- return GetEncoding(new FileInfo(file));
- }
- /// <summary>
- /// 检测文本文件编码
- /// </summary>
- /// <param name="file"></param>
- /// <returns></returns>
- public static Encoding GetEncoding(this FileInfo file)
- {
- using var fs = file.OpenRead();
- return GetEncoding(fs);
- }
- /// <summary>
- /// 检测文本流编码
- /// </summary>
- /// <param name="stream"></param>
- /// <returns></returns>
- public static Encoding GetEncoding(this Stream stream)
- {
- var bytes = new byte[4];
- _ = stream.Read(bytes, 0, 4);
- return GetEncoding(bytes);
- }
- private static Encoding GetEncoding(IReadOnlyList<byte> bytes)
- {
- if (bytes.Count < 4)
- {
- throw new ArgumentException("Byte array is too short");
- }
- if (bytes[0] == 255 && bytes[1] == 254 && bytes[2] == 0 && bytes[3] == 0)
- {
- return Encoding.UTF32;
- }
- if (bytes[0] == 254 && bytes[1] == 255 && bytes[2] == 0)
- {
- return Encoding.BigEndianUnicode;
- }
- if (bytes[0] == 255 && bytes[1] == 254)
- {
- return Encoding.Unicode;
- }
- if (bytes[0] == 0 && bytes[1] == 0 && bytes[2] == 254 && bytes[3] == 255)
- {
- return Encoding.GetEncoding("utf-32BE");
- }
- if (bytes[0] == 239 && bytes[1] == 187 && bytes[2] == 191)
- {
- return Encoding.UTF8;
- }
- return Encoding.ASCII;
- }
- }
|