AbstractFullRegexDetector.cs 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. using System.Collections.Generic;
  2. using System.IO;
  3. using System.Linq;
  4. using System.Reflection;
  5. using System.Text;
  6. using System.Text.RegularExpressions;
  7. using Masuit.Tools.Mime;
  8. namespace Masuit.Tools.Files.FileDetector;
  9. public abstract class AbstractFullRegexDetector : IDetector
  10. {
  11. public abstract string Extension { get; }
  12. public virtual string Precondition => "txt";
  13. protected abstract Regex Pattern { get; }
  14. public virtual string MimeType => new MimeMapper().GetMimeFromExtension("." + Extension);
  15. public virtual List<FormatCategory> FormatCategories => GetType().GetCustomAttributes<FormatCategoryAttribute>().Select(a => a.Category).ToList();
  16. public virtual bool Detect(Stream stream)
  17. {
  18. var encodings = new[]
  19. {
  20. Encoding.UTF8,
  21. Encoding.Unicode,
  22. Encoding.GetEncoding ( "utf-7" ),
  23. Encoding.GetEncoding ( "utf-32" ),
  24. Encoding.BigEndianUnicode,
  25. Encoding.GetEncoding ( "ascii" ),
  26. Encoding.GetEncoding ( "ks_c_5601-1987" ),
  27. Encoding.GetEncoding ( "iso-2022-kr" ),
  28. Encoding.GetEncoding ( "shift_jis" ),
  29. Encoding.GetEncoding ( "csISO2022JP" ),
  30. Encoding.GetEncoding ( "windows-1250" ),
  31. Encoding.GetEncoding ( "windows-1251" ),
  32. Encoding.GetEncoding ( "windows-1252" ),
  33. Encoding.GetEncoding ( "windows-1253" ),
  34. Encoding.GetEncoding ( "windows-1254" ),
  35. };
  36. foreach (var encoding in encodings)
  37. {
  38. stream.Position = 0;
  39. var reader = new StreamReader(stream, encoding, true, 4096, true);
  40. string buffer = reader.ReadToEnd();
  41. if (Pattern.Replace(buffer, "") == string.Empty)
  42. {
  43. return true;
  44. }
  45. }
  46. return false;
  47. }
  48. }