StringCompressor.cs 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. using System.Text;
  2. namespace Masuit.Tools.TextDiff;
  3. internal class StringCompressor
  4. {
  5. private readonly List<string> _lineArray = [];
  6. private readonly Dictionary<string, char> _lineHash = new();
  7. private char this[string line] => _lineHash[line];
  8. private string this[int c] => _lineArray[c];
  9. /// <summary>
  10. /// 将文本的所有行压缩为一系列索引 (starting at \u0001, ending at (char)text.Length)
  11. /// </summary>
  12. /// <param name="text"></param>
  13. /// <param name="maxLines"></param>
  14. /// <returns></returns>
  15. public string Compress(ReadOnlySpan<char> text, int maxLines = char.MaxValue) => Encode(text, maxLines);
  16. public string Decompress(string text) => text.Aggregate(new StringBuilder(), (sb, c) => sb.Append(this[c])).Append(text.Length == char.MaxValue ? this[char.MaxValue] : "").ToString();
  17. private string Encode(ReadOnlySpan<char> text, int maxLines)
  18. {
  19. var sb = new StringBuilder();
  20. var start = 0;
  21. var end = -1;
  22. while (end < text.Length - 1)
  23. {
  24. var i = text[start..].IndexOf('\n');
  25. end = _lineArray.Count == maxLines || i == -1 ? text.Length - 1 : i + start;
  26. var line = text[start..(end + 1)].ToString();
  27. EnsureHashed(line);
  28. sb.Append(this[line]);
  29. start = end + 1;
  30. }
  31. return sb.ToString();
  32. }
  33. // e.g. _lineArray[4] == "Hello\n"
  34. // e.g. _lineHash["Hello\n"] == 4
  35. private void EnsureHashed(string line)
  36. {
  37. if (_lineHash.ContainsKey(line)) return;
  38. _lineArray.Add(line);
  39. _lineHash.Add(line, (char)(_lineArray.Count - 1));
  40. }
  41. }