PatchExtension.cs 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. using System.Collections.Immutable;
  2. using System.Text;
  3. using System.Text.RegularExpressions;
  4. using static Masuit.Tools.TextDiff.DiffOperation;
  5. namespace Masuit.Tools.TextDiff;
  6. public static class PatchExtension
  7. {
  8. internal static readonly string NullPadding = new(Enumerable.Range(1, 4).Select(i => (char)i).ToArray());
  9. private static readonly Regex PatchHeader = new("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$");
  10. /// <summary>
  11. /// 在文本的开始和结束处添加一些填充,以便边缘可以匹配某些内容。patch_apply内部调用
  12. /// </summary>
  13. /// <param name="patches"></param>
  14. /// <param name="padding"></param>
  15. /// <returns></returns>
  16. internal static IEnumerable<DiffPatch> AddPadding(this IEnumerable<DiffPatch> patches, string padding)
  17. {
  18. var paddingLength = padding.Length;
  19. using var enumerator = patches.GetEnumerator();
  20. if (!enumerator.MoveNext())
  21. {
  22. yield break;
  23. }
  24. var current = enumerator.Current.Bump(paddingLength);
  25. var next = current;
  26. var isfirst = true;
  27. while (true)
  28. {
  29. var hasnext = enumerator.MoveNext();
  30. if (hasnext)
  31. {
  32. next = enumerator.Current.Bump(paddingLength);
  33. }
  34. yield return (isfirst, hasnext) switch
  35. {
  36. (true, false) => current.AddPadding(padding), // list has only one patch
  37. (true, true) => current.AddPaddingBegin(padding),
  38. (false, true) => current,
  39. (false, false) => current.AddPaddingEnd(padding)
  40. };
  41. isfirst = false;
  42. if (!hasnext) yield break;
  43. current = next;
  44. }
  45. }
  46. /// <summary>
  47. /// 获取补丁列表并重建文本
  48. /// </summary>
  49. /// <param name="patches"></param>
  50. /// <returns></returns>
  51. public static string ToText(this IEnumerable<DiffPatch> patches) => patches.Aggregate(new StringBuilder(), (sb, patch) => sb.Append(patch)).ToString();
  52. /// <summary>
  53. /// 解析补丁的文本表示,并返回补丁对象列表
  54. /// </summary>
  55. /// <param name="text"></param>
  56. /// <returns></returns>
  57. public static ImmutableList<DiffPatch> ParsePatches(this string text) => ParseCore(text).ToImmutableList();
  58. private static IEnumerable<DiffPatch> ParseCore(string text)
  59. {
  60. if (text.Length == 0)
  61. {
  62. yield break;
  63. }
  64. var lines = text.SplitBy('\n').ToArray();
  65. var index = 0;
  66. while (index < lines.Length)
  67. {
  68. var line = lines[index];
  69. var m = PatchHeader.Match(line);
  70. if (!m.Success)
  71. {
  72. throw new ArgumentException("Invalid patch string: " + line);
  73. }
  74. var (start1, length1) = m.GetStartAndLength(1, 2);
  75. var (start2, length2) = m.GetStartAndLength(3, 4);
  76. index++;
  77. IEnumerable<TextDiffer> CreateDiffs()
  78. {
  79. while (index < lines.Length)
  80. {
  81. line = lines[index];
  82. if (!string.IsNullOrEmpty(line))
  83. {
  84. var sign = line[0];
  85. if (sign == '@')
  86. {
  87. break;
  88. }
  89. yield return sign switch
  90. {
  91. '+' => TextDiffer.Insert(line[1..].Replace("+", "%2b").UrlDecoded()),
  92. '-' => TextDiffer.Delete(line[1..].Replace("+", "%2b").UrlDecoded()),
  93. _ => TextDiffer.Equal(line[1..].Replace("+", "%2b").UrlDecoded())
  94. };
  95. }
  96. index++;
  97. }
  98. }
  99. yield return new DiffPatch(start1, length1, start2, length2, CreateDiffs().ToImmutableList());
  100. }
  101. }
  102. private static (int start, int length) GetStartAndLength(this Match m, int startIndex, int lengthIndex)
  103. {
  104. var lengthStr = m.Groups[lengthIndex].Value;
  105. var value = Convert.ToInt32(m.Groups[startIndex].Value);
  106. return lengthStr switch
  107. {
  108. "0" => (value, 0),
  109. "" => (value - 1, 1),
  110. _ => (value - 1, Convert.ToInt32(lengthStr))
  111. };
  112. }
  113. /// <summary>
  114. /// 将一组补丁合并到文本上。返回一个补丁文本,以及一个指示应用了哪些补丁应用成功
  115. /// </summary>
  116. /// <param name="patches"></param>
  117. /// <param name="text"></param>
  118. /// <returns></returns>
  119. public static (string newText, bool[] results) Apply(this IEnumerable<DiffPatch> patches, string text) => Apply(patches, text, MatchOption.Default, PatchOption.Default);
  120. public static (string newText, bool[] results) Apply(this IEnumerable<DiffPatch> patches, string text, MatchOption matchOption) => Apply(patches, text, matchOption, PatchOption.Default);
  121. /// <summary>
  122. /// 将一组补丁合并到文本上。返回一个补丁文本,以及一个指示应用了哪些补丁应用成功
  123. /// </summary>
  124. /// <param name="input"></param>
  125. /// <param name="text"></param>
  126. /// <param name="matchOption"></param>
  127. /// <param name="option"></param>
  128. /// <returns></returns>
  129. public static (string newText, bool[] results) Apply(this IEnumerable<DiffPatch> input, string text, MatchOption matchOption, PatchOption option)
  130. {
  131. if (!input.Any())
  132. {
  133. return (text, []);
  134. }
  135. var nullPadding = NullPadding;
  136. text = nullPadding + text + nullPadding;
  137. var patches = input.AddPadding(nullPadding).SplitMax().ToList();
  138. var x = 0;
  139. var delta = 0;
  140. var results = new bool[patches.Count];
  141. foreach (var aPatch in patches)
  142. {
  143. var expectedLoc = aPatch.Start2 + delta;
  144. var text1 = aPatch.Diffs.Text1();
  145. int startLoc;
  146. var endLoc = -1;
  147. if (text1.Length > TextDiffConstants.MatchMaxBits)
  148. {
  149. startLoc = text.FindBestMatchIndex(text1[..TextDiffConstants.MatchMaxBits], expectedLoc, matchOption);
  150. if (startLoc != -1)
  151. {
  152. endLoc = text.FindBestMatchIndex(text1[^TextDiffConstants.MatchMaxBits..], expectedLoc + text1.Length - TextDiffConstants.MatchMaxBits, matchOption);
  153. if (endLoc == -1 || startLoc >= endLoc)
  154. {
  155. startLoc = -1;
  156. }
  157. }
  158. }
  159. else
  160. {
  161. startLoc = text.FindBestMatchIndex(text1, expectedLoc, matchOption);
  162. }
  163. if (startLoc == -1)
  164. {
  165. results[x] = false;
  166. delta -= aPatch.Length2 - aPatch.Length1;
  167. }
  168. else
  169. {
  170. results[x] = true;
  171. delta = startLoc - expectedLoc;
  172. var actualEndLoc = endLoc == -1 ? Math.Min(startLoc + text1.Length, text.Length) : Math.Min(endLoc + TextDiffConstants.MatchMaxBits, text.Length);
  173. var text2 = text[startLoc..actualEndLoc];
  174. if (text1 == text2)
  175. {
  176. text = text[..startLoc] + aPatch.Diffs.Text2() + text[(startLoc + text1.Length)..];
  177. }
  178. else
  179. {
  180. var diffs = TextDiffer.Compute(text1, text2, 0f, false);
  181. if (text1.Length > TextDiffConstants.MatchMaxBits && diffs.Levenshtein() / (float)text1.Length > option.PatchDeleteThreshold)
  182. {
  183. results[x] = false;
  184. }
  185. else
  186. {
  187. diffs = diffs.CleanupSemanticLossless().ToImmutableList();
  188. var index1 = 0;
  189. foreach (var aDiff in aPatch.Diffs)
  190. {
  191. if (aDiff.Operation != Equal)
  192. {
  193. var index2 = diffs.FindEquivalentLocation2(index1);
  194. if (aDiff.Operation == Insert)
  195. {
  196. text = text.Insert(startLoc + index2, aDiff.Text);
  197. }
  198. else if (aDiff.Operation == Delete)
  199. {
  200. text = text.Remove(startLoc + index2, diffs.FindEquivalentLocation2(index1 + aDiff.Text.Length) - index2);
  201. }
  202. }
  203. if (aDiff.Operation != Delete)
  204. {
  205. index1 += aDiff.Text.Length;
  206. }
  207. }
  208. }
  209. }
  210. }
  211. x++;
  212. }
  213. text = text.Substring(nullPadding.Length, text.Length - 2 * nullPadding.Length);
  214. return (text, results);
  215. }
  216. internal static IEnumerable<DiffPatch> SplitMax(this IEnumerable<DiffPatch> patches, short patchMargin = 4)
  217. {
  218. const short patchSize = TextDiffConstants.MatchMaxBits;
  219. foreach (var patch in patches)
  220. {
  221. if (patch.Length1 <= patchSize)
  222. {
  223. yield return patch;
  224. continue;
  225. }
  226. var (start1, _, start2, _, diffs) = patch;
  227. var precontext = string.Empty;
  228. while (diffs.Any())
  229. {
  230. var (s1, l1, s2, l2, thediffs) = (start1 - precontext.Length, precontext.Length, start2 - precontext.Length, precontext.Length, new List<TextDiffer>());
  231. var empty = true;
  232. if (precontext.Length != 0)
  233. {
  234. thediffs.Add(TextDiffer.Equal(precontext));
  235. }
  236. while (diffs.Any() && l1 < patchSize - patchMargin)
  237. {
  238. var first = diffs[0];
  239. var diffType = diffs[0].Operation;
  240. var diffText = diffs[0].Text;
  241. if (first.Operation == Insert)
  242. {
  243. l2 += diffText.Length;
  244. start2 += diffText.Length;
  245. thediffs.Add(TextDiffer.Insert(diffText));
  246. diffs = diffs.RemoveAt(0);
  247. empty = false;
  248. }
  249. else if (first.IsLargeDelete(2 * patchSize) && thediffs.Count == 1 && thediffs[0].Operation == Equal)
  250. {
  251. l1 += diffText.Length;
  252. start1 += diffText.Length;
  253. thediffs.Add(TextDiffer.Delete(diffText));
  254. diffs = diffs.RemoveAt(0);
  255. empty = false;
  256. }
  257. else
  258. {
  259. var cutoff = diffText[..Math.Min(diffText.Length, patchSize - l1 - patchMargin)];
  260. l1 += cutoff.Length;
  261. start1 += cutoff.Length;
  262. if (diffType == Equal)
  263. {
  264. l2 += cutoff.Length;
  265. start2 += cutoff.Length;
  266. }
  267. else
  268. {
  269. empty = false;
  270. }
  271. thediffs.Add(TextDiffer.Create(diffType, cutoff));
  272. if (cutoff == first.Text)
  273. {
  274. diffs = diffs.RemoveAt(0);
  275. }
  276. else
  277. {
  278. diffs = diffs.RemoveAt(0).Insert(0, first with
  279. {
  280. Text = first.Text[cutoff.Length..]
  281. });
  282. }
  283. }
  284. }
  285. precontext = thediffs.Text2();
  286. precontext = precontext[Math.Max(0, precontext.Length - patchMargin)..];
  287. var text1 = diffs.Text1();
  288. var postcontext = text1.Length > patchMargin ? text1[..patchMargin] : text1;
  289. if (postcontext.Length != 0)
  290. {
  291. l1 += postcontext.Length;
  292. l2 += postcontext.Length;
  293. var lastDiff = thediffs.Last();
  294. if (thediffs.Count > 0 && lastDiff.Operation == Equal)
  295. {
  296. thediffs[^1] = lastDiff.Append(postcontext);
  297. }
  298. else
  299. {
  300. thediffs.Add(TextDiffer.Equal(postcontext));
  301. }
  302. }
  303. if (!empty)
  304. {
  305. yield return new DiffPatch(s1, l1, s2, l2, thediffs.ToImmutableList());
  306. }
  307. }
  308. }
  309. }
  310. }