PatchExtension.cs 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. using System.Collections.Immutable;
  2. using System.Text;
  3. using System.Text.RegularExpressions;
  4. using static Masuit.Tools.TextDiff.DiffOperation;
  5. namespace Masuit.Tools.TextDiff;
  6. public static class PatchExtension
  7. {
  8. internal static readonly string NullPadding = new(Enumerable.Range(1, 4).Select(i => (char)i).ToArray());
  9. private static readonly Regex PatchHeader = new("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$");
  10. /// <summary>
  11. /// 在文本的开始和结束处添加一些填充,以便边缘可以匹配某些内容。patch_apply内部调用
  12. /// </summary>
  13. /// <param name="patches"></param>
  14. /// <param name="padding"></param>
  15. /// <returns></returns>
  16. internal static IEnumerable<DiffPatch> AddPadding(this IEnumerable<DiffPatch> patches, string padding)
  17. {
  18. var paddingLength = padding.Length;
  19. using var enumerator = patches.GetEnumerator();
  20. if (!enumerator.MoveNext())
  21. {
  22. yield break;
  23. }
  24. var current = enumerator.Current.Bump(paddingLength);
  25. var next = current;
  26. var isfirst = true;
  27. while (true)
  28. {
  29. var hasnext = enumerator.MoveNext();
  30. if (hasnext)
  31. {
  32. next = enumerator.Current.Bump(paddingLength);
  33. }
  34. yield return (isfirst, hasnext) switch
  35. {
  36. (true, false) => current.AddPadding(padding), // list has only one patch
  37. (true, true) => current.AddPaddingBegin(padding),
  38. (false, true) => current,
  39. (false, false) => current.AddPaddingEnd(padding)
  40. };
  41. isfirst = false;
  42. if (!hasnext) yield break;
  43. current = next;
  44. }
  45. }
  46. /// <summary>
  47. /// 获取补丁列表并重建文本
  48. /// </summary>
  49. /// <param name="patches"></param>
  50. /// <returns></returns>
  51. public static string ToText(this IEnumerable<DiffPatch> patches) => patches.Aggregate(new StringBuilder(), (sb, patch) => sb.Append(patch)).ToString();
  52. /// <summary>
  53. /// 解析补丁的文本表示,并返回补丁对象列表
  54. /// </summary>
  55. /// <param name="text"></param>
  56. /// <returns></returns>
  57. public static ImmutableList<DiffPatch> ParsePatches(this string text) => ParseCore(text).ToImmutableList();
  58. private static IEnumerable<DiffPatch> ParseCore(string text)
  59. {
  60. if (text.Length == 0)
  61. {
  62. yield break;
  63. }
  64. var lines = text.SplitBy('\n').ToArray();
  65. var index = 0;
  66. while (index < lines.Length)
  67. {
  68. var line = lines[index];
  69. var m = PatchHeader.Match(line);
  70. if (!m.Success)
  71. {
  72. throw new ArgumentException("Invalid patch string: " + line);
  73. }
  74. var (start1, length1) = m.GetStartAndLength(1, 2);
  75. var (start2, length2) = m.GetStartAndLength(3, 4);
  76. index++;
  77. IEnumerable<TextDiffer> CreateDiffs()
  78. {
  79. while (index < lines.Length)
  80. {
  81. line = lines[index];
  82. if (!string.IsNullOrEmpty(line))
  83. {
  84. var sign = line[0];
  85. if (sign == '@')
  86. {
  87. break;
  88. }
  89. yield return sign switch
  90. {
  91. #if NETSTANDARD2_1_OR_GREATER
  92. '+' => TextDiffer.Insert(line[1..].Replace("+", "%2b").UrlDecoded()),
  93. '-' => TextDiffer.Delete(line[1..].Replace("+", "%2b").UrlDecoded()),
  94. _ => TextDiffer.Equal(line[1..].Replace("+", "%2b").UrlDecoded())
  95. #else
  96. '+' => TextDiffer.Insert(line[1..].Replace("+", "%2b").UrlDecoded().AsSpan()),
  97. '-' => TextDiffer.Delete(line[1..].Replace("+", "%2b").UrlDecoded().AsSpan()),
  98. _ => TextDiffer.Equal(line[1..].Replace("+", "%2b").UrlDecoded().AsSpan())
  99. #endif
  100. };
  101. }
  102. index++;
  103. }
  104. }
  105. yield return new DiffPatch(start1, length1, start2, length2, CreateDiffs().ToImmutableList());
  106. }
  107. }
  108. private static (int start, int length) GetStartAndLength(this Match m, int startIndex, int lengthIndex)
  109. {
  110. var lengthStr = m.Groups[lengthIndex].Value;
  111. var value = Convert.ToInt32(m.Groups[startIndex].Value);
  112. return lengthStr switch
  113. {
  114. "0" => (value, 0),
  115. "" => (value - 1, 1),
  116. _ => (value - 1, Convert.ToInt32(lengthStr))
  117. };
  118. }
  119. /// <summary>
  120. /// 将一组补丁合并到文本上。返回一个补丁文本,以及一个指示应用了哪些补丁应用成功
  121. /// </summary>
  122. /// <param name="patches"></param>
  123. /// <param name="text"></param>
  124. /// <returns></returns>
  125. public static (string newText, bool[] results) Apply(this IEnumerable<DiffPatch> patches, string text) => Apply(patches, text, MatchOption.Default, PatchOption.Default);
  126. public static (string newText, bool[] results) Apply(this IEnumerable<DiffPatch> patches, string text, MatchOption matchOption) => Apply(patches, text, matchOption, PatchOption.Default);
  127. /// <summary>
  128. /// 将一组补丁合并到文本上。返回一个补丁文本,以及一个指示应用了哪些补丁应用成功
  129. /// </summary>
  130. /// <param name="input"></param>
  131. /// <param name="text"></param>
  132. /// <param name="matchOption"></param>
  133. /// <param name="option"></param>
  134. /// <returns></returns>
  135. public static (string newText, bool[] results) Apply(this IEnumerable<DiffPatch> input, string text, MatchOption matchOption, PatchOption option)
  136. {
  137. if (!input.Any())
  138. {
  139. return (text, []);
  140. }
  141. var nullPadding = NullPadding;
  142. text = nullPadding + text + nullPadding;
  143. var patches = input.AddPadding(nullPadding).SplitMax().ToList();
  144. var x = 0;
  145. var delta = 0;
  146. var results = new bool[patches.Count];
  147. foreach (var aPatch in patches)
  148. {
  149. var expectedLoc = aPatch.Start2 + delta;
  150. var text1 = aPatch.Diffs.Text1();
  151. int startLoc;
  152. var endLoc = -1;
  153. if (text1.Length > TextDiffConstants.MatchMaxBits)
  154. {
  155. startLoc = text.FindBestMatchIndex(text1[..TextDiffConstants.MatchMaxBits], expectedLoc, matchOption);
  156. if (startLoc != -1)
  157. {
  158. endLoc = text.FindBestMatchIndex(text1[^TextDiffConstants.MatchMaxBits..], expectedLoc + text1.Length - TextDiffConstants.MatchMaxBits, matchOption);
  159. if (endLoc == -1 || startLoc >= endLoc)
  160. {
  161. startLoc = -1;
  162. }
  163. }
  164. }
  165. else
  166. {
  167. startLoc = text.FindBestMatchIndex(text1, expectedLoc, matchOption);
  168. }
  169. if (startLoc == -1)
  170. {
  171. results[x] = false;
  172. delta -= aPatch.Length2 - aPatch.Length1;
  173. }
  174. else
  175. {
  176. results[x] = true;
  177. delta = startLoc - expectedLoc;
  178. var actualEndLoc = endLoc == -1 ? Math.Min(startLoc + text1.Length, text.Length) : Math.Min(endLoc + TextDiffConstants.MatchMaxBits, text.Length);
  179. var text2 = text[startLoc..actualEndLoc];
  180. if (text1 == text2)
  181. {
  182. text = text[..startLoc] + aPatch.Diffs.Text2() + text[(startLoc + text1.Length)..];
  183. }
  184. else
  185. {
  186. var diffs = TextDiffer.Compute(text1, text2, 0f, false);
  187. if (text1.Length > TextDiffConstants.MatchMaxBits && diffs.Levenshtein() / (float)text1.Length > option.PatchDeleteThreshold)
  188. {
  189. results[x] = false;
  190. }
  191. else
  192. {
  193. diffs = diffs.CleanupSemanticLossless().ToImmutableList();
  194. var index1 = 0;
  195. foreach (var aDiff in aPatch.Diffs)
  196. {
  197. if (aDiff.Operation != Equal)
  198. {
  199. var index2 = diffs.FindEquivalentLocation2(index1);
  200. if (aDiff.Operation == Insert)
  201. {
  202. text = text.Insert(startLoc + index2, aDiff.Text);
  203. }
  204. else if (aDiff.Operation == Delete)
  205. {
  206. text = text.Remove(startLoc + index2, diffs.FindEquivalentLocation2(index1 + aDiff.Text.Length) - index2);
  207. }
  208. }
  209. if (aDiff.Operation != Delete)
  210. {
  211. index1 += aDiff.Text.Length;
  212. }
  213. }
  214. }
  215. }
  216. }
  217. x++;
  218. }
  219. text = text.Substring(nullPadding.Length, text.Length - 2 * nullPadding.Length);
  220. return (text, results);
  221. }
  222. internal static IEnumerable<DiffPatch> SplitMax(this IEnumerable<DiffPatch> patches, short patchMargin = 4)
  223. {
  224. const short patchSize = TextDiffConstants.MatchMaxBits;
  225. foreach (var patch in patches)
  226. {
  227. if (patch.Length1 <= patchSize)
  228. {
  229. yield return patch;
  230. continue;
  231. }
  232. var (start1, _, start2, _, diffs) = patch;
  233. var precontext = string.Empty;
  234. while (diffs.Any())
  235. {
  236. var (s1, l1, s2, l2, thediffs) = (start1 - precontext.Length, precontext.Length, start2 - precontext.Length, precontext.Length, new List<TextDiffer>());
  237. var empty = true;
  238. if (precontext.Length != 0)
  239. {
  240. #if NETSTANDARD2_1_OR_GREATER
  241. thediffs.Add(TextDiffer.Equal(precontext));
  242. #else
  243. thediffs.Add(TextDiffer.Equal(precontext.AsSpan()));
  244. #endif
  245. }
  246. while (diffs.Any() && l1 < patchSize - patchMargin)
  247. {
  248. var first = diffs[0];
  249. var diffType = diffs[0].Operation;
  250. var diffText = diffs[0].Text;
  251. if (first.Operation == Insert)
  252. {
  253. l2 += diffText.Length;
  254. start2 += diffText.Length;
  255. #if NETSTANDARD2_1_OR_GREATER
  256. thediffs.Add(TextDiffer.Insert(diffText));
  257. #else
  258. thediffs.Add(TextDiffer.Insert(diffText.AsSpan()));
  259. #endif
  260. diffs = diffs.RemoveAt(0);
  261. empty = false;
  262. }
  263. else if (first.IsLargeDelete(2 * patchSize) && thediffs.Count == 1 && thediffs[0].Operation == Equal)
  264. {
  265. l1 += diffText.Length;
  266. start1 += diffText.Length;
  267. #if NETSTANDARD2_1_OR_GREATER
  268. thediffs.Add(TextDiffer.Delete(diffText));
  269. #else
  270. thediffs.Add(TextDiffer.Delete(diffText.AsSpan()));
  271. #endif
  272. diffs = diffs.RemoveAt(0);
  273. empty = false;
  274. }
  275. else
  276. {
  277. var cutoff = diffText[..Math.Min(diffText.Length, patchSize - l1 - patchMargin)];
  278. l1 += cutoff.Length;
  279. start1 += cutoff.Length;
  280. if (diffType == Equal)
  281. {
  282. l2 += cutoff.Length;
  283. start2 += cutoff.Length;
  284. }
  285. else
  286. {
  287. empty = false;
  288. }
  289. thediffs.Add(TextDiffer.Create(diffType, cutoff));
  290. if (cutoff == first.Text)
  291. {
  292. diffs = diffs.RemoveAt(0);
  293. }
  294. else
  295. {
  296. diffs = diffs.RemoveAt(0).Insert(0, first with
  297. {
  298. Text = first.Text[cutoff.Length..]
  299. });
  300. }
  301. }
  302. }
  303. precontext = thediffs.Text2();
  304. precontext = precontext[Math.Max(0, precontext.Length - patchMargin)..];
  305. var text1 = diffs.Text1();
  306. var postcontext = text1.Length > patchMargin ? text1[..patchMargin] : text1;
  307. if (postcontext.Length != 0)
  308. {
  309. l1 += postcontext.Length;
  310. l2 += postcontext.Length;
  311. var lastDiff = thediffs.Last();
  312. if (thediffs.Count > 0 && lastDiff.Operation == Equal)
  313. {
  314. thediffs[^1] = lastDiff.Append(postcontext);
  315. }
  316. else
  317. {
  318. #if NETSTANDARD2_1_OR_GREATER
  319. thediffs.Add(TextDiffer.Equal(postcontext));
  320. #else
  321. thediffs.Add(TextDiffer.Equal(postcontext.AsSpan()));
  322. #endif
  323. }
  324. }
  325. if (!empty)
  326. {
  327. yield return new DiffPatch(s1, l1, s2, l2, thediffs.ToImmutableList());
  328. }
  329. }
  330. }
  331. }
  332. }