using System.Collections.Immutable; using System.Text; using System.Text.RegularExpressions; using static Masuit.Tools.TextDiff.DiffOperation; namespace Masuit.Tools.TextDiff; public static class PatchExtension { internal static readonly string NullPadding = new(Enumerable.Range(1, 4).Select(i => (char)i).ToArray()); private static readonly Regex PatchHeader = new("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$"); /// /// 在文本的开始和结束处添加一些填充,以便边缘可以匹配某些内容。patch_apply内部调用 /// /// /// /// internal static IEnumerable AddPadding(this IEnumerable patches, string padding) { var paddingLength = padding.Length; using var enumerator = patches.GetEnumerator(); if (!enumerator.MoveNext()) { yield break; } var current = enumerator.Current.Bump(paddingLength); var next = current; var isfirst = true; while (true) { var hasnext = enumerator.MoveNext(); if (hasnext) { next = enumerator.Current.Bump(paddingLength); } yield return (isfirst, hasnext) switch { (true, false) => current.AddPadding(padding), // list has only one patch (true, true) => current.AddPaddingBegin(padding), (false, true) => current, (false, false) => current.AddPaddingEnd(padding) }; isfirst = false; if (!hasnext) yield break; current = next; } } /// /// 获取补丁列表并重建文本 /// /// /// public static string ToText(this IEnumerable patches) => patches.Aggregate(new StringBuilder(), (sb, patch) => sb.Append(patch)).ToString(); /// /// 解析补丁的文本表示,并返回补丁对象列表 /// /// /// public static ImmutableList ParsePatches(this string text) => ParseCore(text).ToImmutableList(); private static IEnumerable ParseCore(string text) { if (text.Length == 0) { yield break; } var lines = text.SplitBy('\n').ToArray(); var index = 0; while (index < lines.Length) { var line = lines[index]; var m = PatchHeader.Match(line); if (!m.Success) { throw new ArgumentException("Invalid patch string: " + line); } var (start1, length1) = m.GetStartAndLength(1, 2); var (start2, length2) = m.GetStartAndLength(3, 4); index++; IEnumerable CreateDiffs() { while (index < lines.Length) { line = lines[index]; if (!string.IsNullOrEmpty(line)) { var sign = line[0]; if (sign == '@') { break; } yield return sign switch { #if NETSTANDARD2_1_OR_GREATER '+' => TextDiffer.Insert(line[1..].Replace("+", "%2b").UrlDecoded()), '-' => TextDiffer.Delete(line[1..].Replace("+", "%2b").UrlDecoded()), _ => TextDiffer.Equal(line[1..].Replace("+", "%2b").UrlDecoded()) #else '+' => TextDiffer.Insert(line[1..].Replace("+", "%2b").UrlDecoded().AsSpan()), '-' => TextDiffer.Delete(line[1..].Replace("+", "%2b").UrlDecoded().AsSpan()), _ => TextDiffer.Equal(line[1..].Replace("+", "%2b").UrlDecoded().AsSpan()) #endif }; } index++; } } yield return new DiffPatch(start1, length1, start2, length2, CreateDiffs().ToImmutableList()); } } private static (int start, int length) GetStartAndLength(this Match m, int startIndex, int lengthIndex) { var lengthStr = m.Groups[lengthIndex].Value; var value = Convert.ToInt32(m.Groups[startIndex].Value); return lengthStr switch { "0" => (value, 0), "" => (value - 1, 1), _ => (value - 1, Convert.ToInt32(lengthStr)) }; } /// /// 将一组补丁合并到文本上。返回一个补丁文本,以及一个指示应用了哪些补丁应用成功 /// /// /// /// public static (string newText, bool[] results) Apply(this IEnumerable patches, string text) => Apply(patches, text, MatchOption.Default, PatchOption.Default); public static (string newText, bool[] results) Apply(this IEnumerable patches, string text, MatchOption matchOption) => Apply(patches, text, matchOption, PatchOption.Default); /// /// 将一组补丁合并到文本上。返回一个补丁文本,以及一个指示应用了哪些补丁应用成功 /// /// /// /// /// /// public static (string newText, bool[] results) Apply(this IEnumerable input, string text, MatchOption matchOption, PatchOption option) { if (!input.Any()) { return (text, []); } var nullPadding = NullPadding; text = nullPadding + text + nullPadding; var patches = input.AddPadding(nullPadding).SplitMax().ToList(); var x = 0; var delta = 0; var results = new bool[patches.Count]; foreach (var aPatch in patches) { var expectedLoc = aPatch.Start2 + delta; var text1 = aPatch.Diffs.Text1(); int startLoc; var endLoc = -1; if (text1.Length > TextDiffConstants.MatchMaxBits) { startLoc = text.FindBestMatchIndex(text1[..TextDiffConstants.MatchMaxBits], expectedLoc, matchOption); if (startLoc != -1) { endLoc = text.FindBestMatchIndex(text1[^TextDiffConstants.MatchMaxBits..], expectedLoc + text1.Length - TextDiffConstants.MatchMaxBits, matchOption); if (endLoc == -1 || startLoc >= endLoc) { startLoc = -1; } } } else { startLoc = text.FindBestMatchIndex(text1, expectedLoc, matchOption); } if (startLoc == -1) { results[x] = false; delta -= aPatch.Length2 - aPatch.Length1; } else { results[x] = true; delta = startLoc - expectedLoc; var actualEndLoc = endLoc == -1 ? Math.Min(startLoc + text1.Length, text.Length) : Math.Min(endLoc + TextDiffConstants.MatchMaxBits, text.Length); var text2 = text[startLoc..actualEndLoc]; if (text1 == text2) { text = text[..startLoc] + aPatch.Diffs.Text2() + text[(startLoc + text1.Length)..]; } else { var diffs = TextDiffer.Compute(text1, text2, 0f, false); if (text1.Length > TextDiffConstants.MatchMaxBits && diffs.Levenshtein() / (float)text1.Length > option.PatchDeleteThreshold) { results[x] = false; } else { diffs = diffs.CleanupSemanticLossless().ToImmutableList(); var index1 = 0; foreach (var aDiff in aPatch.Diffs) { if (aDiff.Operation != Equal) { var index2 = diffs.FindEquivalentLocation2(index1); if (aDiff.Operation == Insert) { text = text.Insert(startLoc + index2, aDiff.Text); } else if (aDiff.Operation == Delete) { text = text.Remove(startLoc + index2, diffs.FindEquivalentLocation2(index1 + aDiff.Text.Length) - index2); } } if (aDiff.Operation != Delete) { index1 += aDiff.Text.Length; } } } } } x++; } text = text.Substring(nullPadding.Length, text.Length - 2 * nullPadding.Length); return (text, results); } internal static IEnumerable SplitMax(this IEnumerable patches, short patchMargin = 4) { const short patchSize = TextDiffConstants.MatchMaxBits; foreach (var patch in patches) { if (patch.Length1 <= patchSize) { yield return patch; continue; } var (start1, _, start2, _, diffs) = patch; var precontext = string.Empty; while (diffs.Any()) { var (s1, l1, s2, l2, thediffs) = (start1 - precontext.Length, precontext.Length, start2 - precontext.Length, precontext.Length, new List()); var empty = true; if (precontext.Length != 0) { #if NETSTANDARD2_1_OR_GREATER thediffs.Add(TextDiffer.Equal(precontext)); #else thediffs.Add(TextDiffer.Equal(precontext.AsSpan())); #endif } while (diffs.Any() && l1 < patchSize - patchMargin) { var first = diffs[0]; var diffType = diffs[0].Operation; var diffText = diffs[0].Text; if (first.Operation == Insert) { l2 += diffText.Length; start2 += diffText.Length; #if NETSTANDARD2_1_OR_GREATER thediffs.Add(TextDiffer.Insert(diffText)); #else thediffs.Add(TextDiffer.Insert(diffText.AsSpan())); #endif diffs = diffs.RemoveAt(0); empty = false; } else if (first.IsLargeDelete(2 * patchSize) && thediffs.Count == 1 && thediffs[0].Operation == Equal) { l1 += diffText.Length; start1 += diffText.Length; #if NETSTANDARD2_1_OR_GREATER thediffs.Add(TextDiffer.Delete(diffText)); #else thediffs.Add(TextDiffer.Delete(diffText.AsSpan())); #endif diffs = diffs.RemoveAt(0); empty = false; } else { var cutoff = diffText[..Math.Min(diffText.Length, patchSize - l1 - patchMargin)]; l1 += cutoff.Length; start1 += cutoff.Length; if (diffType == Equal) { l2 += cutoff.Length; start2 += cutoff.Length; } else { empty = false; } thediffs.Add(TextDiffer.Create(diffType, cutoff)); if (cutoff == first.Text) { diffs = diffs.RemoveAt(0); } else { diffs = diffs.RemoveAt(0).Insert(0, first with { Text = first.Text[cutoff.Length..] }); } } } precontext = thediffs.Text2(); precontext = precontext[Math.Max(0, precontext.Length - patchMargin)..]; var text1 = diffs.Text1(); var postcontext = text1.Length > patchMargin ? text1[..patchMargin] : text1; if (postcontext.Length != 0) { l1 += postcontext.Length; l2 += postcontext.Length; var lastDiff = thediffs.Last(); if (thediffs.Count > 0 && lastDiff.Operation == Equal) { thediffs[^1] = lastDiff.Append(postcontext); } else { #if NETSTANDARD2_1_OR_GREATER thediffs.Add(TextDiffer.Equal(postcontext)); #else thediffs.Add(TextDiffer.Equal(postcontext.AsSpan())); #endif } } if (!empty) { yield return new DiffPatch(s1, l1, s2, l2, thediffs.ToImmutableList()); } } } } }