Browse Source

fix: improve search and replace symbol parsing (#9456)

Daniel 1 month ago
parent
commit
a49e81f37c
1 changed files with 24 additions and 22 deletions
  1. 24 22
      src/core/diff/strategies/multi-file-search-replace.ts

+ 24 - 22
src/core/diff/strategies/multi-file-search-replace.ts

@@ -251,14 +251,16 @@ Each file requires its own path, start_line, and diff elements.
 
 		const state = { current: State.START, line: 0 }
 
-		// Pattern allows optional '>' after SEARCH to handle AI-generated diffs
-		// (e.g., Sonnet 4 sometimes adds an extra '>')
-		const SEARCH_PATTERN = /^<<<<<<< SEARCH>?$/
-		const SEARCH = SEARCH_PATTERN.source.replace(/[\^$]/g, "") // Remove regex anchors for display
+		// Pattern allows optional extra '<' or '>' for SEARCH to handle AI-generated diffs
+		// (e.g., Sonnet 4 sometimes adds extra markers)
+		const SEARCH_PATTERN = /^<{7,8} SEARCH>?$/
+		const SEARCH = "<<<<<<< SEARCH" // Simplified for display
 		const SEP = "======="
-		const REPLACE = ">>>>>>> REPLACE"
-		const SEARCH_PREFIX = "<<<<<<< "
-		const REPLACE_PREFIX = ">>>>>>> "
+		// Pattern allows optional extra '>' or '<' for REPLACE
+		const REPLACE_PATTERN = /^>{7,8} REPLACE<?$/
+		const REPLACE = ">>>>>>> REPLACE" // Simplified for display
+		const SEARCH_PREFIX_PATTERN = /^<{7,8} /
+		const REPLACE_PREFIX_PATTERN = /^>{7,8} /
 
 		const reportMergeConflictError = (found: string, _expected: string) => ({
 			success: false,
@@ -326,7 +328,7 @@ Each file requires its own path, start_line, and diff elements.
 		const lines = diffContent.split("\n")
 		const searchCount = lines.filter((l) => SEARCH_PATTERN.test(l.trim())).length
 		const sepCount = lines.filter((l) => l.trim() === SEP).length
-		const replaceCount = lines.filter((l) => l.trim() === REPLACE).length
+		const replaceCount = lines.filter((l) => REPLACE_PATTERN.test(l.trim())).length
 
 		const likelyBadStructure = searchCount !== replaceCount || sepCount < searchCount
 
@@ -350,29 +352,29 @@ Each file requires its own path, start_line, and diff elements.
 						return likelyBadStructure
 							? reportInvalidDiffError(SEP, SEARCH)
 							: reportMergeConflictError(SEP, SEARCH)
-					if (marker === REPLACE) return reportInvalidDiffError(REPLACE, SEARCH)
-					if (marker.startsWith(REPLACE_PREFIX)) return reportMergeConflictError(marker, SEARCH)
+					if (REPLACE_PATTERN.test(marker)) return reportInvalidDiffError(REPLACE, SEARCH)
+					if (REPLACE_PREFIX_PATTERN.test(marker)) return reportMergeConflictError(marker, SEARCH)
 					if (SEARCH_PATTERN.test(marker)) state.current = State.AFTER_SEARCH
-					else if (marker.startsWith(SEARCH_PREFIX)) return reportMergeConflictError(marker, SEARCH)
+					else if (SEARCH_PREFIX_PATTERN.test(marker)) return reportMergeConflictError(marker, SEARCH)
 					break
 
 				case State.AFTER_SEARCH:
-					if (SEARCH_PATTERN.test(marker)) return reportInvalidDiffError(SEARCH_PATTERN.source, SEP)
-					if (marker.startsWith(SEARCH_PREFIX)) return reportMergeConflictError(marker, SEARCH)
-					if (marker === REPLACE) return reportInvalidDiffError(REPLACE, SEP)
-					if (marker.startsWith(REPLACE_PREFIX)) return reportMergeConflictError(marker, SEARCH)
+					if (SEARCH_PATTERN.test(marker)) return reportInvalidDiffError(SEARCH, SEP)
+					if (SEARCH_PREFIX_PATTERN.test(marker)) return reportMergeConflictError(marker, SEARCH)
+					if (REPLACE_PATTERN.test(marker)) return reportInvalidDiffError(REPLACE, SEP)
+					if (REPLACE_PREFIX_PATTERN.test(marker)) return reportMergeConflictError(marker, SEARCH)
 					if (marker === SEP) state.current = State.AFTER_SEPARATOR
 					break
 
 				case State.AFTER_SEPARATOR:
-					if (SEARCH_PATTERN.test(marker)) return reportInvalidDiffError(SEARCH_PATTERN.source, REPLACE)
-					if (marker.startsWith(SEARCH_PREFIX)) return reportMergeConflictError(marker, REPLACE)
+					if (SEARCH_PATTERN.test(marker)) return reportInvalidDiffError(SEARCH, REPLACE)
+					if (SEARCH_PREFIX_PATTERN.test(marker)) return reportMergeConflictError(marker, REPLACE)
 					if (marker === SEP)
 						return likelyBadStructure
 							? reportInvalidDiffError(SEP, REPLACE)
 							: reportMergeConflictError(SEP, REPLACE)
-					if (marker === REPLACE) state.current = State.START
-					else if (marker.startsWith(REPLACE_PREFIX)) return reportMergeConflictError(marker, REPLACE)
+					if (REPLACE_PATTERN.test(marker)) state.current = State.START
+					else if (REPLACE_PREFIX_PATTERN.test(marker)) return reportMergeConflictError(marker, REPLACE)
 					break
 			}
 		}
@@ -451,18 +453,18 @@ Each file requires its own path, start_line, and diff elements.
 
 		/* Regex parts:
 		1. (?:^|\n)   Ensures the first marker starts at the beginning of the file or right after a newline.
-		2. (?<!\\)<<<<<<< SEARCH>?\s*\n   Matches the line "<<<<<<< SEARCH" with optional '>' (ignoring any trailing spaces) – the negative lookbehind makes sure it isn't escaped.
+		2. (?<!\\)<{7,8} SEARCH>?\s*\n   Matches "<<<<<<< SEARCH" or "<<<<<<< SEARCH>" or "<<<<<<<<" with 7-8 '<' chars (ignoring any trailing spaces) – the negative lookbehind makes sure it isn't escaped.
 		3. ((?:\:start_line:\s*(\d+)\s*\n))?   Optionally matches a ":start_line:" line. The outer capturing group is group 1 and the inner (\d+) is group 2.
 		4. ((?:\:end_line:\s*(\d+)\s*\n))?   Optionally matches a ":end_line:" line. Group 3 is the whole match and group 4 is the digits.
 		5. ((?<!\\)-------\s*\n)?   Optionally matches the "-------" marker line (group 5).
 		6. ([\s\S]*?)(?:\n)?   Non‐greedy match for the "search content" (group 6) up to the next marker.
 		7. (?:(?<=\n)(?<!\\)=======\s*\n)   Matches the "=======" marker on its own line.
 		8. ([\s\S]*?)(?:\n)?   Non‐greedy match for the "replace content" (group 7).
-		9. (?:(?<=\n)(?<!\\)>>>>>>> REPLACE)(?=\n|$)   Matches the final ">>>>>>> REPLACE" marker on its own line (and requires a following newline or the end of file).
+		9. (?:(?<=\n)(?<!\\)>{7,8} REPLACE<?)(?=\n|$)   Matches ">>>>>>> REPLACE" or ">>>>>>> REPLACE<" or ">>>>>>>>" with 7-8 '>' chars on its own line (and requires a following newline or the end of file).
 		*/
 		let matches = [
 			...diffContent.matchAll(
-				/(?:^|\n)(?<!\\)<<<<<<< SEARCH>?\s*\n((?:\:start_line:\s*(\d+)\s*\n))?((?:\:end_line:\s*(\d+)\s*\n))?((?<!\\)-------\s*\n)?([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)=======\s*\n)([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)>>>>>>> REPLACE)(?=\n|$)/g,
+				/(?:^|\n)(?<!\\)<{7,8} SEARCH>?\s*\n((?:\:start_line:\s*(\d+)\s*\n))?((?:\:end_line:\s*(\d+)\s*\n))?((?<!\\)-------\s*\n)?([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)=======\s*\n)([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)>{7,8} REPLACE<?)(?=\n|$)/g,
 			),
 		]