10 bulan lalu · c18e25f4bc
--- a/src/core/diff/strategies/__tests__/multi-search-replace.test.ts
+++ b/src/core/diff/strategies/__tests__/multi-search-replace.test.ts
@@ -815,23 +815,6 @@ function five() {
 
				 					}
			
 
				 				})
			
 
				 
			
 
				-				it("should not strip when not all lines have numbers in either section", async () => {
			
 
				-					const originalContent = "function test() {\n    return true;\n}\n"
			
 
				-					const diffContent = `test.ts
			
 
				-<<<<<<< SEARCH
			
 
				-1 | function test() {
			
 
				-2 |     return true;
			
 
				-3 | }
			
 
				-=======
			
 
				-1 | function test() {
			
 
				-    return false;
			
 
				-3 | }
			
 
				->>>>>>> REPLACE`
			
 
				-
			
 
				-					const result = await strategy.applyDiff(originalContent, diffContent)
			
 
				-					expect(result.success).toBe(false)
			
 
				-				})
			
 
				-
			
 
				 				it("should preserve content that naturally starts with pipe", async () => {
			
 
				 					const originalContent = "|header|another|\n|---|---|\n|data|more|\n"
			
 
				 					const diffContent = `test.ts
			
@@ -852,6 +835,59 @@ function five() {
 
				 					}
			
 
				 				})
			
 
				 
			
 
				+				describe("aggressive line number stripping fallback", () => {
			
 
				+					// Tests for aggressive line number stripping fallback
			
 
				+					it("should use aggressive line number stripping when line numbers are inconsistent", async () => {
			
 
				+						const originalContent = "function test() {\n    return true;\n}\n"
			
 
				+
			
 
				+						const diffContent = [
			
 
				+							"<<<<<<< SEARCH",
			
 
				+							":start_line:1",
			
 
				+							":end_line:3",
			
 
				+							"-------",
			
 
				+							"1 | function test() {",
			
 
				+							"    return true;", // missing line number
			
 
				+							"3 | }",
			
 
				+							"=======",
			
 
				+							"function test() {",
			
 
				+							"    return fallback;",
			
 
				+							"}",
			
 
				+							">>>>>>> REPLACE",
			
 
				+						].join("\n")
			
 
				+
			
 
				+						const result = await strategy.applyDiff(originalContent, diffContent)
			
 
				+						expect(result.success).toBe(true)
			
 
				+						if (result.success) {
			
 
				+							expect(result.content).toBe("function test() {\n    return fallback;\n}\n")
			
 
				+						}
			
 
				+					})
			
 
				+
			
 
				+					it("should handle pipe characters without numbers using aggressive fallback", async () => {
			
 
				+						const originalContent = "function test() {\n    return true;\n}\n"
			
 
				+
			
 
				+						const diffContent = [
			
 
				+							"<<<<<<< SEARCH",
			
 
				+							":start_line:1",
			
 
				+							":end_line:3",
			
 
				+							"-------",
			
 
				+							"| function test() {",
			
 
				+							"|     return true;",
			
 
				+							"| }",
			
 
				+							"=======",
			
 
				+							"function test() {",
			
 
				+							"    return piped;",
			
 
				+							"}",
			
 
				+							">>>>>>> REPLACE",
			
 
				+						].join("\n")
			
 
				+
			
 
				+						const result = await strategy.applyDiff(originalContent, diffContent)
			
 
				+						expect(result.success).toBe(true)
			
 
				+						if (result.success) {
			
 
				+							expect(result.content).toBe("function test() {\n    return piped;\n}\n")
			
 
				+						}
			
 
				+					})
			
 
				+				})
			
 
				+
			
 
				 				it("should preserve indentation when stripping line numbers", async () => {
			
 
				 					const originalContent = "    function test() {\n        return true;\n    }\n"
			
 
				 					const diffContent = `test.ts
			
--- a/src/core/diff/strategies/multi-search-replace.ts
+++ b/src/core/diff/strategies/multi-search-replace.ts
@@ -29,6 +29,48 @@ function getSimilarity(original: string, search: string): number {
 
				 	return 1 - dist / maxLength
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * Performs a "middle-out" search of `lines` (between [startIndex, endIndex]) to find
			
 
				+ * the slice that is most similar to `searchChunk`. Returns the best score, index, and matched text.
			
 
				+ */
			
 
				+function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, endIndex: number) {
			
 
				+	let bestScore = 0
			
 
				+	let bestMatchIndex = -1
			
 
				+	let bestMatchContent = ""
			
 
				+	const searchLen = searchChunk.split(/\r?\n/).length
			
 
				+
			
 
				+	// Middle-out from the midpoint
			
 
				+	const midPoint = Math.floor((startIndex + endIndex) / 2)
			
 
				+	let leftIndex = midPoint
			
 
				+	let rightIndex = midPoint + 1
			
 
				+
			
 
				+	while (leftIndex >= startIndex || rightIndex <= endIndex - searchLen) {
			
 
				+		if (leftIndex >= startIndex) {
			
 
				+			const originalChunk = lines.slice(leftIndex, leftIndex + searchLen).join("\n")
			
 
				+			const similarity = getSimilarity(originalChunk, searchChunk)
			
 
				+			if (similarity > bestScore) {
			
 
				+				bestScore = similarity
			
 
				+				bestMatchIndex = leftIndex
			
 
				+				bestMatchContent = originalChunk
			
 
				+			}
			
 
				+			leftIndex--
			
 
				+		}
			
 
				+
			
 
				+		if (rightIndex <= endIndex - searchLen) {
			
 
				+			const originalChunk = lines.slice(rightIndex, rightIndex + searchLen).join("\n")
			
 
				+			const similarity = getSimilarity(originalChunk, searchChunk)
			
 
				+			if (similarity > bestScore) {
			
 
				+				bestScore = similarity
			
 
				+				bestMatchIndex = rightIndex
			
 
				+				bestMatchContent = originalChunk
			
 
				+			}
			
 
				+			rightIndex++
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return { bestScore, bestMatchIndex, bestMatchContent }
			
 
				+}
			
 
				+
			
 
				 export class MultiSearchReplaceDiffStrategy implements DiffStrategy {
			
 
				 	private fuzzyThreshold: number
			
 
				 	private bufferLines: number
			
@@ -253,7 +295,9 @@ Only use a single line of '=======' between search and replacement content, beca
 
				 			? { success: true }
			
 
				 			: {
			
 
				 					success: false,
			
 
				-					error: `ERROR: Unexpected end of sequence: Expected '${state.current === State.AFTER_SEARCH ? SEP : REPLACE}' was not found.`,
			
 
				+					error: `ERROR: Unexpected end of sequence: Expected '${
			
 
				+						state.current === State.AFTER_SEARCH ? "=======" : ">>>>>>> REPLACE"
			
 
				+					}' was not found.`,
			
 
				 				}
			
 
				 	}
			
 
				 
			
@@ -329,19 +373,21 @@ Only use a single line of '=======' between search and replacement content, beca
 
				 			}))
			
 
				 			.sort((a, b) => a.startLine - b.startLine)
			
 
				 
			
 
				-		for (let { searchContent, replaceContent, startLine, endLine } of replacements) {
			
 
				-			startLine += startLine === 0 ? 0 : delta
			
 
				-			endLine += delta
			
 
				+		for (const replacement of replacements) {
			
 
				+			let { searchContent, replaceContent } = replacement
			
 
				+			let startLine = replacement.startLine + (replacement.startLine === 0 ? 0 : delta)
			
 
				+			let endLine = replacement.endLine + delta
			
 
				 
			
 
				 			// First unescape any escaped markers in the content
			
 
				 			searchContent = this.unescapeMarkers(searchContent)
			
 
				 			replaceContent = this.unescapeMarkers(replaceContent)
			
 
				 
			
 
				 			// Strip line numbers from search and replace content if every line starts with a line number
			
 
				-			if (
			
 
				+			const hasAllLineNumbers =
			
 
				 				(everyLineHasLineNumbers(searchContent) && everyLineHasLineNumbers(replaceContent)) ||
			
 
				 				(everyLineHasLineNumbers(searchContent) && replaceContent.trim() === "")
			
 
				-			) {
			
 
				+
			
 
				+			if (hasAllLineNumbers) {
			
 
				 				searchContent = stripLineNumbers(searchContent)
			
 
				 				replaceContent = stripLineNumbers(replaceContent)
			
 
				 			}
			
@@ -360,8 +406,8 @@ Only use a single line of '=======' between search and replacement content, beca
 
				 			}
			
 
				 
			
 
				 			// Split content into lines, handling both \n and \r\n
			
 
				-			const searchLines = searchContent === "" ? [] : searchContent.split(/\r?\n/)
			
 
				-			const replaceLines = replaceContent === "" ? [] : replaceContent.split(/\r?\n/)
			
 
				+			let searchLines = searchContent === "" ? [] : searchContent.split(/\r?\n/)
			
 
				+			let replaceLines = replaceContent === "" ? [] : replaceContent.split(/\r?\n/)
			
 
				 
			
 
				 			// Validate that empty search requires start line
			
 
				 			if (searchLines.length === 0 && !startLine) {
			
@@ -385,7 +431,7 @@ Only use a single line of '=======' between search and replacement content, beca
 
				 			let matchIndex = -1
			
 
				 			let bestMatchScore = 0
			
 
				 			let bestMatchContent = ""
			
 
				-			const searchChunk = searchLines.join("\n")
			
 
				+			let searchChunk = searchLines.join("\n")
			
 
				 
			
 
				 			// Determine search bounds
			
 
				 			let searchStartIndex = 0
			
@@ -421,68 +467,70 @@ Only use a single line of '=======' between search and replacement content, beca
 
				 
			
 
				 			// If no match found yet, try middle-out search within bounds
			
 
				 			if (matchIndex === -1) {
			
 
				-				const midPoint = Math.floor((searchStartIndex + searchEndIndex) / 2)
			
 
				-				let leftIndex = midPoint
			
 
				-				let rightIndex = midPoint + 1
			
 
				-
			
 
				-				// Search outward from the middle within bounds
			
 
				-				while (leftIndex >= searchStartIndex || rightIndex <= searchEndIndex - searchLines.length) {
			
 
				-					// Check left side if still in range
			
 
				-					if (leftIndex >= searchStartIndex) {
			
 
				-						const originalChunk = resultLines.slice(leftIndex, leftIndex + searchLines.length).join("\n")
			
 
				-						const similarity = getSimilarity(originalChunk, searchChunk)
			
 
				-						if (similarity > bestMatchScore) {
			
 
				-							bestMatchScore = similarity
			
 
				-							matchIndex = leftIndex
			
 
				-							bestMatchContent = originalChunk
			
 
				-						}
			
 
				-						leftIndex--
			
 
				-					}
			
 
				-
			
 
				-					// Check right side if still in range
			
 
				-					if (rightIndex <= searchEndIndex - searchLines.length) {
			
 
				-						const originalChunk = resultLines.slice(rightIndex, rightIndex + searchLines.length).join("\n")
			
 
				-						const similarity = getSimilarity(originalChunk, searchChunk)
			
 
				-						if (similarity > bestMatchScore) {
			
 
				-							bestMatchScore = similarity
			
 
				-							matchIndex = rightIndex
			
 
				-							bestMatchContent = originalChunk
			
 
				-						}
			
 
				-						rightIndex++
			
 
				-					}
			
 
				-				}
			
 
				+				const {
			
 
				+					bestScore,
			
 
				+					bestMatchIndex,
			
 
				+					bestMatchContent: midContent,
			
 
				+				} = fuzzySearch(resultLines, searchChunk, searchStartIndex, searchEndIndex)
			
 
				+				matchIndex = bestMatchIndex
			
 
				+				bestMatchScore = bestScore
			
 
				+				bestMatchContent = midContent
			
 
				 			}
			
 
				 
			
 
				-			// Require similarity to meet threshold
			
 
				+			// Try aggressive line number stripping as a fallback if regular matching fails
			
 
				 			if (matchIndex === -1 || bestMatchScore < this.fuzzyThreshold) {
			
 
				-				const searchChunk = searchLines.join("\n")
			
 
				-				const originalContentSection =
			
 
				-					startLine !== undefined && endLine !== undefined
			
 
				-						? `\n\nOriginal Content:\n${addLineNumbers(
			
 
				-								resultLines
			
 
				-									.slice(
			
 
				-										Math.max(0, startLine - 1 - this.bufferLines),
			
 
				-										Math.min(resultLines.length, endLine + this.bufferLines),
			
 
				-									)
			
 
				-									.join("\n"),
			
 
				-								Math.max(1, startLine - this.bufferLines),
			
 
				-							)}`
			
 
				-						: `\n\nOriginal Content:\n${addLineNumbers(resultLines.join("\n"))}`
			
 
				-
			
 
				-				const bestMatchSection = bestMatchContent
			
 
				-					? `\n\nBest Match Found:\n${addLineNumbers(bestMatchContent, matchIndex + 1)}`
			
 
				-					: `\n\nBest Match Found:\n(no match)`
			
 
				-
			
 
				-				const lineRange =
			
 
				-					startLine || endLine
			
 
				-						? ` at ${startLine ? `start: ${startLine}` : "start"} to ${endLine ? `end: ${endLine}` : "end"}`
			
 
				-						: ""
			
 
				+				// Strip both search and replace content once (simultaneously)
			
 
				+				const aggressiveSearchContent = stripLineNumbers(searchContent, true)
			
 
				+				const aggressiveReplaceContent = stripLineNumbers(replaceContent, true)
			
 
				+
			
 
				+				const aggressiveSearchLines = aggressiveSearchContent ? aggressiveSearchContent.split(/\r?\n/) : []
			
 
				+				const aggressiveSearchChunk = aggressiveSearchLines.join("\n")
			
 
				+
			
 
				+				// Try middle-out search again with aggressive stripped content (respecting the same search bounds)
			
 
				+				const {
			
 
				+					bestScore,
			
 
				+					bestMatchIndex,
			
 
				+					bestMatchContent: aggContent,
			
 
				+				} = fuzzySearch(resultLines, aggressiveSearchChunk, searchStartIndex, searchEndIndex)
			
 
				+				if (bestMatchIndex !== -1 && bestScore >= this.fuzzyThreshold) {
			
 
				+					matchIndex = bestMatchIndex
			
 
				+					bestMatchScore = bestScore
			
 
				+					bestMatchContent = aggContent
			
 
				+					// Replace the original search/replace with their stripped versions
			
 
				+					searchContent = aggressiveSearchContent
			
 
				+					replaceContent = aggressiveReplaceContent
			
 
				+					searchLines = aggressiveSearchLines
			
 
				+					replaceLines = replaceContent ? replaceContent.split(/\r?\n/) : []
			
 
				+				} else {
			
 
				+					// No match found with either method
			
 
				+					const originalContentSection =
			
 
				+						startLine !== undefined && endLine !== undefined
			
 
				+							? `\n\nOriginal Content:\n${addLineNumbers(
			
 
				+									resultLines
			
 
				+										.slice(
			
 
				+											Math.max(0, startLine - 1 - this.bufferLines),
			
 
				+											Math.min(resultLines.length, endLine + this.bufferLines),
			
 
				+										)
			
 
				+										.join("\n"),
			
 
				+									Math.max(1, startLine - this.bufferLines),
			
 
				+								)}`
			
 
				+							: `\n\nOriginal Content:\n${addLineNumbers(resultLines.join("\n"))}`
			
 
				+
			
 
				+					const bestMatchSection = bestMatchContent
			
 
				+						? `\n\nBest Match Found:\n${addLineNumbers(bestMatchContent, matchIndex + 1)}`
			
 
				+						: `\n\nBest Match Found:\n(no match)`
			
 
				+
			
 
				+					const lineRange =
			
 
				+						startLine || endLine
			
 
				+							? ` at ${startLine ? `start: ${startLine}` : "start"} to ${endLine ? `end: ${endLine}` : "end"}`
			
 
				+							: ""
			
 
				 
			
 
				-				diffResults.push({
			
 
				-					success: false,
			
 
				-					error: `No sufficiently similar match found${lineRange} (${Math.floor(bestMatchScore * 100)}% similar, needs ${Math.floor(this.fuzzyThreshold * 100)}%)\n\nDebug Info:\n- Similarity Score: ${Math.floor(bestMatchScore * 100)}%\n- Required Threshold: ${Math.floor(this.fuzzyThreshold * 100)}%\n- Search Range: ${startLine && endLine ? `lines ${startLine}-${endLine}` : "start to end"}\n- Tip: Use the read_file tool to get the latest content of the file before attempting to use the apply_diff tool again, as the file content may have changed\n\nSearch Content:\n${searchChunk}${bestMatchSection}${originalContentSection}`,
			
 
				-				})
			
 
				-				continue
			
 
				+					diffResults.push({
			
 
				+						success: false,
			
 
				+						error: `No sufficiently similar match found${lineRange} (${Math.floor(bestMatchScore * 100)}% similar, needs ${Math.floor(this.fuzzyThreshold * 100)}%)\n\nDebug Info:\n- Similarity Score: ${Math.floor(bestMatchScore * 100)}%\n- Required Threshold: ${Math.floor(this.fuzzyThreshold * 100)}%\n- Search Range: ${startLine && endLine ? `lines ${startLine}-${endLine}` : "start to end"}\n- Tried both standard and aggressive line number stripping\n- Tip: Use the read_file tool to get the latest content of the file before attempting to use the apply_diff tool again, as the file content may have changed\n\nSearch Content:\n${searchChunk}${bestMatchSection}${originalContentSection}`,
			
 
				+					})
			
 
				+					continue
			
 
				+				}
			
 
				 			}
			
 
				 
			
 
				 			// Get the matched lines from the original content
			
--- a/src/integrations/misc/__tests__/extract-text.test.ts
+++ b/src/integrations/misc/__tests__/extract-text.test.ts
@@ -137,6 +137,46 @@ describe("stripLineNumbers", () => {
 
				 		const expected = "line one\nline two\nline three"
			
 
				 		expect(stripLineNumbers(input)).toBe(expected)
			
 
				 	})
			
 
				+
			
 
				+	describe("aggressive mode", () => {
			
 
				+		it("should strip content with just a pipe character", () => {
			
 
				+			const input = "| line one\n| line two\n| line three"
			
 
				+			const expected = "line one\nline two\nline three"
			
 
				+			expect(stripLineNumbers(input, true)).toBe(expected)
			
 
				+		})
			
 
				+
			
 
				+		it("should strip content with mixed formats in aggressive mode", () => {
			
 
				+			const input = "1 | line one\n| line two\n123 | line three"
			
 
				+			const expected = "line one\nline two\nline three"
			
 
				+			expect(stripLineNumbers(input, true)).toBe(expected)
			
 
				+		})
			
 
				+
			
 
				+		it("should not strip content with pipe characters not at start in aggressive mode", () => {
			
 
				+			const input = "text | more text\nx | y"
			
 
				+			expect(stripLineNumbers(input, true)).toBe(input)
			
 
				+		})
			
 
				+
			
 
				+		it("should handle empty content in aggressive mode", () => {
			
 
				+			expect(stripLineNumbers("", true)).toBe("")
			
 
				+		})
			
 
				+
			
 
				+		it("should preserve padding after pipe in aggressive mode", () => {
			
 
				+			const input = "|  line with extra spaces\n1 |  indented content"
			
 
				+			const expected = " line with extra spaces\n indented content"
			
 
				+			expect(stripLineNumbers(input, true)).toBe(expected)
			
 
				+		})
			
 
				+
			
 
				+		it("should preserve windows-style line endings in aggressive mode", () => {
			
 
				+			const input = "| line one\r\n| line two\r\n| line three"
			
 
				+			const expected = "line one\r\nline two\r\nline three"
			
 
				+			expect(stripLineNumbers(input, true)).toBe(expected)
			
 
				+		})
			
 
				+
			
 
				+		it("should not affect regular content when using aggressive mode", () => {
			
 
				+			const input = "regular line\nanother line\nno pipes here"
			
 
				+			expect(stripLineNumbers(input, true)).toBe(input)
			
 
				+		})
			
 
				+	})
			
 
				 })
			
 
				 
			
 
				 describe("truncateOutput", () => {
			
--- a/src/integrations/misc/extract-text.ts
+++ b/src/integrations/misc/extract-text.ts
@@ -86,17 +86,23 @@ export function everyLineHasLineNumbers(content: string): boolean {
 
				 	return lines.length > 0 && lines.every((line) => /^\s*\d+\s+\|(?!\|)/.test(line))
			
 
				 }
			
 
				 
			
 
				-// Strips line numbers from content while preserving the actual content
			
 
				-// Handles formats like "1 | content", " 12 | content", "123 | content"
			
 
				-// Preserves content that naturally starts with pipe characters
			
 
				-export function stripLineNumbers(content: string): string {
			
 
				+/**
			
 
				+ * Strips line numbers from content while preserving the actual content.
			
 
				+ *
			
 
				+ * @param content The content to process
			
 
				+ * @param aggressive When false (default): Only strips lines with clear number patterns like "123 | content"
			
 
				+ *                   When true: Uses a more lenient pattern that also matches lines with just a pipe character,
			
 
				+ *                   which can be useful when LLMs don't perfectly format the line numbers in diffs
			
 
				+ * @returns The content with line numbers removed
			
 
				+ */
			
 
				+export function stripLineNumbers(content: string, aggressive: boolean = false): string {
			
 
				 	// Split into lines to handle each line individually
			
 
				 	const lines = content.split(/\r?\n/)
			
 
				 
			
 
				 	// Process each line
			
 
				 	const processedLines = lines.map((line) => {
			
 
				 		// Match line number pattern and capture everything after the pipe
			
 
				-		const match = line.match(/^\s*\d+\s+\|(?!\|)\s?(.*)$/)
			
 
				+		const match = aggressive ? line.match(/^\s*(?:\d+\s)?\|\s(.*)$/) : line.match(/^\s*\d+\s+\|(?!\|)\s?(.*)$/)
			
 
				 		return match ? match[1] : line
			
 
				 	})