| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365 |
- import { DiffStrategy, DiffResult } from "../types"
- import { addLineNumbers, everyLineHasLineNumbers, stripLineNumbers } from "../../../integrations/misc/extract-text"
- import { distance } from "fastest-levenshtein"
- const BUFFER_LINES = 40 // Number of extra context lines to show before and after matches
- function getSimilarity(original: string, search: string): number {
- if (search === "") {
- return 1
- }
- // Normalize strings by removing extra whitespace but preserve case
- const normalizeStr = (str: string) => str.replace(/\s+/g, " ").trim()
- const normalizedOriginal = normalizeStr(original)
- const normalizedSearch = normalizeStr(search)
- if (normalizedOriginal === normalizedSearch) {
- return 1
- }
- // Calculate Levenshtein distance using fastest-levenshtein's distance function
- const dist = distance(normalizedOriginal, normalizedSearch)
- // Calculate similarity ratio (0 to 1, where 1 is an exact match)
- const maxLength = Math.max(normalizedOriginal.length, normalizedSearch.length)
- return 1 - dist / maxLength
- }
- export class MultiSearchReplaceDiffStrategy implements DiffStrategy {
- private fuzzyThreshold: number
- private bufferLines: number
- constructor(fuzzyThreshold?: number, bufferLines?: number) {
- // Use provided threshold or default to exact matching (1.0)
- // Note: fuzzyThreshold is inverted in UI (0% = 1.0, 10% = 0.9)
- // so we use it directly here
- this.fuzzyThreshold = fuzzyThreshold ?? 1.0
- this.bufferLines = bufferLines ?? BUFFER_LINES
- }
- getToolDescription(args: { cwd: string; toolOptions?: { [key: string]: string } }): string {
- return `## apply_diff
- Description: Request to replace existing code using a search and replace block.
- This tool allows for precise, surgical replaces to files by specifying exactly what content to search for and what to replace it with.
- The tool will maintain proper indentation and formatting while making changes.
- Only a single operation is allowed per tool use.
- The SEARCH section must exactly match existing content including whitespace and indentation.
- If you're not confident in the exact content to search for, use the read_file tool first to get the exact content.
- When applying the diffs, be extra careful to remember to change any closing brackets or other syntax that may be affected by the diff farther down in the file.
- ALWAYS make as many changes in a single 'apply_diff' request as possible using multiple SEARCH/REPLACE blocks
- Parameters:
- - path: (required) The path of the file to modify (relative to the current working directory ${args.cwd})
- - diff: (required) The search/replace block defining the changes.
- Diff format:
- \`\`\`
- <<<<<<< SEARCH
- :start_line: (required) The line number of original content where the search block starts.
- :end_line: (required) The line number of original content where the search block ends.
- -------
- [exact content to find including whitespace]
- =======
- [new content to replace with]
- >>>>>>> REPLACE
- \`\`\`
- Example:
- Original file:
- \`\`\`
- 1 | def calculate_total(items):
- 2 | total = 0
- 3 | for item in items:
- 4 | total += item
- 5 | return total
- \`\`\`
- Search/Replace content:
- \`\`\`
- <<<<<<< SEARCH
- :start_line:1
- :end_line:5
- -------
- def calculate_total(items):
- total = 0
- for item in items:
- total += item
- return total
- =======
- def calculate_total(items):
- """Calculate total with 10% markup"""
- return sum(item * 1.1 for item in items)
- >>>>>>> REPLACE
- \`\`\`
- Search/Replace content with multi edits:
- \`\`\`
- <<<<<<< SEARCH
- :start_line:1
- :end_line:2
- -------
- def calculate_sum(items):
- sum = 0
- =======
- def calculate_sum(items):
- sum = 0
- >>>>>>> REPLACE
- <<<<<<< SEARCH
- :start_line:4
- :end_line:5
- -------
- total += item
- return total
- =======
- sum += item
- return sum
- >>>>>>> REPLACE
- \`\`\`
- Usage:
- <apply_diff>
- <path>File path here</path>
- <diff>
- Your search/replace content here
- You can use multi search/replace block in one diff block, but make sure to include the line numbers for each block.
- Only use a single line of '=======' between search and replacement content, because multiple '=======' will corrupt the file.
- </diff>
- </apply_diff>`
- }
- async applyDiff(
- originalContent: string,
- diffContent: string,
- _paramStartLine?: number,
- _paramEndLine?: number,
- ): Promise<DiffResult> {
- let matches = [
- ...diffContent.matchAll(
- /<<<<<<< SEARCH\n(:start_line:\s*(\d+)\n){0,1}(:end_line:\s*(\d+)\n){0,1}(-------\n){0,1}([\s\S]*?)\n?=======\n([\s\S]*?)\n?>>>>>>> REPLACE/g,
- ),
- ]
- if (matches.length === 0) {
- return {
- success: false,
- error: `Invalid diff format - missing required sections\n\nDebug Info:\n- Expected Format: <<<<<<< SEARCH\\n:start_line: start line\\n:end_line: end line\\n-------\\n[search content]\\n=======\\n[replace content]\\n>>>>>>> REPLACE\n- Tip: Make sure to include start_line/end_line/SEARCH/REPLACE sections with correct markers`,
- }
- }
- // Detect line ending from original content
- const lineEnding = originalContent.includes("\r\n") ? "\r\n" : "\n"
- let resultLines = originalContent.split(/\r?\n/)
- let delta = 0
- let diffResults: DiffResult[] = []
- let appliedCount = 0
- const replacements = matches
- .map((match) => ({
- startLine: Number(match[2] ?? 0),
- endLine: Number(match[4] ?? resultLines.length),
- searchContent: match[6],
- replaceContent: match[7],
- }))
- .sort((a, b) => a.startLine - b.startLine)
- for (let { searchContent, replaceContent, startLine, endLine } of replacements) {
- startLine += startLine === 0 ? 0 : delta
- endLine += delta
- // Strip line numbers from search and replace content if every line starts with a line number
- if (everyLineHasLineNumbers(searchContent) && everyLineHasLineNumbers(replaceContent)) {
- searchContent = stripLineNumbers(searchContent)
- replaceContent = stripLineNumbers(replaceContent)
- }
- // Split content into lines, handling both \n and \r\n
- const searchLines = searchContent === "" ? [] : searchContent.split(/\r?\n/)
- const replaceLines = replaceContent === "" ? [] : replaceContent.split(/\r?\n/)
- // Validate that empty search requires start line
- if (searchLines.length === 0 && !startLine) {
- diffResults.push({
- success: false,
- error: `Empty search content requires start_line to be specified\n\nDebug Info:\n- Empty search content is only valid for insertions at a specific line\n- For insertions, specify the line number where content should be inserted`,
- })
- continue
- }
- // Validate that empty search requires same start and end line
- if (searchLines.length === 0 && startLine && endLine && startLine !== endLine) {
- diffResults.push({
- success: false,
- error: `Empty search content requires start_line and end_line to be the same (got ${startLine}-${endLine})\n\nDebug Info:\n- Empty search content is only valid for insertions at a specific line\n- For insertions, use the same line number for both start_line and end_line`,
- })
- continue
- }
- // Initialize search variables
- let matchIndex = -1
- let bestMatchScore = 0
- let bestMatchContent = ""
- const searchChunk = searchLines.join("\n")
- // Determine search bounds
- let searchStartIndex = 0
- let searchEndIndex = resultLines.length
- // Validate and handle line range if provided
- if (startLine && endLine) {
- // Convert to 0-based index
- const exactStartIndex = startLine - 1
- const exactEndIndex = endLine - 1
- if (exactStartIndex < 0 || exactEndIndex > resultLines.length || exactStartIndex > exactEndIndex) {
- diffResults.push({
- success: false,
- error: `Line range ${startLine}-${endLine} is invalid (file has ${resultLines.length} lines)\n\nDebug Info:\n- Requested Range: lines ${startLine}-${endLine}\n- File Bounds: lines 1-${resultLines.length}`,
- })
- continue
- }
- // Try exact match first
- const originalChunk = resultLines.slice(exactStartIndex, exactEndIndex + 1).join("\n")
- const similarity = getSimilarity(originalChunk, searchChunk)
- if (similarity >= this.fuzzyThreshold) {
- matchIndex = exactStartIndex
- bestMatchScore = similarity
- bestMatchContent = originalChunk
- } else {
- // Set bounds for buffered search
- searchStartIndex = Math.max(0, startLine - (this.bufferLines + 1))
- searchEndIndex = Math.min(resultLines.length, endLine + this.bufferLines)
- }
- }
- // If no match found yet, try middle-out search within bounds
- if (matchIndex === -1) {
- const midPoint = Math.floor((searchStartIndex + searchEndIndex) / 2)
- let leftIndex = midPoint
- let rightIndex = midPoint + 1
- // Search outward from the middle within bounds
- while (leftIndex >= searchStartIndex || rightIndex <= searchEndIndex - searchLines.length) {
- // Check left side if still in range
- if (leftIndex >= searchStartIndex) {
- const originalChunk = resultLines.slice(leftIndex, leftIndex + searchLines.length).join("\n")
- const similarity = getSimilarity(originalChunk, searchChunk)
- if (similarity > bestMatchScore) {
- bestMatchScore = similarity
- matchIndex = leftIndex
- bestMatchContent = originalChunk
- }
- leftIndex--
- }
- // Check right side if still in range
- if (rightIndex <= searchEndIndex - searchLines.length) {
- const originalChunk = resultLines.slice(rightIndex, rightIndex + searchLines.length).join("\n")
- const similarity = getSimilarity(originalChunk, searchChunk)
- if (similarity > bestMatchScore) {
- bestMatchScore = similarity
- matchIndex = rightIndex
- bestMatchContent = originalChunk
- }
- rightIndex++
- }
- }
- }
- // Require similarity to meet threshold
- if (matchIndex === -1 || bestMatchScore < this.fuzzyThreshold) {
- const searchChunk = searchLines.join("\n")
- const originalContentSection =
- startLine !== undefined && endLine !== undefined
- ? `\n\nOriginal Content:\n${addLineNumbers(
- resultLines
- .slice(
- Math.max(0, startLine - 1 - this.bufferLines),
- Math.min(resultLines.length, endLine + this.bufferLines),
- )
- .join("\n"),
- Math.max(1, startLine - this.bufferLines),
- )}`
- : `\n\nOriginal Content:\n${addLineNumbers(resultLines.join("\n"))}`
- const bestMatchSection = bestMatchContent
- ? `\n\nBest Match Found:\n${addLineNumbers(bestMatchContent, matchIndex + 1)}`
- : `\n\nBest Match Found:\n(no match)`
- const lineRange =
- startLine || endLine
- ? ` at ${startLine ? `start: ${startLine}` : "start"} to ${endLine ? `end: ${endLine}` : "end"}`
- : ""
- diffResults.push({
- success: false,
- error: `No sufficiently similar match found${lineRange} (${Math.floor(bestMatchScore * 100)}% similar, needs ${Math.floor(this.fuzzyThreshold * 100)}%)\n\nDebug Info:\n- Similarity Score: ${Math.floor(bestMatchScore * 100)}%\n- Required Threshold: ${Math.floor(this.fuzzyThreshold * 100)}%\n- Search Range: ${startLine && endLine ? `lines ${startLine}-${endLine}` : "start to end"}\n- Tip: Use read_file to get the latest content of the file before attempting the diff again, as the file content may have changed\n\nSearch Content:\n${searchChunk}${bestMatchSection}${originalContentSection}`,
- })
- continue
- }
- // Get the matched lines from the original content
- const matchedLines = resultLines.slice(matchIndex, matchIndex + searchLines.length)
- // Get the exact indentation (preserving tabs/spaces) of each line
- const originalIndents = matchedLines.map((line) => {
- const match = line.match(/^[\t ]*/)
- return match ? match[0] : ""
- })
- // Get the exact indentation of each line in the search block
- const searchIndents = searchLines.map((line) => {
- const match = line.match(/^[\t ]*/)
- return match ? match[0] : ""
- })
- // Apply the replacement while preserving exact indentation
- const indentedReplaceLines = replaceLines.map((line, i) => {
- // Get the matched line's exact indentation
- const matchedIndent = originalIndents[0] || ""
- // Get the current line's indentation relative to the search content
- const currentIndentMatch = line.match(/^[\t ]*/)
- const currentIndent = currentIndentMatch ? currentIndentMatch[0] : ""
- const searchBaseIndent = searchIndents[0] || ""
- // Calculate the relative indentation level
- const searchBaseLevel = searchBaseIndent.length
- const currentLevel = currentIndent.length
- const relativeLevel = currentLevel - searchBaseLevel
- // If relative level is negative, remove indentation from matched indent
- // If positive, add to matched indent
- const finalIndent =
- relativeLevel < 0
- ? matchedIndent.slice(0, Math.max(0, matchedIndent.length + relativeLevel))
- : matchedIndent + currentIndent.slice(searchBaseLevel)
- return finalIndent + line.trim()
- })
- // Construct the final content
- const beforeMatch = resultLines.slice(0, matchIndex)
- const afterMatch = resultLines.slice(matchIndex + searchLines.length)
- resultLines = [...beforeMatch, ...indentedReplaceLines, ...afterMatch]
- delta = delta - matchedLines.length + replaceLines.length
- appliedCount++
- }
- const finalContent = resultLines.join(lineEnding)
- if (appliedCount === 0) {
- return {
- success: false,
- failParts: diffResults,
- }
- }
- return {
- success: true,
- content: finalContent,
- failParts: diffResults,
- }
- }
- }
|