| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316 |
- import { DiffStrategy, DiffResult } from "../types"
- import { addLineNumbers, everyLineHasLineNumbers, stripLineNumbers } from "../../../integrations/misc/extract-text"
- const BUFFER_LINES = 20; // Number of extra context lines to show before and after matches
- function levenshteinDistance(a: string, b: string): number {
- const matrix: number[][] = [];
- // Initialize matrix
- for (let i = 0; i <= a.length; i++) {
- matrix[i] = [i];
- }
- for (let j = 0; j <= b.length; j++) {
- matrix[0][j] = j;
- }
- // Fill matrix
- for (let i = 1; i <= a.length; i++) {
- for (let j = 1; j <= b.length; j++) {
- if (a[i-1] === b[j-1]) {
- matrix[i][j] = matrix[i-1][j-1];
- } else {
- matrix[i][j] = Math.min(
- matrix[i-1][j-1] + 1, // substitution
- matrix[i][j-1] + 1, // insertion
- matrix[i-1][j] + 1 // deletion
- );
- }
- }
- }
- return matrix[a.length][b.length];
- }
- function getSimilarity(original: string, search: string): number {
- if (search === '') {
- return 1;
- }
- // Normalize strings by removing extra whitespace but preserve case
- const normalizeStr = (str: string) => str.replace(/\s+/g, ' ').trim();
-
- const normalizedOriginal = normalizeStr(original);
- const normalizedSearch = normalizeStr(search);
-
- if (normalizedOriginal === normalizedSearch) { return 1; }
-
- // Calculate Levenshtein distance
- const distance = levenshteinDistance(normalizedOriginal, normalizedSearch);
-
- // Calculate similarity ratio (0 to 1, where 1 is exact match)
- const maxLength = Math.max(normalizedOriginal.length, normalizedSearch.length);
- return 1 - (distance / maxLength);
- }
- export class SearchReplaceDiffStrategy implements DiffStrategy {
- private fuzzyThreshold: number;
- private bufferLines: number;
- constructor(fuzzyThreshold?: number, bufferLines?: number) {
- // Use provided threshold or default to exact matching (1.0)
- // Note: fuzzyThreshold is inverted in UI (0% = 1.0, 10% = 0.9)
- // so we use it directly here
- this.fuzzyThreshold = fuzzyThreshold ?? 1.0;
- this.bufferLines = bufferLines ?? BUFFER_LINES;
- }
- getToolDescription(cwd: string): string {
- return `## apply_diff
- Description: Request to replace existing code using a search and replace block.
- This tool allows for precise, surgical replaces to files by specifying exactly what content to search for and what to replace it with.
- The tool will maintain proper indentation and formatting while making changes.
- Only a single operation is allowed per tool use.
- The SEARCH section must exactly match existing content including whitespace and indentation.
- If you're not confident in the exact content to search for, use the read_file tool first to get the exact content.
- Parameters:
- - path: (required) The path of the file to modify (relative to the current working directory ${cwd})
- - diff: (required) The search/replace block defining the changes.
- - start_line: (required) The line number where the search block starts.
- - end_line: (required) The line number where the search block ends.
- Diff format:
- \`\`\`
- <<<<<<< SEARCH
- [exact content to find including whitespace]
- =======
- [new content to replace with]
- >>>>>>> REPLACE
- \`\`\`
- Example:
- Original file:
- \`\`\`
- 1 | def calculate_total(items):
- 2 | total = 0
- 3 | for item in items:
- 4 | total += item
- 5 | return total
- \`\`\`
- Search/Replace content:
- \`\`\`
- <<<<<<< SEARCH
- def calculate_total(items):
- total = 0
- for item in items:
- total += item
- return total
- =======
- def calculate_total(items):
- """Calculate total with 10% markup"""
- return sum(item * 1.1 for item in items)
- >>>>>>> REPLACE
- \`\`\`
- Usage:
- <apply_diff>
- <path>File path here</path>
- <diff>
- Your search/replace content here
- </diff>
- <start_line>1</start_line>
- <end_line>5</end_line>
- </apply_diff>`
- }
- applyDiff(originalContent: string, diffContent: string, startLine?: number, endLine?: number): DiffResult {
- // Extract the search and replace blocks
- const match = diffContent.match(/<<<<<<< SEARCH\n([\s\S]*?)\n?=======\n([\s\S]*?)\n?>>>>>>> REPLACE/);
- if (!match) {
- return {
- success: false,
- error: `Invalid diff format - missing required SEARCH/REPLACE sections\n\nDebug Info:\n- Expected Format: <<<<<<< SEARCH\\n[search content]\\n=======\\n[replace content]\\n>>>>>>> REPLACE\n- Tip: Make sure to include both SEARCH and REPLACE sections with correct markers`
- };
- }
- let [_, searchContent, replaceContent] = match;
- // Detect line ending from original content
- const lineEnding = originalContent.includes('\r\n') ? '\r\n' : '\n';
- // Strip line numbers from search and replace content if every line starts with a line number
- if (everyLineHasLineNumbers(searchContent) && everyLineHasLineNumbers(replaceContent)) {
- searchContent = stripLineNumbers(searchContent);
- replaceContent = stripLineNumbers(replaceContent);
- }
-
- // Split content into lines, handling both \n and \r\n
- const searchLines = searchContent === '' ? [] : searchContent.split(/\r?\n/);
- const replaceLines = replaceContent === '' ? [] : replaceContent.split(/\r?\n/);
- const originalLines = originalContent.split(/\r?\n/);
- // Validate that empty search requires start line
- if (searchLines.length === 0 && !startLine) {
- return {
- success: false,
- error: `Empty search content requires start_line to be specified\n\nDebug Info:\n- Empty search content is only valid for insertions at a specific line\n- For insertions, specify the line number where content should be inserted`
- };
- }
- // Validate that empty search requires same start and end line
- if (searchLines.length === 0 && startLine && endLine && startLine !== endLine) {
- return {
- success: false,
- error: `Empty search content requires start_line and end_line to be the same (got ${startLine}-${endLine})\n\nDebug Info:\n- Empty search content is only valid for insertions at a specific line\n- For insertions, use the same line number for both start_line and end_line`
- };
- }
-
- // Initialize search variables
- let matchIndex = -1;
- let bestMatchScore = 0;
- let bestMatchContent = "";
- const searchChunk = searchLines.join('\n');
- // Determine search bounds
- let searchStartIndex = 0;
- let searchEndIndex = originalLines.length;
- // Validate and handle line range if provided
- if (startLine && endLine) {
- // Convert to 0-based index
- const exactStartIndex = startLine - 1;
- const exactEndIndex = endLine - 1;
- if (exactStartIndex < 0 || exactEndIndex > originalLines.length || exactStartIndex > exactEndIndex) {
- return {
- success: false,
- error: `Line range ${startLine}-${endLine} is invalid (file has ${originalLines.length} lines)\n\nDebug Info:\n- Requested Range: lines ${startLine}-${endLine}\n- File Bounds: lines 1-${originalLines.length}`,
- };
- }
- // Try exact match first
- const originalChunk = originalLines.slice(exactStartIndex, exactEndIndex + 1).join('\n');
- const similarity = getSimilarity(originalChunk, searchChunk);
- if (similarity >= this.fuzzyThreshold) {
- matchIndex = exactStartIndex;
- bestMatchScore = similarity;
- bestMatchContent = originalChunk;
- } else {
- // Set bounds for buffered search
- searchStartIndex = Math.max(0, startLine - (this.bufferLines + 1));
- searchEndIndex = Math.min(originalLines.length, endLine + this.bufferLines);
- }
- }
- // If no match found yet, try middle-out search within bounds
- if (matchIndex === -1) {
- const midPoint = Math.floor((searchStartIndex + searchEndIndex) / 2);
- let leftIndex = midPoint;
- let rightIndex = midPoint + 1;
- // Search outward from the middle within bounds
- while (leftIndex >= searchStartIndex || rightIndex <= searchEndIndex - searchLines.length) {
- // Check left side if still in range
- if (leftIndex >= searchStartIndex) {
- const originalChunk = originalLines.slice(leftIndex, leftIndex + searchLines.length).join('\n');
- const similarity = getSimilarity(originalChunk, searchChunk);
- if (similarity > bestMatchScore) {
- bestMatchScore = similarity;
- matchIndex = leftIndex;
- bestMatchContent = originalChunk;
- }
- leftIndex--;
- }
- // Check right side if still in range
- if (rightIndex <= searchEndIndex - searchLines.length) {
- const originalChunk = originalLines.slice(rightIndex, rightIndex + searchLines.length).join('\n');
- const similarity = getSimilarity(originalChunk, searchChunk);
- if (similarity > bestMatchScore) {
- bestMatchScore = similarity;
- matchIndex = rightIndex;
- bestMatchContent = originalChunk;
- }
- rightIndex++;
- }
- }
- }
- // Require similarity to meet threshold
- if (matchIndex === -1 || bestMatchScore < this.fuzzyThreshold) {
- const searchChunk = searchLines.join('\n');
- const originalContentSection = startLine !== undefined && endLine !== undefined
- ? `\n\nOriginal Content:\n${addLineNumbers(
- originalLines.slice(
- Math.max(0, startLine - 1 - this.bufferLines),
- Math.min(originalLines.length, endLine + this.bufferLines)
- ).join('\n'),
- Math.max(1, startLine - this.bufferLines)
- )}`
- : `\n\nOriginal Content:\n${addLineNumbers(originalLines.join('\n'))}`;
- const bestMatchSection = bestMatchContent
- ? `\n\nBest Match Found:\n${addLineNumbers(bestMatchContent, matchIndex + 1)}`
- : `\n\nBest Match Found:\n(no match)`;
- const lineRange = startLine || endLine ?
- ` at ${startLine ? `start: ${startLine}` : 'start'} to ${endLine ? `end: ${endLine}` : 'end'}` : '';
- return {
- success: false,
- error: `No sufficiently similar match found${lineRange} (${Math.floor(bestMatchScore * 100)}% similar, needs ${Math.floor(this.fuzzyThreshold * 100)}%)\n\nDebug Info:\n- Similarity Score: ${Math.floor(bestMatchScore * 100)}%\n- Required Threshold: ${Math.floor(this.fuzzyThreshold * 100)}%\n- Search Range: ${startLine && endLine ? `lines ${startLine}-${endLine}` : 'start to end'}\n\nSearch Content:\n${searchChunk}${bestMatchSection}${originalContentSection}`
- };
- }
- // Get the matched lines from the original content
- const matchedLines = originalLines.slice(matchIndex, matchIndex + searchLines.length);
-
- // Get the exact indentation (preserving tabs/spaces) of each line
- const originalIndents = matchedLines.map(line => {
- const match = line.match(/^[\t ]*/);
- return match ? match[0] : '';
- });
- // Get the exact indentation of each line in the search block
- const searchIndents = searchLines.map(line => {
- const match = line.match(/^[\t ]*/);
- return match ? match[0] : '';
- });
- // Apply the replacement while preserving exact indentation
- const indentedReplaceLines = replaceLines.map((line, i) => {
- // Get the matched line's exact indentation
- const matchedIndent = originalIndents[0] || '';
-
- // Get the current line's indentation relative to the search content
- const currentIndentMatch = line.match(/^[\t ]*/);
- const currentIndent = currentIndentMatch ? currentIndentMatch[0] : '';
- const searchBaseIndent = searchIndents[0] || '';
-
- // Calculate the relative indentation level
- const searchBaseLevel = searchBaseIndent.length;
- const currentLevel = currentIndent.length;
- const relativeLevel = currentLevel - searchBaseLevel;
-
- // If relative level is negative, remove indentation from matched indent
- // If positive, add to matched indent
- const finalIndent = relativeLevel < 0
- ? matchedIndent.slice(0, Math.max(0, matchedIndent.length + relativeLevel))
- : matchedIndent + currentIndent.slice(searchBaseLevel);
-
- return finalIndent + line.trim();
- });
- // Construct the final content
- const beforeMatch = originalLines.slice(0, matchIndex);
- const afterMatch = originalLines.slice(matchIndex + searchLines.length);
-
- const finalContent = [...beforeMatch, ...indentedReplaceLines, ...afterMatch].join(lineEnding);
- return {
- success: true,
- content: finalContent
- };
- }
- }
|