search-replace.ts 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. import { DiffStrategy, DiffResult } from "../types"
  2. import { addLineNumbers, everyLineHasLineNumbers, stripLineNumbers } from "../../../integrations/misc/extract-text"
  3. const BUFFER_LINES = 20; // Number of extra context lines to show before and after matches
  4. function levenshteinDistance(a: string, b: string): number {
  5. const matrix: number[][] = [];
  6. // Initialize matrix
  7. for (let i = 0; i <= a.length; i++) {
  8. matrix[i] = [i];
  9. }
  10. for (let j = 0; j <= b.length; j++) {
  11. matrix[0][j] = j;
  12. }
  13. // Fill matrix
  14. for (let i = 1; i <= a.length; i++) {
  15. for (let j = 1; j <= b.length; j++) {
  16. if (a[i-1] === b[j-1]) {
  17. matrix[i][j] = matrix[i-1][j-1];
  18. } else {
  19. matrix[i][j] = Math.min(
  20. matrix[i-1][j-1] + 1, // substitution
  21. matrix[i][j-1] + 1, // insertion
  22. matrix[i-1][j] + 1 // deletion
  23. );
  24. }
  25. }
  26. }
  27. return matrix[a.length][b.length];
  28. }
  29. function getSimilarity(original: string, search: string): number {
  30. if (search === '') {
  31. return 1;
  32. }
  33. // Normalize strings by removing extra whitespace but preserve case
  34. const normalizeStr = (str: string) => str.replace(/\s+/g, ' ').trim();
  35. const normalizedOriginal = normalizeStr(original);
  36. const normalizedSearch = normalizeStr(search);
  37. if (normalizedOriginal === normalizedSearch) { return 1; }
  38. // Calculate Levenshtein distance
  39. const distance = levenshteinDistance(normalizedOriginal, normalizedSearch);
  40. // Calculate similarity ratio (0 to 1, where 1 is exact match)
  41. const maxLength = Math.max(normalizedOriginal.length, normalizedSearch.length);
  42. return 1 - (distance / maxLength);
  43. }
  44. export class SearchReplaceDiffStrategy implements DiffStrategy {
  45. private fuzzyThreshold: number;
  46. private bufferLines: number;
  47. constructor(fuzzyThreshold?: number, bufferLines?: number) {
  48. // Use provided threshold or default to exact matching (1.0)
  49. // Note: fuzzyThreshold is inverted in UI (0% = 1.0, 10% = 0.9)
  50. // so we use it directly here
  51. this.fuzzyThreshold = fuzzyThreshold ?? 1.0;
  52. this.bufferLines = bufferLines ?? BUFFER_LINES;
  53. }
  54. getToolDescription(cwd: string): string {
  55. return `## apply_diff
  56. Description: Request to replace existing code using a search and replace block.
  57. This tool allows for precise, surgical replaces to files by specifying exactly what content to search for and what to replace it with.
  58. The tool will maintain proper indentation and formatting while making changes.
  59. Only a single operation is allowed per tool use.
  60. The SEARCH section must exactly match existing content including whitespace and indentation.
  61. If you're not confident in the exact content to search for, use the read_file tool first to get the exact content.
  62. Parameters:
  63. - path: (required) The path of the file to modify (relative to the current working directory ${cwd})
  64. - diff: (required) The search/replace block defining the changes.
  65. - start_line: (required) The line number where the search block starts.
  66. - end_line: (required) The line number where the search block ends.
  67. Diff format:
  68. \`\`\`
  69. <<<<<<< SEARCH
  70. [exact content to find including whitespace]
  71. =======
  72. [new content to replace with]
  73. >>>>>>> REPLACE
  74. \`\`\`
  75. Example:
  76. Original file:
  77. \`\`\`
  78. 1 | def calculate_total(items):
  79. 2 | total = 0
  80. 3 | for item in items:
  81. 4 | total += item
  82. 5 | return total
  83. \`\`\`
  84. Search/Replace content:
  85. \`\`\`
  86. <<<<<<< SEARCH
  87. def calculate_total(items):
  88. total = 0
  89. for item in items:
  90. total += item
  91. return total
  92. =======
  93. def calculate_total(items):
  94. """Calculate total with 10% markup"""
  95. return sum(item * 1.1 for item in items)
  96. >>>>>>> REPLACE
  97. \`\`\`
  98. Usage:
  99. <apply_diff>
  100. <path>File path here</path>
  101. <diff>
  102. Your search/replace content here
  103. </diff>
  104. <start_line>1</start_line>
  105. <end_line>5</end_line>
  106. </apply_diff>`
  107. }
  108. applyDiff(originalContent: string, diffContent: string, startLine?: number, endLine?: number): DiffResult {
  109. // Extract the search and replace blocks
  110. const match = diffContent.match(/<<<<<<< SEARCH\n([\s\S]*?)\n?=======\n([\s\S]*?)\n?>>>>>>> REPLACE/);
  111. if (!match) {
  112. return {
  113. success: false,
  114. error: `Invalid diff format - missing required SEARCH/REPLACE sections\n\nDebug Info:\n- Expected Format: <<<<<<< SEARCH\\n[search content]\\n=======\\n[replace content]\\n>>>>>>> REPLACE\n- Tip: Make sure to include both SEARCH and REPLACE sections with correct markers`
  115. };
  116. }
  117. let [_, searchContent, replaceContent] = match;
  118. // Detect line ending from original content
  119. const lineEnding = originalContent.includes('\r\n') ? '\r\n' : '\n';
  120. // Strip line numbers from search and replace content if every line starts with a line number
  121. if (everyLineHasLineNumbers(searchContent) && everyLineHasLineNumbers(replaceContent)) {
  122. searchContent = stripLineNumbers(searchContent);
  123. replaceContent = stripLineNumbers(replaceContent);
  124. }
  125. // Split content into lines, handling both \n and \r\n
  126. const searchLines = searchContent === '' ? [] : searchContent.split(/\r?\n/);
  127. const replaceLines = replaceContent === '' ? [] : replaceContent.split(/\r?\n/);
  128. const originalLines = originalContent.split(/\r?\n/);
  129. // Validate that empty search requires start line
  130. if (searchLines.length === 0 && !startLine) {
  131. return {
  132. success: false,
  133. error: `Empty search content requires start_line to be specified\n\nDebug Info:\n- Empty search content is only valid for insertions at a specific line\n- For insertions, specify the line number where content should be inserted`
  134. };
  135. }
  136. // Validate that empty search requires same start and end line
  137. if (searchLines.length === 0 && startLine && endLine && startLine !== endLine) {
  138. return {
  139. success: false,
  140. error: `Empty search content requires start_line and end_line to be the same (got ${startLine}-${endLine})\n\nDebug Info:\n- Empty search content is only valid for insertions at a specific line\n- For insertions, use the same line number for both start_line and end_line`
  141. };
  142. }
  143. // Initialize search variables
  144. let matchIndex = -1;
  145. let bestMatchScore = 0;
  146. let bestMatchContent = "";
  147. const searchChunk = searchLines.join('\n');
  148. // Determine search bounds
  149. let searchStartIndex = 0;
  150. let searchEndIndex = originalLines.length;
  151. // Validate and handle line range if provided
  152. if (startLine && endLine) {
  153. // Convert to 0-based index
  154. const exactStartIndex = startLine - 1;
  155. const exactEndIndex = endLine - 1;
  156. if (exactStartIndex < 0 || exactEndIndex > originalLines.length || exactStartIndex > exactEndIndex) {
  157. return {
  158. success: false,
  159. error: `Line range ${startLine}-${endLine} is invalid (file has ${originalLines.length} lines)\n\nDebug Info:\n- Requested Range: lines ${startLine}-${endLine}\n- File Bounds: lines 1-${originalLines.length}`,
  160. };
  161. }
  162. // Try exact match first
  163. const originalChunk = originalLines.slice(exactStartIndex, exactEndIndex + 1).join('\n');
  164. const similarity = getSimilarity(originalChunk, searchChunk);
  165. if (similarity >= this.fuzzyThreshold) {
  166. matchIndex = exactStartIndex;
  167. bestMatchScore = similarity;
  168. bestMatchContent = originalChunk;
  169. } else {
  170. // Set bounds for buffered search
  171. searchStartIndex = Math.max(0, startLine - (this.bufferLines + 1));
  172. searchEndIndex = Math.min(originalLines.length, endLine + this.bufferLines);
  173. }
  174. }
  175. // If no match found yet, try middle-out search within bounds
  176. if (matchIndex === -1) {
  177. const midPoint = Math.floor((searchStartIndex + searchEndIndex) / 2);
  178. let leftIndex = midPoint;
  179. let rightIndex = midPoint + 1;
  180. // Search outward from the middle within bounds
  181. while (leftIndex >= searchStartIndex || rightIndex <= searchEndIndex - searchLines.length) {
  182. // Check left side if still in range
  183. if (leftIndex >= searchStartIndex) {
  184. const originalChunk = originalLines.slice(leftIndex, leftIndex + searchLines.length).join('\n');
  185. const similarity = getSimilarity(originalChunk, searchChunk);
  186. if (similarity > bestMatchScore) {
  187. bestMatchScore = similarity;
  188. matchIndex = leftIndex;
  189. bestMatchContent = originalChunk;
  190. }
  191. leftIndex--;
  192. }
  193. // Check right side if still in range
  194. if (rightIndex <= searchEndIndex - searchLines.length) {
  195. const originalChunk = originalLines.slice(rightIndex, rightIndex + searchLines.length).join('\n');
  196. const similarity = getSimilarity(originalChunk, searchChunk);
  197. if (similarity > bestMatchScore) {
  198. bestMatchScore = similarity;
  199. matchIndex = rightIndex;
  200. bestMatchContent = originalChunk;
  201. }
  202. rightIndex++;
  203. }
  204. }
  205. }
  206. // Require similarity to meet threshold
  207. if (matchIndex === -1 || bestMatchScore < this.fuzzyThreshold) {
  208. const searchChunk = searchLines.join('\n');
  209. const originalContentSection = startLine !== undefined && endLine !== undefined
  210. ? `\n\nOriginal Content:\n${addLineNumbers(
  211. originalLines.slice(
  212. Math.max(0, startLine - 1 - this.bufferLines),
  213. Math.min(originalLines.length, endLine + this.bufferLines)
  214. ).join('\n'),
  215. Math.max(1, startLine - this.bufferLines)
  216. )}`
  217. : `\n\nOriginal Content:\n${addLineNumbers(originalLines.join('\n'))}`;
  218. const bestMatchSection = bestMatchContent
  219. ? `\n\nBest Match Found:\n${addLineNumbers(bestMatchContent, matchIndex + 1)}`
  220. : `\n\nBest Match Found:\n(no match)`;
  221. const lineRange = startLine || endLine ?
  222. ` at ${startLine ? `start: ${startLine}` : 'start'} to ${endLine ? `end: ${endLine}` : 'end'}` : '';
  223. return {
  224. success: false,
  225. error: `No sufficiently similar match found${lineRange} (${Math.floor(bestMatchScore * 100)}% similar, needs ${Math.floor(this.fuzzyThreshold * 100)}%)\n\nDebug Info:\n- Similarity Score: ${Math.floor(bestMatchScore * 100)}%\n- Required Threshold: ${Math.floor(this.fuzzyThreshold * 100)}%\n- Search Range: ${startLine && endLine ? `lines ${startLine}-${endLine}` : 'start to end'}\n\nSearch Content:\n${searchChunk}${bestMatchSection}${originalContentSection}`
  226. };
  227. }
  228. // Get the matched lines from the original content
  229. const matchedLines = originalLines.slice(matchIndex, matchIndex + searchLines.length);
  230. // Get the exact indentation (preserving tabs/spaces) of each line
  231. const originalIndents = matchedLines.map(line => {
  232. const match = line.match(/^[\t ]*/);
  233. return match ? match[0] : '';
  234. });
  235. // Get the exact indentation of each line in the search block
  236. const searchIndents = searchLines.map(line => {
  237. const match = line.match(/^[\t ]*/);
  238. return match ? match[0] : '';
  239. });
  240. // Apply the replacement while preserving exact indentation
  241. const indentedReplaceLines = replaceLines.map((line, i) => {
  242. // Get the matched line's exact indentation
  243. const matchedIndent = originalIndents[0] || '';
  244. // Get the current line's indentation relative to the search content
  245. const currentIndentMatch = line.match(/^[\t ]*/);
  246. const currentIndent = currentIndentMatch ? currentIndentMatch[0] : '';
  247. const searchBaseIndent = searchIndents[0] || '';
  248. // Calculate the relative indentation level
  249. const searchBaseLevel = searchBaseIndent.length;
  250. const currentLevel = currentIndent.length;
  251. const relativeLevel = currentLevel - searchBaseLevel;
  252. // If relative level is negative, remove indentation from matched indent
  253. // If positive, add to matched indent
  254. const finalIndent = relativeLevel < 0
  255. ? matchedIndent.slice(0, Math.max(0, matchedIndent.length + relativeLevel))
  256. : matchedIndent + currentIndent.slice(searchBaseLevel);
  257. return finalIndent + line.trim();
  258. });
  259. // Construct the final content
  260. const beforeMatch = originalLines.slice(0, matchIndex);
  261. const afterMatch = originalLines.slice(matchIndex + searchLines.length);
  262. const finalContent = [...beforeMatch, ...indentedReplaceLines, ...afterMatch].join(lineEnding);
  263. return {
  264. success: true,
  265. content: finalContent
  266. };
  267. }
  268. }