multi-search-replace.ts 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. import { DiffStrategy, DiffResult } from "../types"
  2. import { addLineNumbers, everyLineHasLineNumbers, stripLineNumbers } from "../../../integrations/misc/extract-text"
  3. import { distance } from "fastest-levenshtein"
  4. const BUFFER_LINES = 40 // Number of extra context lines to show before and after matches
  5. function getSimilarity(original: string, search: string): number {
  6. if (search === "") {
  7. return 1
  8. }
  9. // Normalize strings by removing extra whitespace but preserve case
  10. const normalizeStr = (str: string) => str.replace(/\s+/g, " ").trim()
  11. const normalizedOriginal = normalizeStr(original)
  12. const normalizedSearch = normalizeStr(search)
  13. if (normalizedOriginal === normalizedSearch) {
  14. return 1
  15. }
  16. // Calculate Levenshtein distance using fastest-levenshtein's distance function
  17. const dist = distance(normalizedOriginal, normalizedSearch)
  18. // Calculate similarity ratio (0 to 1, where 1 is an exact match)
  19. const maxLength = Math.max(normalizedOriginal.length, normalizedSearch.length)
  20. return 1 - dist / maxLength
  21. }
  22. export class MultiSearchReplaceDiffStrategy implements DiffStrategy {
  23. private fuzzyThreshold: number
  24. private bufferLines: number
  25. constructor(fuzzyThreshold?: number, bufferLines?: number) {
  26. // Use provided threshold or default to exact matching (1.0)
  27. // Note: fuzzyThreshold is inverted in UI (0% = 1.0, 10% = 0.9)
  28. // so we use it directly here
  29. this.fuzzyThreshold = fuzzyThreshold ?? 1.0
  30. this.bufferLines = bufferLines ?? BUFFER_LINES
  31. }
  32. getToolDescription(args: { cwd: string; toolOptions?: { [key: string]: string } }): string {
  33. return `## apply_diff
  34. Description: Request to replace existing code using a search and replace block.
  35. This tool allows for precise, surgical replaces to files by specifying exactly what content to search for and what to replace it with.
  36. The tool will maintain proper indentation and formatting while making changes.
  37. Only a single operation is allowed per tool use.
  38. The SEARCH section must exactly match existing content including whitespace and indentation.
  39. If you're not confident in the exact content to search for, use the read_file tool first to get the exact content.
  40. When applying the diffs, be extra careful to remember to change any closing brackets or other syntax that may be affected by the diff farther down in the file.
  41. ALWAYS make as many changes in a single 'apply_diff' request as possible using multiple SEARCH/REPLACE blocks
  42. Parameters:
  43. - path: (required) The path of the file to modify (relative to the current working directory ${args.cwd})
  44. - diff: (required) The search/replace block defining the changes.
  45. Diff format:
  46. \`\`\`
  47. <<<<<<< SEARCH
  48. :start_line: (required) The line number of original content where the search block starts.
  49. :end_line: (required) The line number of original content where the search block ends.
  50. -------
  51. [exact content to find including whitespace]
  52. =======
  53. [new content to replace with]
  54. >>>>>>> REPLACE
  55. \`\`\`
  56. Example:
  57. Original file:
  58. \`\`\`
  59. 1 | def calculate_total(items):
  60. 2 | total = 0
  61. 3 | for item in items:
  62. 4 | total += item
  63. 5 | return total
  64. \`\`\`
  65. Search/Replace content:
  66. \`\`\`
  67. <<<<<<< SEARCH
  68. :start_line:1
  69. :end_line:5
  70. -------
  71. def calculate_total(items):
  72. total = 0
  73. for item in items:
  74. total += item
  75. return total
  76. =======
  77. def calculate_total(items):
  78. """Calculate total with 10% markup"""
  79. return sum(item * 1.1 for item in items)
  80. >>>>>>> REPLACE
  81. \`\`\`
  82. Search/Replace content with multi edits:
  83. \`\`\`
  84. <<<<<<< SEARCH
  85. :start_line:1
  86. :end_line:2
  87. -------
  88. def calculate_sum(items):
  89. sum = 0
  90. =======
  91. def calculate_sum(items):
  92. sum = 0
  93. >>>>>>> REPLACE
  94. <<<<<<< SEARCH
  95. :start_line:4
  96. :end_line:5
  97. -------
  98. total += item
  99. return total
  100. =======
  101. sum += item
  102. return sum
  103. >>>>>>> REPLACE
  104. \`\`\`
  105. Usage:
  106. <apply_diff>
  107. <path>File path here</path>
  108. <diff>
  109. Your search/replace content here
  110. You can use multi search/replace block in one diff block, but make sure to include the line numbers for each block.
  111. Only use a single line of '=======' between search and replacement content, because multiple '=======' will corrupt the file.
  112. </diff>
  113. </apply_diff>`
  114. }
  115. async applyDiff(
  116. originalContent: string,
  117. diffContent: string,
  118. _paramStartLine?: number,
  119. _paramEndLine?: number,
  120. ): Promise<DiffResult> {
  121. let matches = [
  122. ...diffContent.matchAll(
  123. /<<<<<<< SEARCH\n(:start_line:\s*(\d+)\n){0,1}(:end_line:\s*(\d+)\n){0,1}(-------\n){0,1}([\s\S]*?)\n?=======\n([\s\S]*?)\n?>>>>>>> REPLACE/g,
  124. ),
  125. ]
  126. if (matches.length === 0) {
  127. return {
  128. success: false,
  129. error: `Invalid diff format - missing required sections\n\nDebug Info:\n- Expected Format: <<<<<<< SEARCH\\n:start_line: start line\\n:end_line: end line\\n-------\\n[search content]\\n=======\\n[replace content]\\n>>>>>>> REPLACE\n- Tip: Make sure to include start_line/end_line/SEARCH/REPLACE sections with correct markers`,
  130. }
  131. }
  132. // Detect line ending from original content
  133. const lineEnding = originalContent.includes("\r\n") ? "\r\n" : "\n"
  134. let resultLines = originalContent.split(/\r?\n/)
  135. let delta = 0
  136. let diffResults: DiffResult[] = []
  137. let appliedCount = 0
  138. const replacements = matches
  139. .map((match) => ({
  140. startLine: Number(match[2] ?? 0),
  141. endLine: Number(match[4] ?? resultLines.length),
  142. searchContent: match[6],
  143. replaceContent: match[7],
  144. }))
  145. .sort((a, b) => a.startLine - b.startLine)
  146. for (let { searchContent, replaceContent, startLine, endLine } of replacements) {
  147. startLine += startLine === 0 ? 0 : delta
  148. endLine += delta
  149. // Strip line numbers from search and replace content if every line starts with a line number
  150. if (everyLineHasLineNumbers(searchContent) && everyLineHasLineNumbers(replaceContent)) {
  151. searchContent = stripLineNumbers(searchContent)
  152. replaceContent = stripLineNumbers(replaceContent)
  153. }
  154. // Split content into lines, handling both \n and \r\n
  155. const searchLines = searchContent === "" ? [] : searchContent.split(/\r?\n/)
  156. const replaceLines = replaceContent === "" ? [] : replaceContent.split(/\r?\n/)
  157. // Validate that empty search requires start line
  158. if (searchLines.length === 0 && !startLine) {
  159. diffResults.push({
  160. success: false,
  161. error: `Empty search content requires start_line to be specified\n\nDebug Info:\n- Empty search content is only valid for insertions at a specific line\n- For insertions, specify the line number where content should be inserted`,
  162. })
  163. continue
  164. }
  165. // Validate that empty search requires same start and end line
  166. if (searchLines.length === 0 && startLine && endLine && startLine !== endLine) {
  167. diffResults.push({
  168. success: false,
  169. error: `Empty search content requires start_line and end_line to be the same (got ${startLine}-${endLine})\n\nDebug Info:\n- Empty search content is only valid for insertions at a specific line\n- For insertions, use the same line number for both start_line and end_line`,
  170. })
  171. continue
  172. }
  173. // Initialize search variables
  174. let matchIndex = -1
  175. let bestMatchScore = 0
  176. let bestMatchContent = ""
  177. const searchChunk = searchLines.join("\n")
  178. // Determine search bounds
  179. let searchStartIndex = 0
  180. let searchEndIndex = resultLines.length
  181. // Validate and handle line range if provided
  182. if (startLine && endLine) {
  183. // Convert to 0-based index
  184. const exactStartIndex = startLine - 1
  185. const exactEndIndex = endLine - 1
  186. if (exactStartIndex < 0 || exactEndIndex > resultLines.length || exactStartIndex > exactEndIndex) {
  187. diffResults.push({
  188. success: false,
  189. error: `Line range ${startLine}-${endLine} is invalid (file has ${resultLines.length} lines)\n\nDebug Info:\n- Requested Range: lines ${startLine}-${endLine}\n- File Bounds: lines 1-${resultLines.length}`,
  190. })
  191. continue
  192. }
  193. // Try exact match first
  194. const originalChunk = resultLines.slice(exactStartIndex, exactEndIndex + 1).join("\n")
  195. const similarity = getSimilarity(originalChunk, searchChunk)
  196. if (similarity >= this.fuzzyThreshold) {
  197. matchIndex = exactStartIndex
  198. bestMatchScore = similarity
  199. bestMatchContent = originalChunk
  200. } else {
  201. // Set bounds for buffered search
  202. searchStartIndex = Math.max(0, startLine - (this.bufferLines + 1))
  203. searchEndIndex = Math.min(resultLines.length, endLine + this.bufferLines)
  204. }
  205. }
  206. // If no match found yet, try middle-out search within bounds
  207. if (matchIndex === -1) {
  208. const midPoint = Math.floor((searchStartIndex + searchEndIndex) / 2)
  209. let leftIndex = midPoint
  210. let rightIndex = midPoint + 1
  211. // Search outward from the middle within bounds
  212. while (leftIndex >= searchStartIndex || rightIndex <= searchEndIndex - searchLines.length) {
  213. // Check left side if still in range
  214. if (leftIndex >= searchStartIndex) {
  215. const originalChunk = resultLines.slice(leftIndex, leftIndex + searchLines.length).join("\n")
  216. const similarity = getSimilarity(originalChunk, searchChunk)
  217. if (similarity > bestMatchScore) {
  218. bestMatchScore = similarity
  219. matchIndex = leftIndex
  220. bestMatchContent = originalChunk
  221. }
  222. leftIndex--
  223. }
  224. // Check right side if still in range
  225. if (rightIndex <= searchEndIndex - searchLines.length) {
  226. const originalChunk = resultLines.slice(rightIndex, rightIndex + searchLines.length).join("\n")
  227. const similarity = getSimilarity(originalChunk, searchChunk)
  228. if (similarity > bestMatchScore) {
  229. bestMatchScore = similarity
  230. matchIndex = rightIndex
  231. bestMatchContent = originalChunk
  232. }
  233. rightIndex++
  234. }
  235. }
  236. }
  237. // Require similarity to meet threshold
  238. if (matchIndex === -1 || bestMatchScore < this.fuzzyThreshold) {
  239. const searchChunk = searchLines.join("\n")
  240. const originalContentSection =
  241. startLine !== undefined && endLine !== undefined
  242. ? `\n\nOriginal Content:\n${addLineNumbers(
  243. resultLines
  244. .slice(
  245. Math.max(0, startLine - 1 - this.bufferLines),
  246. Math.min(resultLines.length, endLine + this.bufferLines),
  247. )
  248. .join("\n"),
  249. Math.max(1, startLine - this.bufferLines),
  250. )}`
  251. : `\n\nOriginal Content:\n${addLineNumbers(resultLines.join("\n"))}`
  252. const bestMatchSection = bestMatchContent
  253. ? `\n\nBest Match Found:\n${addLineNumbers(bestMatchContent, matchIndex + 1)}`
  254. : `\n\nBest Match Found:\n(no match)`
  255. const lineRange =
  256. startLine || endLine
  257. ? ` at ${startLine ? `start: ${startLine}` : "start"} to ${endLine ? `end: ${endLine}` : "end"}`
  258. : ""
  259. diffResults.push({
  260. success: false,
  261. error: `No sufficiently similar match found${lineRange} (${Math.floor(bestMatchScore * 100)}% similar, needs ${Math.floor(this.fuzzyThreshold * 100)}%)\n\nDebug Info:\n- Similarity Score: ${Math.floor(bestMatchScore * 100)}%\n- Required Threshold: ${Math.floor(this.fuzzyThreshold * 100)}%\n- Search Range: ${startLine && endLine ? `lines ${startLine}-${endLine}` : "start to end"}\n- Tip: Use read_file to get the latest content of the file before attempting the diff again, as the file content may have changed\n\nSearch Content:\n${searchChunk}${bestMatchSection}${originalContentSection}`,
  262. })
  263. continue
  264. }
  265. // Get the matched lines from the original content
  266. const matchedLines = resultLines.slice(matchIndex, matchIndex + searchLines.length)
  267. // Get the exact indentation (preserving tabs/spaces) of each line
  268. const originalIndents = matchedLines.map((line) => {
  269. const match = line.match(/^[\t ]*/)
  270. return match ? match[0] : ""
  271. })
  272. // Get the exact indentation of each line in the search block
  273. const searchIndents = searchLines.map((line) => {
  274. const match = line.match(/^[\t ]*/)
  275. return match ? match[0] : ""
  276. })
  277. // Apply the replacement while preserving exact indentation
  278. const indentedReplaceLines = replaceLines.map((line, i) => {
  279. // Get the matched line's exact indentation
  280. const matchedIndent = originalIndents[0] || ""
  281. // Get the current line's indentation relative to the search content
  282. const currentIndentMatch = line.match(/^[\t ]*/)
  283. const currentIndent = currentIndentMatch ? currentIndentMatch[0] : ""
  284. const searchBaseIndent = searchIndents[0] || ""
  285. // Calculate the relative indentation level
  286. const searchBaseLevel = searchBaseIndent.length
  287. const currentLevel = currentIndent.length
  288. const relativeLevel = currentLevel - searchBaseLevel
  289. // If relative level is negative, remove indentation from matched indent
  290. // If positive, add to matched indent
  291. const finalIndent =
  292. relativeLevel < 0
  293. ? matchedIndent.slice(0, Math.max(0, matchedIndent.length + relativeLevel))
  294. : matchedIndent + currentIndent.slice(searchBaseLevel)
  295. return finalIndent + line.trim()
  296. })
  297. // Construct the final content
  298. const beforeMatch = resultLines.slice(0, matchIndex)
  299. const afterMatch = resultLines.slice(matchIndex + searchLines.length)
  300. resultLines = [...beforeMatch, ...indentedReplaceLines, ...afterMatch]
  301. delta = delta - matchedLines.length + replaceLines.length
  302. appliedCount++
  303. }
  304. const finalContent = resultLines.join(lineEnding)
  305. if (appliedCount === 0) {
  306. return {
  307. success: false,
  308. failParts: diffResults,
  309. }
  310. }
  311. return {
  312. success: true,
  313. content: finalContent,
  314. failParts: diffResults,
  315. }
  316. }
  317. }