diff-06-26-25.ts 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960
  1. const SEARCH_BLOCK_START = "------- SEARCH"
  2. const SEARCH_BLOCK_END = "======="
  3. const REPLACE_BLOCK_END = "+++++++ REPLACE"
  4. const SEARCH_BLOCK_CHAR = "-"
  5. const REPLACE_BLOCK_CHAR = "+"
  6. const LEGACY_SEARCH_BLOCK_CHAR = "<"
  7. const LEGACY_REPLACE_BLOCK_CHAR = ">"
  8. // Replace the exact string constants with flexible regex patterns
  9. const SEARCH_BLOCK_START_REGEX = /^[-]{3,} SEARCH>?$/
  10. const LEGACY_SEARCH_BLOCK_START_REGEX = /^[<]{3,} SEARCH>?$/
  11. const SEARCH_BLOCK_END_REGEX = /^[=]{3,}$/
  12. const REPLACE_BLOCK_END_REGEX = /^[+]{3,} REPLACE>?$/
  13. const LEGACY_REPLACE_BLOCK_END_REGEX = /^[>]{3,} REPLACE>?$/
  14. // Similarity thresholds for block anchor fallback matching
  15. const SINGLE_CANDIDATE_SIMILARITY_THRESHOLD = 0.0
  16. const MULTIPLE_CANDIDATES_SIMILARITY_THRESHOLD = 0.0
  17. /**
  18. * Levenshtein distance algorithm implementation
  19. */
  20. function levenshtein(a: string, b: string): number {
  21. // Handle empty strings
  22. if (a === "" || b === "") {
  23. return Math.max(a.length, b.length)
  24. }
  25. const matrix = Array.from({ length: a.length + 1 }, (_, i) =>
  26. Array.from({ length: b.length + 1 }, (_, j) => (i === 0 ? j : j === 0 ? i : 0)),
  27. )
  28. for (let i = 1; i <= a.length; i++) {
  29. for (let j = 1; j <= b.length; j++) {
  30. const cost = a[i - 1] === b[j - 1] ? 0 : 1
  31. matrix[i][j] = Math.min(matrix[i - 1][j] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j - 1] + cost)
  32. }
  33. }
  34. return matrix[a.length][b.length]
  35. }
  36. // Helper functions to check if a line matches the flexible patterns
  37. function isSearchBlockStart(line: string): boolean {
  38. return SEARCH_BLOCK_START_REGEX.test(line) || LEGACY_SEARCH_BLOCK_START_REGEX.test(line)
  39. }
  40. function isSearchBlockEnd(line: string): boolean {
  41. return SEARCH_BLOCK_END_REGEX.test(line)
  42. }
  43. function isReplaceBlockEnd(line: string): boolean {
  44. return REPLACE_BLOCK_END_REGEX.test(line) || LEGACY_REPLACE_BLOCK_END_REGEX.test(line)
  45. }
  46. /**
  47. * Attempts a line-trimmed fallback match for the given search content in the original content.
  48. * It tries to match `searchContent` lines against a block of lines in `originalContent` starting
  49. * from `lastProcessedIndex`. Lines are matched by trimming leading/trailing whitespace and ensuring
  50. * they are identical afterwards.
  51. *
  52. * Returns [matchIndexStart, matchIndexEnd] if found, or false if not found.
  53. */
  54. function lineTrimmedFallbackMatch(originalContent: string, searchContent: string, startIndex: number): [number, number] | false {
  55. // Split both contents into lines
  56. const originalLines = originalContent.split("\n")
  57. const searchLines = searchContent.split("\n")
  58. // Trim trailing empty line if exists (from the trailing \n in searchContent)
  59. if (searchLines[searchLines.length - 1] === "") {
  60. searchLines.pop()
  61. }
  62. // Find the line number where startIndex falls
  63. let startLineNum = 0
  64. let currentIndex = 0
  65. while (currentIndex < startIndex && startLineNum < originalLines.length) {
  66. currentIndex += originalLines[startLineNum].length + 1 // +1 for \n
  67. startLineNum++
  68. }
  69. // For each possible starting position in original content
  70. for (let i = startLineNum; i <= originalLines.length - searchLines.length; i++) {
  71. let matches = true
  72. // Try to match all search lines from this position
  73. for (let j = 0; j < searchLines.length; j++) {
  74. const originalTrimmed = originalLines[i + j].trim()
  75. const searchTrimmed = searchLines[j].trim()
  76. if (originalTrimmed !== searchTrimmed) {
  77. matches = false
  78. break
  79. }
  80. }
  81. // If we found a match, calculate the exact character positions
  82. if (matches) {
  83. // Find start character index
  84. let matchStartIndex = 0
  85. for (let k = 0; k < i; k++) {
  86. matchStartIndex += originalLines[k].length + 1 // +1 for \n
  87. }
  88. // Find end character index
  89. let matchEndIndex = matchStartIndex
  90. for (let k = 0; k < searchLines.length; k++) {
  91. matchEndIndex += originalLines[i + k].length + 1 // +1 for \n
  92. }
  93. return [matchStartIndex, matchEndIndex]
  94. }
  95. }
  96. return false
  97. }
  98. /**
  99. * Attempts to match blocks of code by using the first and last lines as anchors,
  100. * with similarity checking to prevent false positives.
  101. * This is a third-tier fallback strategy that helps match blocks where we can identify
  102. * the correct location by matching the beginning and end, even if the exact content
  103. * differs slightly.
  104. *
  105. * The matching strategy:
  106. * 1. Only attempts to match blocks of 3 or more lines to avoid false positives
  107. * 2. Extracts from the search content:
  108. * - First line as the "start anchor"
  109. * - Last line as the "end anchor"
  110. * 3. Collects all candidate positions where both anchors match
  111. * 4. Uses levenshtein distance to calculate similarity of middle lines
  112. * 5. Returns match only if similarity meets threshold requirements
  113. *
  114. * This approach is particularly useful for matching blocks of code where:
  115. * - The exact content might have minor differences
  116. * - The beginning and end of the block are distinctive enough to serve as anchors
  117. * - The overall structure (number of lines) remains the same
  118. * - The middle content is reasonably similar (prevents false positives)
  119. *
  120. * @param originalContent - The full content of the original file
  121. * @param searchContent - The content we're trying to find in the original file
  122. * @param startIndex - The character index in originalContent where to start searching
  123. * @returns A tuple of [startIndex, endIndex] if a match is found, false otherwise
  124. */
  125. function blockAnchorFallbackMatch(originalContent: string, searchContent: string, startIndex: number): [number, number, number] | false {
  126. const originalLines = originalContent.split("\n")
  127. const searchLines = searchContent.split("\n")
  128. // Only use this approach for blocks of 3+ lines
  129. if (searchLines.length < 3) {
  130. return false
  131. }
  132. // Trim trailing empty line if exists
  133. if (searchLines[searchLines.length - 1] === "") {
  134. searchLines.pop()
  135. }
  136. const firstLineSearch = searchLines[0].trim()
  137. const lastLineSearch = searchLines[searchLines.length - 1].trim()
  138. const searchBlockSize = searchLines.length
  139. // Find the line number where startIndex falls
  140. let startLineNum = 0
  141. let currentIndex = 0
  142. while (currentIndex < startIndex && startLineNum < originalLines.length) {
  143. currentIndex += originalLines[startLineNum].length + 1
  144. startLineNum++
  145. }
  146. // Collect all candidate positions
  147. const candidates: number[] = []
  148. for (let i = startLineNum; i <= originalLines.length - searchBlockSize; i++) {
  149. if (originalLines[i].trim() === firstLineSearch && originalLines[i + searchBlockSize - 1].trim() === lastLineSearch) {
  150. candidates.push(i)
  151. }
  152. }
  153. // Return immediately if no candidates
  154. if (candidates.length === 0) {
  155. return false
  156. }
  157. // Handle single candidate scenario (using relaxed threshold)
  158. if (candidates.length === 1) {
  159. const i = candidates[0]
  160. let similarity = 0
  161. let linesToCheck = searchBlockSize - 2
  162. for (let j = 1; j < searchBlockSize - 1; j++) {
  163. const originalLine = originalLines[i + j].trim()
  164. const searchLine = searchLines[j].trim()
  165. const maxLen = Math.max(originalLine.length, searchLine.length)
  166. if (maxLen === 0) {
  167. continue
  168. }
  169. const distance = levenshtein(originalLine, searchLine)
  170. similarity += (1 - distance / maxLen) / linesToCheck
  171. // Exit early when threshold is reached
  172. if (similarity >= SINGLE_CANDIDATE_SIMILARITY_THRESHOLD) {
  173. break
  174. }
  175. }
  176. if (similarity >= SINGLE_CANDIDATE_SIMILARITY_THRESHOLD) {
  177. let matchStartIndex = 0
  178. for (let k = 0; k < i; k++) {
  179. matchStartIndex += originalLines[k].length + 1
  180. }
  181. let matchEndIndex = matchStartIndex
  182. for (let k = 0; k < searchBlockSize; k++) {
  183. matchEndIndex += originalLines[i + k].length + 1
  184. }
  185. return [matchStartIndex, matchEndIndex, similarity]
  186. }
  187. return false
  188. }
  189. // Calculate similarity for multiple candidates
  190. let bestMatchIndex = -1
  191. let maxSimilarity = -1
  192. for (const i of candidates) {
  193. let similarity = 0
  194. for (let j = 1; j < searchBlockSize - 1; j++) {
  195. const originalLine = originalLines[i + j].trim()
  196. const searchLine = searchLines[j].trim()
  197. const maxLen = Math.max(originalLine.length, searchLine.length)
  198. if (maxLen === 0) {
  199. continue
  200. }
  201. const distance = levenshtein(originalLine, searchLine)
  202. similarity += 1 - distance / maxLen
  203. }
  204. similarity /= searchBlockSize - 2 // Average similarity
  205. if (similarity > maxSimilarity) {
  206. maxSimilarity = similarity
  207. bestMatchIndex = i
  208. }
  209. }
  210. // Threshold judgment
  211. if (maxSimilarity >= MULTIPLE_CANDIDATES_SIMILARITY_THRESHOLD) {
  212. const i = bestMatchIndex
  213. let matchStartIndex = 0
  214. for (let k = 0; k < i; k++) {
  215. matchStartIndex += originalLines[k].length + 1
  216. }
  217. let matchEndIndex = matchStartIndex
  218. for (let k = 0; k < searchBlockSize; k++) {
  219. matchEndIndex += originalLines[i + k].length + 1
  220. }
  221. return [matchStartIndex, matchEndIndex, maxSimilarity]
  222. }
  223. return false
  224. }
  225. /**
  226. * This function reconstructs the file content by applying a streamed diff (in a
  227. * specialized SEARCH/REPLACE block format) to the original file content. It is designed
  228. * to handle both incremental updates and the final resulting file after all chunks have
  229. * been processed.
  230. *
  231. * The diff format is a custom structure that uses three markers to define changes:
  232. *
  233. * ------- SEARCH
  234. * [Exact content to find in the original file]
  235. * =======
  236. * [Content to replace with]
  237. * +++++++ REPLACE
  238. *
  239. * Behavior and Assumptions:
  240. * 1. The file is processed chunk-by-chunk. Each chunk of `diffContent` may contain
  241. * partial or complete SEARCH/REPLACE blocks. By calling this function with each
  242. * incremental chunk (with `isFinal` indicating the last chunk), the final reconstructed
  243. * file content is produced.
  244. *
  245. * 2. Matching Strategy (in order of attempt):
  246. * a. Exact Match: First attempts to find the exact SEARCH block text in the original file
  247. * b. Line-Trimmed Match: Falls back to line-by-line comparison ignoring leading/trailing whitespace
  248. * c. Block Anchor Match: For blocks of 3+ lines, tries to match using first/last lines as anchors
  249. * If all matching strategies fail, an error is thrown.
  250. *
  251. * 3. Empty SEARCH Section:
  252. * - If SEARCH is empty and the original file is empty, this indicates creating a new file
  253. * (pure insertion).
  254. * - If SEARCH is empty and the original file is not empty, this indicates a complete
  255. * file replacement (the entire original content is considered matched and replaced).
  256. *
  257. * 4. Applying Changes:
  258. * - Before encountering the "=======" marker, lines are accumulated as search content.
  259. * - After "=======" and before ">>>>>>> REPLACE", lines are accumulated as replacement content.
  260. * - Once the block is complete (">>>>>>> REPLACE"), the matched section in the original
  261. * file is replaced with the accumulated replacement lines, and the position in the original
  262. * file is advanced.
  263. *
  264. * 5. Incremental Output:
  265. * - As soon as the match location is found and we are in the REPLACE section, each new
  266. * replacement line is appended to the result so that partial updates can be viewed
  267. * incrementally.
  268. *
  269. * 6. Partial Markers:
  270. * - If the final line of the chunk looks like it might be part of a marker but is not one
  271. * of the known markers, it is removed. This prevents incomplete or partial markers
  272. * from corrupting the output.
  273. *
  274. * 7. Finalization:
  275. * - Once all chunks have been processed (when `isFinal` is true), any remaining original
  276. * content after the last replaced section is appended to the result.
  277. * - Trailing newlines are not forcibly added. The code tries to output exactly what is specified.
  278. *
  279. * Errors:
  280. * - If the search block cannot be matched using any of the available matching strategies,
  281. * an error is thrown.
  282. */
  283. export async function constructNewFileContent(
  284. diffContent: string,
  285. originalContent: string,
  286. isFinal: boolean,
  287. version: "v1" | "v2" = "v1",
  288. ): Promise<any> {
  289. const constructor = constructNewFileContentVersionMapping[version]
  290. if (!constructor) {
  291. throw new Error(`Invalid version '${version}' for file content constructor`)
  292. }
  293. return constructor(diffContent, originalContent, isFinal)
  294. }
  295. const constructNewFileContentVersionMapping: Record<
  296. string,
  297. (diffContent: string, originalContent: string, isFinal: boolean) => Promise<any>
  298. > = {
  299. v1: constructNewFileContentV1,
  300. v2: constructNewFileContentV2,
  301. } as const
  302. async function constructNewFileContentV1(diffContent: string, originalContent: string, isFinal: boolean): Promise<{
  303. content: string;
  304. replacements: Array<{
  305. start: number;
  306. end: number;
  307. content: string;
  308. method: string;
  309. similarity: number;
  310. searchContent: string;
  311. matchedText: string;
  312. }>;
  313. }> {
  314. let result = ""
  315. let lastProcessedIndex = 0
  316. let currentSearchContent = ""
  317. let currentReplaceContent = ""
  318. let inSearch = false
  319. let inReplace = false
  320. let searchMatchIndex = -1
  321. let searchEndIndex = -1
  322. let matchMethod = ""
  323. let similarityScore = -1.0
  324. // Track all replacements to handle out-of-order edits
  325. let replacements: Array<{
  326. start: number;
  327. end: number;
  328. content: string;
  329. method: string;
  330. similarity: number;
  331. searchContent: string;
  332. matchedText: string;
  333. }> = []
  334. let pendingOutOfOrderReplacement = false
  335. let lines = diffContent.split("\n")
  336. // If the last line looks like a partial marker but isn't recognized,
  337. // remove it because it might be incomplete.
  338. const lastLine = lines[lines.length - 1]
  339. if (
  340. lines.length > 0 &&
  341. (lastLine.startsWith(SEARCH_BLOCK_CHAR) ||
  342. lastLine.startsWith(LEGACY_SEARCH_BLOCK_CHAR) ||
  343. lastLine.startsWith("=") ||
  344. lastLine.startsWith(REPLACE_BLOCK_CHAR) ||
  345. lastLine.startsWith(LEGACY_REPLACE_BLOCK_CHAR)) &&
  346. !isSearchBlockStart(lastLine) &&
  347. !isSearchBlockEnd(lastLine) &&
  348. !isReplaceBlockEnd(lastLine)
  349. ) {
  350. lines.pop()
  351. }
  352. for (const line of lines) {
  353. if (isSearchBlockStart(line)) {
  354. inSearch = true
  355. currentSearchContent = ""
  356. currentReplaceContent = ""
  357. continue
  358. }
  359. if (isSearchBlockEnd(line)) {
  360. inSearch = false
  361. inReplace = true
  362. // Remove trailing linebreak for adding the === marker
  363. // if (currentSearchContent.endsWith("\r\n")) {
  364. // currentSearchContent = currentSearchContent.slice(0, -2)
  365. // } else if (currentSearchContent.endsWith("\n")) {
  366. // currentSearchContent = currentSearchContent.slice(0, -1)
  367. // }
  368. if (!currentSearchContent) {
  369. // Empty search block
  370. if (originalContent.length === 0) {
  371. // New file scenario: nothing to match, just start inserting
  372. searchMatchIndex = 0
  373. searchEndIndex = 0
  374. matchMethod = "empty_new_file"
  375. } else {
  376. // ERROR: Empty search block with non-empty file indicates malformed SEARCH marker
  377. throw new Error(
  378. "Empty SEARCH block detected with non-empty file. This usually indicates a malformed SEARCH marker.\n" +
  379. "Please ensure your SEARCH marker follows the correct format:\n" +
  380. "- Use '------- SEARCH' (7+ dashes + space + SEARCH)\n",
  381. )
  382. }
  383. } else {
  384. // Add check for inefficient full-file search
  385. // if (currentSearchContent.trim() === originalContent.trim()) {
  386. // throw new Error(
  387. // "The SEARCH block contains the entire file content. Please either:\n" +
  388. // "1. Use an empty SEARCH block to replace the entire file, or\n" +
  389. // "2. Make focused changes to specific parts of the file that need modification.",
  390. // )
  391. // }
  392. // Exact search match scenario
  393. const exactIndex = originalContent.indexOf(currentSearchContent, lastProcessedIndex)
  394. if (exactIndex !== -1) {
  395. searchMatchIndex = exactIndex
  396. searchEndIndex = exactIndex + currentSearchContent.length
  397. matchMethod = "exact_match"
  398. } else {
  399. // Attempt fallback line-trimmed matching
  400. const lineMatch = lineTrimmedFallbackMatch(originalContent, currentSearchContent, lastProcessedIndex)
  401. if (lineMatch) {
  402. ;[searchMatchIndex, searchEndIndex] = lineMatch
  403. matchMethod = "line_trimmed_fallback"
  404. } else {
  405. // Try block anchor fallback for larger blocks
  406. const blockMatch = blockAnchorFallbackMatch(originalContent, currentSearchContent, lastProcessedIndex)
  407. if (blockMatch) {
  408. ;[searchMatchIndex, searchEndIndex, similarityScore] = blockMatch
  409. matchMethod = "block_anchor_fallback"
  410. } else {
  411. // Last resort: search the entire file from the beginning
  412. const fullFileIndex = originalContent.indexOf(currentSearchContent, 0)
  413. if (fullFileIndex !== -1) {
  414. // Found in the file - could be out of order
  415. searchMatchIndex = fullFileIndex
  416. searchEndIndex = fullFileIndex + currentSearchContent.length
  417. matchMethod = "full_file_search"
  418. if (searchMatchIndex < lastProcessedIndex) {
  419. pendingOutOfOrderReplacement = true
  420. }
  421. } else {
  422. throw new Error(
  423. `The SEARCH block:\n${currentSearchContent.trimEnd()}\n...does not match anything in the file.`,
  424. )
  425. }
  426. }
  427. }
  428. }
  429. }
  430. // Check if this is an out-of-order replacement
  431. if (searchMatchIndex < lastProcessedIndex) {
  432. pendingOutOfOrderReplacement = true
  433. }
  434. // For in-order replacements, output everything up to the match location
  435. if (!pendingOutOfOrderReplacement) {
  436. result += originalContent.slice(lastProcessedIndex, searchMatchIndex)
  437. }
  438. continue
  439. }
  440. if (isReplaceBlockEnd(line)) {
  441. // Finished one replace block
  442. if (searchMatchIndex === -1) {
  443. throw new Error(
  444. `The SEARCH block:\n${currentSearchContent.trimEnd()}\n...is malformatted.`,
  445. )
  446. }
  447. // Store this replacement
  448. replacements.push({
  449. start: searchMatchIndex,
  450. end: searchEndIndex,
  451. content: currentReplaceContent,
  452. method: matchMethod,
  453. similarity: similarityScore,
  454. searchContent: currentSearchContent,
  455. matchedText: originalContent.slice(searchMatchIndex, searchEndIndex),
  456. })
  457. // If this was an in-order replacement, advance lastProcessedIndex
  458. if (!pendingOutOfOrderReplacement) {
  459. lastProcessedIndex = searchEndIndex
  460. }
  461. // Reset for next block
  462. inSearch = false
  463. inReplace = false
  464. currentSearchContent = ""
  465. currentReplaceContent = ""
  466. searchMatchIndex = -1
  467. searchEndIndex = -1
  468. similarityScore = -1.0
  469. pendingOutOfOrderReplacement = false
  470. continue
  471. }
  472. // Accumulate content for search or replace
  473. // (currentReplaceContent is not being used for anything right now since we directly append to result.)
  474. // (We artificially add a linebreak since we split on \n at the beginning. In order to not include a trailing linebreak in the final search/result blocks we need to remove it before using them. This allows for partial line matches to be correctly identified.)
  475. // NOTE: search/replace blocks must be arranged in the order they appear in the file due to how we build the content using lastProcessedIndex. We also cannot strip the trailing newline since for non-partial lines it would remove the linebreak from the original content. (If we remove end linebreak from search, then we'd also have to remove it from replace but we can't know if it's a partial line or not since the model may be using the line break to indicate the end of the block rather than as part of the search content.) We require the model to output full lines in order for our fallbacks to work as well.
  476. if (inSearch) {
  477. currentSearchContent += line + "\n"
  478. } else if (inReplace) {
  479. currentReplaceContent += line + "\n"
  480. // Only output replacement lines immediately for in-order replacements
  481. if (searchMatchIndex !== -1 && !pendingOutOfOrderReplacement) {
  482. result += line + "\n"
  483. }
  484. }
  485. }
  486. // If this is the final chunk, we need to apply all replacements and build the final result
  487. if (isFinal) {
  488. // Handle the case where we're still in replace mode when processing ends
  489. // and this is the final chunk - treat it as if we encountered the REPLACE marker
  490. if (inReplace && searchMatchIndex !== -1) {
  491. // Store this replacement
  492. replacements.push({
  493. start: searchMatchIndex,
  494. end: searchEndIndex,
  495. content: currentReplaceContent,
  496. method: matchMethod,
  497. similarity: similarityScore,
  498. searchContent: currentSearchContent,
  499. matchedText: originalContent.slice(searchMatchIndex, searchEndIndex),
  500. })
  501. // If this was an in-order replacement, advance lastProcessedIndex
  502. if (!pendingOutOfOrderReplacement) {
  503. lastProcessedIndex = searchEndIndex
  504. }
  505. // Reset state
  506. inSearch = false
  507. inReplace = false
  508. currentSearchContent = ""
  509. currentReplaceContent = ""
  510. searchMatchIndex = -1
  511. searchEndIndex = -1
  512. pendingOutOfOrderReplacement = false
  513. }
  514. // end of handling missing replace marker
  515. // Sort replacements by start position
  516. replacements.sort((a, b) => a.start - b.start)
  517. // Rebuild the entire result by applying all replacements
  518. result = ""
  519. let currentPos = 0
  520. for (const replacement of replacements) {
  521. // Add original content up to this replacement
  522. result += originalContent.slice(currentPos, replacement.start)
  523. // Add the replacement content
  524. result += replacement.content
  525. // Move position to after the replaced section
  526. currentPos = replacement.end
  527. }
  528. // Add any remaining original content
  529. result += originalContent.slice(currentPos)
  530. }
  531. // For testing - return debug info
  532. return {
  533. content: result,
  534. replacements: replacements
  535. }
  536. }
  537. enum ProcessingState {
  538. Idle = 0,
  539. StateSearch = 1 << 0,
  540. StateReplace = 1 << 1,
  541. }
  542. class NewFileContentConstructor {
  543. private originalContent: string
  544. private isFinal: boolean
  545. private state: number
  546. private pendingNonStandardLines: string[]
  547. private result: string
  548. private lastProcessedIndex: number
  549. private currentSearchContent: string
  550. private currentReplaceContent: string
  551. private searchMatchIndex: number
  552. private searchEndIndex: number
  553. constructor(originalContent: string, isFinal: boolean) {
  554. this.originalContent = originalContent
  555. this.isFinal = isFinal
  556. this.pendingNonStandardLines = []
  557. this.result = ""
  558. this.lastProcessedIndex = 0
  559. this.state = ProcessingState.Idle
  560. this.currentSearchContent = ""
  561. this.currentReplaceContent = ""
  562. this.searchMatchIndex = -1
  563. this.searchEndIndex = -1
  564. }
  565. private resetForNextBlock() {
  566. // Reset for next block
  567. this.state = ProcessingState.Idle
  568. this.currentSearchContent = ""
  569. this.currentReplaceContent = ""
  570. this.searchMatchIndex = -1
  571. this.searchEndIndex = -1
  572. }
  573. private findLastMatchingLineIndex(regx: RegExp, lineLimit: number) {
  574. for (let i = lineLimit; i > 0; ) {
  575. i--
  576. if (this.pendingNonStandardLines[i].match(regx)) {
  577. return i
  578. }
  579. }
  580. return -1
  581. }
  582. private updateProcessingState(newState: ProcessingState) {
  583. const isValidTransition =
  584. (this.state === ProcessingState.Idle && newState === ProcessingState.StateSearch) ||
  585. (this.state === ProcessingState.StateSearch && newState === ProcessingState.StateReplace)
  586. if (!isValidTransition) {
  587. throw new Error(
  588. `Invalid state transition.\n` +
  589. "Valid transitions are:\n" +
  590. "- Idle → StateSearch\n" +
  591. "- StateSearch → StateReplace",
  592. )
  593. }
  594. this.state |= newState
  595. }
  596. private isStateActive(state: ProcessingState): boolean {
  597. return (this.state & state) === state
  598. }
  599. private activateReplaceState() {
  600. this.updateProcessingState(ProcessingState.StateReplace)
  601. }
  602. private activateSearchState() {
  603. this.updateProcessingState(ProcessingState.StateSearch)
  604. this.currentSearchContent = ""
  605. this.currentReplaceContent = ""
  606. }
  607. private isSearchingActive(): boolean {
  608. return this.isStateActive(ProcessingState.StateSearch)
  609. }
  610. private isReplacingActive(): boolean {
  611. return this.isStateActive(ProcessingState.StateReplace)
  612. }
  613. private hasPendingNonStandardLines(pendingNonStandardLineLimit: number): boolean {
  614. return this.pendingNonStandardLines.length - pendingNonStandardLineLimit < this.pendingNonStandardLines.length
  615. }
  616. public processLine(line: string) {
  617. this.internalProcessLine(line, true, this.pendingNonStandardLines.length)
  618. }
  619. public getResult() {
  620. // If this is the final chunk, append any remaining original content
  621. if (this.isFinal && this.lastProcessedIndex < this.originalContent.length) {
  622. this.result += this.originalContent.slice(this.lastProcessedIndex)
  623. }
  624. if (this.isFinal && this.state !== ProcessingState.Idle) {
  625. throw new Error("File processing incomplete - SEARCH/REPLACE operations still active during finalization")
  626. }
  627. return this.result
  628. }
  629. private internalProcessLine(
  630. line: string,
  631. canWritependingNonStandardLines: boolean,
  632. pendingNonStandardLineLimit: number,
  633. ): number {
  634. let removeLineCount = 0
  635. if (isSearchBlockStart(line)) {
  636. removeLineCount = this.trimPendingNonStandardTrailingEmptyLines(pendingNonStandardLineLimit)
  637. if (removeLineCount > 0) {
  638. pendingNonStandardLineLimit = pendingNonStandardLineLimit - removeLineCount
  639. }
  640. if (this.hasPendingNonStandardLines(pendingNonStandardLineLimit)) {
  641. this.tryFixSearchReplaceBlock(pendingNonStandardLineLimit)
  642. canWritependingNonStandardLines && (this.pendingNonStandardLines.length = 0)
  643. }
  644. this.activateSearchState()
  645. } else if (isSearchBlockEnd(line)) {
  646. // 校验非标内容
  647. if (!this.isSearchingActive()) {
  648. this.tryFixSearchBlock(pendingNonStandardLineLimit)
  649. canWritependingNonStandardLines && (this.pendingNonStandardLines.length = 0)
  650. }
  651. this.activateReplaceState()
  652. this.beforeReplace()
  653. } else if (isReplaceBlockEnd(line)) {
  654. if (!this.isReplacingActive()) {
  655. this.tryFixReplaceBlock(pendingNonStandardLineLimit)
  656. canWritependingNonStandardLines && (this.pendingNonStandardLines.length = 0)
  657. }
  658. this.lastProcessedIndex = this.searchEndIndex
  659. this.resetForNextBlock()
  660. } else {
  661. // Accumulate content for search or replace
  662. // (currentReplaceContent is not being used for anything right now since we directly append to result.)
  663. // (We artificially add a linebreak since we split on \n at the beginning. In order to not include a trailing linebreak in the final search/result blocks we need to remove it before using them. This allows for partial line matches to be correctly identified.)
  664. // NOTE: search/replace blocks must be arranged in the order they appear in the file due to how we build the content using lastProcessedIndex. We also cannot strip the trailing newline since for non-partial lines it would remove the linebreak from the original content. (If we remove end linebreak from search, then we'd also have to remove it from replace but we can't know if it's a partial line or not since the model may be using the line break to indicate the end of the block rather than as part of the search content.) We require the model to output full lines in order for our fallbacks to work as well.
  665. if (this.isReplacingActive()) {
  666. this.currentReplaceContent += line + "\n"
  667. // Output replacement lines immediately if we know the insertion point
  668. if (this.searchMatchIndex !== -1) {
  669. this.result += line + "\n"
  670. }
  671. } else if (this.isSearchingActive()) {
  672. this.currentSearchContent += line + "\n"
  673. } else {
  674. let appendToPendingNonStandardLines = canWritependingNonStandardLines
  675. if (appendToPendingNonStandardLines) {
  676. // 处理非标内容
  677. this.pendingNonStandardLines.push(line)
  678. }
  679. }
  680. }
  681. return removeLineCount
  682. }
  683. private beforeReplace() {
  684. // Remove trailing linebreak for adding the === marker
  685. // if (currentSearchContent.endsWith("\r\n")) {
  686. // currentSearchContent = currentSearchContent.slice(0, -2)
  687. // } else if (currentSearchContent.endsWith("\n")) {
  688. // currentSearchContent = currentSearchContent.slice(0, -1)
  689. // }
  690. if (!this.currentSearchContent) {
  691. // Empty search block
  692. if (this.originalContent.length === 0) {
  693. // New file scenario: nothing to match, just start inserting
  694. this.searchMatchIndex = 0
  695. this.searchEndIndex = 0
  696. } else {
  697. // Complete file replacement scenario: treat the entire file as matched
  698. this.searchMatchIndex = 0
  699. this.searchEndIndex = this.originalContent.length
  700. }
  701. } else {
  702. // Add check for inefficient full-file search
  703. // if (currentSearchContent.trim() === originalContent.trim()) {
  704. // throw new Error(
  705. // "The SEARCH block contains the entire file content. Please either:\n" +
  706. // "1. Use an empty SEARCH block to replace the entire file, or\n" +
  707. // "2. Make focused changes to specific parts of the file that need modification.",
  708. // )
  709. // }
  710. // Exact search match scenario
  711. const exactIndex = this.originalContent.indexOf(this.currentSearchContent, this.lastProcessedIndex)
  712. if (exactIndex !== -1) {
  713. this.searchMatchIndex = exactIndex
  714. this.searchEndIndex = exactIndex + this.currentSearchContent.length
  715. } else {
  716. // Attempt fallback line-trimmed matching
  717. const lineMatch = lineTrimmedFallbackMatch(
  718. this.originalContent,
  719. this.currentSearchContent,
  720. this.lastProcessedIndex,
  721. )
  722. if (lineMatch) {
  723. ;[this.searchMatchIndex, this.searchEndIndex] = lineMatch
  724. } else {
  725. // Try block anchor fallback for larger blocks
  726. const blockMatch = blockAnchorFallbackMatch(
  727. this.originalContent,
  728. this.currentSearchContent,
  729. this.lastProcessedIndex,
  730. )
  731. if (blockMatch) {
  732. ;[this.searchMatchIndex, this.searchEndIndex, /* ignore similarity */] = blockMatch
  733. } else {
  734. throw new Error(
  735. `The SEARCH block:\n${this.currentSearchContent.trimEnd()}\n...does not match anything in the file.`,
  736. )
  737. }
  738. }
  739. }
  740. }
  741. if (this.searchMatchIndex < this.lastProcessedIndex) {
  742. throw new Error(
  743. `The SEARCH block:\n${this.currentSearchContent.trimEnd()}\n...matched an incorrect content in the file.`,
  744. )
  745. }
  746. // Output everything up to the match location
  747. this.result += this.originalContent.slice(this.lastProcessedIndex, this.searchMatchIndex)
  748. }
  749. private tryFixSearchBlock(lineLimit: number): number {
  750. let removeLineCount = 0
  751. if (lineLimit < 0) {
  752. lineLimit = this.pendingNonStandardLines.length
  753. }
  754. if (!lineLimit) {
  755. throw new Error("Invalid SEARCH/REPLACE block structure - no lines available to process")
  756. }
  757. let searchTagRegexp = /^([-]{3,}|[<]{3,}) SEARCH$/
  758. const searchTagIndex = this.findLastMatchingLineIndex(searchTagRegexp, lineLimit)
  759. if (searchTagIndex !== -1) {
  760. let fixLines = this.pendingNonStandardLines.slice(searchTagIndex, lineLimit)
  761. fixLines[0] = SEARCH_BLOCK_START
  762. for (const line of fixLines) {
  763. removeLineCount += this.internalProcessLine(line, false, searchTagIndex)
  764. }
  765. } else {
  766. throw new Error(
  767. `Invalid REPLACE marker detected - could not find matching SEARCH block starting from line ${searchTagIndex + 1}`,
  768. )
  769. }
  770. return removeLineCount
  771. }
  772. private tryFixReplaceBlock(lineLimit: number): number {
  773. let removeLineCount = 0
  774. if (lineLimit < 0) {
  775. lineLimit = this.pendingNonStandardLines.length
  776. }
  777. if (!lineLimit) {
  778. throw new Error()
  779. }
  780. let replaceBeginTagRegexp = /^[=]{3,}$/
  781. const replaceBeginTagIndex = this.findLastMatchingLineIndex(replaceBeginTagRegexp, lineLimit)
  782. if (replaceBeginTagIndex !== -1) {
  783. // // 校验非标内容
  784. // if (!this.isSearchingActive()) {
  785. // removeLineCount += this.tryFixSearchBlock(replaceBeginTagIndex)
  786. // }
  787. let fixLines = this.pendingNonStandardLines.slice(replaceBeginTagIndex - removeLineCount, lineLimit - removeLineCount)
  788. fixLines[0] = SEARCH_BLOCK_END
  789. for (const line of fixLines) {
  790. removeLineCount += this.internalProcessLine(line, false, replaceBeginTagIndex - removeLineCount)
  791. }
  792. } else {
  793. throw new Error(`Malformed REPLACE block - missing valid separator after line ${replaceBeginTagIndex + 1}`)
  794. }
  795. return removeLineCount
  796. }
  797. private tryFixSearchReplaceBlock(lineLimit: number): number {
  798. let removeLineCount = 0
  799. if (lineLimit < 0) {
  800. lineLimit = this.pendingNonStandardLines.length
  801. }
  802. if (!lineLimit) {
  803. throw new Error()
  804. }
  805. let replaceEndTagRegexp = /^([+]{3,}|[>]{3,}) REPLACE$/
  806. const replaceEndTagIndex = this.findLastMatchingLineIndex(replaceEndTagRegexp, lineLimit)
  807. const likeReplaceEndTag = replaceEndTagIndex === lineLimit - 1
  808. if (likeReplaceEndTag) {
  809. // // 校验非标内容
  810. // if (!this.isReplacingActive()) {
  811. // removeLineCount += this.tryFixReplaceBlock(replaceEndTagIndex)
  812. // }
  813. let fixLines = this.pendingNonStandardLines.slice(replaceEndTagIndex - removeLineCount, lineLimit - removeLineCount)
  814. fixLines[fixLines.length - 1] = REPLACE_BLOCK_END
  815. for (const line of fixLines) {
  816. removeLineCount += this.internalProcessLine(line, false, replaceEndTagIndex - removeLineCount)
  817. }
  818. } else {
  819. throw new Error("Malformed SEARCH/REPLACE block structure: Missing valid closing REPLACE marker")
  820. }
  821. return removeLineCount
  822. }
  823. /**
  824. * Removes trailing empty lines from the pendingNonStandardLines array
  825. * @param lineLimit - The index to start checking from (exclusive).
  826. * Removes empty lines from lineLimit-1 backwards.
  827. * @returns The number of empty lines removed
  828. */
  829. private trimPendingNonStandardTrailingEmptyLines(lineLimit: number): number {
  830. let removedCount = 0
  831. let i = Math.min(lineLimit, this.pendingNonStandardLines.length) - 1
  832. while (i >= 0 && this.pendingNonStandardLines[i].trim() === "") {
  833. this.pendingNonStandardLines.pop()
  834. removedCount++
  835. i--
  836. }
  837. return removedCount
  838. }
  839. }
  840. export async function constructNewFileContentV2(diffContent: string, originalContent: string, isFinal: boolean): Promise<string> {
  841. let newFileContentConstructor = new NewFileContentConstructor(originalContent, isFinal)
  842. let lines = diffContent.split("\n")
  843. // If the last line looks like a partial marker but isn't recognized,
  844. // remove it because it might be incomplete.
  845. const lastLine = lines[lines.length - 1]
  846. if (
  847. lines.length > 0 &&
  848. (lastLine.startsWith(SEARCH_BLOCK_CHAR) ||
  849. lastLine.startsWith(LEGACY_SEARCH_BLOCK_CHAR) ||
  850. lastLine.startsWith("=") ||
  851. lastLine.startsWith(REPLACE_BLOCK_CHAR) ||
  852. lastLine.startsWith(LEGACY_REPLACE_BLOCK_CHAR)) &&
  853. lastLine !== SEARCH_BLOCK_START &&
  854. lastLine !== SEARCH_BLOCK_END &&
  855. lastLine !== REPLACE_BLOCK_END
  856. ) {
  857. lines.pop()
  858. }
  859. for (const line of lines) {
  860. newFileContentConstructor.processLine(line)
  861. }
  862. let result = newFileContentConstructor.getResult()
  863. return result
  864. }