diff-06-23-25.ts 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827
  1. const SEARCH_BLOCK_START = "------- SEARCH"
  2. const SEARCH_BLOCK_END = "======="
  3. const REPLACE_BLOCK_END = "+++++++ REPLACE"
  4. const SEARCH_BLOCK_CHAR = "-"
  5. const REPLACE_BLOCK_CHAR = "+"
  6. const LEGACY_SEARCH_BLOCK_CHAR = "<"
  7. const LEGACY_REPLACE_BLOCK_CHAR = ">"
  8. // Replace the exact string constants with flexible regex patterns
  9. const SEARCH_BLOCK_START_REGEX = /^[-]{3,} SEARCH$/
  10. const SEARCH_BLOCK_END_REGEX = /^[=]{3,}$/
  11. const REPLACE_BLOCK_END_REGEX = /^[+]{3,} REPLACE$/
  12. const LEGACY_SEARCH_BLOCK_START_REGEX = /^[<]{3,} SEARCH$/
  13. const LEGACY_REPLACE_BLOCK_END_REGEX = /^[>]{3,} REPLACE$/
  14. // Helper functions to check if a line matches the flexible patterns
  15. function isSearchBlockStart(line: string): boolean {
  16. return SEARCH_BLOCK_START_REGEX.test(line) || LEGACY_SEARCH_BLOCK_START_REGEX.test(line)
  17. }
  18. function isSearchBlockEnd(line: string): boolean {
  19. return SEARCH_BLOCK_END_REGEX.test(line)
  20. }
  21. function isReplaceBlockEnd(line: string): boolean {
  22. return REPLACE_BLOCK_END_REGEX.test(line) || LEGACY_REPLACE_BLOCK_END_REGEX.test(line)
  23. }
  24. /**
  25. * Attempts a line-trimmed fallback match for the given search content in the original content.
  26. * It tries to match `searchContent` lines against a block of lines in `originalContent` starting
  27. * from `lastProcessedIndex`. Lines are matched by trimming leading/trailing whitespace and ensuring
  28. * they are identical afterwards.
  29. *
  30. * Returns [matchIndexStart, matchIndexEnd] if found, or false if not found.
  31. */
  32. function lineTrimmedFallbackMatch(originalContent: string, searchContent: string, startIndex: number): [number, number] | false {
  33. // Split both contents into lines
  34. const originalLines = originalContent.split("\n")
  35. const searchLines = searchContent.split("\n")
  36. // Trim trailing empty line if exists (from the trailing \n in searchContent)
  37. if (searchLines[searchLines.length - 1] === "") {
  38. searchLines.pop()
  39. }
  40. // Find the line number where startIndex falls
  41. let startLineNum = 0
  42. let currentIndex = 0
  43. while (currentIndex < startIndex && startLineNum < originalLines.length) {
  44. currentIndex += originalLines[startLineNum].length + 1 // +1 for \n
  45. startLineNum++
  46. }
  47. // For each possible starting position in original content
  48. for (let i = startLineNum; i <= originalLines.length - searchLines.length; i++) {
  49. let matches = true
  50. // Try to match all search lines from this position
  51. for (let j = 0; j < searchLines.length; j++) {
  52. const originalTrimmed = originalLines[i + j].trim()
  53. const searchTrimmed = searchLines[j].trim()
  54. if (originalTrimmed !== searchTrimmed) {
  55. matches = false
  56. break
  57. }
  58. }
  59. // If we found a match, calculate the exact character positions
  60. if (matches) {
  61. // Find start character index
  62. let matchStartIndex = 0
  63. for (let k = 0; k < i; k++) {
  64. matchStartIndex += originalLines[k].length + 1 // +1 for \n
  65. }
  66. // Find end character index
  67. let matchEndIndex = matchStartIndex
  68. for (let k = 0; k < searchLines.length; k++) {
  69. matchEndIndex += originalLines[i + k].length + 1 // +1 for \n
  70. }
  71. return [matchStartIndex, matchEndIndex]
  72. }
  73. }
  74. return false
  75. }
  76. /**
  77. * Attempts to match blocks of code by using the first and last lines as anchors.
  78. * This is a third-tier fallback strategy that helps match blocks where we can identify
  79. * the correct location by matching the beginning and end, even if the exact content
  80. * differs slightly.
  81. *
  82. * The matching strategy:
  83. * 1. Only attempts to match blocks of 3 or more lines to avoid false positives
  84. * 2. Extracts from the search content:
  85. * - First line as the "start anchor"
  86. * - Last line as the "end anchor"
  87. * 3. For each position in the original content:
  88. * - Checks if the next line matches the start anchor
  89. * - If it does, jumps ahead by the search block size
  90. * - Checks if that line matches the end anchor
  91. * - All comparisons are done after trimming whitespace
  92. *
  93. * This approach is particularly useful for matching blocks of code where:
  94. * - The exact content might have minor differences
  95. * - The beginning and end of the block are distinctive enough to serve as anchors
  96. * - The overall structure (number of lines) remains the same
  97. *
  98. * @param originalContent - The full content of the original file
  99. * @param searchContent - The content we're trying to find in the original file
  100. * @param startIndex - The character index in originalContent where to start searching
  101. * @returns A tuple of [startIndex, endIndex] if a match is found, false otherwise
  102. */
  103. function blockAnchorFallbackMatch(originalContent: string, searchContent: string, startIndex: number): [number, number] | false {
  104. const originalLines = originalContent.split("\n")
  105. const searchLines = searchContent.split("\n")
  106. // Only use this approach for blocks of 3+ lines
  107. if (searchLines.length < 3) {
  108. return false
  109. }
  110. // Trim trailing empty line if exists
  111. if (searchLines[searchLines.length - 1] === "") {
  112. searchLines.pop()
  113. }
  114. const firstLineSearch = searchLines[0].trim()
  115. const lastLineSearch = searchLines[searchLines.length - 1].trim()
  116. const searchBlockSize = searchLines.length
  117. // Find the line number where startIndex falls
  118. let startLineNum = 0
  119. let currentIndex = 0
  120. while (currentIndex < startIndex && startLineNum < originalLines.length) {
  121. currentIndex += originalLines[startLineNum].length + 1
  122. startLineNum++
  123. }
  124. // Look for matching start and end anchors
  125. for (let i = startLineNum; i <= originalLines.length - searchBlockSize; i++) {
  126. // Check if first line matches
  127. if (originalLines[i].trim() !== firstLineSearch) {
  128. continue
  129. }
  130. // Check if last line matches at the expected position
  131. if (originalLines[i + searchBlockSize - 1].trim() !== lastLineSearch) {
  132. continue
  133. }
  134. // Calculate exact character positions
  135. let matchStartIndex = 0
  136. for (let k = 0; k < i; k++) {
  137. matchStartIndex += originalLines[k].length + 1
  138. }
  139. let matchEndIndex = matchStartIndex
  140. for (let k = 0; k < searchBlockSize; k++) {
  141. matchEndIndex += originalLines[i + k].length + 1
  142. }
  143. return [matchStartIndex, matchEndIndex]
  144. }
  145. return false
  146. }
  147. /**
  148. * This function reconstructs the file content by applying a streamed diff (in a
  149. * specialized SEARCH/REPLACE block format) to the original file content. It is designed
  150. * to handle both incremental updates and the final resulting file after all chunks have
  151. * been processed.
  152. *
  153. * The diff format is a custom structure that uses three markers to define changes:
  154. *
  155. * ------- SEARCH
  156. * [Exact content to find in the original file]
  157. * =======
  158. * [Content to replace with]
  159. * +++++++ REPLACE
  160. *
  161. * Behavior and Assumptions:
  162. * 1. The file is processed chunk-by-chunk. Each chunk of `diffContent` may contain
  163. * partial or complete SEARCH/REPLACE blocks. By calling this function with each
  164. * incremental chunk (with `isFinal` indicating the last chunk), the final reconstructed
  165. * file content is produced.
  166. *
  167. * 2. Matching Strategy (in order of attempt):
  168. * a. Exact Match: First attempts to find the exact SEARCH block text in the original file
  169. * b. Line-Trimmed Match: Falls back to line-by-line comparison ignoring leading/trailing whitespace
  170. * c. Block Anchor Match: For blocks of 3+ lines, tries to match using first/last lines as anchors
  171. * If all matching strategies fail, an error is thrown.
  172. *
  173. * 3. Empty SEARCH Section:
  174. * - If SEARCH is empty and the original file is empty, this indicates creating a new file
  175. * (pure insertion).
  176. * - If SEARCH is empty and the original file is not empty, this indicates a complete
  177. * file replacement (the entire original content is considered matched and replaced).
  178. *
  179. * 4. Applying Changes:
  180. * - Before encountering the "=======" marker, lines are accumulated as search content.
  181. * - After "=======" and before ">>>>>>> REPLACE", lines are accumulated as replacement content.
  182. * - Once the block is complete (">>>>>>> REPLACE"), the matched section in the original
  183. * file is replaced with the accumulated replacement lines, and the position in the original
  184. * file is advanced.
  185. *
  186. * 5. Incremental Output:
  187. * - As soon as the match location is found and we are in the REPLACE section, each new
  188. * replacement line is appended to the result so that partial updates can be viewed
  189. * incrementally.
  190. *
  191. * 6. Partial Markers:
  192. * - If the final line of the chunk looks like it might be part of a marker but is not one
  193. * of the known markers, it is removed. This prevents incomplete or partial markers
  194. * from corrupting the output.
  195. *
  196. * 7. Finalization:
  197. * - Once all chunks have been processed (when `isFinal` is true), any remaining original
  198. * content after the last replaced section is appended to the result.
  199. * - Trailing newlines are not forcibly added. The code tries to output exactly what is specified.
  200. *
  201. * Errors:
  202. * - If the search block cannot be matched using any of the available matching strategies,
  203. * an error is thrown.
  204. */
  205. export async function constructNewFileContent(
  206. diffContent: string,
  207. originalContent: string,
  208. isFinal: boolean,
  209. version: "v1" | "v2" = "v1",
  210. ): Promise<string> {
  211. const constructor = constructNewFileContentVersionMapping[version]
  212. if (!constructor) {
  213. throw new Error(`Invalid version '${version}' for file content constructor`)
  214. }
  215. return constructor(diffContent, originalContent, isFinal)
  216. }
  217. const constructNewFileContentVersionMapping: Record<
  218. string,
  219. (diffContent: string, originalContent: string, isFinal: boolean) => Promise<string>
  220. > = {
  221. v1: constructNewFileContentV1,
  222. v2: constructNewFileContentV2,
  223. } as const
  224. async function constructNewFileContentV1(diffContent: string, originalContent: string, isFinal: boolean): Promise<string> {
  225. let result = ""
  226. let lastProcessedIndex = 0
  227. let currentSearchContent = ""
  228. let currentReplaceContent = ""
  229. let inSearch = false
  230. let inReplace = false
  231. let searchMatchIndex = -1
  232. let searchEndIndex = -1
  233. // Track all replacements to handle out-of-order edits
  234. let replacements: Array<{ start: number; end: number; content: string }> = []
  235. let pendingOutOfOrderReplacement = false
  236. let lines = diffContent.split("\n")
  237. // If the last line looks like a partial marker but isn't recognized,
  238. // remove it because it might be incomplete.
  239. const lastLine = lines[lines.length - 1]
  240. if (
  241. lines.length > 0 &&
  242. (lastLine.startsWith(SEARCH_BLOCK_CHAR) ||
  243. lastLine.startsWith(LEGACY_SEARCH_BLOCK_CHAR) ||
  244. lastLine.startsWith("=") ||
  245. lastLine.startsWith(REPLACE_BLOCK_CHAR) ||
  246. lastLine.startsWith(LEGACY_REPLACE_BLOCK_CHAR)) &&
  247. !isSearchBlockStart(lastLine) &&
  248. !isSearchBlockEnd(lastLine) &&
  249. !isReplaceBlockEnd(lastLine)
  250. ) {
  251. lines.pop()
  252. }
  253. for (const line of lines) {
  254. if (isSearchBlockStart(line)) {
  255. inSearch = true
  256. currentSearchContent = ""
  257. currentReplaceContent = ""
  258. continue
  259. }
  260. if (isSearchBlockEnd(line)) {
  261. inSearch = false
  262. inReplace = true
  263. // Remove trailing linebreak for adding the === marker
  264. // if (currentSearchContent.endsWith("\r\n")) {
  265. // currentSearchContent = currentSearchContent.slice(0, -2)
  266. // } else if (currentSearchContent.endsWith("\n")) {
  267. // currentSearchContent = currentSearchContent.slice(0, -1)
  268. // }
  269. if (!currentSearchContent) {
  270. // Empty search block
  271. if (originalContent.length === 0) {
  272. // New file scenario: nothing to match, just start inserting
  273. searchMatchIndex = 0
  274. searchEndIndex = 0
  275. } else {
  276. // ERROR: Empty search block with non-empty file indicates malformed SEARCH marker
  277. throw new Error(
  278. "Empty SEARCH block detected with non-empty file. This usually indicates a malformed SEARCH marker.\n" +
  279. "Please ensure your SEARCH marker follows the correct format:\n" +
  280. "- Use '------- SEARCH' (7+ dashes + space + SEARCH)\n",
  281. )
  282. }
  283. } else {
  284. // Add check for inefficient full-file search
  285. // if (currentSearchContent.trim() === originalContent.trim()) {
  286. // throw new Error(
  287. // "The SEARCH block contains the entire file content. Please either:\n" +
  288. // "1. Use an empty SEARCH block to replace the entire file, or\n" +
  289. // "2. Make focused changes to specific parts of the file that need modification.",
  290. // )
  291. // }
  292. // Exact search match scenario
  293. const exactIndex = originalContent.indexOf(currentSearchContent, lastProcessedIndex)
  294. if (exactIndex !== -1) {
  295. searchMatchIndex = exactIndex
  296. searchEndIndex = exactIndex + currentSearchContent.length
  297. } else {
  298. // Attempt fallback line-trimmed matching
  299. const lineMatch = lineTrimmedFallbackMatch(originalContent, currentSearchContent, lastProcessedIndex)
  300. if (lineMatch) {
  301. ;[searchMatchIndex, searchEndIndex] = lineMatch
  302. } else {
  303. // Try block anchor fallback for larger blocks
  304. const blockMatch = blockAnchorFallbackMatch(originalContent, currentSearchContent, lastProcessedIndex)
  305. if (blockMatch) {
  306. ;[searchMatchIndex, searchEndIndex] = blockMatch
  307. } else {
  308. // Last resort: search the entire file from the beginning
  309. const fullFileIndex = originalContent.indexOf(currentSearchContent, 0)
  310. if (fullFileIndex !== -1) {
  311. // Found in the file - could be out of order
  312. searchMatchIndex = fullFileIndex
  313. searchEndIndex = fullFileIndex + currentSearchContent.length
  314. if (searchMatchIndex < lastProcessedIndex) {
  315. pendingOutOfOrderReplacement = true
  316. }
  317. } else {
  318. throw new Error(
  319. `The SEARCH block:\n${currentSearchContent.trimEnd()}\n...does not match anything in the file.`,
  320. )
  321. }
  322. }
  323. }
  324. }
  325. }
  326. // Check if this is an out-of-order replacement
  327. if (searchMatchIndex < lastProcessedIndex) {
  328. pendingOutOfOrderReplacement = true
  329. }
  330. // For in-order replacements, output everything up to the match location
  331. if (!pendingOutOfOrderReplacement) {
  332. result += originalContent.slice(lastProcessedIndex, searchMatchIndex)
  333. }
  334. continue
  335. }
  336. if (isReplaceBlockEnd(line)) {
  337. // Finished one replace block
  338. // Store this replacement
  339. replacements.push({
  340. start: searchMatchIndex,
  341. end: searchEndIndex,
  342. content: currentReplaceContent,
  343. })
  344. // If this was an in-order replacement, advance lastProcessedIndex
  345. if (!pendingOutOfOrderReplacement) {
  346. lastProcessedIndex = searchEndIndex
  347. }
  348. // Reset for next block
  349. inSearch = false
  350. inReplace = false
  351. currentSearchContent = ""
  352. currentReplaceContent = ""
  353. searchMatchIndex = -1
  354. searchEndIndex = -1
  355. pendingOutOfOrderReplacement = false
  356. continue
  357. }
  358. // Accumulate content for search or replace
  359. // (currentReplaceContent is not being used for anything right now since we directly append to result.)
  360. // (We artificially add a linebreak since we split on \n at the beginning. In order to not include a trailing linebreak in the final search/result blocks we need to remove it before using them. This allows for partial line matches to be correctly identified.)
  361. // NOTE: search/replace blocks must be arranged in the order they appear in the file due to how we build the content using lastProcessedIndex. We also cannot strip the trailing newline since for non-partial lines it would remove the linebreak from the original content. (If we remove end linebreak from search, then we'd also have to remove it from replace but we can't know if it's a partial line or not since the model may be using the line break to indicate the end of the block rather than as part of the search content.) We require the model to output full lines in order for our fallbacks to work as well.
  362. if (inSearch) {
  363. currentSearchContent += line + "\n"
  364. } else if (inReplace) {
  365. currentReplaceContent += line + "\n"
  366. // Only output replacement lines immediately for in-order replacements
  367. if (searchMatchIndex !== -1 && !pendingOutOfOrderReplacement) {
  368. result += line + "\n"
  369. }
  370. }
  371. }
  372. // If this is the final chunk, we need to apply all replacements and build the final result
  373. if (isFinal) {
  374. // Handle the case where we're still in replace mode when processing ends
  375. // and this is the final chunk - treat it as if we encountered the REPLACE marker
  376. if (inReplace && searchMatchIndex !== -1) {
  377. // Store this replacement
  378. replacements.push({
  379. start: searchMatchIndex,
  380. end: searchEndIndex,
  381. content: currentReplaceContent,
  382. })
  383. // If this was an in-order replacement, advance lastProcessedIndex
  384. if (!pendingOutOfOrderReplacement) {
  385. lastProcessedIndex = searchEndIndex
  386. }
  387. // Reset state
  388. inSearch = false
  389. inReplace = false
  390. currentSearchContent = ""
  391. currentReplaceContent = ""
  392. searchMatchIndex = -1
  393. searchEndIndex = -1
  394. pendingOutOfOrderReplacement = false
  395. }
  396. // end of handling missing replace marker
  397. // Sort replacements by start position
  398. replacements.sort((a, b) => a.start - b.start)
  399. // Rebuild the entire result by applying all replacements
  400. result = ""
  401. let currentPos = 0
  402. for (const replacement of replacements) {
  403. // Add original content up to this replacement
  404. result += originalContent.slice(currentPos, replacement.start)
  405. // Add the replacement content
  406. result += replacement.content
  407. // Move position to after the replaced section
  408. currentPos = replacement.end
  409. }
  410. // Add any remaining original content
  411. result += originalContent.slice(currentPos)
  412. }
  413. return result
  414. }
  415. enum ProcessingState {
  416. Idle = 0,
  417. StateSearch = 1 << 0,
  418. StateReplace = 1 << 1,
  419. }
  420. class NewFileContentConstructor {
  421. private originalContent: string
  422. private isFinal: boolean
  423. private state: number
  424. private pendingNonStandardLines: string[]
  425. private result: string
  426. private lastProcessedIndex: number
  427. private currentSearchContent: string
  428. private currentReplaceContent: string
  429. private searchMatchIndex: number
  430. private searchEndIndex: number
  431. constructor(originalContent: string, isFinal: boolean) {
  432. this.originalContent = originalContent
  433. this.isFinal = isFinal
  434. this.pendingNonStandardLines = []
  435. this.result = ""
  436. this.lastProcessedIndex = 0
  437. this.state = ProcessingState.Idle
  438. this.currentSearchContent = ""
  439. this.currentReplaceContent = ""
  440. this.searchMatchIndex = -1
  441. this.searchEndIndex = -1
  442. }
  443. private resetForNextBlock() {
  444. // Reset for next block
  445. this.state = ProcessingState.Idle
  446. this.currentSearchContent = ""
  447. this.currentReplaceContent = ""
  448. this.searchMatchIndex = -1
  449. this.searchEndIndex = -1
  450. }
  451. private findLastMatchingLineIndex(regx: RegExp, lineLimit: number) {
  452. for (let i = lineLimit; i > 0; ) {
  453. i--
  454. if (this.pendingNonStandardLines[i].match(regx)) {
  455. return i
  456. }
  457. }
  458. return -1
  459. }
  460. private updateProcessingState(newState: ProcessingState) {
  461. const isValidTransition =
  462. (this.state === ProcessingState.Idle && newState === ProcessingState.StateSearch) ||
  463. (this.state === ProcessingState.StateSearch && newState === ProcessingState.StateReplace)
  464. if (!isValidTransition) {
  465. throw new Error(
  466. `Invalid state transition.\n` +
  467. "Valid transitions are:\n" +
  468. "- Idle → StateSearch\n" +
  469. "- StateSearch → StateReplace",
  470. )
  471. }
  472. this.state |= newState
  473. }
  474. private isStateActive(state: ProcessingState): boolean {
  475. return (this.state & state) === state
  476. }
  477. private activateReplaceState() {
  478. this.updateProcessingState(ProcessingState.StateReplace)
  479. }
  480. private activateSearchState() {
  481. this.updateProcessingState(ProcessingState.StateSearch)
  482. this.currentSearchContent = ""
  483. this.currentReplaceContent = ""
  484. }
  485. private isSearchingActive(): boolean {
  486. return this.isStateActive(ProcessingState.StateSearch)
  487. }
  488. private isReplacingActive(): boolean {
  489. return this.isStateActive(ProcessingState.StateReplace)
  490. }
  491. private hasPendingNonStandardLines(pendingNonStandardLineLimit: number): boolean {
  492. return this.pendingNonStandardLines.length - pendingNonStandardLineLimit < this.pendingNonStandardLines.length
  493. }
  494. public processLine(line: string) {
  495. this.internalProcessLine(line, true, this.pendingNonStandardLines.length)
  496. }
  497. public getResult() {
  498. // If this is the final chunk, append any remaining original content
  499. if (this.isFinal && this.lastProcessedIndex < this.originalContent.length) {
  500. this.result += this.originalContent.slice(this.lastProcessedIndex)
  501. }
  502. if (this.isFinal && this.state !== ProcessingState.Idle) {
  503. throw new Error("File processing incomplete - SEARCH/REPLACE operations still active during finalization")
  504. }
  505. return this.result
  506. }
  507. private internalProcessLine(
  508. line: string,
  509. canWritependingNonStandardLines: boolean,
  510. pendingNonStandardLineLimit: number,
  511. ): number {
  512. let removeLineCount = 0
  513. if (isSearchBlockStart(line)) {
  514. removeLineCount = this.trimPendingNonStandardTrailingEmptyLines(pendingNonStandardLineLimit)
  515. if (removeLineCount > 0) {
  516. pendingNonStandardLineLimit = pendingNonStandardLineLimit - removeLineCount
  517. }
  518. if (this.hasPendingNonStandardLines(pendingNonStandardLineLimit)) {
  519. this.tryFixSearchReplaceBlock(pendingNonStandardLineLimit)
  520. canWritependingNonStandardLines && (this.pendingNonStandardLines.length = 0)
  521. }
  522. this.activateSearchState()
  523. } else if (isSearchBlockEnd(line)) {
  524. // 校验非标内容
  525. if (!this.isSearchingActive()) {
  526. this.tryFixSearchBlock(pendingNonStandardLineLimit)
  527. canWritependingNonStandardLines && (this.pendingNonStandardLines.length = 0)
  528. }
  529. this.activateReplaceState()
  530. this.beforeReplace()
  531. } else if (isReplaceBlockEnd(line)) {
  532. if (!this.isReplacingActive()) {
  533. this.tryFixReplaceBlock(pendingNonStandardLineLimit)
  534. canWritependingNonStandardLines && (this.pendingNonStandardLines.length = 0)
  535. }
  536. this.lastProcessedIndex = this.searchEndIndex
  537. this.resetForNextBlock()
  538. } else {
  539. // Accumulate content for search or replace
  540. // (currentReplaceContent is not being used for anything right now since we directly append to result.)
  541. // (We artificially add a linebreak since we split on \n at the beginning. In order to not include a trailing linebreak in the final search/result blocks we need to remove it before using them. This allows for partial line matches to be correctly identified.)
  542. // NOTE: search/replace blocks must be arranged in the order they appear in the file due to how we build the content using lastProcessedIndex. We also cannot strip the trailing newline since for non-partial lines it would remove the linebreak from the original content. (If we remove end linebreak from search, then we'd also have to remove it from replace but we can't know if it's a partial line or not since the model may be using the line break to indicate the end of the block rather than as part of the search content.) We require the model to output full lines in order for our fallbacks to work as well.
  543. if (this.isReplacingActive()) {
  544. this.currentReplaceContent += line + "\n"
  545. // Output replacement lines immediately if we know the insertion point
  546. if (this.searchMatchIndex !== -1) {
  547. this.result += line + "\n"
  548. }
  549. } else if (this.isSearchingActive()) {
  550. this.currentSearchContent += line + "\n"
  551. } else {
  552. let appendToPendingNonStandardLines = canWritependingNonStandardLines
  553. if (appendToPendingNonStandardLines) {
  554. // 处理非标内容
  555. this.pendingNonStandardLines.push(line)
  556. }
  557. }
  558. }
  559. return removeLineCount
  560. }
  561. private beforeReplace() {
  562. // Remove trailing linebreak for adding the === marker
  563. // if (currentSearchContent.endsWith("\r\n")) {
  564. // currentSearchContent = currentSearchContent.slice(0, -2)
  565. // } else if (currentSearchContent.endsWith("\n")) {
  566. // currentSearchContent = currentSearchContent.slice(0, -1)
  567. // }
  568. if (!this.currentSearchContent) {
  569. // Empty search block
  570. if (this.originalContent.length === 0) {
  571. // New file scenario: nothing to match, just start inserting
  572. this.searchMatchIndex = 0
  573. this.searchEndIndex = 0
  574. } else {
  575. // Complete file replacement scenario: treat the entire file as matched
  576. this.searchMatchIndex = 0
  577. this.searchEndIndex = this.originalContent.length
  578. }
  579. } else {
  580. // Add check for inefficient full-file search
  581. // if (currentSearchContent.trim() === originalContent.trim()) {
  582. // throw new Error(
  583. // "The SEARCH block contains the entire file content. Please either:\n" +
  584. // "1. Use an empty SEARCH block to replace the entire file, or\n" +
  585. // "2. Make focused changes to specific parts of the file that need modification.",
  586. // )
  587. // }
  588. // Exact search match scenario
  589. const exactIndex = this.originalContent.indexOf(this.currentSearchContent, this.lastProcessedIndex)
  590. if (exactIndex !== -1) {
  591. this.searchMatchIndex = exactIndex
  592. this.searchEndIndex = exactIndex + this.currentSearchContent.length
  593. } else {
  594. // Attempt fallback line-trimmed matching
  595. const lineMatch = lineTrimmedFallbackMatch(
  596. this.originalContent,
  597. this.currentSearchContent,
  598. this.lastProcessedIndex,
  599. )
  600. if (lineMatch) {
  601. ;[this.searchMatchIndex, this.searchEndIndex] = lineMatch
  602. } else {
  603. // Try block anchor fallback for larger blocks
  604. const blockMatch = blockAnchorFallbackMatch(
  605. this.originalContent,
  606. this.currentSearchContent,
  607. this.lastProcessedIndex,
  608. )
  609. if (blockMatch) {
  610. ;[this.searchMatchIndex, this.searchEndIndex] = blockMatch
  611. } else {
  612. throw new Error(
  613. `The SEARCH block:\n${this.currentSearchContent.trimEnd()}\n...does not match anything in the file.`,
  614. )
  615. }
  616. }
  617. }
  618. }
  619. if (this.searchMatchIndex < this.lastProcessedIndex) {
  620. throw new Error(
  621. `The SEARCH block:\n${this.currentSearchContent.trimEnd()}\n...matched an incorrect content in the file.`,
  622. )
  623. }
  624. // Output everything up to the match location
  625. this.result += this.originalContent.slice(this.lastProcessedIndex, this.searchMatchIndex)
  626. }
  627. private tryFixSearchBlock(lineLimit: number): number {
  628. let removeLineCount = 0
  629. if (lineLimit < 0) {
  630. lineLimit = this.pendingNonStandardLines.length
  631. }
  632. if (!lineLimit) {
  633. throw new Error("Invalid SEARCH/REPLACE block structure - no lines available to process")
  634. }
  635. let searchTagRegexp = /^([-]{3,}|[<]{3,}) SEARCH$/
  636. const searchTagIndex = this.findLastMatchingLineIndex(searchTagRegexp, lineLimit)
  637. if (searchTagIndex !== -1) {
  638. let fixLines = this.pendingNonStandardLines.slice(searchTagIndex, lineLimit)
  639. fixLines[0] = SEARCH_BLOCK_START
  640. for (const line of fixLines) {
  641. removeLineCount += this.internalProcessLine(line, false, searchTagIndex)
  642. }
  643. } else {
  644. throw new Error(
  645. `Invalid REPLACE marker detected - could not find matching SEARCH block starting from line ${searchTagIndex + 1}`,
  646. )
  647. }
  648. return removeLineCount
  649. }
  650. private tryFixReplaceBlock(lineLimit: number): number {
  651. let removeLineCount = 0
  652. if (lineLimit < 0) {
  653. lineLimit = this.pendingNonStandardLines.length
  654. }
  655. if (!lineLimit) {
  656. throw new Error()
  657. }
  658. let replaceBeginTagRegexp = /^[=]{3,}$/
  659. const replaceBeginTagIndex = this.findLastMatchingLineIndex(replaceBeginTagRegexp, lineLimit)
  660. if (replaceBeginTagIndex !== -1) {
  661. // // 校验非标内容
  662. // if (!this.isSearchingActive()) {
  663. // removeLineCount += this.tryFixSearchBlock(replaceBeginTagIndex)
  664. // }
  665. let fixLines = this.pendingNonStandardLines.slice(replaceBeginTagIndex - removeLineCount, lineLimit - removeLineCount)
  666. fixLines[0] = SEARCH_BLOCK_END
  667. for (const line of fixLines) {
  668. removeLineCount += this.internalProcessLine(line, false, replaceBeginTagIndex - removeLineCount)
  669. }
  670. } else {
  671. throw new Error(`Malformed REPLACE block - missing valid separator after line ${replaceBeginTagIndex + 1}`)
  672. }
  673. return removeLineCount
  674. }
  675. private tryFixSearchReplaceBlock(lineLimit: number): number {
  676. let removeLineCount = 0
  677. if (lineLimit < 0) {
  678. lineLimit = this.pendingNonStandardLines.length
  679. }
  680. if (!lineLimit) {
  681. throw new Error()
  682. }
  683. let replaceEndTagRegexp = /^([+]{3,}|[>]{3,}) REPLACE$/
  684. const replaceEndTagIndex = this.findLastMatchingLineIndex(replaceEndTagRegexp, lineLimit)
  685. const likeReplaceEndTag = replaceEndTagIndex === lineLimit - 1
  686. if (likeReplaceEndTag) {
  687. // // 校验非标内容
  688. // if (!this.isReplacingActive()) {
  689. // removeLineCount += this.tryFixReplaceBlock(replaceEndTagIndex)
  690. // }
  691. let fixLines = this.pendingNonStandardLines.slice(replaceEndTagIndex - removeLineCount, lineLimit - removeLineCount)
  692. fixLines[fixLines.length - 1] = REPLACE_BLOCK_END
  693. for (const line of fixLines) {
  694. removeLineCount += this.internalProcessLine(line, false, replaceEndTagIndex - removeLineCount)
  695. }
  696. } else {
  697. throw new Error("Malformed SEARCH/REPLACE block structure: Missing valid closing REPLACE marker")
  698. }
  699. return removeLineCount
  700. }
  701. /**
  702. * Removes trailing empty lines from the pendingNonStandardLines array
  703. * @param lineLimit - The index to start checking from (exclusive).
  704. * Removes empty lines from lineLimit-1 backwards.
  705. * @returns The number of empty lines removed
  706. */
  707. private trimPendingNonStandardTrailingEmptyLines(lineLimit: number): number {
  708. let removedCount = 0
  709. let i = Math.min(lineLimit, this.pendingNonStandardLines.length) - 1
  710. while (i >= 0 && this.pendingNonStandardLines[i].trim() === "") {
  711. this.pendingNonStandardLines.pop()
  712. removedCount++
  713. i--
  714. }
  715. return removedCount
  716. }
  717. }
  718. export async function constructNewFileContentV2(diffContent: string, originalContent: string, isFinal: boolean): Promise<string> {
  719. let newFileContentConstructor = new NewFileContentConstructor(originalContent, isFinal)
  720. let lines = diffContent.split("\n")
  721. // If the last line looks like a partial marker but isn't recognized,
  722. // remove it because it might be incomplete.
  723. const lastLine = lines[lines.length - 1]
  724. if (
  725. lines.length > 0 &&
  726. (lastLine.startsWith(SEARCH_BLOCK_CHAR) ||
  727. lastLine.startsWith(LEGACY_SEARCH_BLOCK_CHAR) ||
  728. lastLine.startsWith("=") ||
  729. lastLine.startsWith(REPLACE_BLOCK_CHAR) ||
  730. lastLine.startsWith(LEGACY_REPLACE_BLOCK_CHAR)) &&
  731. lastLine !== SEARCH_BLOCK_START &&
  732. lastLine !== SEARCH_BLOCK_END &&
  733. lastLine !== REPLACE_BLOCK_END
  734. ) {
  735. lines.pop()
  736. }
  737. for (const line of lines) {
  738. newFileContentConstructor.processLine(line)
  739. }
  740. let result = newFileContentConstructor.getResult()
  741. return result
  742. }