index.ts 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680
  1. import z from "zod"
  2. import * as path from "path"
  3. import * as fs from "fs/promises"
  4. import { readFileSync } from "fs"
  5. import { Log } from "../util/log"
  6. export namespace Patch {
  7. const log = Log.create({ service: "patch" })
  8. // Schema definitions
  9. export const PatchSchema = z.object({
  10. patchText: z.string().describe("The full patch text that describes all changes to be made"),
  11. })
  12. export type PatchParams = z.infer<typeof PatchSchema>
  13. // Core types matching the Rust implementation
  14. export interface ApplyPatchArgs {
  15. patch: string
  16. hunks: Hunk[]
  17. workdir?: string
  18. }
  19. export type Hunk =
  20. | { type: "add"; path: string; contents: string }
  21. | { type: "delete"; path: string }
  22. | { type: "update"; path: string; move_path?: string; chunks: UpdateFileChunk[] }
  23. export interface UpdateFileChunk {
  24. old_lines: string[]
  25. new_lines: string[]
  26. change_context?: string
  27. is_end_of_file?: boolean
  28. }
  29. export interface ApplyPatchAction {
  30. changes: Map<string, ApplyPatchFileChange>
  31. patch: string
  32. cwd: string
  33. }
  34. export type ApplyPatchFileChange =
  35. | { type: "add"; content: string }
  36. | { type: "delete"; content: string }
  37. | { type: "update"; unified_diff: string; move_path?: string; new_content: string }
  38. export interface AffectedPaths {
  39. added: string[]
  40. modified: string[]
  41. deleted: string[]
  42. }
  43. export enum ApplyPatchError {
  44. ParseError = "ParseError",
  45. IoError = "IoError",
  46. ComputeReplacements = "ComputeReplacements",
  47. ImplicitInvocation = "ImplicitInvocation",
  48. }
  49. export enum MaybeApplyPatch {
  50. Body = "Body",
  51. ShellParseError = "ShellParseError",
  52. PatchParseError = "PatchParseError",
  53. NotApplyPatch = "NotApplyPatch",
  54. }
  55. export enum MaybeApplyPatchVerified {
  56. Body = "Body",
  57. ShellParseError = "ShellParseError",
  58. CorrectnessError = "CorrectnessError",
  59. NotApplyPatch = "NotApplyPatch",
  60. }
  61. // Parser implementation
  62. function parsePatchHeader(
  63. lines: string[],
  64. startIdx: number,
  65. ): { filePath: string; movePath?: string; nextIdx: number } | null {
  66. const line = lines[startIdx]
  67. if (line.startsWith("*** Add File:")) {
  68. const filePath = line.split(":", 2)[1]?.trim()
  69. return filePath ? { filePath, nextIdx: startIdx + 1 } : null
  70. }
  71. if (line.startsWith("*** Delete File:")) {
  72. const filePath = line.split(":", 2)[1]?.trim()
  73. return filePath ? { filePath, nextIdx: startIdx + 1 } : null
  74. }
  75. if (line.startsWith("*** Update File:")) {
  76. const filePath = line.split(":", 2)[1]?.trim()
  77. let movePath: string | undefined
  78. let nextIdx = startIdx + 1
  79. // Check for move directive
  80. if (nextIdx < lines.length && lines[nextIdx].startsWith("*** Move to:")) {
  81. movePath = lines[nextIdx].split(":", 2)[1]?.trim()
  82. nextIdx++
  83. }
  84. return filePath ? { filePath, movePath, nextIdx } : null
  85. }
  86. return null
  87. }
  88. function parseUpdateFileChunks(lines: string[], startIdx: number): { chunks: UpdateFileChunk[]; nextIdx: number } {
  89. const chunks: UpdateFileChunk[] = []
  90. let i = startIdx
  91. while (i < lines.length && !lines[i].startsWith("***")) {
  92. if (lines[i].startsWith("@@")) {
  93. // Parse context line
  94. const contextLine = lines[i].substring(2).trim()
  95. i++
  96. const oldLines: string[] = []
  97. const newLines: string[] = []
  98. let isEndOfFile = false
  99. // Parse change lines
  100. while (i < lines.length && !lines[i].startsWith("@@") && !lines[i].startsWith("***")) {
  101. const changeLine = lines[i]
  102. if (changeLine === "*** End of File") {
  103. isEndOfFile = true
  104. i++
  105. break
  106. }
  107. if (changeLine.startsWith(" ")) {
  108. // Keep line - appears in both old and new
  109. const content = changeLine.substring(1)
  110. oldLines.push(content)
  111. newLines.push(content)
  112. } else if (changeLine.startsWith("-")) {
  113. // Remove line - only in old
  114. oldLines.push(changeLine.substring(1))
  115. } else if (changeLine.startsWith("+")) {
  116. // Add line - only in new
  117. newLines.push(changeLine.substring(1))
  118. }
  119. i++
  120. }
  121. chunks.push({
  122. old_lines: oldLines,
  123. new_lines: newLines,
  124. change_context: contextLine || undefined,
  125. is_end_of_file: isEndOfFile || undefined,
  126. })
  127. } else {
  128. i++
  129. }
  130. }
  131. return { chunks, nextIdx: i }
  132. }
  133. function parseAddFileContent(lines: string[], startIdx: number): { content: string; nextIdx: number } {
  134. let content = ""
  135. let i = startIdx
  136. while (i < lines.length && !lines[i].startsWith("***")) {
  137. if (lines[i].startsWith("+")) {
  138. content += lines[i].substring(1) + "\n"
  139. }
  140. i++
  141. }
  142. // Remove trailing newline
  143. if (content.endsWith("\n")) {
  144. content = content.slice(0, -1)
  145. }
  146. return { content, nextIdx: i }
  147. }
  148. function stripHeredoc(input: string): string {
  149. // Match heredoc patterns like: cat <<'EOF'\n...\nEOF or <<EOF\n...\nEOF
  150. const heredocMatch = input.match(/^(?:cat\s+)?<<['"]?(\w+)['"]?\s*\n([\s\S]*?)\n\1\s*$/)
  151. if (heredocMatch) {
  152. return heredocMatch[2]
  153. }
  154. return input
  155. }
  156. export function parsePatch(patchText: string): { hunks: Hunk[] } {
  157. const cleaned = stripHeredoc(patchText.trim())
  158. const lines = cleaned.split("\n")
  159. const hunks: Hunk[] = []
  160. let i = 0
  161. // Look for Begin/End patch markers
  162. const beginMarker = "*** Begin Patch"
  163. const endMarker = "*** End Patch"
  164. const beginIdx = lines.findIndex((line) => line.trim() === beginMarker)
  165. const endIdx = lines.findIndex((line) => line.trim() === endMarker)
  166. if (beginIdx === -1 || endIdx === -1 || beginIdx >= endIdx) {
  167. throw new Error("Invalid patch format: missing Begin/End markers")
  168. }
  169. // Parse content between markers
  170. i = beginIdx + 1
  171. while (i < endIdx) {
  172. const header = parsePatchHeader(lines, i)
  173. if (!header) {
  174. i++
  175. continue
  176. }
  177. if (lines[i].startsWith("*** Add File:")) {
  178. const { content, nextIdx } = parseAddFileContent(lines, header.nextIdx)
  179. hunks.push({
  180. type: "add",
  181. path: header.filePath,
  182. contents: content,
  183. })
  184. i = nextIdx
  185. } else if (lines[i].startsWith("*** Delete File:")) {
  186. hunks.push({
  187. type: "delete",
  188. path: header.filePath,
  189. })
  190. i = header.nextIdx
  191. } else if (lines[i].startsWith("*** Update File:")) {
  192. const { chunks, nextIdx } = parseUpdateFileChunks(lines, header.nextIdx)
  193. hunks.push({
  194. type: "update",
  195. path: header.filePath,
  196. move_path: header.movePath,
  197. chunks,
  198. })
  199. i = nextIdx
  200. } else {
  201. i++
  202. }
  203. }
  204. return { hunks }
  205. }
  206. // Apply patch functionality
  207. export function maybeParseApplyPatch(
  208. argv: string[],
  209. ):
  210. | { type: MaybeApplyPatch.Body; args: ApplyPatchArgs }
  211. | { type: MaybeApplyPatch.PatchParseError; error: Error }
  212. | { type: MaybeApplyPatch.NotApplyPatch } {
  213. const APPLY_PATCH_COMMANDS = ["apply_patch", "applypatch"]
  214. // Direct invocation: apply_patch <patch>
  215. if (argv.length === 2 && APPLY_PATCH_COMMANDS.includes(argv[0])) {
  216. try {
  217. const { hunks } = parsePatch(argv[1])
  218. return {
  219. type: MaybeApplyPatch.Body,
  220. args: {
  221. patch: argv[1],
  222. hunks,
  223. },
  224. }
  225. } catch (error) {
  226. return {
  227. type: MaybeApplyPatch.PatchParseError,
  228. error: error as Error,
  229. }
  230. }
  231. }
  232. // Bash heredoc form: bash -lc 'apply_patch <<"EOF" ...'
  233. if (argv.length === 3 && argv[0] === "bash" && argv[1] === "-lc") {
  234. // Simple extraction - in real implementation would need proper bash parsing
  235. const script = argv[2]
  236. const heredocMatch = script.match(/apply_patch\s*<<['"](\w+)['"]\s*\n([\s\S]*?)\n\1/)
  237. if (heredocMatch) {
  238. const patchContent = heredocMatch[2]
  239. try {
  240. const { hunks } = parsePatch(patchContent)
  241. return {
  242. type: MaybeApplyPatch.Body,
  243. args: {
  244. patch: patchContent,
  245. hunks,
  246. },
  247. }
  248. } catch (error) {
  249. return {
  250. type: MaybeApplyPatch.PatchParseError,
  251. error: error as Error,
  252. }
  253. }
  254. }
  255. }
  256. return { type: MaybeApplyPatch.NotApplyPatch }
  257. }
  258. // File content manipulation
  259. interface ApplyPatchFileUpdate {
  260. unified_diff: string
  261. content: string
  262. }
  263. export function deriveNewContentsFromChunks(filePath: string, chunks: UpdateFileChunk[]): ApplyPatchFileUpdate {
  264. // Read original file content
  265. let originalContent: string
  266. try {
  267. originalContent = readFileSync(filePath, "utf-8")
  268. } catch (error) {
  269. throw new Error(`Failed to read file ${filePath}: ${error}`)
  270. }
  271. let originalLines = originalContent.split("\n")
  272. // Drop trailing empty element for consistent line counting
  273. if (originalLines.length > 0 && originalLines[originalLines.length - 1] === "") {
  274. originalLines.pop()
  275. }
  276. const replacements = computeReplacements(originalLines, filePath, chunks)
  277. let newLines = applyReplacements(originalLines, replacements)
  278. // Ensure trailing newline
  279. if (newLines.length === 0 || newLines[newLines.length - 1] !== "") {
  280. newLines.push("")
  281. }
  282. const newContent = newLines.join("\n")
  283. // Generate unified diff
  284. const unifiedDiff = generateUnifiedDiff(originalContent, newContent)
  285. return {
  286. unified_diff: unifiedDiff,
  287. content: newContent,
  288. }
  289. }
  290. function computeReplacements(
  291. originalLines: string[],
  292. filePath: string,
  293. chunks: UpdateFileChunk[],
  294. ): Array<[number, number, string[]]> {
  295. const replacements: Array<[number, number, string[]]> = []
  296. let lineIndex = 0
  297. for (const chunk of chunks) {
  298. // Handle context-based seeking
  299. if (chunk.change_context) {
  300. const contextIdx = seekSequence(originalLines, [chunk.change_context], lineIndex)
  301. if (contextIdx === -1) {
  302. throw new Error(`Failed to find context '${chunk.change_context}' in ${filePath}`)
  303. }
  304. lineIndex = contextIdx + 1
  305. }
  306. // Handle pure addition (no old lines)
  307. if (chunk.old_lines.length === 0) {
  308. const insertionIdx =
  309. originalLines.length > 0 && originalLines[originalLines.length - 1] === ""
  310. ? originalLines.length - 1
  311. : originalLines.length
  312. replacements.push([insertionIdx, 0, chunk.new_lines])
  313. continue
  314. }
  315. // Try to match old lines in the file
  316. let pattern = chunk.old_lines
  317. let newSlice = chunk.new_lines
  318. let found = seekSequence(originalLines, pattern, lineIndex, chunk.is_end_of_file)
  319. // Retry without trailing empty line if not found
  320. if (found === -1 && pattern.length > 0 && pattern[pattern.length - 1] === "") {
  321. pattern = pattern.slice(0, -1)
  322. if (newSlice.length > 0 && newSlice[newSlice.length - 1] === "") {
  323. newSlice = newSlice.slice(0, -1)
  324. }
  325. found = seekSequence(originalLines, pattern, lineIndex, chunk.is_end_of_file)
  326. }
  327. if (found !== -1) {
  328. replacements.push([found, pattern.length, newSlice])
  329. lineIndex = found + pattern.length
  330. } else {
  331. throw new Error(`Failed to find expected lines in ${filePath}:\n${chunk.old_lines.join("\n")}`)
  332. }
  333. }
  334. // Sort replacements by index to apply in order
  335. replacements.sort((a, b) => a[0] - b[0])
  336. return replacements
  337. }
  338. function applyReplacements(lines: string[], replacements: Array<[number, number, string[]]>): string[] {
  339. // Apply replacements in reverse order to avoid index shifting
  340. const result = [...lines]
  341. for (let i = replacements.length - 1; i >= 0; i--) {
  342. const [startIdx, oldLen, newSegment] = replacements[i]
  343. // Remove old lines
  344. result.splice(startIdx, oldLen)
  345. // Insert new lines
  346. for (let j = 0; j < newSegment.length; j++) {
  347. result.splice(startIdx + j, 0, newSegment[j])
  348. }
  349. }
  350. return result
  351. }
  352. // Normalize Unicode punctuation to ASCII equivalents (like Rust's normalize_unicode)
  353. function normalizeUnicode(str: string): string {
  354. return str
  355. .replace(/[\u2018\u2019\u201A\u201B]/g, "'") // single quotes
  356. .replace(/[\u201C\u201D\u201E\u201F]/g, '"') // double quotes
  357. .replace(/[\u2010\u2011\u2012\u2013\u2014\u2015]/g, "-") // dashes
  358. .replace(/\u2026/g, "...") // ellipsis
  359. .replace(/\u00A0/g, " ") // non-breaking space
  360. }
  361. type Comparator = (a: string, b: string) => boolean
  362. function tryMatch(lines: string[], pattern: string[], startIndex: number, compare: Comparator, eof: boolean): number {
  363. // If EOF anchor, try matching from end of file first
  364. if (eof) {
  365. const fromEnd = lines.length - pattern.length
  366. if (fromEnd >= startIndex) {
  367. let matches = true
  368. for (let j = 0; j < pattern.length; j++) {
  369. if (!compare(lines[fromEnd + j], pattern[j])) {
  370. matches = false
  371. break
  372. }
  373. }
  374. if (matches) return fromEnd
  375. }
  376. }
  377. // Forward search from startIndex
  378. for (let i = startIndex; i <= lines.length - pattern.length; i++) {
  379. let matches = true
  380. for (let j = 0; j < pattern.length; j++) {
  381. if (!compare(lines[i + j], pattern[j])) {
  382. matches = false
  383. break
  384. }
  385. }
  386. if (matches) return i
  387. }
  388. return -1
  389. }
  390. function seekSequence(lines: string[], pattern: string[], startIndex: number, eof = false): number {
  391. if (pattern.length === 0) return -1
  392. // Pass 1: exact match
  393. const exact = tryMatch(lines, pattern, startIndex, (a, b) => a === b, eof)
  394. if (exact !== -1) return exact
  395. // Pass 2: rstrip (trim trailing whitespace)
  396. const rstrip = tryMatch(lines, pattern, startIndex, (a, b) => a.trimEnd() === b.trimEnd(), eof)
  397. if (rstrip !== -1) return rstrip
  398. // Pass 3: trim (both ends)
  399. const trim = tryMatch(lines, pattern, startIndex, (a, b) => a.trim() === b.trim(), eof)
  400. if (trim !== -1) return trim
  401. // Pass 4: normalized (Unicode punctuation to ASCII)
  402. const normalized = tryMatch(
  403. lines,
  404. pattern,
  405. startIndex,
  406. (a, b) => normalizeUnicode(a.trim()) === normalizeUnicode(b.trim()),
  407. eof,
  408. )
  409. return normalized
  410. }
  411. function generateUnifiedDiff(oldContent: string, newContent: string): string {
  412. const oldLines = oldContent.split("\n")
  413. const newLines = newContent.split("\n")
  414. // Simple diff generation - in a real implementation you'd use a proper diff algorithm
  415. let diff = "@@ -1 +1 @@\n"
  416. // Find changes (simplified approach)
  417. const maxLen = Math.max(oldLines.length, newLines.length)
  418. let hasChanges = false
  419. for (let i = 0; i < maxLen; i++) {
  420. const oldLine = oldLines[i] || ""
  421. const newLine = newLines[i] || ""
  422. if (oldLine !== newLine) {
  423. if (oldLine) diff += `-${oldLine}\n`
  424. if (newLine) diff += `+${newLine}\n`
  425. hasChanges = true
  426. } else if (oldLine) {
  427. diff += ` ${oldLine}\n`
  428. }
  429. }
  430. return hasChanges ? diff : ""
  431. }
  432. // Apply hunks to filesystem
  433. export async function applyHunksToFiles(hunks: Hunk[]): Promise<AffectedPaths> {
  434. if (hunks.length === 0) {
  435. throw new Error("No files were modified.")
  436. }
  437. const added: string[] = []
  438. const modified: string[] = []
  439. const deleted: string[] = []
  440. for (const hunk of hunks) {
  441. switch (hunk.type) {
  442. case "add":
  443. // Create parent directories
  444. const addDir = path.dirname(hunk.path)
  445. if (addDir !== "." && addDir !== "/") {
  446. await fs.mkdir(addDir, { recursive: true })
  447. }
  448. await fs.writeFile(hunk.path, hunk.contents, "utf-8")
  449. added.push(hunk.path)
  450. log.info(`Added file: ${hunk.path}`)
  451. break
  452. case "delete":
  453. await fs.unlink(hunk.path)
  454. deleted.push(hunk.path)
  455. log.info(`Deleted file: ${hunk.path}`)
  456. break
  457. case "update":
  458. const fileUpdate = deriveNewContentsFromChunks(hunk.path, hunk.chunks)
  459. if (hunk.move_path) {
  460. // Handle file move
  461. const moveDir = path.dirname(hunk.move_path)
  462. if (moveDir !== "." && moveDir !== "/") {
  463. await fs.mkdir(moveDir, { recursive: true })
  464. }
  465. await fs.writeFile(hunk.move_path, fileUpdate.content, "utf-8")
  466. await fs.unlink(hunk.path)
  467. modified.push(hunk.move_path)
  468. log.info(`Moved file: ${hunk.path} -> ${hunk.move_path}`)
  469. } else {
  470. // Regular update
  471. await fs.writeFile(hunk.path, fileUpdate.content, "utf-8")
  472. modified.push(hunk.path)
  473. log.info(`Updated file: ${hunk.path}`)
  474. }
  475. break
  476. }
  477. }
  478. return { added, modified, deleted }
  479. }
  480. // Main patch application function
  481. export async function applyPatch(patchText: string): Promise<AffectedPaths> {
  482. const { hunks } = parsePatch(patchText)
  483. return applyHunksToFiles(hunks)
  484. }
  485. // Async version of maybeParseApplyPatchVerified
  486. export async function maybeParseApplyPatchVerified(
  487. argv: string[],
  488. cwd: string,
  489. ): Promise<
  490. | { type: MaybeApplyPatchVerified.Body; action: ApplyPatchAction }
  491. | { type: MaybeApplyPatchVerified.CorrectnessError; error: Error }
  492. | { type: MaybeApplyPatchVerified.NotApplyPatch }
  493. > {
  494. // Detect implicit patch invocation (raw patch without apply_patch command)
  495. if (argv.length === 1) {
  496. try {
  497. parsePatch(argv[0])
  498. return {
  499. type: MaybeApplyPatchVerified.CorrectnessError,
  500. error: new Error(ApplyPatchError.ImplicitInvocation),
  501. }
  502. } catch {
  503. // Not a patch, continue
  504. }
  505. }
  506. const result = maybeParseApplyPatch(argv)
  507. switch (result.type) {
  508. case MaybeApplyPatch.Body:
  509. const { args } = result
  510. const effectiveCwd = args.workdir ? path.resolve(cwd, args.workdir) : cwd
  511. const changes = new Map<string, ApplyPatchFileChange>()
  512. for (const hunk of args.hunks) {
  513. const resolvedPath = path.resolve(
  514. effectiveCwd,
  515. hunk.type === "update" && hunk.move_path ? hunk.move_path : hunk.path,
  516. )
  517. switch (hunk.type) {
  518. case "add":
  519. changes.set(resolvedPath, {
  520. type: "add",
  521. content: hunk.contents,
  522. })
  523. break
  524. case "delete":
  525. // For delete, we need to read the current content
  526. const deletePath = path.resolve(effectiveCwd, hunk.path)
  527. try {
  528. const content = await fs.readFile(deletePath, "utf-8")
  529. changes.set(resolvedPath, {
  530. type: "delete",
  531. content,
  532. })
  533. } catch (error) {
  534. return {
  535. type: MaybeApplyPatchVerified.CorrectnessError,
  536. error: new Error(`Failed to read file for deletion: ${deletePath}`),
  537. }
  538. }
  539. break
  540. case "update":
  541. const updatePath = path.resolve(effectiveCwd, hunk.path)
  542. try {
  543. const fileUpdate = deriveNewContentsFromChunks(updatePath, hunk.chunks)
  544. changes.set(resolvedPath, {
  545. type: "update",
  546. unified_diff: fileUpdate.unified_diff,
  547. move_path: hunk.move_path ? path.resolve(effectiveCwd, hunk.move_path) : undefined,
  548. new_content: fileUpdate.content,
  549. })
  550. } catch (error) {
  551. return {
  552. type: MaybeApplyPatchVerified.CorrectnessError,
  553. error: error as Error,
  554. }
  555. }
  556. break
  557. }
  558. }
  559. return {
  560. type: MaybeApplyPatchVerified.Body,
  561. action: {
  562. changes,
  563. patch: args.patch,
  564. cwd: effectiveCwd,
  565. },
  566. }
  567. case MaybeApplyPatch.PatchParseError:
  568. return {
  569. type: MaybeApplyPatchVerified.CorrectnessError,
  570. error: result.error,
  571. }
  572. case MaybeApplyPatch.NotApplyPatch:
  573. return { type: MaybeApplyPatchVerified.NotApplyPatch }
  574. }
  575. }
  576. }