ClaudeDev.ts 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893
  1. import { Anthropic } from "@anthropic-ai/sdk"
  2. import defaultShell from "default-shell"
  3. import * as diff from "diff"
  4. import { execa, ExecaError } from "execa"
  5. import fs from "fs/promises"
  6. import os from "os"
  7. import osName from "os-name"
  8. import pWaitFor from "p-wait-for"
  9. import * as path from "path"
  10. import { serializeError } from "serialize-error"
  11. import * as vscode from "vscode"
  12. import { listFiles, parseSourceCodeForDefinitionsTopLevel } from "./parse-source-code"
  13. import { ClaudeDevProvider } from "./providers/ClaudeDevProvider"
  14. import { ClaudeRequestResult } from "./shared/ClaudeRequestResult"
  15. import { DEFAULT_MAX_REQUESTS_PER_TASK } from "./shared/Constants"
  16. import { ClaudeAsk, ClaudeSay, ClaudeSayTool } from "./shared/ExtensionMessage"
  17. import { Tool, ToolName } from "./shared/Tool"
  18. import { ClaudeAskResponse } from "./shared/WebviewMessage"
  19. const SYSTEM_PROMPT =
  20. () => `You are Claude Dev, a highly skilled software developer with extensive knowledge in many programming languages, frameworks, design patterns, and best practices.
  21. ====
  22. CAPABILITIES
  23. - You can read and analyze code in various programming languages, and can write clean, efficient, and well-documented code.
  24. - You can debug complex issues and providing detailed explanations, offering architectural insights and design patterns.
  25. - You have access to tools that let you execute CLI commands on the user's computer, list files in a directory (top level or recursively), extract source code definitions, read and write files, and ask follow-up questions. These tools help you effectively accomplish a wide range of tasks, such as writing code, making edits or improvements to existing files, understanding the current state of a project, performing system operations, and much more.
  26. - You can use the list_files_recursive tool to get an overview of the project's file structure, which can provide key insights into the project from directory/file names (how developers conceptualize and organize their code) or file extensions (the language used). The list_files_top_level tool is better suited for generic directories you don't necessarily need the nested structure of, like the Desktop.
  27. - You can use the view_source_code_definitions_top_level tool to get an overview of source code definitions for all files at the top level of a specified directory. This can be particularly useful when you need to understand the broader context and relationships between certain parts of the code. You may need to call this tool multiple times to understand various parts of the codebase related to the task.
  28. - For example, when asked to make edits or improvements you might use list_files_recursive to get an overview of the project's file structure, then view_source_code_definitions_top_level to get an overview of source code definitions for files located in relevant directories, then read_file to examine the contents of relevant files, analyze the code and suggest improvements or make necessary edits, then use the write_to_file tool to implement changes.
  29. - The execute_command tool lets you run commands on the user's computer and should be used whenever you feel it can help accomplish the user's task. When you need to execute a CLI command, you must provide a clear explanation of what the command does. Prefer to execute complex CLI commands over creating executable scripts, since they are more flexible and easier to run. Interactive and long-running commands are allowed, since the user has the ability to send input to stdin and terminate the command on their own if needed.
  30. ====
  31. RULES
  32. - Unless otherwise specified by the user, you MUST accomplish your task within the following directory: ${
  33. vscode.workspace.workspaceFolders?.map((folder) => folder.uri.fsPath).at(0) ??
  34. path.join(os.homedir(), "Desktop")
  35. }
  36. - Your current working directory is '${process.cwd()}', and you cannot \`cd\` into a different directory to complete a task. You are stuck operating from '${process.cwd()}', so be sure to pass in the appropriate 'path' parameter when using tools that require a path.
  37. - If you do not know the contents of an existing file you need to edit, use the read_file tool to help you make informed changes. However if you have already read or written to this file before, you can assume its contents have not changed since then so you would not need to call the read_file tool beforehand.
  38. - When editing files, always provide the complete file content in your response, regardless of the extent of changes. The system handles diff generation automatically.
  39. - Before using the execute_command tool, you must first think about the SYSTEM INFORMATION context provided to understand the user's environment and tailor your commands to ensure they are compatible with their system.
  40. - When creating a new project (such as an app, website, or any software project), unless the user specifies otherwise, organize all new files within a dedicated project directory. Use appropriate file paths when writing files, as the write_to_file tool will automatically create any necessary directories. Structure the project logically, adhering to best practices for the specific type of project being created. Unless otherwise specified, new projects should be easily run without additional setup, for example most projects can be built in HTML, CSS, and JavaScript - which you can open in a browser.
  41. - You must try to use multiple tools in one request when possible. For example if you were to create a website, you would use the write_to_file tool to create the necessary files with their appropriate contents all at once. Or if you wanted to analyze a project, you could use the read_file tool multiple times to look at several key files. This will help you accomplish the user's task more efficiently.
  42. - Be sure to consider the type of project (e.g. Python, JavaScript, web application) when determining the appropriate structure and files to include. Also consider what files may be most relevant to accomplishing the task, for example looking at a project's manifest file would help you understand the project's dependencies, which you could incorporate into any code you write.
  43. - When making changes to code, always consider the context in which the code is being used. Ensure that your changes are compatible with the existing codebase and that they follow the project's coding standards and best practices.
  44. - Do not ask for more information than necessary. Use the tools provided to accomplish the user's request efficiently and effectively. When you've completed your task, you must use the attempt_completion tool to present the result to the user. The user may provide feedback, which you can use to make improvements and try again.
  45. - You are only allowed to ask the user questions using the ask_followup_question tool. Use this tool only when you need additional details to complete a task, and be sure to use a clear and concise question that will help you move forward with the task.
  46. - Your goal is to try to accomplish the user's task, NOT engage in a back and forth conversation.
  47. - NEVER end completion_attempt with a question or request to engage in further conversation! Formulate the end of your result in a way that is final and does not require further input from the user.
  48. - NEVER start your responses with affirmations like "Certaintly", "Okay", "Sure", "Great", etc. You should NOT be conversational in your responses, but rather direct and to the point.
  49. - Feel free to use markdown as much as you'd like in your responses. When using code blocks, always include a language specifier.
  50. ====
  51. OBJECTIVE
  52. You accomplish a given task iteratively, breaking it down into clear steps and working through them methodically.
  53. 1. Analyze the user's task and set clear, achievable goals to accomplish it. Prioritize these goals in a logical order.
  54. 2. Work through these goals sequentially, utilizing available tools as necessary. Each goal should correspond to a distinct step in your problem-solving process.
  55. 3. Remember, you have extensive capabilities with access to a wide range of tools that can be used in powerful and clever ways as necessary to accomplish each goal. Before calling a tool, do some analysis within <thinking></thinking> tags. First, think about which of the provided tools is the relevant tool to answer the user's request. Second, go through each of the required parameters of the relevant tool and determine if the user has directly provided or given enough information to infer a value. When deciding if the parameter can be inferred, carefully consider all the context to see if it supports a specific value. If all of the required parameters are present or can be reasonably inferred, close the thinking tag and proceed with the tool call. BUT, if one of the values for a required parameter is missing, DO NOT invoke the function (not even with fillers for the missing params) and instead, ask the user to provide the missing parameters using the ask_followup_question tool. DO NOT ask for more information on optional parameters if it is not provided.
  56. 4. Once you've completed the user's task, you must use the attempt_completion tool to present the result of the task to the user. You may also provide a CLI command to showcase the result of your task; this can be particularly useful for web development tasks, where you can run e.g. \`open index.html\` to show the website you've built.
  57. 5. The user may provide feedback, which you can use to make improvements and try again. But DO NOT continue in pointless back and forth conversations, i.e. don't end your responses with questions or offers for further assistance.
  58. ====
  59. SYSTEM INFORMATION
  60. Operating System: ${osName()}
  61. Default Shell: ${defaultShell}
  62. VSCode Visible Files: ${
  63. vscode.window.visibleTextEditors
  64. ?.map((editor) => editor.document?.uri?.fsPath)
  65. .filter(Boolean)
  66. .join(", ") || "(No files open)"
  67. }
  68. VSCode Opened Tabs: ${
  69. vscode.window.tabGroups.all
  70. .flatMap((group) => group.tabs)
  71. .map((tab) => (tab.input as vscode.TabInputText)?.uri?.fsPath)
  72. .filter(Boolean)
  73. .join(", ") || "(No tabs open)"
  74. }
  75. `
  76. const tools: Tool[] = [
  77. {
  78. name: "execute_command",
  79. description:
  80. "Execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run.",
  81. input_schema: {
  82. type: "object",
  83. properties: {
  84. command: {
  85. type: "string",
  86. description:
  87. "The CLI command to execute. This should be valid for the current operating system. Ensure the command is properly formatted and does not contain any harmful instructions.",
  88. },
  89. },
  90. required: ["command"],
  91. },
  92. },
  93. {
  94. name: "list_files_top_level",
  95. description:
  96. "List all files and directories at the top level of the specified directory. This should only be used for generic directories you don't necessarily need the nested structure of, like the Desktop.",
  97. input_schema: {
  98. type: "object",
  99. properties: {
  100. path: {
  101. type: "string",
  102. description: "The path of the directory to list contents for.",
  103. },
  104. },
  105. required: ["path"],
  106. },
  107. },
  108. {
  109. name: "list_files_recursive",
  110. description:
  111. "Recursively list all files and directories within the specified directory. This provides a comprehensive view of the project structure, and can guide decision-making on which files to process or explore further.",
  112. input_schema: {
  113. type: "object",
  114. properties: {
  115. path: {
  116. type: "string",
  117. description: "The path of the directory to recursively list contents for.",
  118. },
  119. },
  120. required: ["path"],
  121. },
  122. },
  123. {
  124. name: "view_source_code_definitions_top_level",
  125. description:
  126. "Parse all source code files at the top level of the specified directory to extract names of key elements like classes and functions. This tool provides insights into the codebase structure and important constructs, encapsulating high-level concepts and relationships that are crucial for understanding the overall architecture.",
  127. input_schema: {
  128. type: "object",
  129. properties: {
  130. path: {
  131. type: "string",
  132. description:
  133. "The path of the directory to parse top level source code files for to view their definitions.",
  134. },
  135. },
  136. required: ["path"],
  137. },
  138. },
  139. {
  140. name: "read_file",
  141. description:
  142. "Read the contents of a file at the specified path. Use this when you need to examine the contents of an existing file, for example to analyze code, review text files, or extract information from configuration files. Be aware that this tool may not be suitable for very large files or binary files, as it returns the raw content as a string.",
  143. input_schema: {
  144. type: "object",
  145. properties: {
  146. path: {
  147. type: "string",
  148. description: "The path of the file to read.",
  149. },
  150. },
  151. required: ["path"],
  152. },
  153. },
  154. {
  155. name: "write_to_file",
  156. description:
  157. "Write content to a file at the specified path. If the file exists, only the necessary changes will be applied. If the file doesn't exist, it will be created. Always provide the full intended content of the file. This tool will automatically create any directories needed to write the file.",
  158. input_schema: {
  159. type: "object",
  160. properties: {
  161. path: {
  162. type: "string",
  163. description: "The path of the file to write to.",
  164. },
  165. content: {
  166. type: "string",
  167. description: "The full content to write to the file",
  168. },
  169. },
  170. required: ["path", "content"],
  171. },
  172. },
  173. {
  174. name: "ask_followup_question",
  175. description:
  176. "Ask the user a question to gather additional information needed to complete the task. This tool should be used when you encounter ambiguities, need clarification, or require more details to proceed effectively. It allows for interactive problem-solving by enabling direct communication with the user. Use this tool judiciously to maintain a balance between gathering necessary information and avoiding excessive back-and-forth.",
  177. input_schema: {
  178. type: "object",
  179. properties: {
  180. question: {
  181. type: "string",
  182. description:
  183. "The question to ask the user. This should be a clear, specific question that addresses the information you need.",
  184. },
  185. },
  186. required: ["question"],
  187. },
  188. },
  189. {
  190. name: "attempt_completion",
  191. description:
  192. "Once you've completed the task, use this tool to present the result to the user. They may respond with feedback if they are not satisfied with the result, which you can use to make improvements and try again.",
  193. input_schema: {
  194. type: "object",
  195. properties: {
  196. command: {
  197. type: "string",
  198. description:
  199. "The CLI command to execute to show a live demo of the result to the user. For example, use 'open -a \"Google Chrome\" index.html' to display a created website. Avoid commands that run indefinitely (like servers) that don't terminate on their own. Instead, if such a command is needed, include instructions for the user to run it in the 'result' parameter.",
  200. },
  201. result: {
  202. type: "string",
  203. description:
  204. "The result of the task. Formulate this result in a way that is final and does not require further input from the user. Don't end your result with questions or offers for further assistance.",
  205. },
  206. },
  207. required: ["result"],
  208. },
  209. },
  210. ]
  211. export class ClaudeDev {
  212. private client: Anthropic
  213. private maxRequestsPerTask: number
  214. private requestCount = 0
  215. private askResponse?: ClaudeAskResponse
  216. private askResponseText?: string
  217. private lastMessageTs?: number
  218. private providerRef: WeakRef<ClaudeDevProvider>
  219. abort: boolean = false
  220. constructor(provider: ClaudeDevProvider, task: string, apiKey: string, maxRequestsPerTask?: number) {
  221. this.providerRef = new WeakRef(provider)
  222. this.client = new Anthropic({ apiKey })
  223. this.maxRequestsPerTask = maxRequestsPerTask ?? DEFAULT_MAX_REQUESTS_PER_TASK
  224. this.startTask(task)
  225. }
  226. updateApiKey(apiKey: string) {
  227. this.client = new Anthropic({ apiKey })
  228. }
  229. updateMaxRequestsPerTask(maxRequestsPerTask: number | undefined) {
  230. this.maxRequestsPerTask = maxRequestsPerTask ?? DEFAULT_MAX_REQUESTS_PER_TASK
  231. }
  232. async handleWebviewAskResponse(askResponse: ClaudeAskResponse, text?: string) {
  233. this.askResponse = askResponse
  234. this.askResponseText = text
  235. }
  236. async ask(type: ClaudeAsk, question: string): Promise<{ response: ClaudeAskResponse; text?: string }> {
  237. // If this ClaudeDev instance was aborted by the provider, then the only thing keeping us alive is a promise still running in the background, in which case we don't want to send its result to the webview as it is attached to a new instance of ClaudeDev now. So we can safely ignore the result of any active promises, and this class will be deallocated. (Although we set claudeDev = undefined in provider, that simply removes the reference to this instance, but the instance is still alive until this promise resolves or rejects.)
  238. if (this.abort) {
  239. throw new Error("ClaudeDev instance aborted")
  240. }
  241. this.askResponse = undefined
  242. this.askResponseText = undefined
  243. const askTs = Date.now()
  244. this.lastMessageTs = askTs
  245. await this.providerRef.deref()?.addClaudeMessage({ ts: askTs, type: "ask", ask: type, text: question })
  246. await this.providerRef.deref()?.postStateToWebview()
  247. await pWaitFor(() => this.askResponse !== undefined || this.lastMessageTs !== askTs, { interval: 100 })
  248. if (this.lastMessageTs !== askTs) {
  249. throw new Error("Current ask promise was ignored") // could happen if we send multiple asks in a row i.e. with command_output. It's important that when we know an ask could fail, it is handled gracefully
  250. }
  251. const result = { response: this.askResponse!, text: this.askResponseText }
  252. this.askResponse = undefined
  253. this.askResponseText = undefined
  254. return result
  255. }
  256. async say(type: ClaudeSay, text?: string): Promise<undefined> {
  257. if (this.abort) {
  258. throw new Error("ClaudeDev instance aborted")
  259. }
  260. const sayTs = Date.now()
  261. this.lastMessageTs = sayTs
  262. await this.providerRef.deref()?.addClaudeMessage({ ts: sayTs, type: "say", say: type, text: text })
  263. await this.providerRef.deref()?.postStateToWebview()
  264. }
  265. private async startTask(task: string): Promise<void> {
  266. // conversationHistory (for API) and claudeMessages (for webview) need to be in sync
  267. // if the extension process were killed, then on restart the claudeMessages might not be empty, so we need to set it to [] when we create a new ClaudeDev client (otherwise webview would show stale messages from previous session)
  268. await this.providerRef.deref()?.setClaudeMessages(undefined)
  269. await this.providerRef.deref()?.postStateToWebview()
  270. // This first message kicks off a task, it is not included in every subsequent message.
  271. let userPrompt = `Task: \"${task}\"`
  272. // TODO: create tools that let Claude interact with VSCode (e.g. open a file, list open files, etc.)
  273. //const openFiles = vscode.window.visibleTextEditors?.map((editor) => editor.document.uri.fsPath).join("\n")
  274. await this.say("text", task)
  275. let totalInputTokens = 0
  276. let totalOutputTokens = 0
  277. while (this.requestCount < this.maxRequestsPerTask) {
  278. const { didEndLoop, inputTokens, outputTokens } = await this.recursivelyMakeClaudeRequests([
  279. { type: "text", text: userPrompt },
  280. ])
  281. totalInputTokens += inputTokens
  282. totalOutputTokens += outputTokens
  283. // The way this agentic loop works is that claude will be given a task that he then calls tools to complete. unless there's an attempt_completion call, we keep responding back to him with his tool's responses until he either attempt_completion or does not use anymore tools. If he does not use anymore tools, we ask him to consider if he's completed the task and then call attempt_completion, otherwise proceed with completing the task.
  284. // There is a MAX_REQUESTS_PER_TASK limit to prevent infinite requests, but Claude is prompted to finish the task as efficiently as he can.
  285. //const totalCost = this.calculateApiCost(totalInputTokens, totalOutputTokens)
  286. if (didEndLoop) {
  287. //this.say("task_completed", `Task completed. Total API usage cost: ${totalCost}`)
  288. break
  289. } else {
  290. // this.say(
  291. // "tool",
  292. // "Claude responded with only text blocks but has not called attempt_completion yet. Forcing him to continue with task..."
  293. // )
  294. userPrompt =
  295. "Ask yourself if you have completed the user's task. If you have, use the attempt_completion tool, otherwise proceed to the next step. (This is an automated message, so do not respond to it conversationally. Just proceed with the task.)"
  296. }
  297. }
  298. }
  299. async executeTool(toolName: ToolName, toolInput: any, isLastWriteToFile: boolean = false): Promise<string> {
  300. switch (toolName) {
  301. case "write_to_file":
  302. return this.writeToFile(toolInput.path, toolInput.content, isLastWriteToFile)
  303. case "read_file":
  304. return this.readFile(toolInput.path)
  305. case "list_files_top_level":
  306. return this.listFilesTopLevel(toolInput.path)
  307. case "list_files_recursive":
  308. return this.listFilesRecursive(toolInput.path)
  309. case "view_source_code_definitions_top_level":
  310. return this.viewSourceCodeDefinitionsTopLevel(toolInput.path)
  311. case "execute_command":
  312. return this.executeCommand(toolInput.command)
  313. case "ask_followup_question":
  314. return this.askFollowupQuestion(toolInput.question)
  315. case "attempt_completion":
  316. return this.attemptCompletion(toolInput.result, toolInput.command)
  317. default:
  318. return `Unknown tool: ${toolName}`
  319. }
  320. }
  321. // Calculates cost of a Claude 3.5 Sonnet API request
  322. calculateApiCost(inputTokens: number, outputTokens: number): number {
  323. const INPUT_COST_PER_MILLION = 3.0 // $3 per million input tokens
  324. const OUTPUT_COST_PER_MILLION = 15.0 // $15 per million output tokens
  325. const inputCost = (inputTokens / 1_000_000) * INPUT_COST_PER_MILLION
  326. const outputCost = (outputTokens / 1_000_000) * OUTPUT_COST_PER_MILLION
  327. const totalCost = inputCost + outputCost
  328. return totalCost
  329. }
  330. async writeToFile(filePath: string, newContent: string, isLast: boolean): Promise<string> {
  331. try {
  332. const fileExists = await fs
  333. .access(filePath)
  334. .then(() => true)
  335. .catch(() => false)
  336. if (fileExists) {
  337. const originalContent = await fs.readFile(filePath, "utf-8")
  338. // fix issue where claude always removes newline from the file
  339. if (originalContent.endsWith("\n") && !newContent.endsWith("\n")) {
  340. newContent += "\n"
  341. }
  342. // condensed patch to return to claude
  343. const diffResult = diff.createPatch(filePath, originalContent, newContent)
  344. // full diff representation for webview
  345. const diffRepresentation = diff
  346. .diffLines(originalContent, newContent)
  347. .map((part) => {
  348. const prefix = part.added ? "+" : part.removed ? "-" : " "
  349. return (part.value || "")
  350. .split("\n")
  351. .map((line) => (line ? prefix + line : ""))
  352. .join("\n")
  353. })
  354. .join("")
  355. // Create virtual document with new file, then open diff editor
  356. const fileName = path.basename(filePath)
  357. vscode.commands.executeCommand(
  358. "vscode.diff",
  359. vscode.Uri.file(filePath),
  360. // to create a virtual doc we use a uri scheme registered in extension.ts, which then converts this base64 content into a text document
  361. // (providing file name with extension in the uri lets vscode know the language of the file and apply syntax highlighting)
  362. vscode.Uri.parse(`claude-dev-diff:${fileName}`).with({
  363. query: Buffer.from(newContent).toString("base64"),
  364. }),
  365. `${fileName}: Original ↔ Suggested Changes`
  366. )
  367. const { response, text } = await this.ask(
  368. "tool",
  369. JSON.stringify({
  370. tool: "editedExistingFile",
  371. path: filePath,
  372. diff: diffRepresentation,
  373. } as ClaudeSayTool)
  374. )
  375. if (response !== "yesButtonTapped") {
  376. if (isLast) {
  377. await this.closeDiffViews()
  378. }
  379. if (response === "textResponse" && text) {
  380. await this.say("user_feedback", text)
  381. return `The user denied this operation and provided the following feedback:\n\"${text}\"`
  382. }
  383. return "The user denied this operation."
  384. }
  385. await fs.writeFile(filePath, newContent)
  386. // Finish by opening the edited file in the editor
  387. await vscode.window.showTextDocument(vscode.Uri.file(filePath), { preview: false })
  388. if (isLast) {
  389. await this.closeDiffViews()
  390. }
  391. return `Changes applied to ${filePath}:\n${diffResult}`
  392. } else {
  393. const fileName = path.basename(filePath)
  394. vscode.commands.executeCommand(
  395. "vscode.diff",
  396. vscode.Uri.parse(`claude-dev-diff:${fileName}`).with({
  397. query: Buffer.from("").toString("base64"),
  398. }),
  399. vscode.Uri.parse(`claude-dev-diff:${fileName}`).with({
  400. query: Buffer.from(newContent).toString("base64"),
  401. }),
  402. `${fileName}: New File`
  403. )
  404. const { response, text } = await this.ask(
  405. "tool",
  406. JSON.stringify({ tool: "newFileCreated", path: filePath, content: newContent } as ClaudeSayTool)
  407. )
  408. if (response !== "yesButtonTapped") {
  409. if (isLast) {
  410. await this.closeDiffViews()
  411. }
  412. if (response === "textResponse" && text) {
  413. await this.say("user_feedback", text)
  414. return `The user denied this operation and provided the following feedback:\n\"${text}\"`
  415. }
  416. return "The user denied this operation."
  417. }
  418. await fs.mkdir(path.dirname(filePath), { recursive: true })
  419. await fs.writeFile(filePath, newContent)
  420. await vscode.window.showTextDocument(vscode.Uri.file(filePath), { preview: false })
  421. if (isLast) {
  422. await this.closeDiffViews()
  423. }
  424. return `New file created and content written to ${filePath}`
  425. }
  426. } catch (error) {
  427. const errorString = `Error writing file: ${JSON.stringify(serializeError(error))}`
  428. this.say("error", `Error writing file:\n${error.message ?? JSON.stringify(serializeError(error), null, 2)}`)
  429. return errorString
  430. }
  431. }
  432. async closeDiffViews() {
  433. const tabs = vscode.window.tabGroups.all
  434. .map((tg) => tg.tabs)
  435. .flat()
  436. .filter(
  437. (tab) =>
  438. tab.input instanceof vscode.TabInputTextDiff && tab.input?.modified?.scheme === "claude-dev-diff"
  439. )
  440. for (const tab of tabs) {
  441. await vscode.window.tabGroups.close(tab)
  442. }
  443. }
  444. async readFile(filePath: string): Promise<string> {
  445. try {
  446. const content = await fs.readFile(filePath, "utf-8")
  447. const { response, text } = await this.ask(
  448. "tool",
  449. JSON.stringify({ tool: "readFile", path: filePath, content } as ClaudeSayTool)
  450. )
  451. if (response !== "yesButtonTapped") {
  452. if (response === "textResponse" && text) {
  453. await this.say("user_feedback", text)
  454. return `The user denied this operation and provided the following feedback:\n\"${text}\"`
  455. }
  456. return "The user denied this operation."
  457. }
  458. return content
  459. } catch (error) {
  460. const errorString = `Error reading file: ${JSON.stringify(serializeError(error))}`
  461. this.say("error", `Error reading file:\n${error.message ?? JSON.stringify(serializeError(error), null, 2)}`)
  462. return errorString
  463. }
  464. }
  465. async listFilesTopLevel(dirPath: string): Promise<string> {
  466. try {
  467. const files = await listFiles(dirPath, false)
  468. const result = files
  469. .map((file) => {
  470. const relativePath = path.relative(dirPath, file)
  471. return file.endsWith("/") ? relativePath + "/" : relativePath
  472. })
  473. .sort((a, b) => {
  474. const aIsDir = a.endsWith("/")
  475. const bIsDir = b.endsWith("/")
  476. if (aIsDir !== bIsDir) {
  477. return aIsDir ? -1 : 1
  478. }
  479. return a.localeCompare(b, undefined, { numeric: true, sensitivity: "base" })
  480. })
  481. .join("\n")
  482. const { response, text } = await this.ask(
  483. "tool",
  484. JSON.stringify({ tool: "listFilesTopLevel", path: dirPath, content: result } as ClaudeSayTool)
  485. )
  486. if (response !== "yesButtonTapped") {
  487. if (response === "textResponse" && text) {
  488. await this.say("user_feedback", text)
  489. return `The user denied this operation and provided the following feedback:\n\"${text}\"`
  490. }
  491. return "The user denied this operation."
  492. }
  493. return result
  494. } catch (error) {
  495. const errorString = `Error listing files and directories: ${JSON.stringify(serializeError(error))}`
  496. this.say(
  497. "error",
  498. `Error listing files and directories:\n${
  499. error.message ?? JSON.stringify(serializeError(error), null, 2)
  500. }`
  501. )
  502. return errorString
  503. }
  504. }
  505. async listFilesRecursive(dirPath: string): Promise<string> {
  506. try {
  507. const files = await listFiles(dirPath, true)
  508. const result = files.map((file) => path.relative(dirPath, file)).join("\n")
  509. const { response, text } = await this.ask(
  510. "tool",
  511. JSON.stringify({ tool: "listFilesRecursive", path: dirPath, content: result } as ClaudeSayTool)
  512. )
  513. if (response !== "yesButtonTapped") {
  514. if (response === "textResponse" && text) {
  515. await this.say("user_feedback", text)
  516. return `The user denied this operation and provided the following feedback:\n\"${text}\"`
  517. }
  518. return "The user denied this operation."
  519. }
  520. return result
  521. } catch (error) {
  522. const errorString = `Error listing files recursively: ${JSON.stringify(serializeError(error))}`
  523. this.say(
  524. "error",
  525. `Error listing files recursively:\n${error.message ?? JSON.stringify(serializeError(error), null, 2)}`
  526. )
  527. return errorString
  528. }
  529. }
  530. async viewSourceCodeDefinitionsTopLevel(dirPath: string): Promise<string> {
  531. try {
  532. const result = await parseSourceCodeForDefinitionsTopLevel(dirPath)
  533. const { response, text } = await this.ask(
  534. "tool",
  535. JSON.stringify({
  536. tool: "viewSourceCodeDefinitionsTopLevel",
  537. path: dirPath,
  538. content: result,
  539. } as ClaudeSayTool)
  540. )
  541. if (response !== "yesButtonTapped") {
  542. if (response === "textResponse" && text) {
  543. await this.say("user_feedback", text)
  544. return `The user denied this operation and provided the following feedback:\n\"${text}\"`
  545. }
  546. return "The user denied this operation."
  547. }
  548. return result
  549. } catch (error) {
  550. const errorString = `Error parsing source code definitions: ${JSON.stringify(serializeError(error))}`
  551. this.say(
  552. "error",
  553. `Error parsing source code definitions:\n${
  554. error.message ?? JSON.stringify(serializeError(error), null, 2)
  555. }`
  556. )
  557. return errorString
  558. }
  559. }
  560. async executeCommand(command: string, returnEmptyStringOnSuccess: boolean = false): Promise<string> {
  561. const { response, text } = await this.ask("command", command)
  562. if (response !== "yesButtonTapped") {
  563. if (response === "textResponse" && text) {
  564. await this.say("user_feedback", text)
  565. return `The user denied this operation and provided the following feedback:\n\"${text}\"`
  566. }
  567. return "The user denied this operation."
  568. }
  569. try {
  570. let result = ""
  571. // execa by default tries to convert bash into javascript, so need to specify `shell: true` to use sh on unix or cmd.exe on windows
  572. // also worth noting that execa`input` and the execa(command) have nuanced differences like the template literal version handles escaping for you, while with the function call, you need to be more careful about how arguments are passed, especially when using shell: true.
  573. // execa returns a promise-like object that is both a promise and a Subprocess that has properties like stdin
  574. const subprocess = execa({ shell: true })`${command}`
  575. try {
  576. for await (const chunk of subprocess) {
  577. const line = chunk.toString()
  578. // stream output to user in realtime
  579. // do not await as we are not waiting for a response
  580. this.ask("command_output", line)
  581. .then(({ response, text }) => {
  582. // if this ask promise is not ignored, that means the user responded to it somehow either by clicking primary button or by typing text
  583. if (response === "yesButtonTapped") {
  584. // SIGINT is typically what's sent when a user interrupts a process (like pressing Ctrl+C)
  585. subprocess.kill("SIGINT") // will result in for loop throwing error
  586. } else {
  587. // if the user sent some input, we send it to the command stdin
  588. // add newline as cli programs expect a newline after each input
  589. subprocess.stdin?.write(text + "\n")
  590. }
  591. })
  592. .catch(() => {
  593. // this can only happen if this ask promise was ignored, so ignore this error
  594. })
  595. result += `${line}\n`
  596. }
  597. } catch (e) {
  598. if ((e as ExecaError).signal === "SIGINT") {
  599. const line = `\nUser exited command...`
  600. await this.say("command_output", line)
  601. result += line
  602. } else {
  603. throw e // if the command was not terminated by user, let outer catch handle it as a real error
  604. }
  605. }
  606. // for attemptCompletion, we don't want to return the command output
  607. if (returnEmptyStringOnSuccess) {
  608. return ""
  609. }
  610. return `Command Output:\n${result}`
  611. } catch (e) {
  612. const error = e as any
  613. let errorMessage = error.message || JSON.stringify(serializeError(error), null, 2)
  614. const errorString = `Error executing command:\n${errorMessage}`
  615. this.say("error", `Error executing command:\n${errorMessage}`) // TODO: in webview show code block for command errors
  616. return errorString
  617. }
  618. }
  619. async askFollowupQuestion(question: string): Promise<string> {
  620. const { text } = await this.ask("followup", question)
  621. await this.say("user_feedback", text ?? "")
  622. return `User's response:\n\"${text}\"`
  623. }
  624. async attemptCompletion(result: string, command?: string): Promise<string> {
  625. let resultToSend = result
  626. if (command) {
  627. await this.say("completion_result", resultToSend)
  628. // TODO: currently we don't handle if this command fails, it could be useful to let claude know and retry
  629. const commandResult = await this.executeCommand(command, true)
  630. // if we received non-empty string, the command was rejected or failed
  631. if (commandResult) {
  632. return commandResult
  633. }
  634. resultToSend = ""
  635. }
  636. const { response, text } = await this.ask("completion_result", resultToSend) // this prompts webview to show 'new task' button, and enable text input (which would be the 'text' here)
  637. if (response === "yesButtonTapped") {
  638. return ""
  639. }
  640. await this.say("user_feedback", text ?? "")
  641. return `The user is not pleased with the results. Use the feedback they provided to successfully complete the task, and then attempt completion again.\nUser's feedback:\n\"${text}\"`
  642. }
  643. async attemptApiRequest(): Promise<Anthropic.Messages.Message> {
  644. try {
  645. const response = await this.client.messages.create(
  646. {
  647. model: "claude-3-5-sonnet-20240620", // https://docs.anthropic.com/en/docs/about-claude/models
  648. // beta max tokens
  649. max_tokens: 8192,
  650. system: SYSTEM_PROMPT(),
  651. messages: (await this.providerRef.deref()?.getApiConversationHistory()) || [],
  652. tools: tools,
  653. tool_choice: { type: "auto" },
  654. },
  655. {
  656. // https://github.com/anthropics/anthropic-sdk-typescript?tab=readme-ov-file#default-headers
  657. headers: { "anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15" },
  658. }
  659. )
  660. return response
  661. } catch (error) {
  662. const { response } = await this.ask(
  663. "api_req_failed",
  664. error.message ?? JSON.stringify(serializeError(error), null, 2)
  665. )
  666. if (response !== "yesButtonTapped") {
  667. // this will never happen since if noButtonTapped, we will clear current task, aborting this instance
  668. throw new Error("API request failed")
  669. }
  670. await this.say("api_req_retried")
  671. return this.attemptApiRequest()
  672. }
  673. }
  674. async recursivelyMakeClaudeRequests(
  675. userContent: Array<
  676. | Anthropic.TextBlockParam
  677. | Anthropic.ImageBlockParam
  678. | Anthropic.ToolUseBlockParam
  679. | Anthropic.ToolResultBlockParam
  680. >
  681. ): Promise<ClaudeRequestResult> {
  682. if (this.abort) {
  683. throw new Error("ClaudeDev instance aborted")
  684. }
  685. await this.providerRef.deref()?.addMessageToApiConversationHistory({ role: "user", content: userContent })
  686. if (this.requestCount >= this.maxRequestsPerTask) {
  687. const { response } = await this.ask(
  688. "request_limit_reached",
  689. `Claude Dev has reached the maximum number of requests for this task. Would you like to reset the count and allow him to proceed?`
  690. )
  691. if (response === "yesButtonTapped") {
  692. this.requestCount = 0
  693. } else {
  694. await this.providerRef.deref()?.addMessageToApiConversationHistory({
  695. role: "assistant",
  696. content: [
  697. {
  698. type: "text",
  699. text: "Failure: I have reached the request limit for this task. Do you have a new task for me?",
  700. },
  701. ],
  702. })
  703. return { didEndLoop: true, inputTokens: 0, outputTokens: 0 }
  704. }
  705. }
  706. // what the user sees in the webview
  707. await this.say(
  708. "api_req_started",
  709. JSON.stringify({
  710. request: {
  711. model: "claude-3-5-sonnet-20240620",
  712. max_tokens: 8192,
  713. system: "(see SYSTEM_PROMPT in https://github.com/saoudrizwan/claude-dev/blob/main/src/ClaudeDev.ts)",
  714. messages: [{ conversation_history: "..." }, { role: "user", content: userContent }],
  715. tools: "(see tools in https://github.com/saoudrizwan/claude-dev/blob/main/src/ClaudeDev.ts)",
  716. tool_choice: { type: "auto" },
  717. },
  718. })
  719. )
  720. try {
  721. const response = await this.attemptApiRequest()
  722. this.requestCount++
  723. let assistantResponses: Anthropic.Messages.ContentBlock[] = []
  724. let inputTokens = response.usage.input_tokens
  725. let outputTokens = response.usage.output_tokens
  726. await this.say(
  727. "api_req_finished",
  728. JSON.stringify({
  729. tokensIn: inputTokens,
  730. tokensOut: outputTokens,
  731. cost: this.calculateApiCost(inputTokens, outputTokens),
  732. })
  733. )
  734. // A response always returns text content blocks (it's just that before we were iterating over the completion_attempt response before we could append text response, resulting in bug)
  735. for (const contentBlock of response.content) {
  736. if (contentBlock.type === "text") {
  737. assistantResponses.push(contentBlock)
  738. await this.say("text", contentBlock.text)
  739. }
  740. }
  741. let toolResults: Anthropic.ToolResultBlockParam[] = []
  742. let attemptCompletionBlock: Anthropic.Messages.ToolUseBlock | undefined
  743. const writeToFileCount = response.content.filter(
  744. (block) => block.type === "tool_use" && (block.name as ToolName) === "write_to_file"
  745. ).length
  746. let currentWriteToFile = 0
  747. for (const contentBlock of response.content) {
  748. if (contentBlock.type === "tool_use") {
  749. assistantResponses.push(contentBlock)
  750. const toolName = contentBlock.name as ToolName
  751. const toolInput = contentBlock.input
  752. const toolUseId = contentBlock.id
  753. if (toolName === "attempt_completion") {
  754. attemptCompletionBlock = contentBlock
  755. } else {
  756. if (toolName === "write_to_file") {
  757. currentWriteToFile++
  758. }
  759. const result = await this.executeTool(
  760. toolName,
  761. toolInput,
  762. currentWriteToFile === writeToFileCount
  763. )
  764. // this.say(
  765. // "tool",
  766. // `\nTool Used: ${toolName}\nTool Input: ${JSON.stringify(toolInput)}\nTool Result: ${result}`
  767. // )
  768. toolResults.push({ type: "tool_result", tool_use_id: toolUseId, content: result })
  769. }
  770. }
  771. }
  772. if (assistantResponses.length > 0) {
  773. await this.providerRef
  774. .deref()
  775. ?.addMessageToApiConversationHistory({ role: "assistant", content: assistantResponses })
  776. } else {
  777. // this should never happen! it there's no assistant_responses, that means we got no text or tool_use content blocks from API which we should assume is an error
  778. this.say("error", "Unexpected Error: No assistant messages were found in the API response")
  779. await this.providerRef.deref()?.addMessageToApiConversationHistory({
  780. role: "assistant",
  781. content: [{ type: "text", text: "Failure: I did not have a response to provide." }],
  782. })
  783. }
  784. let didEndLoop = false
  785. // attempt_completion is always done last, since there might have been other tools that needed to be called first before the job is finished
  786. // it's important to note that claude will order the tools logically in most cases, so we don't have to think about which tools make sense calling before others
  787. if (attemptCompletionBlock) {
  788. let result = await this.executeTool(
  789. attemptCompletionBlock.name as ToolName,
  790. attemptCompletionBlock.input
  791. )
  792. // this.say(
  793. // "tool",
  794. // `\nattempt_completion Tool Used: ${attemptCompletionBlock.name}\nTool Input: ${JSON.stringify(
  795. // attemptCompletionBlock.input
  796. // )}\nTool Result: ${result}`
  797. // )
  798. if (result === "") {
  799. didEndLoop = true
  800. result = "The user is satisfied with the result."
  801. }
  802. toolResults.push({ type: "tool_result", tool_use_id: attemptCompletionBlock.id, content: result })
  803. }
  804. if (toolResults.length > 0) {
  805. if (didEndLoop) {
  806. await this.providerRef
  807. .deref()
  808. ?.addMessageToApiConversationHistory({ role: "user", content: toolResults })
  809. await this.providerRef.deref()?.addMessageToApiConversationHistory({
  810. role: "assistant",
  811. content: [
  812. {
  813. type: "text",
  814. text: "I am pleased you are satisfied with the result. Do you have a new task for me?",
  815. },
  816. ],
  817. })
  818. } else {
  819. const {
  820. didEndLoop: recDidEndLoop,
  821. inputTokens: recInputTokens,
  822. outputTokens: recOutputTokens,
  823. } = await this.recursivelyMakeClaudeRequests(toolResults)
  824. didEndLoop = recDidEndLoop
  825. inputTokens += recInputTokens
  826. outputTokens += recOutputTokens
  827. }
  828. }
  829. return { didEndLoop, inputTokens, outputTokens }
  830. } catch (error) {
  831. // this should never happen since the only thing that can throw an error is the attemptApiRequest, which is wrapped in a try catch that sends an ask where if noButtonTapped, will clear current task and destroy this instance. However to avoid unhandled promise rejection, we will end this loop which will end execution of this instance (see startTask)
  832. return { didEndLoop: true, inputTokens: 0, outputTokens: 0 }
  833. }
  834. }
  835. }