فهرست منبع

Move browser_action to a tool file (#2099)

Matt Rubens 9 ماه پیش
والد
کامیت
38365085d7
2فایلهای تغییر یافته به همراه156 افزوده شده و 146 حذف شده
  1. 4 146
      src/core/Cline.ts
  2. 152 0
      src/core/tools/browserActionTool.ts

+ 4 - 146
src/core/Cline.ts

@@ -91,6 +91,7 @@ import { insertContentTool } from "./tools/insertContentTool"
 import { searchAndReplaceTool } from "./tools/searchAndReplaceTool"
 import { listCodeDefinitionNamesTool } from "./tools/listCodeDefinitionNamesTool"
 import { searchFilesTool } from "./tools/searchFilesTool"
+import { browserActionTool } from "./tools/browserActionTool"
 
 export type ToolResponse = string | Array<Anthropic.TextBlockParam | Anthropic.ImageBlockParam>
 type UserContent = Array<Anthropic.Messages.ContentBlockParam>
@@ -140,7 +141,7 @@ export class Cline extends EventEmitter<ClineEvents> {
 	readonly apiConfiguration: ApiConfiguration
 	api: ApiHandler
 	private urlContentFetcher: UrlContentFetcher
-	private browserSession: BrowserSession
+	browserSession: BrowserSession
 	didEditFile: boolean = false
 	customInstructions?: string
 	diffStrategy?: DiffStrategy
@@ -1613,151 +1614,8 @@ export class Cline extends EventEmitter<ClineEvents> {
 						break
 					}
 					case "browser_action": {
-						const action: BrowserAction | undefined = block.params.action as BrowserAction
-						const url: string | undefined = block.params.url
-						const coordinate: string | undefined = block.params.coordinate
-						const text: string | undefined = block.params.text
-						if (!action || !browserActions.includes(action)) {
-							// checking for action to ensure it is complete and valid
-							if (!block.partial) {
-								// if the block is complete and we don't have a valid action this is a mistake
-								this.consecutiveMistakeCount++
-								pushToolResult(await this.sayAndCreateMissingParamError("browser_action", "action"))
-								await this.browserSession.closeBrowser()
-							}
-							break
-						}
-
-						try {
-							if (block.partial) {
-								if (action === "launch") {
-									await this.ask(
-										"browser_action_launch",
-										removeClosingTag("url", url),
-										block.partial,
-									).catch(() => {})
-								} else {
-									await this.say(
-										"browser_action",
-										JSON.stringify({
-											action: action as BrowserAction,
-											coordinate: removeClosingTag("coordinate", coordinate),
-											text: removeClosingTag("text", text),
-										} satisfies ClineSayBrowserAction),
-										undefined,
-										block.partial,
-									)
-								}
-								break
-							} else {
-								// Initialize with empty object to avoid "used before assigned" errors
-								let browserActionResult: BrowserActionResult = {}
-								if (action === "launch") {
-									if (!url) {
-										this.consecutiveMistakeCount++
-										pushToolResult(
-											await this.sayAndCreateMissingParamError("browser_action", "url"),
-										)
-										await this.browserSession.closeBrowser()
-										break
-									}
-									this.consecutiveMistakeCount = 0
-									const didApprove = await askApproval("browser_action_launch", url)
-									if (!didApprove) {
-										break
-									}
-
-									// NOTE: it's okay that we call this message since the partial inspect_site is finished streaming. The only scenario we have to avoid is sending messages WHILE a partial message exists at the end of the messages array. For example the api_req_finished message would interfere with the partial message, so we needed to remove that.
-									// await this.say("inspect_site_result", "") // no result, starts the loading spinner waiting for result
-									await this.say("browser_action_result", "") // starts loading spinner
-
-									await this.browserSession.launchBrowser()
-									browserActionResult = await this.browserSession.navigateToUrl(url)
-								} else {
-									if (action === "click") {
-										if (!coordinate) {
-											this.consecutiveMistakeCount++
-											pushToolResult(
-												await this.sayAndCreateMissingParamError(
-													"browser_action",
-													"coordinate",
-												),
-											)
-											await this.browserSession.closeBrowser()
-											break // can't be within an inner switch
-										}
-									}
-									if (action === "type") {
-										if (!text) {
-											this.consecutiveMistakeCount++
-											pushToolResult(
-												await this.sayAndCreateMissingParamError("browser_action", "text"),
-											)
-											await this.browserSession.closeBrowser()
-											break
-										}
-									}
-									this.consecutiveMistakeCount = 0
-									await this.say(
-										"browser_action",
-										JSON.stringify({
-											action: action as BrowserAction,
-											coordinate,
-											text,
-										} satisfies ClineSayBrowserAction),
-										undefined,
-										false,
-									)
-									switch (action) {
-										case "click":
-											browserActionResult = await this.browserSession.click(coordinate!)
-											break
-										case "type":
-											browserActionResult = await this.browserSession.type(text!)
-											break
-										case "scroll_down":
-											browserActionResult = await this.browserSession.scrollDown()
-											break
-										case "scroll_up":
-											browserActionResult = await this.browserSession.scrollUp()
-											break
-										case "close":
-											browserActionResult = await this.browserSession.closeBrowser()
-											break
-									}
-								}
-
-								switch (action) {
-									case "launch":
-									case "click":
-									case "type":
-									case "scroll_down":
-									case "scroll_up":
-										await this.say("browser_action_result", JSON.stringify(browserActionResult))
-										pushToolResult(
-											formatResponse.toolResult(
-												`The browser action has been executed. The console logs and screenshot have been captured for your analysis.\n\nConsole logs:\n${
-													browserActionResult?.logs || "(No new logs)"
-												}\n\n(REMEMBER: if you need to proceed to using non-\`browser_action\` tools or launch a new browser, you MUST first close this browser. For example, if after analyzing the logs and screenshot you need to edit a file, you must first close the browser before you can use the write_to_file tool.)`,
-												browserActionResult?.screenshot ? [browserActionResult.screenshot] : [],
-											),
-										)
-										break
-									case "close":
-										pushToolResult(
-											formatResponse.toolResult(
-												`The browser has been closed. You may now proceed to using other tools.`,
-											),
-										)
-										break
-								}
-								break
-							}
-						} catch (error) {
-							await this.browserSession.closeBrowser() // if any error occurs, the browser session is terminated
-							await handleError("executing browser action", error)
-							break
-						}
+						await browserActionTool(this, block, askApproval, handleError, pushToolResult, removeClosingTag)
+						break
 					}
 					case "execute_command": {
 						const command: string | undefined = block.params.command

+ 152 - 0
src/core/tools/browserActionTool.ts

@@ -0,0 +1,152 @@
+import { Cline } from "../Cline"
+import { ToolUse } from "../assistant-message"
+import { AskApproval, HandleError, PushToolResult, RemoveClosingTag } from "./types"
+import {
+	BrowserAction,
+	BrowserActionResult,
+	browserActions,
+	ClineSayBrowserAction,
+} from "../../shared/ExtensionMessage"
+import { formatResponse } from "../prompts/responses"
+
+export async function browserActionTool(
+	cline: Cline,
+	block: ToolUse,
+	askApproval: AskApproval,
+	handleError: HandleError,
+	pushToolResult: PushToolResult,
+	removeClosingTag: RemoveClosingTag,
+) {
+	const action: BrowserAction | undefined = block.params.action as BrowserAction
+	const url: string | undefined = block.params.url
+	const coordinate: string | undefined = block.params.coordinate
+	const text: string | undefined = block.params.text
+	if (!action || !browserActions.includes(action)) {
+		// checking for action to ensure it is complete and valid
+		if (!block.partial) {
+			// if the block is complete and we don't have a valid action cline is a mistake
+			cline.consecutiveMistakeCount++
+			pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "action"))
+			await cline.browserSession.closeBrowser()
+		}
+		return
+	}
+
+	try {
+		if (block.partial) {
+			if (action === "launch") {
+				await cline.ask("browser_action_launch", removeClosingTag("url", url), block.partial).catch(() => {})
+			} else {
+				await cline.say(
+					"browser_action",
+					JSON.stringify({
+						action: action as BrowserAction,
+						coordinate: removeClosingTag("coordinate", coordinate),
+						text: removeClosingTag("text", text),
+					} satisfies ClineSayBrowserAction),
+					undefined,
+					block.partial,
+				)
+			}
+			return
+		} else {
+			// Initialize with empty object to avoid "used before assigned" errors
+			let browserActionResult: BrowserActionResult = {}
+			if (action === "launch") {
+				if (!url) {
+					cline.consecutiveMistakeCount++
+					pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "url"))
+					await cline.browserSession.closeBrowser()
+					return
+				}
+				cline.consecutiveMistakeCount = 0
+				const didApprove = await askApproval("browser_action_launch", url)
+				if (!didApprove) {
+					return
+				}
+
+				// NOTE: it's okay that we call cline message since the partial inspect_site is finished streaming. The only scenario we have to avoid is sending messages WHILE a partial message exists at the end of the messages array. For example the api_req_finished message would interfere with the partial message, so we needed to remove that.
+				// await cline.say("inspect_site_result", "") // no result, starts the loading spinner waiting for result
+				await cline.say("browser_action_result", "") // starts loading spinner
+
+				await cline.browserSession.launchBrowser()
+				browserActionResult = await cline.browserSession.navigateToUrl(url)
+			} else {
+				if (action === "click") {
+					if (!coordinate) {
+						cline.consecutiveMistakeCount++
+						pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "coordinate"))
+						await cline.browserSession.closeBrowser()
+						return // can't be within an inner switch
+					}
+				}
+				if (action === "type") {
+					if (!text) {
+						cline.consecutiveMistakeCount++
+						pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "text"))
+						await cline.browserSession.closeBrowser()
+						return
+					}
+				}
+				cline.consecutiveMistakeCount = 0
+				await cline.say(
+					"browser_action",
+					JSON.stringify({
+						action: action as BrowserAction,
+						coordinate,
+						text,
+					} satisfies ClineSayBrowserAction),
+					undefined,
+					false,
+				)
+				switch (action) {
+					case "click":
+						browserActionResult = await cline.browserSession.click(coordinate!)
+						break
+					case "type":
+						browserActionResult = await cline.browserSession.type(text!)
+						break
+					case "scroll_down":
+						browserActionResult = await cline.browserSession.scrollDown()
+						break
+					case "scroll_up":
+						browserActionResult = await cline.browserSession.scrollUp()
+						break
+					case "close":
+						browserActionResult = await cline.browserSession.closeBrowser()
+						break
+				}
+			}
+
+			switch (action) {
+				case "launch":
+				case "click":
+				case "type":
+				case "scroll_down":
+				case "scroll_up":
+					await cline.say("browser_action_result", JSON.stringify(browserActionResult))
+					pushToolResult(
+						formatResponse.toolResult(
+							`The browser action has been executed. The console logs and screenshot have been captured for your analysis.\n\nConsole logs:\n${
+								browserActionResult?.logs || "(No new logs)"
+							}\n\n(REMEMBER: if you need to proceed to using non-\`browser_action\` tools or launch a new browser, you MUST first close cline browser. For example, if after analyzing the logs and screenshot you need to edit a file, you must first close the browser before you can use the write_to_file tool.)`,
+							browserActionResult?.screenshot ? [browserActionResult.screenshot] : [],
+						),
+					)
+					break
+				case "close":
+					pushToolResult(
+						formatResponse.toolResult(
+							`The browser has been closed. You may now proceed to using other tools.`,
+						),
+					)
+					break
+			}
+			return
+		}
+	} catch (error) {
+		await cline.browserSession.closeBrowser() // if any error occurs, the browser session is terminated
+		await handleError("executing browser action", error)
+		return
+	}
+}