Просмотр исходного кода

feat(browserTool): Implement resize action (#2370)

* Implement resize action for browser action tool

* Update snapshots
Marco Quinten 9 месяцев назад
Родитель
Сommit
fff8fdd3f3

+ 2 - 1
src/core/assistant-message/index.ts

@@ -60,6 +60,7 @@ export const toolParamNames = [
 	"cwd",
 	"follow_up",
 	"task",
+	"size",
 ] as const
 
 export type ToolParamName = (typeof toolParamNames)[number]
@@ -115,7 +116,7 @@ export interface ListCodeDefinitionNamesToolUse extends ToolUse {
 
 export interface BrowserActionToolUse extends ToolUse {
 	name: "browser_action"
-	params: Partial<Pick<Record<ToolParamName, string>, "action" | "url" | "coordinate" | "text">>
+	params: Partial<Pick<Record<ToolParamName, string>, "action" | "url" | "coordinate" | "text" | "size">>
 }
 
 export interface UseMcpToolToolUse extends ToolUse {

+ 8 - 0
src/core/prompts/__tests__/__snapshots__/system.test.ts.snap

@@ -2719,6 +2719,8 @@ Parameters:
         - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot.
     * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text.
         - Use with the \`text\` parameter to provide the string to type.
+    * resize: Resize the viewport to a specific w,h size.
+        - Use with the \`size\` parameter to specify the new size.
     * scroll_down: Scroll down the page by one page height.
     * scroll_up: Scroll up the page by one page height.
     * close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**.
@@ -2727,6 +2729,8 @@ Parameters:
     * Example: <url>https://example.com</url>
 - coordinate: (optional) The X and Y coordinates for the \`click\` action. Coordinates should be within the **900x600** resolution.
     * Example: <coordinate>450,300</coordinate>
+- size: (optional) The width and height for the \`resize\` action.
+    * Example: <size>1280,720</size>
 - text: (optional) Use this for providing the text for the \`type\` action.
     * Example: <text>Hello, world!</text>
 Usage:
@@ -3630,6 +3634,8 @@ Parameters:
         - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot.
     * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text.
         - Use with the \`text\` parameter to provide the string to type.
+    * resize: Resize the viewport to a specific w,h size.
+        - Use with the \`size\` parameter to specify the new size.
     * scroll_down: Scroll down the page by one page height.
     * scroll_up: Scroll up the page by one page height.
     * close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**.
@@ -3638,6 +3644,8 @@ Parameters:
     * Example: <url>https://example.com</url>
 - coordinate: (optional) The X and Y coordinates for the \`click\` action. Coordinates should be within the **1280x800** resolution.
     * Example: <coordinate>450,300</coordinate>
+- size: (optional) The width and height for the \`resize\` action.
+    * Example: <size>1280,720</size>
 - text: (optional) Use this for providing the text for the \`type\` action.
     * Example: <text>Hello, world!</text>
 Usage:

+ 4 - 0
src/core/prompts/tools/browser-action.ts

@@ -20,6 +20,8 @@ Parameters:
         - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot.
     * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text.
         - Use with the \`text\` parameter to provide the string to type.
+    * resize: Resize the viewport to a specific w,h size.
+        - Use with the \`size\` parameter to specify the new size.
     * scroll_down: Scroll down the page by one page height.
     * scroll_up: Scroll up the page by one page height.
     * close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**.
@@ -28,6 +30,8 @@ Parameters:
     * Example: <url>https://example.com</url>
 - coordinate: (optional) The X and Y coordinates for the \`click\` action. Coordinates should be within the **${args.browserViewportSize}** resolution.
     * Example: <coordinate>450,300</coordinate>
+- size: (optional) The width and height for the \`resize\` action.
+    * Example: <size>1280,720</size>
 - text: (optional) Use this for providing the text for the \`type\` action.
     * Example: <text>Hello, world!</text>
 Usage:

+ 13 - 0
src/core/tools/browserActionTool.ts

@@ -21,6 +21,7 @@ export async function browserActionTool(
 	const url: string | undefined = block.params.url
 	const coordinate: string | undefined = block.params.coordinate
 	const text: string | undefined = block.params.text
+	const size: string | undefined = block.params.size
 	if (!action || !browserActions.includes(action)) {
 		// checking for action to ensure it is complete and valid
 		if (!block.partial) {
@@ -88,6 +89,14 @@ export async function browserActionTool(
 						return
 					}
 				}
+				if (action === "resize") {
+					if (!size) {
+						cline.consecutiveMistakeCount++
+						pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "size"))
+						await cline.browserSession.closeBrowser()
+						return
+					}
+				}
 				cline.consecutiveMistakeCount = 0
 				await cline.say(
 					"browser_action",
@@ -112,6 +121,9 @@ export async function browserActionTool(
 					case "scroll_up":
 						browserActionResult = await cline.browserSession.scrollUp()
 						break
+					case "resize":
+						browserActionResult = await cline.browserSession.resize(size!)
+						break
 					case "close":
 						browserActionResult = await cline.browserSession.closeBrowser()
 						break
@@ -124,6 +136,7 @@ export async function browserActionTool(
 				case "type":
 				case "scroll_down":
 				case "scroll_up":
+				case "resize":
 					await cline.say("browser_action_result", JSON.stringify(browserActionResult))
 					pushToolResult(
 						formatResponse.toolResult(

+ 13 - 0
src/services/browser/BrowserSession.ts

@@ -538,4 +538,17 @@ export class BrowserSession {
 			})
 		})
 	}
+
+	async resize(size: string): Promise<BrowserActionResult> {
+		return this.doAction(async (page) => {
+			const [width, height] = size.split(",").map(Number)
+			const session = await page.createCDPSession()
+			await page.setViewport({ width, height })
+			const { windowId } = await session.send("Browser.getWindowForTarget")
+			await session.send("Browser.setWindowBounds", {
+				bounds: { width, height },
+				windowId,
+			})
+		})
+	}
 }

+ 11 - 1
src/shared/ExtensionMessage.ts

@@ -233,13 +233,23 @@ export interface ClineSayTool {
 }
 
 // Must keep in sync with system prompt.
-export const browserActions = ["launch", "click", "hover", "type", "scroll_down", "scroll_up", "close"] as const
+export const browserActions = [
+	"launch",
+	"click",
+	"hover",
+	"type",
+	"scroll_down",
+	"scroll_up",
+	"resize",
+	"close",
+] as const
 
 export type BrowserAction = (typeof browserActions)[number]
 
 export interface ClineSayBrowserAction {
 	action: BrowserAction
 	coordinate?: string
+	size?: string
 	text?: string
 }