Przeglądaj źródła

Browser Use 2.0 (#8941)

Co-authored-by: roomote[bot] <219738659+roomote[bot]@users.noreply.github.com>
Co-authored-by: daniel-lxs <[email protected]>
Hannes Rudolph 1 miesiąc temu
rodzic
commit
ee93530076
56 zmienionych plików z 3221 dodań i 592 usunięć
  1. 1 1
      knip.json
  2. 1 0
      packages/types/src/message.ts
  3. 30 5
      src/core/assistant-message/presentAssistantMessage.ts
  4. 18 1
      src/core/environment/__tests__/getEnvironmentDetails.spec.ts
  5. 29 0
      src/core/environment/getEnvironmentDetails.ts
  6. 25 10
      src/core/prompts/__tests__/__snapshots__/system-prompt/with-computer-use-support.snap
  7. 25 10
      src/core/prompts/tools/browser-action.ts
  8. 10 32
      src/core/prompts/tools/native-tools/browser_action.ts
  9. 72 1
      src/core/task/Task.ts
  10. 171 157
      src/core/tools/BrowserActionTool.ts
  11. 84 0
      src/core/tools/__tests__/BrowserActionTool.coordinateScaling.spec.ts
  12. 310 0
      src/core/webview/BrowserSessionPanelManager.ts
  13. 6 0
      src/core/webview/ClineProvider.ts
  14. 1 0
      src/core/webview/__tests__/ClineProvider.spec.ts
  15. 96 0
      src/core/webview/webviewMessageHandler.ts
  16. 306 5
      src/services/browser/BrowserSession.ts
  17. 3 3
      src/services/browser/UrlContentFetcher.ts
  18. 222 0
      src/services/browser/__tests__/BrowserSession.spec.ts
  19. 11 0
      src/shared/ExtensionMessage.ts
  20. 10 0
      src/shared/WebviewMessage.ts
  21. 95 0
      src/shared/browserUtils.ts
  22. 12 0
      webview-ui/browser-panel.html
  23. 12 0
      webview-ui/src/browser-panel.tsx
  24. 60 0
      webview-ui/src/components/browser-session/BrowserPanelStateProvider.tsx
  25. 102 0
      webview-ui/src/components/browser-session/BrowserSessionPanel.tsx
  26. 184 0
      webview-ui/src/components/chat/BrowserActionRow.tsx
  27. 776 179
      webview-ui/src/components/chat/BrowserSessionRow.tsx
  28. 34 0
      webview-ui/src/components/chat/BrowserSessionStatusRow.tsx
  29. 20 2
      webview-ui/src/components/chat/ChatRow.tsx
  30. 12 1
      webview-ui/src/components/chat/ChatTextArea.tsx
  31. 86 125
      webview-ui/src/components/chat/ChatView.tsx
  32. 104 50
      webview-ui/src/components/chat/TaskHeader.tsx
  33. 55 0
      webview-ui/src/components/chat/__tests__/BrowserSessionRow.aspect-ratio.spec.tsx
  34. 42 0
      webview-ui/src/components/chat/__tests__/BrowserSessionRow.disconnect-button.spec.tsx
  35. 126 0
      webview-ui/src/components/chat/__tests__/BrowserSessionRow.spec.tsx
  36. 1 0
      webview-ui/src/context/ExtensionStateContext.tsx
  37. 1 0
      webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx
  38. 4 1
      webview-ui/src/i18n/locales/ca/chat.json
  39. 3 0
      webview-ui/src/i18n/locales/de/chat.json
  40. 4 1
      webview-ui/src/i18n/locales/en/chat.json
  41. 4 1
      webview-ui/src/i18n/locales/es/chat.json
  42. 4 1
      webview-ui/src/i18n/locales/fr/chat.json
  43. 3 0
      webview-ui/src/i18n/locales/hi/chat.json
  44. 4 1
      webview-ui/src/i18n/locales/id/chat.json
  45. 3 0
      webview-ui/src/i18n/locales/it/chat.json
  46. 4 1
      webview-ui/src/i18n/locales/ja/chat.json
  47. 3 0
      webview-ui/src/i18n/locales/ko/chat.json
  48. 4 1
      webview-ui/src/i18n/locales/nl/chat.json
  49. 3 0
      webview-ui/src/i18n/locales/pl/chat.json
  50. 3 0
      webview-ui/src/i18n/locales/pt-BR/chat.json
  51. 4 1
      webview-ui/src/i18n/locales/ru/chat.json
  52. 4 1
      webview-ui/src/i18n/locales/tr/chat.json
  53. 3 0
      webview-ui/src/i18n/locales/vi/chat.json
  54. 3 0
      webview-ui/src/i18n/locales/zh-CN/chat.json
  55. 4 1
      webview-ui/src/i18n/locales/zh-TW/chat.json
  56. 4 0
      webview-ui/vite.config.ts

+ 1 - 1
knip.json

@@ -16,7 +16,7 @@
 			"project": ["**/*.ts"]
 		},
 		"webview-ui": {
-			"entry": ["src/index.tsx"],
+			"entry": ["src/index.tsx", "src/browser-panel.tsx"],
 			"project": ["src/**/*.{ts,tsx}", "../src/shared/*.ts"]
 		},
 		"packages/{build,cloud,evals,ipc,telemetry,types}": {

+ 1 - 0
packages/types/src/message.ts

@@ -166,6 +166,7 @@ export const clineSays = [
 	"shell_integration_warning",
 	"browser_action",
 	"browser_action_result",
+	"browser_session_status",
 	"mcp_server_request_started",
 	"mcp_server_response",
 	"subtask_result",

+ 30 - 5
src/core/assistant-message/presentAssistantMessage.ts

@@ -461,8 +461,32 @@ export async function presentAssistantMessage(cline: Task) {
 				return text.replace(tagRegex, "")
 			}
 
-			if (block.name !== "browser_action") {
-				await cline.browserSession.closeBrowser()
+			// Keep browser open during an active session so other tools can run.
+			// Session is active if we've seen any browser_action_result and the last browser_action is not "close".
+			try {
+				const messages = cline.clineMessages || []
+				const hasStarted = messages.some((m: any) => m.say === "browser_action_result")
+				let isClosed = false
+				for (let i = messages.length - 1; i >= 0; i--) {
+					const m = messages[i]
+					if (m.say === "browser_action") {
+						try {
+							const act = JSON.parse(m.text || "{}")
+							isClosed = act.action === "close"
+						} catch {}
+						break
+					}
+				}
+				const sessionActive = hasStarted && !isClosed
+				// Only auto-close when no active browser session is present, and this isn't a browser_action
+				if (!sessionActive && block.name !== "browser_action") {
+					await cline.browserSession.closeBrowser()
+				}
+			} catch {
+				// On any unexpected error, fall back to conservative behavior
+				if (block.name !== "browser_action") {
+					await cline.browserSession.closeBrowser()
+				}
 			}
 
 			if (!block.partial) {
@@ -669,13 +693,14 @@ export async function presentAssistantMessage(cline: Task) {
 					})
 					break
 				case "browser_action":
-					await browserActionTool.handle(cline, block as ToolUse<"browser_action">, {
+					await browserActionTool(
+						cline,
+						block as ToolUse<"browser_action">,
 						askApproval,
 						handleError,
 						pushToolResult,
 						removeClosingTag,
-						toolProtocol,
-					})
+					)
 					break
 				case "execute_command":
 					await executeCommandTool.handle(cline, block as ToolUse<"execute_command">, {

+ 18 - 1
src/core/environment/__tests__/getEnvironmentDetails.spec.ts

@@ -118,6 +118,10 @@ describe("getEnvironmentDetails", () => {
 				deref: vi.fn().mockReturnValue(mockProvider),
 				[Symbol.toStringTag]: "WeakRef",
 			} as unknown as WeakRef<ClineProvider>,
+			browserSession: {
+				isSessionActive: vi.fn().mockReturnValue(false),
+				getViewportSize: vi.fn().mockReturnValue({ width: 900, height: 600 }),
+			} as any,
 		}
 
 		// Mock other dependencies.
@@ -393,7 +397,6 @@ describe("getEnvironmentDetails", () => {
 		const result = await getEnvironmentDetails(cline as Task)
 		expect(result).toContain("REMINDERS")
 	})
-
 	it("should include git status when maxGitStatusFiles > 0", async () => {
 		;(getGitStatus as Mock).mockResolvedValue("## main\nM  file1.ts")
 		mockProvider.getState.mockResolvedValue({
@@ -456,4 +459,18 @@ describe("getEnvironmentDetails", () => {
 
 		expect(getGitStatus).toHaveBeenCalledWith(mockCwd, 5)
 	})
+
+	it("should NOT include Browser Session Status when inactive", async () => {
+		const result = await getEnvironmentDetails(mockCline as Task)
+		expect(result).not.toContain("# Browser Session Status")
+	})
+
+	it("should include Browser Session Status with current viewport when active", async () => {
+		;(mockCline.browserSession as any).isSessionActive = vi.fn().mockReturnValue(true)
+		;(mockCline.browserSession as any).getViewportSize = vi.fn().mockReturnValue({ width: 1280, height: 720 })
+
+		const result = await getEnvironmentDetails(mockCline as Task)
+		expect(result).toContain("Active - A browser session is currently open and ready for browser_action commands")
+		expect(result).toContain("Current viewport size: 1280x720 pixels.")
+	})
 })

+ 29 - 0
src/core/environment/getEnvironmentDetails.ts

@@ -248,6 +248,35 @@ export async function getEnvironmentDetails(cline: Task, includeFileDetails: boo
 		}
 	}
 
+	// Add browser session status - Only show when active to prevent cluttering context
+	const isBrowserActive = cline.browserSession.isSessionActive()
+
+	if (isBrowserActive) {
+		// Build viewport info for status (prefer actual viewport if available, else fallback to configured setting)
+		const configuredViewport = (state?.browserViewportSize as string | undefined) ?? "900x600"
+		let configuredWidth: number | undefined
+		let configuredHeight: number | undefined
+		if (configuredViewport.includes("x")) {
+			const parts = configuredViewport.split("x").map((v) => Number(v))
+			configuredWidth = parts[0]
+			configuredHeight = parts[1]
+		}
+
+		let actualWidth: number | undefined
+		let actualHeight: number | undefined
+		const vp = cline.browserSession.getViewportSize?.()
+		if (vp) {
+			actualWidth = vp.width
+			actualHeight = vp.height
+		}
+
+		const width = actualWidth ?? configuredWidth
+		const height = actualHeight ?? configuredHeight
+		const viewportInfo = width && height ? `\nCurrent viewport size: ${width}x${height} pixels.` : ""
+
+		details += `\n# Browser Session Status\nActive - A browser session is currently open and ready for browser_action commands${viewportInfo}\n`
+	}
+
 	if (includeFileDetails) {
 		details += `\n\n# Current Workspace Directory (${cline.cwd.toPosix()}) Files\n`
 		const isDesktop = arePathsEqual(cline.cwd, path.join(os.homedir(), "Desktop"))

+ 25 - 10
src/core/prompts/__tests__/__snapshots__/system-prompt/with-computer-use-support.snap

@@ -228,10 +228,12 @@ Example for appending to the end of file:
 
 ## browser_action
 Description: Request to interact with a Puppeteer-controlled browser. Every action, except `close`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action.
-- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL.
-- While the browser is active, only the `browser_action` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result.
-- The browser window has a resolution of **1280x800** pixels. When performing any click actions, ensure the coordinates are within this resolution range.
-- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges.
+
+**Browser Session Lifecycle:**
+- Browser sessions **start** with `launch` and **end** with `close`
+- The session remains active across multiple messages and tool uses
+- You can use other tools while the browser session is active - it will stay open in the background
+
 Parameters:
 - action: (required) The action to perform. The available actions are:
     * launch: Launch a new Puppeteer-controlled browser instance at the specified URL. This **must always be the first action**.
@@ -245,6 +247,12 @@ Parameters:
         - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot.
     * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text.
         - Use with the `text` parameter to provide the string to type.
+    * press: Press a single keyboard key or key combination (e.g., Enter, Tab, Escape, Cmd+K, Shift+Enter).
+        - Use with the `text` parameter to provide the key name or combination.
+        - For single keys: Enter, Tab, Escape, etc.
+        - For key combinations: Cmd+K, Ctrl+C, Shift+Enter, Alt+F4, etc.
+        - Supported modifiers: Cmd/Command/Meta, Ctrl/Control, Shift, Alt/Option
+        - Example: <text>Cmd+K</text> or <text>Shift+Enter</text>
     * resize: Resize the viewport to a specific w,h size.
         - Use with the `size` parameter to specify the new size.
     * scroll_down: Scroll down the page by one page height.
@@ -253,17 +261,24 @@ Parameters:
         - Example: `<action>close</action>`
 - url: (optional) Use this for providing the URL for the `launch` action.
     * Example: <url>https://example.com</url>
-- coordinate: (optional) The X and Y coordinates for the `click` and `hover` actions. Coordinates should be within the **1280x800** resolution.
-    * Example: <coordinate>450,300</coordinate>
+- coordinate: (optional) The X and Y coordinates for the `click` and `hover` actions.
+    * **CRITICAL**: Screenshot dimensions are NOT the same as the browser viewport dimensions
+    * Format: <coordinate>x,y@widthxheight</coordinate>
+    * Measure x,y on the screenshot image you see in chat
+    * The widthxheight MUST be the EXACT pixel size of that screenshot image (never the browser viewport)
+    * Never use the browser viewport size for widthxheight - the viewport is only a reference and is often larger than the screenshot
+    * Images are often downscaled before you see them, so the screenshot's dimensions will likely be smaller than the viewport
+    * Example A: If the screenshot you see is 1094x1092 and you want to click (450,300) on that image, use: <coordinate>450,300@1094x1092</coordinate>
+    * Example B: If the browser viewport is 1280x800 but the screenshot is 1000x625 and you want to click (500,300) on the screenshot, use: <coordinate>500,300@1000x625</coordinate>
 - size: (optional) The width and height for the `resize` action.
     * Example: <size>1280,720</size>
 - text: (optional) Use this for providing the text for the `type` action.
     * Example: <text>Hello, world!</text>
 Usage:
 <browser_action>
-<action>Action to perform (e.g., launch, click, type, scroll_down, scroll_up, close)</action>
+<action>Action to perform (e.g., launch, click, type, press, scroll_down, scroll_up, close)</action>
 <url>URL to launch the browser at (optional)</url>
-<coordinate>x,y coordinates (optional)</coordinate>
+<coordinate>x,y@widthxheight coordinates (optional)</coordinate>
 <text>Text to type (optional)</text>
 </browser_action>
 
@@ -273,10 +288,10 @@ Example: Requesting to launch a browser at https://example.com
 <url>https://example.com</url>
 </browser_action>
 
-Example: Requesting to click on the element at coordinates 450,300
+Example: Requesting to click on the element at coordinates 450,300 on a 1024x768 image
 <browser_action>
 <action>click</action>
-<coordinate>450,300</coordinate>
+<coordinate>450,300@1024x768</coordinate>
 </browser_action>
 
 ## ask_followup_question

+ 25 - 10
src/core/prompts/tools/browser-action.ts

@@ -6,10 +6,12 @@ export function getBrowserActionDescription(args: ToolArgs): string | undefined
 	}
 	return `## browser_action
 Description: Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action.
-- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL.
-- While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result.
-- The browser window has a resolution of **${args.browserViewportSize}** pixels. When performing any click actions, ensure the coordinates are within this resolution range.
-- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges.
+
+**Browser Session Lifecycle:**
+- Browser sessions **start** with \`launch\` and **end** with \`close\`
+- The session remains active across multiple messages and tool uses
+- You can use other tools while the browser session is active - it will stay open in the background
+
 Parameters:
 - action: (required) The action to perform. The available actions are:
     * launch: Launch a new Puppeteer-controlled browser instance at the specified URL. This **must always be the first action**.
@@ -23,6 +25,12 @@ Parameters:
         - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot.
     * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text.
         - Use with the \`text\` parameter to provide the string to type.
+    * press: Press a single keyboard key or key combination (e.g., Enter, Tab, Escape, Cmd+K, Shift+Enter).
+        - Use with the \`text\` parameter to provide the key name or combination.
+        - For single keys: Enter, Tab, Escape, etc.
+        - For key combinations: Cmd+K, Ctrl+C, Shift+Enter, Alt+F4, etc.
+        - Supported modifiers: Cmd/Command/Meta, Ctrl/Control, Shift, Alt/Option
+        - Example: <text>Cmd+K</text> or <text>Shift+Enter</text>
     * resize: Resize the viewport to a specific w,h size.
         - Use with the \`size\` parameter to specify the new size.
     * scroll_down: Scroll down the page by one page height.
@@ -31,17 +39,24 @@ Parameters:
         - Example: \`<action>close</action>\`
 - url: (optional) Use this for providing the URL for the \`launch\` action.
     * Example: <url>https://example.com</url>
-- coordinate: (optional) The X and Y coordinates for the \`click\` and \`hover\` actions. Coordinates should be within the **${args.browserViewportSize}** resolution.
-    * Example: <coordinate>450,300</coordinate>
+- coordinate: (optional) The X and Y coordinates for the \`click\` and \`hover\` actions.
+    * **CRITICAL**: Screenshot dimensions are NOT the same as the browser viewport dimensions
+    * Format: <coordinate>x,y@widthxheight</coordinate>
+    * Measure x,y on the screenshot image you see in chat
+    * The widthxheight MUST be the EXACT pixel size of that screenshot image (never the browser viewport)
+    * Never use the browser viewport size for widthxheight - the viewport is only a reference and is often larger than the screenshot
+    * Images are often downscaled before you see them, so the screenshot's dimensions will likely be smaller than the viewport
+    * Example A: If the screenshot you see is 1094x1092 and you want to click (450,300) on that image, use: <coordinate>450,300@1094x1092</coordinate>
+    * Example B: If the browser viewport is 1280x800 but the screenshot is 1000x625 and you want to click (500,300) on the screenshot, use: <coordinate>500,300@1000x625</coordinate>
 - size: (optional) The width and height for the \`resize\` action.
     * Example: <size>1280,720</size>
 - text: (optional) Use this for providing the text for the \`type\` action.
     * Example: <text>Hello, world!</text>
 Usage:
 <browser_action>
-<action>Action to perform (e.g., launch, click, type, scroll_down, scroll_up, close)</action>
+<action>Action to perform (e.g., launch, click, type, press, scroll_down, scroll_up, close)</action>
 <url>URL to launch the browser at (optional)</url>
-<coordinate>x,y coordinates (optional)</coordinate>
+<coordinate>x,y@widthxheight coordinates (optional)</coordinate>
 <text>Text to type (optional)</text>
 </browser_action>
 
@@ -51,9 +66,9 @@ Example: Requesting to launch a browser at https://example.com
 <url>https://example.com</url>
 </browser_action>
 
-Example: Requesting to click on the element at coordinates 450,300
+Example: Requesting to click on the element at coordinates 450,300 on a 1024x768 image
 <browser_action>
 <action>click</action>
-<coordinate>450,300</coordinate>
+<coordinate>450,300@1024x768</coordinate>
 </browser_action>`
 }

+ 10 - 32
src/core/prompts/tools/native-tools/browser_action.ts

@@ -5,7 +5,7 @@ export default {
 	function: {
 		name: "browser_action",
 		description:
-			"Interact with a Puppeteer-controlled browser session. Always start by launching at a URL and always finish by closing the browser. While the browser is active, do not call any other tools. Use coordinates within the viewport to hover or click, provide text for typing, and ensure actions are grounded in the latest screenshot and console logs.",
+			"Interact with a browser session. Always start by launching at a URL and always finish by closing the browser. While the browser is active, do not call any other tools. Use coordinates within the viewport to hover or click, provide text for typing, and ensure actions are grounded in the latest screenshot and console logs.",
 		strict: true,
 		parameters: {
 			type: "object",
@@ -13,51 +13,29 @@ export default {
 				action: {
 					type: "string",
 					description: "Browser action to perform",
-					enum: ["launch", "hover", "click", "type", "resize", "scroll_down", "scroll_up", "close"],
+					enum: ["launch", "click", "hover", "type", "press", "scroll_down", "scroll_up", "resize", "close"],
 				},
 				url: {
 					type: ["string", "null"],
 					description: "URL to open when performing the launch action; must include protocol",
 				},
 				coordinate: {
-					type: ["object", "null"],
+					type: ["string", "null"],
 					description:
-						"Screen coordinate for hover or click actions; target the center of the desired element",
-					properties: {
-						x: {
-							type: "number",
-							description: "Horizontal pixel position within the current viewport",
-						},
-						y: {
-							type: "number",
-							description: "Vertical pixel position within the current viewport",
-						},
-					},
-					required: ["x", "y"],
-					additionalProperties: false,
+						"Screen coordinate for hover or click actions in format 'x,y@WIDTHxHEIGHT' where x,y is the target position on the screenshot image and WIDTHxHEIGHT is the exact pixel dimensions of the screenshot image (not the browser viewport). Example: '450,203@900x600' means click at (450,203) on a 900x600 screenshot. The coordinates will be automatically scaled to match the actual viewport dimensions.",
 				},
 				size: {
-					type: ["object", "null"],
-					description: "Viewport dimensions to apply when performing the resize action",
-					properties: {
-						width: {
-							type: "number",
-							description: "Viewport width in pixels",
-						},
-						height: {
-							type: "number",
-							description: "Viewport height in pixels",
-						},
-					},
-					required: ["width", "height"],
-					additionalProperties: false,
+					type: ["string", "null"],
+					description:
+						"Viewport dimensions for the resize action in format 'WIDTHxHEIGHT' or 'WIDTH,HEIGHT'. Example: '1280x800' or '1280,800'",
 				},
 				text: {
 					type: ["string", "null"],
-					description: "Text to type when performing the type action",
+					description:
+						"Text to type when performing the type action, or key name to press when performing the press action (e.g., 'Enter', 'Tab', 'Escape')",
 				},
 			},
-			required: ["action", "url", "coordinate", "size", "text"],
+			required: ["action"],
 			additionalProperties: false,
 		},
 	},

+ 72 - 1
src/core/task/Task.ts

@@ -385,7 +385,28 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		this.autoApprovalHandler = new AutoApprovalHandler()
 
 		this.urlContentFetcher = new UrlContentFetcher(provider.context)
-		this.browserSession = new BrowserSession(provider.context)
+		this.browserSession = new BrowserSession(provider.context, (isActive: boolean) => {
+			// Add a message to indicate browser session status change
+			this.say("browser_session_status", isActive ? "Browser session opened" : "Browser session closed")
+			// Broadcast to browser panel
+			this.broadcastBrowserSessionUpdate()
+
+			// When a browser session becomes active, automatically open/reveal the Browser Session tab
+			if (isActive) {
+				try {
+					// Lazy-load to avoid circular imports at module load time
+					const { BrowserSessionPanelManager } = require("../webview/BrowserSessionPanelManager")
+					const providerRef = this.providerRef.deref()
+					if (providerRef) {
+						BrowserSessionPanelManager.getInstance(providerRef)
+							.show()
+							.catch(() => {})
+					}
+				} catch (err) {
+					console.error("[Task] Failed to auto-open Browser Session panel:", err)
+				}
+			}
+		})
 		this.diffEnabled = enableDiff
 		this.fuzzyMatchThreshold = fuzzyMatchThreshold
 		this.consecutiveMistakeLimit = consecutiveMistakeLimit ?? DEFAULT_CONSECUTIVE_MISTAKE_LIMIT
@@ -1384,6 +1405,11 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				contextCondense,
 			})
 		}
+
+		// Broadcast browser session updates to panel when browser-related messages are added
+		if (type === "browser_action" || type === "browser_action_result" || type === "browser_session_status") {
+			this.broadcastBrowserSessionUpdate()
+		}
 	}
 
 	async sayAndCreateMissingParamError(toolName: ToolName, paramName: string, relPath?: string) {
@@ -1804,6 +1830,16 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		} catch (error) {
 			console.error("Error closing browser session:", error)
 		}
+		// Also close the Browser Session panel when the task is disposed
+		try {
+			const provider = this.providerRef.deref()
+			if (provider) {
+				const { BrowserSessionPanelManager } = require("../webview/BrowserSessionPanelManager")
+				BrowserSessionPanelManager.getInstance(provider).dispose()
+			}
+		} catch (error) {
+			console.error("Error closing browser session panel:", error)
+		}
 
 		try {
 			if (this.rooIgnoreController) {
@@ -3551,6 +3587,41 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		return this.workspacePath
 	}
 
+	/**
+	 * Broadcast browser session updates to the browser panel (if open)
+	 */
+	private broadcastBrowserSessionUpdate(): void {
+		const provider = this.providerRef.deref()
+		if (!provider) {
+			return
+		}
+
+		try {
+			const { BrowserSessionPanelManager } = require("../webview/BrowserSessionPanelManager")
+			const panelManager = BrowserSessionPanelManager.getInstance(provider)
+
+			// Get browser session messages
+			const browserSessionStartIndex = this.clineMessages.findIndex(
+				(m) =>
+					m.ask === "browser_action_launch" ||
+					(m.say === "browser_session_status" && m.text?.includes("opened")),
+			)
+
+			const browserSessionMessages =
+				browserSessionStartIndex !== -1 ? this.clineMessages.slice(browserSessionStartIndex) : []
+
+			const isBrowserSessionActive = this.browserSession?.isSessionActive() ?? false
+
+			// Update the panel asynchronously
+			panelManager.updateBrowserSession(browserSessionMessages, isBrowserSessionActive).catch((error: Error) => {
+				console.error("Failed to broadcast browser session update:", error)
+			})
+		} catch (error) {
+			// Silently fail if panel manager is not available
+			console.debug("Browser panel not available for update:", error)
+		}
+	}
+
 	/**
 	 * Process any queued messages by dequeuing and submitting them.
 	 * This ensures that queued user messages are sent when appropriate,

+ 171 - 157
src/core/tools/BrowserActionTool.ts

@@ -1,7 +1,5 @@
-import type { BrowserActionParams, Coordinate, Size } from "@roo-code/types"
 import { Task } from "../task/Task"
-import { BaseTool, ToolCallbacks } from "./BaseTool"
-import type { ToolUse } from "../../shared/tools"
+import { ToolUse, AskApproval, HandleError, PushToolResult, RemoveClosingTag } from "../../shared/tools"
 import {
 	BrowserAction,
 	BrowserActionResult,
@@ -9,171 +7,186 @@ import {
 	ClineSayBrowserAction,
 } from "../../shared/ExtensionMessage"
 import { formatResponse } from "../prompts/responses"
+import { Anthropic } from "@anthropic-ai/sdk"
+import { scaleCoordinate } from "../../shared/browserUtils"
 
-export class BrowserActionTool extends BaseTool<"browser_action"> {
-	readonly name = "browser_action" as const
-
-	parseLegacy(params: Partial<Record<string, string>>): BrowserActionParams {
-		const action = params.action as BrowserAction | undefined
-
-		// Parse coordinate if present - XML protocol sends "x,y" format
-		let coordinate: Coordinate | undefined
-		if (params.coordinate) {
-			// Try parsing as "x,y" string first (XML protocol)
-			const parts = params.coordinate.split(",")
-			if (parts.length === 2) {
-				const x = parseInt(parts[0], 10)
-				const y = parseInt(parts[1], 10)
-				if (!isNaN(x) && !isNaN(y)) {
-					coordinate = { x, y }
-				}
-			} else {
-				// Try parsing as JSON object (fallback)
-				try {
-					const parsed = JSON.parse(params.coordinate)
-					if (parsed && typeof parsed.x === "number" && typeof parsed.y === "number") {
-						coordinate = { x: parsed.x, y: parsed.y }
-					}
-				} catch (error) {
-					// Invalid coordinate format, leave undefined
-				}
-			}
-		}
+export async function browserActionTool(
+	cline: Task,
+	block: ToolUse,
+	askApproval: AskApproval,
+	handleError: HandleError,
+	pushToolResult: PushToolResult,
+	removeClosingTag: RemoveClosingTag,
+) {
+	const action: BrowserAction | undefined = block.params.action as BrowserAction
+	const url: string | undefined = block.params.url
+	const coordinate: string | undefined = block.params.coordinate
+	const text: string | undefined = block.params.text
+	const size: string | undefined = block.params.size
 
-		// Parse size if present - XML protocol sends "width,height" format
-		let size: Size | undefined
-		if (params.size) {
-			// Try parsing as "width,height" string first (XML protocol)
-			const parts = params.size.split(",")
-			if (parts.length === 2) {
-				const width = parseInt(parts[0], 10)
-				const height = parseInt(parts[1], 10)
-				if (!isNaN(width) && !isNaN(height)) {
-					size = { width, height }
-				}
-			} else {
-				// Try parsing as JSON object (fallback)
-				try {
-					const parsed = JSON.parse(params.size)
-					if (parsed && typeof parsed.width === "number" && typeof parsed.height === "number") {
-						size = { width: parsed.width, height: parsed.height }
-					}
-				} catch (error) {
-					// Invalid size format, leave undefined
-				}
-			}
+	if (!action || !browserActions.includes(action)) {
+		// checking for action to ensure it is complete and valid
+		if (!block.partial) {
+			// if the block is complete and we don't have a valid action cline is a mistake
+			cline.consecutiveMistakeCount++
+			cline.recordToolError("browser_action")
+			pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "action"))
+			// Do not close the browser on parameter validation errors
 		}
 
-		return {
-			action: action!,
-			url: params.url,
-			coordinate,
-			size,
-			text: params.text,
-		}
+		return
 	}
 
-	async execute(params: BrowserActionParams, task: Task, callbacks: ToolCallbacks): Promise<void> {
-		const { action, url, coordinate, text, size } = params
-		const { handleError, pushToolResult } = callbacks
-
-		// Validate action
-		if (!action || !browserActions.includes(action)) {
-			task.consecutiveMistakeCount++
-			task.recordToolError("browser_action")
-			pushToolResult(await task.sayAndCreateMissingParamError("browser_action", "action"))
-			await task.browserSession.closeBrowser()
+	try {
+		if (block.partial) {
+			if (action === "launch") {
+				await cline.ask("browser_action_launch", removeClosingTag("url", url), block.partial).catch(() => {})
+			} else {
+				await cline.say(
+					"browser_action",
+					JSON.stringify({
+						action: action as BrowserAction,
+						coordinate: removeClosingTag("coordinate", coordinate),
+						text: removeClosingTag("text", text),
+						size: removeClosingTag("size", size),
+					} satisfies ClineSayBrowserAction),
+					undefined,
+					block.partial,
+				)
+			}
 			return
-		}
-
-		try {
+		} else {
+			// Initialize with empty object to avoid "used before assigned" errors
 			let browserActionResult: BrowserActionResult = {}
 
 			if (action === "launch") {
 				if (!url) {
-					task.consecutiveMistakeCount++
-					task.recordToolError("browser_action")
-					pushToolResult(await task.sayAndCreateMissingParamError("browser_action", "url"))
-					await task.browserSession.closeBrowser()
+					cline.consecutiveMistakeCount++
+					cline.recordToolError("browser_action")
+					pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "url"))
+					// Do not close the browser on parameter validation errors
 					return
 				}
 
-				task.consecutiveMistakeCount = 0
-				const didApprove = await callbacks.askApproval("browser_action_launch", url)
+				cline.consecutiveMistakeCount = 0
+				const didApprove = await askApproval("browser_action_launch", url)
 
 				if (!didApprove) {
 					return
 				}
 
-				await task.say("browser_action_result", "")
-				await task.browserSession.launchBrowser()
-				browserActionResult = await task.browserSession.navigateToUrl(url)
+				// NOTE: It's okay that we call cline message since the partial inspect_site is finished streaming.
+				// The only scenario we have to avoid is sending messages WHILE a partial message exists at the end of the messages array.
+				// For example the api_req_finished message would interfere with the partial message, so we needed to remove that.
+
+				// Launch browser first (this triggers "Browser session opened" status message)
+				await cline.browserSession.launchBrowser()
+
+				// Create browser_action say message AFTER launching so status appears first
+				await cline.say(
+					"browser_action",
+					JSON.stringify({
+						action: "launch" as BrowserAction,
+						text: url,
+					} satisfies ClineSayBrowserAction),
+					undefined,
+					false,
+				)
+
+				browserActionResult = await cline.browserSession.navigateToUrl(url)
 			} else {
-				// Validate parameters for specific actions
+				// Variables to hold validated and processed parameters
+				let processedCoordinate = coordinate
+
 				if (action === "click" || action === "hover") {
 					if (!coordinate) {
-						task.consecutiveMistakeCount++
-						task.recordToolError("browser_action")
-						pushToolResult(await task.sayAndCreateMissingParamError("browser_action", "coordinate"))
-						await task.browserSession.closeBrowser()
+						cline.consecutiveMistakeCount++
+						cline.recordToolError("browser_action")
+						pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "coordinate"))
+						// Do not close the browser on parameter validation errors
+						return // can't be within an inner switch
+					}
+
+					// Get viewport dimensions from the browser session
+					const viewportSize = cline.browserSession.getViewportSize()
+					const viewportWidth = viewportSize.width || 900 // default to 900 if not available
+					const viewportHeight = viewportSize.height || 600 // default to 600 if not available
+
+					// Scale coordinate from image dimensions to viewport dimensions
+					try {
+						processedCoordinate = scaleCoordinate(coordinate, viewportWidth, viewportHeight)
+					} catch (error) {
+						cline.consecutiveMistakeCount++
+						cline.recordToolError("browser_action")
+						pushToolResult(
+							await cline.sayAndCreateMissingParamError(
+								"browser_action",
+								"coordinate",
+								error instanceof Error ? error.message : String(error),
+							),
+						)
 						return
 					}
 				}
 
-				if (action === "type") {
+				if (action === "type" || action === "press") {
 					if (!text) {
-						task.consecutiveMistakeCount++
-						task.recordToolError("browser_action")
-						pushToolResult(await task.sayAndCreateMissingParamError("browser_action", "text"))
-						await task.browserSession.closeBrowser()
+						cline.consecutiveMistakeCount++
+						cline.recordToolError("browser_action")
+						pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "text"))
+						// Do not close the browser on parameter validation errors
 						return
 					}
 				}
 
 				if (action === "resize") {
 					if (!size) {
-						task.consecutiveMistakeCount++
-						task.recordToolError("browser_action")
-						pushToolResult(await task.sayAndCreateMissingParamError("browser_action", "size"))
-						await task.browserSession.closeBrowser()
+						cline.consecutiveMistakeCount++
+						cline.recordToolError("browser_action")
+						pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "size"))
+						// Do not close the browser on parameter validation errors
 						return
 					}
 				}
 
-				task.consecutiveMistakeCount = 0
+				cline.consecutiveMistakeCount = 0
 
-				await task.say(
-					"browser_action",
-					JSON.stringify({
-						action: action as BrowserAction,
-						coordinate: coordinate ? `${coordinate.x},${coordinate.y}` : undefined,
-						text,
-					} satisfies ClineSayBrowserAction),
-					undefined,
-					false,
-				)
+				// Prepare say payload; include executedCoordinate for pointer actions
+				const sayPayload: ClineSayBrowserAction & { executedCoordinate?: string } = {
+					action: action as BrowserAction,
+					coordinate,
+					text,
+					size,
+				}
+				if ((action === "click" || action === "hover") && processedCoordinate) {
+					sayPayload.executedCoordinate = processedCoordinate
+				}
+				await cline.say("browser_action", JSON.stringify(sayPayload), undefined, false)
 
 				switch (action) {
 					case "click":
-						browserActionResult = await task.browserSession.click(`${coordinate!.x},${coordinate!.y}`)
+						browserActionResult = await cline.browserSession.click(processedCoordinate!)
 						break
 					case "hover":
-						browserActionResult = await task.browserSession.hover(`${coordinate!.x},${coordinate!.y}`)
+						browserActionResult = await cline.browserSession.hover(processedCoordinate!)
 						break
 					case "type":
-						browserActionResult = await task.browserSession.type(text!)
+						browserActionResult = await cline.browserSession.type(text!)
+						break
+					case "press":
+						browserActionResult = await cline.browserSession.press(text!)
 						break
 					case "scroll_down":
-						browserActionResult = await task.browserSession.scrollDown()
+						browserActionResult = await cline.browserSession.scrollDown()
 						break
 					case "scroll_up":
-						browserActionResult = await task.browserSession.scrollUp()
+						browserActionResult = await cline.browserSession.scrollUp()
 						break
 					case "resize":
-						browserActionResult = await task.browserSession.resize(`${size!.width},${size!.height}`)
+						browserActionResult = await cline.browserSession.resize(size!)
 						break
 					case "close":
-						browserActionResult = await task.browserSession.closeBrowser()
+						browserActionResult = await cline.browserSession.closeBrowser()
 						break
 				}
 			}
@@ -183,62 +196,63 @@ export class BrowserActionTool extends BaseTool<"browser_action"> {
 				case "click":
 				case "hover":
 				case "type":
+				case "press":
 				case "scroll_down":
 				case "scroll_up":
-				case "resize":
-					await task.say("browser_action_result", JSON.stringify(browserActionResult))
+				case "resize": {
+					await cline.say("browser_action_result", JSON.stringify(browserActionResult))
 
-					pushToolResult(
-						formatResponse.toolResult(
-							`The browser action has been executed. The console logs and screenshot have been captured for your analysis.\n\nConsole logs:\n${
-								browserActionResult?.logs || "(No new logs)"
-							}\n\n(REMEMBER: if you need to proceed to using non-\`browser_action\` tools or launch a new browser, you MUST first close cline browser. For example, if after analyzing the logs and screenshot you need to edit a file, you must first close the browser before you can use the write_to_file tool.)`,
-							browserActionResult?.screenshot ? [browserActionResult.screenshot] : [],
-						),
-					)
-					break
+					const images = browserActionResult?.screenshot ? [browserActionResult.screenshot] : []
+
+					let messageText = `The browser action has been executed.`
+
+					messageText += `\n\n**CRITICAL**: When providing click/hover coordinates:`
+					messageText += `\n1. Screenshot dimensions != Browser viewport dimensions`
+					messageText += `\n2. Measure x,y on the screenshot image you see below`
+					messageText += `\n3. Use format: <coordinate>x,y@WIDTHxHEIGHT</coordinate> where WIDTHxHEIGHT is the EXACT pixel size of the screenshot image`
+					messageText += `\n4. Never use the browser viewport size for WIDTHxHEIGHT - it is only for reference and is often larger than the screenshot`
+					messageText += `\n5. Screenshots are often downscaled - always use the dimensions you see in the image`
+					messageText += `\nExample: Viewport 1280x800, screenshot 1000x625, click (500,300) -> <coordinate>500,300@1000x625</coordinate>`
 
+					// Include browser viewport dimensions (for reference only)
+					if (browserActionResult?.viewportWidth && browserActionResult?.viewportHeight) {
+						messageText += `\n\nBrowser viewport: ${browserActionResult.viewportWidth}x${browserActionResult.viewportHeight}`
+					}
+
+					// Include cursor position if available
+					if (browserActionResult?.currentMousePosition) {
+						messageText += `\nCursor position: ${browserActionResult.currentMousePosition}`
+					}
+
+					messageText += `\n\nConsole logs:\n${browserActionResult?.logs || "(No new logs)"}\n`
+
+					if (images.length > 0) {
+						const blocks = [
+							...formatResponse.imageBlocks(images),
+							{ type: "text", text: messageText } as Anthropic.TextBlockParam,
+						]
+						pushToolResult(blocks)
+					} else {
+						pushToolResult(messageText)
+					}
+
+					break
+				}
 				case "close":
 					pushToolResult(
 						formatResponse.toolResult(
 							`The browser has been closed. You may now proceed to using other tools.`,
 						),
 					)
+
 					break
 			}
-		} catch (error) {
-			await task.browserSession.closeBrowser()
-			await handleError("executing browser action", error as Error)
-		}
-	}
 
-	override async handlePartial(task: Task, block: ToolUse<"browser_action">): Promise<void> {
-		const action: BrowserAction | undefined = block.params.action as BrowserAction
-		const url: string | undefined = block.params.url
-		const coordinate: string | undefined = block.params.coordinate
-		const text: string | undefined = block.params.text
-
-		if (!action || !browserActions.includes(action)) {
 			return
 		}
-
-		if (action === "launch") {
-			await task
-				.ask("browser_action_launch", this.removeClosingTag("url", url, block.partial), block.partial)
-				.catch(() => {})
-		} else {
-			await task.say(
-				"browser_action",
-				JSON.stringify({
-					action: action as BrowserAction,
-					coordinate: this.removeClosingTag("coordinate", coordinate, block.partial),
-					text: this.removeClosingTag("text", text, block.partial),
-				} satisfies ClineSayBrowserAction),
-				undefined,
-				block.partial,
-			)
-		}
+	} catch (error) {
+		// Keep the browser session alive on errors; report the error without terminating the session
+		await handleError("executing browser action", error)
+		return
 	}
 }
-
-export const browserActionTool = new BrowserActionTool()

+ 84 - 0
src/core/tools/__tests__/BrowserActionTool.coordinateScaling.spec.ts

@@ -0,0 +1,84 @@
+// Test coordinate scaling functionality in browser actions
+import { describe, it, expect } from "vitest"
+import { scaleCoordinate } from "../../../shared/browserUtils"
+
+describe("Browser Action Coordinate Scaling", () => {
+	describe("Coordinate format validation", () => {
+		it("should match valid coordinate format with image dimensions", () => {
+			const validFormats = [
+				"450,300@1024x768",
+				"0,0@1920x1080",
+				"1920,1080@1920x1080",
+				"100,200@800x600",
+				" 273 , 273 @ 1280x800 ",
+				"267,273@1280,800", // comma separator for dimensions
+				"450,300@1024,768", // comma separator for dimensions
+			]
+
+			validFormats.forEach((coord) => {
+				// Should not throw
+				expect(() => scaleCoordinate(coord, 900, 600)).not.toThrow()
+			})
+		})
+
+		it("should not match invalid coordinate formats", () => {
+			const invalidFormats = [
+				"450,300", // missing image dimensions
+				"450,300@", // incomplete dimensions
+				"450,300@1024", // missing height
+				"450,300@1024x", // missing height value
+				"@1024x768", // missing coordinates
+				"450@1024x768", // missing y coordinate
+				",300@1024x768", // missing x coordinate
+				"450,300@1024x768x2", // extra dimension
+				"a,b@1024x768", // non-numeric coordinates
+				"450,300@axb", // non-numeric dimensions
+			]
+
+			invalidFormats.forEach((coord) => {
+				expect(() => scaleCoordinate(coord, 900, 600)).toThrow()
+			})
+		})
+	})
+
+	describe("Coordinate scaling logic", () => {
+		it("should correctly scale coordinates from image to viewport", () => {
+			// Test case 1: Same dimensions (no scaling)
+			expect(scaleCoordinate("450,300@900x600", 900, 600)).toBe("450,300")
+
+			// Test case 2: Half dimensions (2x upscale)
+			expect(scaleCoordinate("225,150@450x300", 900, 600)).toBe("450,300")
+
+			// Test case 3: Double dimensions (0.5x downscale)
+			expect(scaleCoordinate("900,600@1800x1200", 900, 600)).toBe("450,300")
+
+			// Test case 4: Different aspect ratio
+			expect(scaleCoordinate("512,384@1024x768", 1920, 1080)).toBe("960,540")
+
+			// Test case 5: Edge cases (0,0)
+			expect(scaleCoordinate("0,0@1024x768", 1920, 1080)).toBe("0,0")
+
+			// Test case 6: Edge cases (max coordinates)
+			expect(scaleCoordinate("1024,768@1024x768", 1920, 1080)).toBe("1920,1080")
+		})
+
+		it("should throw error for invalid coordinate format", () => {
+			// Test invalid formats
+			expect(() => scaleCoordinate("450,300", 900, 600)).toThrow("Invalid coordinate format")
+			expect(() => scaleCoordinate("450,300@1024", 900, 600)).toThrow("Invalid coordinate format")
+			expect(() => scaleCoordinate("invalid", 900, 600)).toThrow("Invalid coordinate format")
+		})
+
+		it("should handle rounding correctly", () => {
+			// Test rounding behavior
+			// 333 / 1000 * 900 = 299.7 -> rounds to 300
+			expect(scaleCoordinate("333,333@1000x1000", 900, 900)).toBe("300,300")
+
+			// 666 / 1000 * 900 = 599.4 -> rounds to 599
+			expect(scaleCoordinate("666,666@1000x1000", 900, 900)).toBe("599,599")
+
+			// 500 / 1000 * 900 = 450.0 -> rounds to 450
+			expect(scaleCoordinate("500,500@1000x1000", 900, 900)).toBe("450,450")
+		})
+	})
+})

+ 310 - 0
src/core/webview/BrowserSessionPanelManager.ts

@@ -0,0 +1,310 @@
+import * as vscode from "vscode"
+import type { ClineMessage } from "@roo-code/types"
+import { getUri } from "./getUri"
+import { getNonce } from "./getNonce"
+import type { ClineProvider } from "./ClineProvider"
+import { webviewMessageHandler } from "./webviewMessageHandler"
+
+export class BrowserSessionPanelManager {
+	private static instances: WeakMap<ClineProvider, BrowserSessionPanelManager> = new WeakMap()
+	private panel: vscode.WebviewPanel | undefined
+	private disposables: vscode.Disposable[] = []
+	private isReady: boolean = false
+	private pendingUpdate?: { messages: ClineMessage[]; isActive: boolean }
+	private pendingNavigateIndex?: number
+	private userManuallyClosedPanel: boolean = false
+
+	private constructor(private readonly provider: ClineProvider) {}
+
+	/**
+	 * Get or create a BrowserSessionPanelManager instance for the given provider
+	 */
+	public static getInstance(provider: ClineProvider): BrowserSessionPanelManager {
+		let instance = BrowserSessionPanelManager.instances.get(provider)
+		if (!instance) {
+			instance = new BrowserSessionPanelManager(provider)
+			BrowserSessionPanelManager.instances.set(provider, instance)
+		}
+		return instance
+	}
+
+	/**
+	 * Show the browser session panel, creating it if necessary
+	 */
+	public async show(): Promise<void> {
+		await this.createOrShowPanel()
+
+		// Send initial browser session data
+		const task = this.provider.getCurrentTask()
+		if (task) {
+			const messages = task.clineMessages || []
+			const browserSessionStartIndex = messages.findIndex(
+				(m) =>
+					m.ask === "browser_action_launch" ||
+					(m.say === "browser_session_status" && m.text?.includes("opened")),
+			)
+			const browserSessionMessages =
+				browserSessionStartIndex !== -1 ? messages.slice(browserSessionStartIndex) : []
+			const isBrowserSessionActive = task.browserSession?.isSessionActive() ?? false
+
+			await this.updateBrowserSession(browserSessionMessages, isBrowserSessionActive)
+		}
+	}
+
+	private async createOrShowPanel(): Promise<void> {
+		// If panel already exists, show it
+		if (this.panel) {
+			this.panel.reveal(vscode.ViewColumn.One)
+			return
+		}
+
+		const extensionUri = this.provider.context.extensionUri
+		const extensionMode = this.provider.context.extensionMode
+
+		// Create new panel
+		this.panel = vscode.window.createWebviewPanel("roo.browserSession", "Browser Session", vscode.ViewColumn.One, {
+			enableScripts: true,
+			retainContextWhenHidden: true,
+			localResourceRoots: [extensionUri],
+		})
+
+		// Set up the webview's HTML content
+		this.panel.webview.html =
+			extensionMode === vscode.ExtensionMode.Development
+				? await this.getHMRHtmlContent(this.panel.webview, extensionUri)
+				: this.getHtmlContent(this.panel.webview, extensionUri)
+
+		// Wire message channel for this panel (state handshake + actions)
+		this.panel.webview.onDidReceiveMessage(
+			async (message: any) => {
+				try {
+					// Let the shared handler process commands that work for any webview
+					if (message?.type) {
+						await webviewMessageHandler(this.provider as any, message)
+					}
+					// Panel-specific readiness and initial state
+					if (message?.type === "webviewDidLaunch") {
+						this.isReady = true
+						// Send full extension state to this panel (the sidebar postState targets the main webview)
+						const state = await (this.provider as any).getStateToPostToWebview?.()
+						if (state) {
+							await this.panel?.webview.postMessage({ type: "state", state })
+						}
+						// Flush any pending browser session update queued before readiness
+						if (this.pendingUpdate) {
+							await this.updateBrowserSession(this.pendingUpdate.messages, this.pendingUpdate.isActive)
+							this.pendingUpdate = undefined
+						}
+						// Flush any pending navigation request queued before readiness
+						if (this.pendingNavigateIndex !== undefined) {
+							await this.navigateToStep(this.pendingNavigateIndex)
+							this.pendingNavigateIndex = undefined
+						}
+					}
+				} catch (err) {
+					console.error("[BrowserSessionPanel] onDidReceiveMessage error:", err)
+				}
+			},
+			undefined,
+			this.disposables,
+		)
+
+		// Handle panel disposal - track that user closed it manually
+		this.panel.onDidDispose(
+			() => {
+				// Mark that user manually closed the panel (unless we're programmatically disposing)
+				if (this.panel) {
+					this.userManuallyClosedPanel = true
+				}
+				this.panel = undefined
+				this.dispose()
+			},
+			null,
+			this.disposables,
+		)
+	}
+
+	public async updateBrowserSession(messages: ClineMessage[], isBrowserSessionActive: boolean): Promise<void> {
+		if (!this.panel) {
+			return
+		}
+		// If the panel isn't ready yet, queue the latest snapshot to post after handshake
+		if (!this.isReady) {
+			this.pendingUpdate = { messages, isActive: isBrowserSessionActive }
+			return
+		}
+
+		await this.panel.webview.postMessage({
+			type: "browserSessionUpdate",
+			browserSessionMessages: messages,
+			isBrowserSessionActive,
+		})
+	}
+
+	/**
+	 * Navigate the Browser Session panel to a specific step index.
+	 * If the panel isn't ready yet, queue the navigation to run after handshake.
+	 */
+	public async navigateToStep(stepIndex: number): Promise<void> {
+		if (!this.panel) {
+			return
+		}
+		if (!this.isReady) {
+			this.pendingNavigateIndex = stepIndex
+			return
+		}
+
+		await this.panel.webview.postMessage({
+			type: "browserSessionNavigate",
+			stepIndex,
+		})
+	}
+
+	/**
+	 * Reset the manual close flag (call this when a new browser session launches)
+	 */
+	public resetManualCloseFlag(): void {
+		this.userManuallyClosedPanel = false
+	}
+
+	/**
+	 * Check if auto-opening should be allowed (not manually closed by user)
+	 */
+	public shouldAllowAutoOpen(): boolean {
+		return !this.userManuallyClosedPanel
+	}
+
+	/**
+	 * Whether the Browser Session panel is currently open.
+	 */
+	public isOpen(): boolean {
+		return !!this.panel
+	}
+
+	/**
+	 * Toggle the Browser Session panel visibility.
+	 * - If open: closes it
+	 * - If closed: opens it and sends initial session snapshot
+	 */
+	public async toggle(): Promise<void> {
+		if (this.panel) {
+			this.dispose()
+		} else {
+			await this.show()
+		}
+	}
+
+	public dispose(): void {
+		// Clear the panel reference before disposing to prevent marking as manual close
+		const panelToDispose = this.panel
+		this.panel = undefined
+
+		while (this.disposables.length) {
+			const disposable = this.disposables.pop()
+			if (disposable) {
+				disposable.dispose()
+			}
+		}
+		try {
+			panelToDispose?.dispose()
+		} catch {}
+		this.isReady = false
+		this.pendingUpdate = undefined
+	}
+
+	private async getHMRHtmlContent(webview: vscode.Webview, extensionUri: vscode.Uri): Promise<string> {
+		const fs = require("fs")
+		const path = require("path")
+		let localPort = "5173"
+
+		try {
+			const portFilePath = path.resolve(__dirname, "../../.vite-port")
+			if (fs.existsSync(portFilePath)) {
+				localPort = fs.readFileSync(portFilePath, "utf8").trim()
+			}
+		} catch (err) {
+			console.error("[BrowserSessionPanel:Vite] Failed to read port file:", err)
+		}
+
+		const localServerUrl = `localhost:${localPort}`
+		const nonce = getNonce()
+
+		const stylesUri = getUri(webview, extensionUri, ["webview-ui", "build", "assets", "index.css"])
+		const codiconsUri = getUri(webview, extensionUri, ["assets", "codicons", "codicon.css"])
+
+		const scriptUri = `http://${localServerUrl}/src/browser-panel.tsx`
+
+		const reactRefresh = `
+			<script nonce="${nonce}" type="module">
+				import RefreshRuntime from "http://localhost:${localPort}/@react-refresh"
+				RefreshRuntime.injectIntoGlobalHook(window)
+				window.$RefreshReg$ = () => {}
+				window.$RefreshSig$ = () => (type) => type
+				window.__vite_plugin_react_preamble_installed__ = true
+			</script>
+		`
+
+		const csp = [
+			"default-src 'none'",
+			`font-src ${webview.cspSource} data:`,
+			`style-src ${webview.cspSource} 'unsafe-inline' https://* http://${localServerUrl}`,
+			`img-src ${webview.cspSource} data:`,
+			`script-src 'unsafe-eval' ${webview.cspSource} http://${localServerUrl} 'nonce-${nonce}'`,
+			`connect-src ${webview.cspSource} ws://${localServerUrl} http://${localServerUrl}`,
+		]
+
+		return `
+			<!DOCTYPE html>
+			<html lang="en">
+				<head>
+					<meta charset="utf-8">
+					<meta name="viewport" content="width=device-width,initial-scale=1">
+					<meta http-equiv="Content-Security-Policy" content="${csp.join("; ")}">
+					<link rel="stylesheet" type="text/css" href="${stylesUri}">
+					<link href="${codiconsUri}" rel="stylesheet" />
+					<title>Browser Session</title>
+				</head>
+				<body>
+					<div id="root"></div>
+					${reactRefresh}
+					<script type="module" src="${scriptUri}"></script>
+				</body>
+			</html>
+		`
+	}
+
+	private getHtmlContent(webview: vscode.Webview, extensionUri: vscode.Uri): string {
+		const stylesUri = getUri(webview, extensionUri, ["webview-ui", "build", "assets", "index.css"])
+		const scriptUri = getUri(webview, extensionUri, ["webview-ui", "build", "assets", "browser-panel.js"])
+		const codiconsUri = getUri(webview, extensionUri, ["assets", "codicons", "codicon.css"])
+
+		const nonce = getNonce()
+
+		const csp = [
+			"default-src 'none'",
+			`font-src ${webview.cspSource} data:`,
+			`style-src ${webview.cspSource} 'unsafe-inline'`,
+			`img-src ${webview.cspSource} data:`,
+			`script-src ${webview.cspSource} 'wasm-unsafe-eval' 'nonce-${nonce}'`,
+			`connect-src ${webview.cspSource}`,
+		]
+
+		return `
+			<!DOCTYPE html>
+			<html lang="en">
+				<head>
+					<meta charset="utf-8">
+					<meta name="viewport" content="width=device-width,initial-scale=1">
+					<meta http-equiv="Content-Security-Policy" content="${csp.join("; ")}">
+					<link rel="stylesheet" type="text/css" href="${stylesUri}">
+					<link href="${codiconsUri}" rel="stylesheet" />
+					<title>Browser Session</title>
+				</head>
+				<body>
+					<div id="root"></div>
+					<script nonce="${nonce}" type="module" src="${scriptUri}"></script>
+				</body>
+			</html>
+		`
+	}
+}

+ 6 - 0
src/core/webview/ClineProvider.ts

@@ -1925,6 +1925,7 @@ export class ClineProvider
 			openRouterImageGenerationSelectedModel,
 			openRouterUseMiddleOutTransform,
 			featureRoomoteControlEnabled,
+			isBrowserSessionActive,
 		} = await this.getState()
 
 		let cloudOrganizations: CloudOrganizationMembership[] = []
@@ -1974,6 +1975,7 @@ export class ClineProvider
 			alwaysAllowModeSwitch: alwaysAllowModeSwitch ?? false,
 			alwaysAllowSubtasks: alwaysAllowSubtasks ?? false,
 			alwaysAllowUpdateTodoList: alwaysAllowUpdateTodoList ?? false,
+			isBrowserSessionActive,
 			allowedMaxRequests,
 			allowedMaxCost,
 			autoCondenseContext: autoCondenseContext ?? true,
@@ -2187,6 +2189,9 @@ export class ClineProvider
 			)
 		}
 
+		// Get actual browser session state
+		const isBrowserSessionActive = this.getCurrentTask()?.browserSession?.isSessionActive() ?? false
+
 		// Return the same structure as before.
 		return {
 			apiConfiguration: providerSettings,
@@ -2205,6 +2210,7 @@ export class ClineProvider
 			alwaysAllowSubtasks: stateValues.alwaysAllowSubtasks ?? false,
 			alwaysAllowFollowupQuestions: stateValues.alwaysAllowFollowupQuestions ?? false,
 			alwaysAllowUpdateTodoList: stateValues.alwaysAllowUpdateTodoList ?? false,
+			isBrowserSessionActive,
 			followupAutoApproveTimeoutMs: stateValues.followupAutoApproveTimeoutMs ?? 60000,
 			diagnosticsEnabled: stateValues.diagnosticsEnabled ?? true,
 			allowedMaxRequests: stateValues.allowedMaxRequests,

+ 1 - 0
src/core/webview/__tests__/ClineProvider.spec.ts

@@ -503,6 +503,7 @@ describe("ClineProvider", () => {
 
 		const mockState: ExtensionState = {
 			version: "1.0.0",
+			isBrowserSessionActive: false,
 			clineMessages: [],
 			taskHistory: [],
 			shouldShowAnnouncement: false,

+ 96 - 0
src/core/webview/webviewMessageHandler.ts

@@ -23,6 +23,7 @@ import { type ApiMessage } from "../task-persistence/apiMessages"
 import { saveTaskMessages } from "../task-persistence"
 
 import { ClineProvider } from "./ClineProvider"
+import { BrowserSessionPanelManager } from "./BrowserSessionPanelManager"
 import { handleCheckpointRestoreOperation } from "./checkpointRestoreHandler"
 import { changeLanguage, t } from "../../i18n"
 import { Package } from "../../shared/package"
@@ -1116,6 +1117,101 @@ export const webviewMessageHandler = async (
 		case "cancelTask":
 			await provider.cancelTask()
 			break
+		case "killBrowserSession":
+			{
+				const task = provider.getCurrentTask()
+				if (task?.browserSession) {
+					await task.browserSession.closeBrowser()
+					await provider.postStateToWebview()
+				}
+			}
+			break
+		case "openBrowserSessionPanel":
+			{
+				// Toggle the Browser Session panel (open if closed, close if open)
+				const panelManager = BrowserSessionPanelManager.getInstance(provider)
+				await panelManager.toggle()
+			}
+			break
+		case "showBrowserSessionPanelAtStep":
+			{
+				const panelManager = BrowserSessionPanelManager.getInstance(provider)
+
+				// If this is a launch action, reset the manual close flag
+				if (message.isLaunchAction) {
+					panelManager.resetManualCloseFlag()
+				}
+
+				// Show panel if:
+				// 1. Manual click (forceShow) - always show
+				// 2. Launch action - always show and reset flag
+				// 3. Auto-open for non-launch action - only if user hasn't manually closed
+				if (message.forceShow || message.isLaunchAction || panelManager.shouldAllowAutoOpen()) {
+					// Ensure panel is shown and populated
+					await panelManager.show()
+
+					// Navigate to a specific step if provided
+					// For launch actions: navigate to step 0
+					// For manual clicks: navigate to the clicked step
+					// For auto-opens of regular actions: don't navigate, let BrowserSessionRow's
+					// internal auto-advance logic handle it (only advances if user is on most recent step)
+					if (typeof message.stepIndex === "number" && message.stepIndex >= 0) {
+						await panelManager.navigateToStep(message.stepIndex)
+					}
+				}
+			}
+			break
+		case "refreshBrowserSessionPanel":
+			{
+				// Re-send the latest browser session snapshot to the panel
+				const panelManager = BrowserSessionPanelManager.getInstance(provider)
+				const task = provider.getCurrentTask()
+				if (task) {
+					const messages = task.clineMessages || []
+					const browserSessionStartIndex = messages.findIndex(
+						(m) =>
+							m.ask === "browser_action_launch" ||
+							(m.say === "browser_session_status" && m.text?.includes("opened")),
+					)
+					const browserSessionMessages =
+						browserSessionStartIndex !== -1 ? messages.slice(browserSessionStartIndex) : []
+					const isBrowserSessionActive = task.browserSession?.isSessionActive() ?? false
+					await panelManager.updateBrowserSession(browserSessionMessages, isBrowserSessionActive)
+				}
+			}
+			break
+		case "allowedCommands": {
+			// Validate and sanitize the commands array
+			const commands = message.commands ?? []
+			const validCommands = Array.isArray(commands)
+				? commands.filter((cmd) => typeof cmd === "string" && cmd.trim().length > 0)
+				: []
+
+			await updateGlobalState("allowedCommands", validCommands)
+
+			// Also update workspace settings.
+			await vscode.workspace
+				.getConfiguration(Package.name)
+				.update("allowedCommands", validCommands, vscode.ConfigurationTarget.Global)
+
+			break
+		}
+		case "deniedCommands": {
+			// Validate and sanitize the commands array
+			const commands = message.commands ?? []
+			const validCommands = Array.isArray(commands)
+				? commands.filter((cmd) => typeof cmd === "string" && cmd.trim().length > 0)
+				: []
+
+			await updateGlobalState("deniedCommands", validCommands)
+
+			// Also update workspace settings.
+			await vscode.workspace
+				.getConfiguration(Package.name)
+				.update("deniedCommands", validCommands, vscode.ConfigurationTarget.Global)
+
+			break
+		}
 		case "openCustomModesSettings": {
 			const customModesFilePath = await provider.customModesManager.getCustomModesFilePath()
 

+ 306 - 5
src/services/browser/BrowserSession.ts

@@ -1,7 +1,7 @@
 import * as vscode from "vscode"
 import * as fs from "fs/promises"
 import * as path from "path"
-import { Browser, Page, ScreenshotOptions, TimeoutError, launch, connect } from "puppeteer-core"
+import { Browser, Page, ScreenshotOptions, TimeoutError, launch, connect, KeyInput } from "puppeteer-core"
 // @ts-ignore
 import PCR from "puppeteer-chromium-resolver"
 import pWaitFor from "p-wait-for"
@@ -25,9 +25,15 @@ export class BrowserSession {
 	private currentMousePosition?: string
 	private lastConnectionAttempt?: number
 	private isUsingRemoteBrowser: boolean = false
+	private onStateChange?: (isActive: boolean) => void
 
-	constructor(context: vscode.ExtensionContext) {
+	// Track last known viewport to surface in environment details
+	private lastViewportWidth?: number
+	private lastViewportHeight?: number
+
+	constructor(context: vscode.ExtensionContext, onStateChange?: (isActive: boolean) => void) {
 		this.context = context
+		this.onStateChange = onStateChange
 	}
 
 	private async ensureChromiumExists(): Promise<PCRStats> {
@@ -189,21 +195,31 @@ export class BrowserSession {
 				await this.launchLocalBrowser()
 			}
 		}
+
+		// Notify that browser session is now active
+		if (this.browser && this.onStateChange) {
+			this.onStateChange(true)
+		}
 	}
 
 	/**
 	 * Closes the browser and resets browser state
 	 */
 	async closeBrowser(): Promise<BrowserActionResult> {
-		if (this.browser || this.page) {
-			console.log("closing browser...")
+		const wasActive = !!(this.browser || this.page)
 
+		if (wasActive) {
 			if (this.isUsingRemoteBrowser && this.browser) {
 				await this.browser.disconnect().catch(() => {})
 			} else {
 				await this.browser?.close().catch(() => {})
 			}
 			this.resetBrowserState()
+
+			// Notify that browser session is now inactive
+			if (this.onStateChange) {
+				this.onStateChange(false)
+			}
 		}
 		return {}
 	}
@@ -216,12 +232,14 @@ export class BrowserSession {
 		this.page = undefined
 		this.currentMousePosition = undefined
 		this.isUsingRemoteBrowser = false
+		this.lastViewportWidth = undefined
+		this.lastViewportHeight = undefined
 	}
 
 	async doAction(action: (page: Page) => Promise<void>): Promise<BrowserActionResult> {
 		if (!this.page) {
 			throw new Error(
-				"Browser is not launched. This may occur if the browser was automatically closed by a non-`browser_action` tool.",
+				"Cannot perform browser action: no active browser session. The browser must be launched first using the 'launch' action before other browser actions can be performed.",
 			)
 		}
 
@@ -260,6 +278,11 @@ export class BrowserSession {
 			interval: 100,
 		}).catch(() => {})
 
+		// Draw cursor indicator if we have a cursor position
+		if (this.currentMousePosition) {
+			await this.drawCursorIndicator(this.page, this.currentMousePosition)
+		}
+
 		let options: ScreenshotOptions = {
 			encoding: "base64",
 
@@ -291,15 +314,29 @@ export class BrowserSession {
 			throw new Error("Failed to take screenshot.")
 		}
 
+		// Remove cursor indicator after taking screenshot
+		if (this.currentMousePosition) {
+			await this.removeCursorIndicator(this.page)
+		}
+
 		// this.page.removeAllListeners() <- causes the page to crash!
 		this.page.off("console", consoleListener)
 		this.page.off("pageerror", errorListener)
 
+		// Get actual viewport dimensions
+		const viewport = this.page.viewport()
+
+		// Persist last known viewport dimensions
+		this.lastViewportWidth = viewport?.width
+		this.lastViewportHeight = viewport?.height
+
 		return {
 			screenshot,
 			logs: logs.join("\n"),
 			currentUrl: this.page.url(),
 			currentMousePosition: this.currentMousePosition,
+			viewportWidth: viewport?.width,
+			viewportHeight: viewport?.height,
 		}
 	}
 
@@ -453,6 +490,64 @@ export class BrowserSession {
 		}
 	}
 
+	/**
+	 * Force links and window.open to navigate in the same tab.
+	 * This makes clicks on anchors with target="_blank" stay in the current page
+	 * and also intercepts window.open so SPA/open-in-new-tab patterns don't spawn popups.
+	 */
+	private async forceLinksToSameTab(page: Page): Promise<void> {
+		try {
+			await page.evaluate(() => {
+				try {
+					// Ensure we only install once per document
+					if ((window as any).__ROO_FORCE_SAME_TAB__) return
+					;(window as any).__ROO_FORCE_SAME_TAB__ = true
+
+					// Override window.open to navigate current tab instead of creating a new one
+					const originalOpen = window.open
+					window.open = function (url: string | URL, target?: string, features?: string) {
+						try {
+							const href = typeof url === "string" ? url : String(url)
+							location.href = href
+						} catch {
+							// fall back to original if something unexpected occurs
+							try {
+								return originalOpen.apply(window, [url as any, "_self", features]) as any
+							} catch {}
+						}
+						return null as any
+					} as any
+
+					// Rewrite anchors that explicitly open new tabs
+					document.querySelectorAll('a[target="_blank"]').forEach((a) => {
+						a.setAttribute("target", "_self")
+					})
+
+					// Defensive capture: if an element still tries to open in a new tab, force same-tab
+					document.addEventListener(
+						"click",
+						(ev) => {
+							const el = (ev.target as HTMLElement | null)?.closest?.(
+								'a[target="_blank"]',
+							) as HTMLAnchorElement | null
+							if (el && el.href) {
+								ev.preventDefault()
+								try {
+									location.href = el.href
+								} catch {}
+							}
+						},
+						{ capture: true, passive: false },
+					)
+				} catch {
+					// no-op; forcing same-tab is best-effort
+				}
+			})
+		} catch {
+			// If evaluate fails (e.g., cross-origin/state), continue without breaking the action
+		}
+	}
+
 	/**
 	 * Handles mouse interaction with network activity monitoring
 	 */
@@ -463,6 +558,9 @@ export class BrowserSession {
 	): Promise<void> {
 		const [x, y] = coordinate.split(",").map(Number)
 
+		// Force any new-tab behavior (target="_blank", window.open) to stay in the same tab
+		await this.forceLinksToSameTab(page)
+
 		// Set up network request monitoring
 		let hasNetworkActivity = false
 		const requestListener = () => {
@@ -506,6 +604,106 @@ export class BrowserSession {
 		})
 	}
 
+	async press(key: string): Promise<BrowserActionResult> {
+		return this.doAction(async (page) => {
+			// Parse key combinations (e.g., "Cmd+K", "Shift+Enter")
+			const parts = key.split("+").map((k) => k.trim())
+			const modifiers: string[] = []
+			let mainKey = parts[parts.length - 1]
+
+			// Identify modifiers
+			for (let i = 0; i < parts.length - 1; i++) {
+				const part = parts[i].toLowerCase()
+				if (part === "cmd" || part === "command" || part === "meta") {
+					modifiers.push("Meta")
+				} else if (part === "ctrl" || part === "control") {
+					modifiers.push("Control")
+				} else if (part === "shift") {
+					modifiers.push("Shift")
+				} else if (part === "alt" || part === "option") {
+					modifiers.push("Alt")
+				}
+			}
+
+			// Map common key aliases to Puppeteer KeyInput values
+			const mapping: Record<string, KeyInput | string> = {
+				esc: "Escape",
+				return: "Enter",
+				escape: "Escape",
+				enter: "Enter",
+				tab: "Tab",
+				space: "Space",
+				arrowup: "ArrowUp",
+				arrowdown: "ArrowDown",
+				arrowleft: "ArrowLeft",
+				arrowright: "ArrowRight",
+			}
+			mainKey = (mapping[mainKey.toLowerCase()] ?? mainKey) as string
+
+			// Avoid new-tab behavior from Enter on links/buttons
+			await this.forceLinksToSameTab(page)
+
+			// Track inflight requests so we can detect brief network bursts
+			let inflight = 0
+			const onRequest = () => {
+				inflight++
+			}
+			const onRequestDone = () => {
+				inflight = Math.max(0, inflight - 1)
+			}
+			page.on("request", onRequest)
+			page.on("requestfinished", onRequestDone)
+			page.on("requestfailed", onRequestDone)
+
+			// Start a short navigation wait in parallel; if no nav, it times out harmlessly
+			const HARD_CAP_MS = 3000
+			const navPromise = page
+				.waitForNavigation({
+					// domcontentloaded is enough to confirm a submit navigated
+					waitUntil: ["domcontentloaded"],
+					timeout: HARD_CAP_MS,
+				})
+				.catch(() => undefined)
+
+			// Press key combination
+			if (modifiers.length > 0) {
+				// Hold down modifiers
+				for (const modifier of modifiers) {
+					await page.keyboard.down(modifier as KeyInput)
+				}
+
+				// Press main key
+				await page.keyboard.press(mainKey as KeyInput)
+
+				// Release modifiers
+				for (const modifier of modifiers) {
+					await page.keyboard.up(modifier as KeyInput)
+				}
+			} else {
+				// Single key press
+				await page.keyboard.press(mainKey as KeyInput)
+			}
+
+			// Give time for any requests to kick off
+			await delay(120)
+
+			// Hard-cap the wait to avoid UI hangs
+			await Promise.race([
+				navPromise,
+				pWaitFor(() => inflight === 0, { timeout: HARD_CAP_MS, interval: 100 }).catch(() => {}),
+				delay(HARD_CAP_MS),
+			])
+
+			// Stabilize DOM briefly before capturing screenshot (shorter cap)
+			await this.waitTillHTMLStable(page, 2_000)
+
+			// Cleanup
+			page.off("request", onRequest)
+			page.off("requestfinished", onRequestDone)
+			page.off("requestfailed", onRequestDone)
+		})
+	}
+
 	/**
 	 * Scrolls the page by the specified amount
 	 */
@@ -557,4 +755,107 @@ export class BrowserSession {
 			})
 		})
 	}
+
+	/**
+	 * Draws a cursor indicator on the page at the specified position
+	 */
+	private async drawCursorIndicator(page: Page, coordinate: string): Promise<void> {
+		const [x, y] = coordinate.split(",").map(Number)
+
+		try {
+			await page.evaluate(
+				(cursorX: number, cursorY: number) => {
+					// Create a cursor indicator element
+					const cursor = document.createElement("div")
+					cursor.id = "__roo_cursor_indicator__"
+					cursor.style.cssText = `
+						position: fixed;
+						left: ${cursorX}px;
+						top: ${cursorY}px;
+						width: 35px;
+						height: 35px;
+						pointer-events: none;
+						z-index: 2147483647;
+					`
+
+					// Create SVG cursor pointer
+					const svg = `
+						<svg width="35" height="35" viewBox="0 0 35 35" fill="none" xmlns="http://www.w3.org/2000/svg">
+							<path d="M5 3L5 17L9 13L12 19L14 18L11 12L17 12L5 3Z"
+								  fill="white"
+								  stroke="black"
+								  stroke-width="1.5"/>
+							<path d="M5 3L5 17L9 13L12 19L14 18L11 12L17 12L5 3Z"
+								  fill="black"
+								  stroke="white"
+								  stroke-width=".5"/>
+						</svg>
+					`
+					cursor.innerHTML = svg
+
+					document.body.appendChild(cursor)
+				},
+				x,
+				y,
+			)
+		} catch (error) {
+			console.error("Failed to draw cursor indicator:", error)
+		}
+	}
+
+	/**
+	 * Removes the cursor indicator from the page
+	 */
+	private async removeCursorIndicator(page: Page): Promise<void> {
+		try {
+			await page.evaluate(() => {
+				const cursor = document.getElementById("__roo_cursor_indicator__")
+				if (cursor) {
+					cursor.remove()
+				}
+			})
+		} catch (error) {
+			console.error("Failed to remove cursor indicator:", error)
+		}
+	}
+
+	/**
+	 * Returns whether a browser session is currently active
+	 */
+	isSessionActive(): boolean {
+		return !!(this.browser && this.page)
+	}
+
+	/**
+	 * Returns the last known viewport size (if any)
+	 *
+	 * Prefer the live page viewport when available so we stay accurate after:
+	 * - browser_action resize
+	 * - manual window resizes (especially with remote browsers)
+	 *
+	 * Falls back to the configured default viewport when no prior information exists.
+	 */
+	getViewportSize(): { width?: number; height?: number } {
+		// If we have an active page, ask Puppeteer for the current viewport.
+		// This keeps us in sync with any resizes that happen outside of our own
+		// browser_action lifecycle (e.g. user dragging the window).
+		if (this.page) {
+			const vp = this.page.viewport()
+			if (vp?.width) this.lastViewportWidth = vp.width
+			if (vp?.height) this.lastViewportHeight = vp.height
+		}
+
+		// If we've ever observed a viewport, use that.
+		if (this.lastViewportWidth && this.lastViewportHeight) {
+			return {
+				width: this.lastViewportWidth,
+				height: this.lastViewportHeight,
+			}
+		}
+
+		// Otherwise fall back to the configured default so the tool can still
+		// operate before the first screenshot-based action has run.
+		const { width, height } = this.getViewport()
+		return { width, height }
+	}
 }

+ 3 - 3
src/services/browser/UrlContentFetcher.ts

@@ -90,9 +90,9 @@ export class UrlContentFetcher {
 			throw new Error("Browser not initialized")
 		}
 		/*
-		- networkidle2 is equivalent to playwright's networkidle where it waits until there are no more than 2 network connections for at least 500 ms.
-		- domcontentloaded is when the basic DOM is loaded
-		this should be sufficient for most doc sites
+		- In Puppeteer, "networkidle2" waits until there are no more than 2 network connections for at least 500 ms (roughly equivalent to Playwright's "networkidle").
+		- "domcontentloaded" is when the basic DOM is loaded.
+		This should be sufficient for most doc sites.
 		*/
 		try {
 			await this.page.goto(url, {

+ 222 - 0
src/services/browser/__tests__/BrowserSession.spec.ts

@@ -229,4 +229,226 @@ describe("BrowserSession", () => {
 			expect(mockBrowser.close).not.toHaveBeenCalled()
 		})
 	})
+
+	it("forces same-tab behavior before click", async () => {
+		// Prepare a minimal mock page with required APIs
+		const page: any = {
+			on: vi.fn(),
+			off: vi.fn(),
+			screenshot: vi.fn().mockResolvedValue("mockScreenshotBase64"),
+			url: vi.fn().mockReturnValue("https://example.com"),
+			viewport: vi.fn().mockReturnValue({ width: 900, height: 600 }),
+			waitForNavigation: vi.fn().mockResolvedValue(undefined),
+			evaluate: vi.fn().mockResolvedValue(undefined),
+			mouse: {
+				click: vi.fn().mockResolvedValue(undefined),
+				move: vi.fn().mockResolvedValue(undefined),
+			},
+		}
+
+		;(browserSession as any).page = page
+
+		// Spy on the forceLinksToSameTab helper to ensure it's invoked
+		const forceSpy = vi.fn().mockResolvedValue(undefined)
+		;(browserSession as any).forceLinksToSameTab = forceSpy
+
+		await browserSession.click("10,20")
+
+		expect(forceSpy).toHaveBeenCalledTimes(1)
+		expect(forceSpy).toHaveBeenCalledWith(page)
+		expect(page.mouse.click).toHaveBeenCalledWith(10, 20)
+	})
+})
+
+describe("keyboard press", () => {
+	it("presses a keyboard key", async () => {
+		// Prepare a minimal mock page with required APIs
+		const page: any = {
+			on: vi.fn(),
+			off: vi.fn(),
+			screenshot: vi.fn().mockResolvedValue("mockScreenshotBase64"),
+			url: vi.fn().mockReturnValue("https://example.com"),
+			viewport: vi.fn().mockReturnValue({ width: 900, height: 600 }),
+			waitForNavigation: vi.fn().mockResolvedValue(undefined),
+			evaluate: vi.fn().mockResolvedValue(undefined),
+			keyboard: {
+				press: vi.fn().mockResolvedValue(undefined),
+				type: vi.fn().mockResolvedValue(undefined),
+			},
+		}
+
+		// Create a fresh BrowserSession with a mock context
+		const mockCtx: any = {
+			globalState: { get: vi.fn(), update: vi.fn() },
+			globalStorageUri: { fsPath: "/mock/global/storage/path" },
+			extensionUri: { fsPath: "/mock/extension/path" },
+		}
+		const session = new BrowserSession(mockCtx)
+
+		;(session as any).page = page
+
+		await session.press("Enter")
+
+		expect(page.keyboard.press).toHaveBeenCalledTimes(1)
+		expect(page.keyboard.press).toHaveBeenCalledWith("Enter")
+	})
+})
+
+describe("cursor visualization", () => {
+	it("should draw cursor indicator when cursor position exists", async () => {
+		// Prepare a minimal mock page with required APIs
+		const page: any = {
+			on: vi.fn(),
+			off: vi.fn(),
+			screenshot: vi.fn().mockResolvedValue("mockScreenshotBase64"),
+			url: vi.fn().mockReturnValue("https://example.com"),
+			viewport: vi.fn().mockReturnValue({ width: 900, height: 600 }),
+			evaluate: vi.fn().mockResolvedValue(undefined),
+			mouse: {
+				click: vi.fn().mockResolvedValue(undefined),
+			},
+		}
+
+		// Create a fresh BrowserSession with a mock context
+		const mockCtx: any = {
+			globalState: { get: vi.fn(), update: vi.fn() },
+			globalStorageUri: { fsPath: "/mock/global/storage/path" },
+			extensionUri: { fsPath: "/mock/extension/path" },
+		}
+		const session = new BrowserSession(mockCtx)
+
+		;(session as any).page = page
+
+		// Perform a click action which sets cursor position
+		const result = await session.click("100,200")
+
+		// Verify cursor indicator was drawn and removed
+		// evaluate is called 3 times: 1 for forceLinksToSameTab, 1 for draw cursor, 1 for remove cursor
+		expect(page.evaluate).toHaveBeenCalled()
+
+		// Verify the result includes cursor position
+		expect(result.currentMousePosition).toBe("100,200")
+	})
+
+	it("should include cursor position in action result", async () => {
+		// Prepare a minimal mock page with required APIs
+		const page: any = {
+			on: vi.fn(),
+			off: vi.fn(),
+			screenshot: vi.fn().mockResolvedValue("mockScreenshotBase64"),
+			url: vi.fn().mockReturnValue("https://example.com"),
+			viewport: vi.fn().mockReturnValue({ width: 900, height: 600 }),
+			evaluate: vi.fn().mockResolvedValue(undefined),
+			mouse: {
+				move: vi.fn().mockResolvedValue(undefined),
+			},
+		}
+
+		// Create a fresh BrowserSession with a mock context
+		const mockCtx: any = {
+			globalState: { get: vi.fn(), update: vi.fn() },
+			globalStorageUri: { fsPath: "/mock/global/storage/path" },
+			extensionUri: { fsPath: "/mock/extension/path" },
+		}
+		const session = new BrowserSession(mockCtx)
+
+		;(session as any).page = page
+
+		// Perform a hover action which sets cursor position
+		const result = await session.hover("150,250")
+
+		// Verify the result includes cursor position
+		expect(result.currentMousePosition).toBe("150,250")
+		expect(result.viewportWidth).toBe(900)
+		expect(result.viewportHeight).toBe(600)
+	})
+
+	it("should not draw cursor indicator when no cursor position exists", async () => {
+		// Prepare a minimal mock page with required APIs
+		const page: any = {
+			on: vi.fn(),
+			off: vi.fn(),
+			screenshot: vi.fn().mockResolvedValue("mockScreenshotBase64"),
+			url: vi.fn().mockReturnValue("https://example.com"),
+			viewport: vi.fn().mockReturnValue({ width: 900, height: 600 }),
+			evaluate: vi.fn().mockResolvedValue(undefined),
+		}
+
+		// Create a fresh BrowserSession with a mock context
+		const mockCtx: any = {
+			globalState: { get: vi.fn(), update: vi.fn() },
+			globalStorageUri: { fsPath: "/mock/global/storage/path" },
+			extensionUri: { fsPath: "/mock/extension/path" },
+		}
+		const session = new BrowserSession(mockCtx)
+
+		;(session as any).page = page
+
+		// Perform scroll action which doesn't set cursor position
+		const result = await session.scrollDown()
+
+		// Verify evaluate was called only for scroll operation (not for cursor drawing/removal)
+		// scrollDown calls evaluate once for scrolling
+		expect(page.evaluate).toHaveBeenCalledTimes(1)
+
+		// Verify no cursor position in result
+		expect(result.currentMousePosition).toBeUndefined()
+	})
+
+	describe("getViewportSize", () => {
+		it("falls back to configured viewport when no page or last viewport is available", () => {
+			const localCtx: any = {
+				globalState: {
+					get: vi.fn((key: string) => {
+						if (key === "browserViewportSize") return "1024x768"
+						return undefined
+					}),
+					update: vi.fn(),
+				},
+				globalStorageUri: { fsPath: "/mock/global/storage/path" },
+				extensionUri: { fsPath: "/mock/extension/path" },
+			}
+
+			const session = new BrowserSession(localCtx)
+			const vp = (session as any).getViewportSize()
+			expect(vp).toEqual({ width: 1024, height: 768 })
+		})
+
+		it("returns live page viewport when available and updates lastViewport cache", () => {
+			const localCtx: any = {
+				globalState: {
+					get: vi.fn(),
+					update: vi.fn(),
+				},
+				globalStorageUri: { fsPath: "/mock/global/storage/path" },
+				extensionUri: { fsPath: "/mock/extension/path" },
+			}
+			const session = new BrowserSession(localCtx)
+			;(session as any).page = {
+				viewport: vi.fn().mockReturnValue({ width: 1111, height: 555 }),
+			}
+
+			const vp = (session as any).getViewportSize()
+			expect(vp).toEqual({ width: 1111, height: 555 })
+			expect((session as any).lastViewportWidth).toBe(1111)
+			expect((session as any).lastViewportHeight).toBe(555)
+		})
+
+		it("returns cached last viewport when page no longer exists", () => {
+			const localCtx: any = {
+				globalState: {
+					get: vi.fn(),
+					update: vi.fn(),
+				},
+				globalStorageUri: { fsPath: "/mock/global/storage/path" },
+				extensionUri: { fsPath: "/mock/extension/path" },
+			}
+			const session = new BrowserSession(localCtx)
+			;(session as any).lastViewportWidth = 800
+			;(session as any).lastViewportHeight = 600
+
+			const vp = (session as any).getViewportSize()
+			expect(vp).toEqual({ width: 800, height: 600 })
+		})
+	})
 })

+ 11 - 0
src/shared/ExtensionMessage.ts

@@ -129,6 +129,8 @@ export interface ExtensionMessage {
 		| "dismissedUpsells"
 		| "organizationSwitchResult"
 		| "interactionRequired"
+		| "browserSessionUpdate"
+		| "browserSessionNavigate"
 	text?: string
 	payload?: any // Add a generic payload for now, can refine later
 	// Checkpoint warning message
@@ -213,6 +215,9 @@ export interface ExtensionMessage {
 	queuedMessages?: QueuedMessage[]
 	list?: string[] // For dismissedUpsells
 	organizationId?: string | null // For organizationSwitchResult
+	browserSessionMessages?: ClineMessage[] // For browser session panel updates
+	isBrowserSessionActive?: boolean // For browser session panel updates
+	stepIndex?: number // For browserSessionNavigate: the target step index to display
 }
 
 export type ExtensionState = Pick<
@@ -333,6 +338,8 @@ export type ExtensionState = Pick<
 	organizationAllowList: OrganizationAllowList
 	organizationSettingsVersion?: number
 
+	isBrowserSessionActive: boolean // Actual browser session state
+
 	autoCondenseContext: boolean
 	autoCondenseContextPercent: number
 	marketplaceItems?: MarketplaceItem[]
@@ -420,6 +427,7 @@ export const browserActions = [
 	"click",
 	"hover",
 	"type",
+	"press",
 	"scroll_down",
 	"scroll_up",
 	"resize",
@@ -433,6 +441,7 @@ export interface ClineSayBrowserAction {
 	coordinate?: string
 	size?: string
 	text?: string
+	executedCoordinate?: string
 }
 
 export type BrowserActionResult = {
@@ -440,6 +449,8 @@ export type BrowserActionResult = {
 	logs?: string
 	currentUrl?: string
 	currentMousePosition?: string
+	viewportWidth?: number
+	viewportHeight?: number
 }
 
 export interface ClineAskUseMcpServer {

+ 10 - 0
src/shared/WebviewMessage.ts

@@ -166,6 +166,13 @@ export interface WebviewMessage {
 		| "dismissUpsell"
 		| "getDismissedUpsells"
 		| "updateSettings"
+		| "allowedCommands"
+		| "deniedCommands"
+		| "killBrowserSession"
+		| "openBrowserSessionPanel"
+		| "showBrowserSessionPanelAtStep"
+		| "refreshBrowserSessionPanel"
+		| "browserPanelDidLaunch"
 	text?: string
 	editedMessageContent?: string
 	tab?: "settings" | "history" | "mcp" | "modes" | "chat" | "marketplace" | "cloud"
@@ -177,6 +184,9 @@ export interface WebviewMessage {
 	images?: string[]
 	bool?: boolean
 	value?: number
+	stepIndex?: number
+	isLaunchAction?: boolean
+	forceShow?: boolean
 	commands?: string[]
 	audioType?: AudioType
 	serverName?: string

+ 95 - 0
src/shared/browserUtils.ts

@@ -0,0 +1,95 @@
+/**
+ * Parses coordinate string and scales from image dimensions to viewport dimensions
+ * The LLM examines the screenshot it receives (which may be downscaled by the API)
+ * and reports coordinates in format: "x,y@widthxheight" where widthxheight is what the LLM observed
+ *
+ * Format: "x,y@widthxheight" (required)
+ * Returns: scaled coordinate string "x,y" in viewport coordinates
+ * Throws: Error if format is invalid or missing image dimensions
+ */
+export function scaleCoordinate(coordinate: string, viewportWidth: number, viewportHeight: number): string {
+	// Parse coordinate with required image dimensions (accepts both 'x' and ',' as dimension separators)
+	const match = coordinate.match(/^\s*(\d+)\s*,\s*(\d+)\s*@\s*(\d+)\s*[x,]\s*(\d+)\s*$/)
+
+	if (!match) {
+		throw new Error(
+			`Invalid coordinate format: "${coordinate}". ` +
+				`Expected format: "x,y@widthxheight" (e.g., "450,300@1024x768")`,
+		)
+	}
+
+	const [, xStr, yStr, imgWidthStr, imgHeightStr] = match
+	const x = parseInt(xStr, 10)
+	const y = parseInt(yStr, 10)
+	const imgWidth = parseInt(imgWidthStr, 10)
+	const imgHeight = parseInt(imgHeightStr, 10)
+
+	// Scale coordinates from image dimensions to viewport dimensions
+	const scaledX = Math.round((x / imgWidth) * viewportWidth)
+	const scaledY = Math.round((y / imgHeight) * viewportHeight)
+
+	return `${scaledX},${scaledY}`
+}
+
+/**
+ * Formats a key string into a more readable format (e.g., "Control+c" -> "Ctrl + C")
+ */
+export function prettyKey(k?: string): string {
+	if (!k) return ""
+	return k
+		.split("+")
+		.map((part) => {
+			const p = part.trim()
+			const lower = p.toLowerCase()
+			const map: Record<string, string> = {
+				enter: "Enter",
+				tab: "Tab",
+				escape: "Esc",
+				esc: "Esc",
+				backspace: "Backspace",
+				space: "Space",
+				shift: "Shift",
+				control: "Ctrl",
+				ctrl: "Ctrl",
+				alt: "Alt",
+				meta: "Meta",
+				command: "Cmd",
+				cmd: "Cmd",
+				arrowup: "Arrow Up",
+				arrowdown: "Arrow Down",
+				arrowleft: "Arrow Left",
+				arrowright: "Arrow Right",
+				pageup: "Page Up",
+				pagedown: "Page Down",
+				home: "Home",
+				end: "End",
+			}
+			if (map[lower]) return map[lower]
+			const keyMatch = /^Key([A-Z])$/.exec(p)
+			if (keyMatch) return keyMatch[1].toUpperCase()
+			const digitMatch = /^Digit([0-9])$/.exec(p)
+			if (digitMatch) return digitMatch[1]
+			const spaced = p.replace(/([a-z])([A-Z])/g, "$1 $2")
+			return spaced.charAt(0).toUpperCase() + spaced.slice(1)
+		})
+		.join(" + ")
+}
+
+/**
+ * Wrapper around scaleCoordinate that handles failures gracefully by checking for simple coordinates
+ */
+export function getViewportCoordinate(
+	coord: string | undefined,
+	viewportWidth: number,
+	viewportHeight: number,
+): string {
+	if (!coord) return ""
+
+	try {
+		return scaleCoordinate(coord, viewportWidth, viewportHeight)
+	} catch (e) {
+		// Fallback to simple x,y parsing or return as is
+		const simpleMatch = /^\s*(\d+)\s*,\s*(\d+)/.exec(coord)
+		return simpleMatch ? `${simpleMatch[1]},${simpleMatch[2]}` : coord
+	}
+}

+ 12 - 0
webview-ui/browser-panel.html

@@ -0,0 +1,12 @@
+<!doctype html>
+<html lang="en">
+	<head>
+		<meta charset="UTF-8" />
+		<meta name="viewport" content="width=device-width, initial-scale=1.0" />
+		<title>Browser Session</title>
+	</head>
+	<body>
+		<div id="root"></div>
+		<script type="module" src="/src/browser-panel.tsx"></script>
+	</body>
+</html>

+ 12 - 0
webview-ui/src/browser-panel.tsx

@@ -0,0 +1,12 @@
+import { StrictMode } from "react"
+import { createRoot } from "react-dom/client"
+
+import "./index.css"
+import BrowserSessionPanel from "./components/browser-session/BrowserSessionPanel"
+import "../node_modules/@vscode/codicons/dist/codicon.css"
+
+createRoot(document.getElementById("root")!).render(
+	<StrictMode>
+		<BrowserSessionPanel />
+	</StrictMode>,
+)

+ 60 - 0
webview-ui/src/components/browser-session/BrowserPanelStateProvider.tsx

@@ -0,0 +1,60 @@
+import React, { createContext, useContext, useState, useEffect, useCallback } from "react"
+import { ExtensionMessage } from "@roo/ExtensionMessage"
+
+interface BrowserPanelState {
+	browserViewportSize: string
+	isBrowserSessionActive: boolean
+	language: string
+}
+
+const BrowserPanelStateContext = createContext<BrowserPanelState | undefined>(undefined)
+
+export const BrowserPanelStateProvider: React.FC<{ children: React.ReactNode }> = ({ children }) => {
+	const [state, setState] = useState<BrowserPanelState>({
+		browserViewportSize: "900x600",
+		isBrowserSessionActive: false,
+		language: "en",
+	})
+
+	const handleMessage = useCallback((event: MessageEvent) => {
+		const message: ExtensionMessage = event.data
+
+		switch (message.type) {
+			case "state":
+				if (message.state) {
+					setState((prev) => ({
+						...prev,
+						browserViewportSize: message.state?.browserViewportSize || "900x600",
+						isBrowserSessionActive: message.state?.isBrowserSessionActive || false,
+						language: message.state?.language || "en",
+					}))
+				}
+				break
+			case "browserSessionUpdate":
+				if (message.isBrowserSessionActive !== undefined) {
+					setState((prev) => ({
+						...prev,
+						isBrowserSessionActive: message.isBrowserSessionActive || false,
+					}))
+				}
+				break
+		}
+	}, [])
+
+	useEffect(() => {
+		window.addEventListener("message", handleMessage)
+		return () => {
+			window.removeEventListener("message", handleMessage)
+		}
+	}, [handleMessage])
+
+	return <BrowserPanelStateContext.Provider value={state}>{children}</BrowserPanelStateContext.Provider>
+}
+
+export const useBrowserPanelState = () => {
+	const context = useContext(BrowserPanelStateContext)
+	if (context === undefined) {
+		throw new Error("useBrowserPanelState must be used within a BrowserPanelStateProvider")
+	}
+	return context
+}

+ 102 - 0
webview-ui/src/components/browser-session/BrowserSessionPanel.tsx

@@ -0,0 +1,102 @@
+import React, { useEffect, useState } from "react"
+import { type ClineMessage } from "@roo-code/types"
+import BrowserSessionRow from "../chat/BrowserSessionRow"
+import { TooltipProvider } from "@src/components/ui/tooltip"
+import ErrorBoundary from "../ErrorBoundary"
+import TranslationProvider from "@src/i18n/TranslationContext"
+import { ExtensionMessage } from "@roo/ExtensionMessage"
+import { BrowserPanelStateProvider, useBrowserPanelState } from "./BrowserPanelStateProvider"
+import { vscode } from "@src/utils/vscode"
+import { ExtensionStateContextProvider } from "@/context/ExtensionStateContext"
+
+interface BrowserSessionPanelState {
+	messages: ClineMessage[]
+}
+
+const BrowserSessionPanelContent: React.FC = () => {
+	const { browserViewportSize, isBrowserSessionActive } = useBrowserPanelState()
+	const [state, setState] = useState<BrowserSessionPanelState>({
+		messages: [],
+	})
+	// Target page index to navigate BrowserSessionRow to
+	const [navigateToStepIndex, setNavigateToStepIndex] = useState<number | undefined>(undefined)
+
+	const [expandedRows, setExpandedRows] = useState<Record<number, boolean>>({})
+
+	useEffect(() => {
+		const handleMessage = (event: MessageEvent) => {
+			const message: ExtensionMessage = event.data
+
+			switch (message.type) {
+				case "browserSessionUpdate":
+					if (message.browserSessionMessages) {
+						setState((prev) => ({
+							...prev,
+							messages: message.browserSessionMessages || [],
+						}))
+					}
+					break
+				case "browserSessionNavigate":
+					if (typeof message.stepIndex === "number" && message.stepIndex >= 0) {
+						setNavigateToStepIndex(message.stepIndex)
+					}
+					break
+			}
+		}
+
+		window.addEventListener("message", handleMessage)
+
+		return () => {
+			window.removeEventListener("message", handleMessage)
+		}
+	}, [])
+
+	return (
+		<div className="fixed top-0 left-0 right-0 bottom-0 flex flex-col overflow-hidden bg-vscode-editor-background">
+			<BrowserSessionRow
+				messages={state.messages}
+				isLast={true}
+				lastModifiedMessage={state.messages.at(-1)}
+				isStreaming={false}
+				isExpanded={(messageTs: number) => expandedRows[messageTs] ?? false}
+				onToggleExpand={(messageTs: number) => {
+					setExpandedRows((prev: Record<number, boolean>) => ({
+						...prev,
+						[messageTs]: !prev[messageTs],
+					}))
+				}}
+				fullScreen={true}
+				browserViewportSizeProp={browserViewportSize}
+				isBrowserSessionActiveProp={isBrowserSessionActive}
+				navigateToPageIndex={navigateToStepIndex}
+			/>
+		</div>
+	)
+}
+
+const BrowserSessionPanel: React.FC = () => {
+	// Ensure the panel receives initial state and becomes "ready" without needing a second click
+	useEffect(() => {
+		try {
+			vscode.postMessage({ type: "webviewDidLaunch" })
+		} catch {
+			// Ignore errors during initial launch
+		}
+	}, [])
+
+	return (
+		<ErrorBoundary>
+			<ExtensionStateContextProvider>
+				<TooltipProvider>
+					<TranslationProvider>
+						<BrowserPanelStateProvider>
+							<BrowserSessionPanelContent />
+						</BrowserPanelStateProvider>
+					</TranslationProvider>
+				</TooltipProvider>
+			</ExtensionStateContextProvider>
+		</ErrorBoundary>
+	)
+}
+
+export default BrowserSessionPanel

+ 184 - 0
webview-ui/src/components/chat/BrowserActionRow.tsx

@@ -0,0 +1,184 @@
+import { memo, useMemo, useEffect, useRef } from "react"
+import { ClineMessage } from "@roo-code/types"
+import { ClineSayBrowserAction } from "@roo/ExtensionMessage"
+import { vscode } from "@src/utils/vscode"
+import { getViewportCoordinate as getViewportCoordinateShared, prettyKey } from "@roo/browserUtils"
+import {
+	MousePointer as MousePointerIcon,
+	Keyboard,
+	ArrowDown,
+	ArrowUp,
+	Pointer,
+	Play,
+	Check,
+	Maximize2,
+} from "lucide-react"
+import { useExtensionState } from "@src/context/ExtensionStateContext"
+import { useTranslation } from "react-i18next"
+
+interface BrowserActionRowProps {
+	message: ClineMessage
+	nextMessage?: ClineMessage
+	actionIndex?: number
+	totalActions?: number
+}
+
+// Get icon for each action type
+const getActionIcon = (action: string) => {
+	switch (action) {
+		case "click":
+			return <MousePointerIcon className="w-3.5 h-3.5 opacity-70" />
+		case "type":
+		case "press":
+			return <Keyboard className="w-3.5 h-3.5 opacity-70" />
+		case "scroll_down":
+			return <ArrowDown className="w-3.5 h-3.5 opacity-70" />
+		case "scroll_up":
+			return <ArrowUp className="w-3.5 h-3.5 opacity-70" />
+		case "launch":
+			return <Play className="w-3.5 h-3.5 opacity-70" />
+		case "close":
+			return <Check className="w-3.5 h-3.5 opacity-70" />
+		case "resize":
+			return <Maximize2 className="w-3.5 h-3.5 opacity-70" />
+		case "hover":
+		default:
+			return <Pointer className="w-3.5 h-3.5 opacity-70" />
+	}
+}
+
+const BrowserActionRow = memo(({ message, nextMessage, actionIndex, totalActions }: BrowserActionRowProps) => {
+	const { t } = useTranslation()
+	const { isBrowserSessionActive } = useExtensionState()
+	const hasHandledAutoOpenRef = useRef(false)
+
+	// Parse this specific browser action
+	const browserAction = useMemo<ClineSayBrowserAction | null>(() => {
+		try {
+			return JSON.parse(message.text || "{}") as ClineSayBrowserAction
+		} catch {
+			return null
+		}
+	}, [message.text])
+
+	// Get viewport dimensions from the result message if available
+	const viewportDimensions = useMemo(() => {
+		if (!nextMessage || nextMessage.say !== "browser_action_result") return null
+		try {
+			const result = JSON.parse(nextMessage.text || "{}")
+			return {
+				width: result.viewportWidth,
+				height: result.viewportHeight,
+			}
+		} catch {
+			return null
+		}
+	}, [nextMessage])
+
+	// Format action display text
+	const actionText = useMemo(() => {
+		if (!browserAction) return "Browser action"
+
+		// Helper to scale coordinates from screenshot dimensions to viewport dimensions
+		// Matches the backend's scaleCoordinate function logic
+		const getViewportCoordinate = (coord?: string): string =>
+			getViewportCoordinateShared(coord, viewportDimensions?.width ?? 0, viewportDimensions?.height ?? 0)
+
+		switch (browserAction.action) {
+			case "launch":
+				return `Launched browser`
+			case "click":
+				return `Clicked at: ${browserAction.executedCoordinate || getViewportCoordinate(browserAction.coordinate)}`
+			case "type":
+				return `Typed: ${browserAction.text}`
+			case "press":
+				return `Pressed key: ${prettyKey(browserAction.text)}`
+			case "hover":
+				return `Hovered at: ${browserAction.executedCoordinate || getViewportCoordinate(browserAction.coordinate)}`
+			case "scroll_down":
+				return "Scrolled down"
+			case "scroll_up":
+				return "Scrolled up"
+			case "resize":
+				return `Resized to: ${browserAction.size?.split(/[x,]/).join(" x ")}`
+			case "close":
+				return "Closed browser"
+			default:
+				return browserAction.action
+		}
+	}, [browserAction, viewportDimensions])
+
+	// Auto-open Browser Session panel when:
+	// 1. This is a "launch" action (new browser session) - always opens and navigates to launch
+	// 2. Regular actions - only open panel if user hasn't manually closed it, let internal auto-advance logic handle step
+	// Only run this once per action to avoid re-sending messages when scrolling
+	useEffect(() => {
+		if (!isBrowserSessionActive || hasHandledAutoOpenRef.current) {
+			return
+		}
+
+		const isLaunchAction = browserAction?.action === "launch"
+
+		if (isLaunchAction) {
+			// Launch action: navigate to step 0 (the launch)
+			vscode.postMessage({
+				type: "showBrowserSessionPanelAtStep",
+				stepIndex: 0,
+				isLaunchAction: true,
+			})
+			hasHandledAutoOpenRef.current = true
+		} else {
+			// Regular actions: just show panel, don't navigate
+			// BrowserSessionRow's internal auto-advance logic will handle jumping to new steps
+			// only if user is currently on the most recent step
+			vscode.postMessage({
+				type: "showBrowserSessionPanelAtStep",
+				isLaunchAction: false,
+			})
+			hasHandledAutoOpenRef.current = true
+		}
+	}, [isBrowserSessionActive, browserAction])
+
+	const headerStyle: React.CSSProperties = {
+		display: "flex",
+		alignItems: "center",
+		gap: "10px",
+		marginBottom: "10px",
+		wordBreak: "break-word",
+	}
+
+	return (
+		<div className="px-[15px] py-[10px] pr-[6px]">
+			{/* Header with action description - clicking opens Browser Session panel at this step */}
+			<div
+				style={headerStyle}
+				className="cursor-pointer"
+				onClick={() => {
+					const idx = typeof actionIndex === "number" ? Math.max(0, actionIndex - 1) : 0
+					vscode.postMessage({ type: "showBrowserSessionPanelAtStep", stepIndex: idx, forceShow: true })
+				}}>
+				<span
+					className="codicon codicon-globe text-vscode-testing-iconPassed shrink-0"
+					style={{ marginBottom: "-1.5px" }}
+				/>
+				<span style={{ fontWeight: "bold" }}>{t("chat:browser.actions.title")}</span>
+				{actionIndex !== undefined && totalActions !== undefined && (
+					<span style={{ fontWeight: "bold" }}>
+						{" "}
+						- {actionIndex}/{totalActions} -{" "}
+					</span>
+				)}
+				{browserAction && (
+					<>
+						<span className="shrink-0">{getActionIcon(browserAction.action)}</span>
+						<span className="flex-1 truncate">{actionText}</span>
+					</>
+				)}
+			</div>
+		</div>
+	)
+})
+
+BrowserActionRow.displayName = "BrowserActionRow"
+
+export default BrowserActionRow

Plik diff jest za duży
+ 776 - 179
webview-ui/src/components/chat/BrowserSessionRow.tsx


+ 34 - 0
webview-ui/src/components/chat/BrowserSessionStatusRow.tsx

@@ -0,0 +1,34 @@
+import { memo } from "react"
+import { Globe } from "lucide-react"
+import { ClineMessage } from "@roo-code/types"
+
+interface BrowserSessionStatusRowProps {
+	message: ClineMessage
+}
+
+const BrowserSessionStatusRow = memo(({ message }: BrowserSessionStatusRowProps) => {
+	const isOpened = message.text?.includes("opened")
+
+	return (
+		<div className="flex items-center gap-2 py-2 px-[15px] text-sm">
+			<Globe
+				className="w-4 h-4 shrink-0"
+				style={{
+					opacity: 0.7,
+					color: isOpened ? "#4ade80" : "#9ca3af", // green when opened, gray when closed
+				}}
+			/>
+			<span
+				style={{
+					color: isOpened ? "var(--vscode-testing-iconPassed)" : "var(--vscode-descriptionForeground)",
+					fontWeight: 500,
+				}}>
+				{message.text}
+			</span>
+		</div>
+	)
+})
+
+BrowserSessionStatusRow.displayName = "BrowserSessionStatusRow"
+
+export default BrowserSessionStatusRow

+ 20 - 2
webview-ui/src/components/chat/ChatRow.tsx

@@ -160,6 +160,7 @@ export const ChatRowContent = ({
 	onSuggestionClick,
 	onFollowUpUnmount,
 	onBatchFileResponse,
+	editable,
 	isFollowUpAnswered,
 }: ChatRowContentProps) => {
 	const { t } = useTranslation()
@@ -536,11 +537,24 @@ export const ChatRowContent = ({
 			}
 			case "updateTodoList" as any: {
 				const todos = (tool as any).todos || []
-
 				// Get previous todos from the latest todos in the task context
 				const previousTodos = getPreviousTodos(clineMessages, message.ts)
 
-				return <TodoChangeDisplay previousTodos={previousTodos} newTodos={todos} />
+				return (
+					<>
+						<TodoChangeDisplay previousTodos={previousTodos} newTodos={todos} />
+						<UpdateTodoListToolBlock
+							todos={todos}
+							content={(tool as any).content}
+							onChange={(updatedTodos) => {
+								if (typeof vscode !== "undefined" && vscode?.postMessage) {
+									vscode.postMessage({ type: "updateTodoList", payload: { todos: updatedTodos } })
+								}
+							}}
+							editable={!!(editable && isLast)}
+						/>
+					</>
+				)
 			}
 			case "newFileCreated":
 				return (
@@ -1381,6 +1395,10 @@ export const ChatRowContent = ({
 							<ImageBlock imageUri={imageInfo.imageUri} imagePath={imageInfo.imagePath} />
 						</div>
 					)
+				case "browser_action":
+				case "browser_action_result":
+					// Handled by BrowserSessionRow; prevent raw JSON (action/result) from rendering here
+					return null
 				default:
 					return (
 						<>

+ 12 - 1
webview-ui/src/components/chat/ChatTextArea.tsx

@@ -51,6 +51,9 @@ interface ChatTextAreaProps {
 	// Edit mode props
 	isEditMode?: boolean
 	onCancel?: () => void
+	// Browser session status
+	isBrowserSessionActive?: boolean
+	showBrowserDockToggle?: boolean
 }
 
 export const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
@@ -71,6 +74,8 @@ export const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 			modeShortcutText,
 			isEditMode = false,
 			onCancel,
+			isBrowserSessionActive = false,
+			showBrowserDockToggle = false,
 		},
 		ref,
 	) => {
@@ -1236,7 +1241,7 @@ export const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 					</div>
 					<div
 						className={cn(
-							"flex flex-shrink-0 items-center gap-0.5",
+							"flex flex-shrink-0 items-center gap-0.5 h-5 leading-none",
 							!isEditMode && cloudUserInfo ? "" : "pr-2",
 						)}>
 						{isTtsPlaying && (
@@ -1261,6 +1266,12 @@ export const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 						)}
 						{!isEditMode ? <IndexingStatusBadge /> : null}
 						{!isEditMode && cloudUserInfo && <CloudAccountSwitcher />}
+						{/* keep props referenced after moving browser button */}
+						<div
+							className="hidden"
+							data-browser-session-active={isBrowserSessionActive}
+							data-show-browser-dock-toggle={showBrowserDockToggle}
+						/>
 					</div>
 				</div>
 			</div>

+ 86 - 125
webview-ui/src/components/chat/ChatView.tsx

@@ -1,5 +1,5 @@
 import React, { forwardRef, useCallback, useEffect, useImperativeHandle, useMemo, useRef, useState } from "react"
-import { useDeepCompareEffect, useEvent, useMount } from "react-use"
+import { useDeepCompareEffect, useEvent } from "react-use"
 import debounce from "debounce"
 import { Virtuoso, type VirtuosoHandle } from "react-virtuoso"
 import removeMd from "remove-markdown"
@@ -13,7 +13,7 @@ import { appendImages } from "@src/utils/imageUtils"
 
 import type { ClineAsk, ClineMessage } from "@roo-code/types"
 
-import { ClineSayBrowserAction, ClineSayTool, ExtensionMessage } from "@roo/ExtensionMessage"
+import { ClineSayTool, ExtensionMessage } from "@roo/ExtensionMessage"
 import { findLast } from "@roo/array"
 import { SuggestionItem } from "@roo-code/types"
 import { combineApiRequests } from "@roo/combineApiRequests"
@@ -37,7 +37,8 @@ import TelemetryBanner from "../common/TelemetryBanner"
 import VersionIndicator from "../common/VersionIndicator"
 import HistoryPreview from "../history/HistoryPreview"
 import Announcement from "./Announcement"
-import BrowserSessionRow from "./BrowserSessionRow"
+import BrowserActionRow from "./BrowserActionRow"
+import BrowserSessionStatusRow from "./BrowserSessionStatusRow"
 import ChatRow from "./ChatRow"
 import { ChatTextArea } from "./ChatTextArea"
 import TaskHeader from "./TaskHeader"
@@ -95,6 +96,7 @@ const ChatViewComponent: React.ForwardRefRenderFunction<ChatViewRef, ChatViewPro
 		soundVolume,
 		cloudIsAuthenticated,
 		messageQueue = [],
+		isBrowserSessionActive,
 	} = useExtensionState()
 
 	const messagesRef = useRef(messages)
@@ -808,9 +810,6 @@ const ChatViewComponent: React.ForwardRefRenderFunction<ChatViewRef, ChatViewPro
 
 	useEvent("message", handleMessage)
 
-	// NOTE: the VSCode window needs to be focused for this to work.
-	useMount(() => textAreaRef.current?.focus())
-
 	const visibleMessages = useMemo(() => {
 		// Pre-compute checkpoint hashes that have associated user messages for O(1) lookup
 		const userMessageCheckpointHashes = new Set<string>()
@@ -965,97 +964,54 @@ const ChatViewComponent: React.ForwardRefRenderFunction<ChatViewRef, ChatViewPro
 		setWasStreaming(isStreaming)
 	}, [isStreaming, lastMessage, wasStreaming, messages.length])
 
-	const isBrowserSessionMessage = (message: ClineMessage): boolean => {
-		// Which of visible messages are browser session messages, see above.
-		if (message.type === "ask") {
-			return ["browser_action_launch"].includes(message.ask!)
-		}
-
-		if (message.type === "say") {
-			return ["api_req_started", "text", "browser_action", "browser_action_result"].includes(message.say!)
-		}
-
-		return false
-	}
-
-	const groupedMessages = useMemo(() => {
-		const result: (ClineMessage | ClineMessage[])[] = []
-		let currentGroup: ClineMessage[] = []
-		let isInBrowserSession = false
-
-		const endBrowserSession = () => {
-			if (currentGroup.length > 0) {
-				result.push([...currentGroup])
-				currentGroup = []
-				isInBrowserSession = false
+	// Compute current browser session messages for the top banner (not grouped into chat stream)
+	// Find the FIRST browser session from the beginning to show ALL sessions
+	const browserSessionStartIndex = useMemo(() => {
+		for (let i = 0; i < messages.length; i++) {
+			if (messages[i].ask === "browser_action_launch") {
+				return i
 			}
 		}
+		return -1
+	}, [messages])
 
-		visibleMessages.forEach((message: ClineMessage) => {
-			if (message.ask === "browser_action_launch") {
-				// Complete existing browser session if any.
-				endBrowserSession()
-				// Start new.
-				isInBrowserSession = true
-				currentGroup.push(message)
-			} else if (isInBrowserSession) {
-				// End session if `api_req_started` is cancelled.
-
-				if (message.say === "api_req_started") {
-					// Get last `api_req_started` in currentGroup to check if
-					// it's cancelled. If it is then this api req is not part
-					// of the current browser session.
-					const lastApiReqStarted = [...currentGroup].reverse().find((m) => m.say === "api_req_started")
-
-					if (lastApiReqStarted?.text !== null && lastApiReqStarted?.text !== undefined) {
-						const info = JSON.parse(lastApiReqStarted.text)
-						const isCancelled = info.cancelReason !== null && info.cancelReason !== undefined
-
-						if (isCancelled) {
-							endBrowserSession()
-							result.push(message)
-							return
-						}
-					}
-				}
-
-				if (isBrowserSessionMessage(message)) {
-					currentGroup.push(message)
+	const _browserSessionMessages = useMemo<ClineMessage[]>(() => {
+		if (browserSessionStartIndex === -1) return []
+		return messages.slice(browserSessionStartIndex)
+	}, [browserSessionStartIndex, messages])
 
-					// Check if this is a close action
-					if (message.say === "browser_action") {
-						const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction
-						if (browserAction.action === "close") {
-							endBrowserSession()
-						}
-					}
-				} else {
-					// complete existing browser session if any
-					endBrowserSession()
-					result.push(message)
-				}
-			} else {
-				result.push(message)
-			}
-		})
+	// Show globe toggle only when in a task that has a browser session (active or inactive)
+	const showBrowserDockToggle = useMemo(
+		() => Boolean(task && (browserSessionStartIndex !== -1 || isBrowserSessionActive)),
+		[task, browserSessionStartIndex, isBrowserSessionActive],
+	)
 
-		// Handle case where browser session is the last group
-		if (currentGroup.length > 0) {
-			result.push([...currentGroup])
+	const isBrowserSessionMessage = useCallback((message: ClineMessage): boolean => {
+		// Only the launch ask should be hidden from chat (it's shown in the drawer header)
+		if (message.type === "ask" && message.ask === "browser_action_launch") {
+			return true
 		}
+		// browser_action_result messages are paired with browser_action and should not appear independently
+		if (message.type === "say" && message.say === "browser_action_result") {
+			return true
+		}
+		return false
+	}, [])
+
+	const groupedMessages = useMemo(() => {
+		// Only filter out the launch ask and result messages - browser actions appear in chat
+		const result: ClineMessage[] = visibleMessages.filter((msg) => !isBrowserSessionMessage(msg))
 
 		if (isCondensing) {
-			// Show indicator after clicking condense button
 			result.push({
 				type: "say",
 				say: "condense_context",
 				ts: Date.now(),
 				partial: true,
-			})
+			} as any)
 		}
-
 		return result
-	}, [isCondensing, visibleMessages])
+	}, [isCondensing, visibleMessages, isBrowserSessionMessage])
 
 	// scrolling
 
@@ -1204,34 +1160,37 @@ const ChatViewComponent: React.ForwardRefRenderFunction<ChatViewRef, ChatViewPro
 		vscode.postMessage({ type: "askResponse", askResponse: "objectResponse", text: JSON.stringify(response) })
 	}, [])
 
-	// Handler for when FollowUpSuggest component unmounts
-	const handleFollowUpUnmount = useCallback(() => {
-		// Mark that user has responded
-		userRespondedRef.current = true
-	}, [])
-
 	const itemContent = useCallback(
-		(index: number, messageOrGroup: ClineMessage | ClineMessage[]) => {
-			// browser session group
-			if (Array.isArray(messageOrGroup)) {
+		(index: number, messageOrGroup: ClineMessage) => {
+			const hasCheckpoint = modifiedMessages.some((message) => message.say === "checkpoint_saved")
+
+			// Check if this is a browser action message
+			if (messageOrGroup.type === "say" && messageOrGroup.say === "browser_action") {
+				// Find the corresponding result message by looking for the next browser_action_result after this action's timestamp
+				const nextMessage = modifiedMessages.find(
+					(m) => m.ts > messageOrGroup.ts && m.say === "browser_action_result",
+				)
+
+				// Calculate action index and total count
+				const browserActions = modifiedMessages.filter((m) => m.say === "browser_action")
+				const actionIndex = browserActions.findIndex((m) => m.ts === messageOrGroup.ts) + 1
+				const totalActions = browserActions.length
+
 				return (
-					<BrowserSessionRow
-						messages={messageOrGroup}
-						isLast={index === groupedMessages.length - 1}
-						lastModifiedMessage={modifiedMessages.at(-1)}
-						onHeightChange={handleRowHeightChange}
-						isStreaming={isStreaming}
-						isExpanded={(messageTs: number) => expandedRows[messageTs] ?? false}
-						onToggleExpand={(messageTs: number) => {
-							setExpandedRows((prev: Record<number, boolean>) => ({
-								...prev,
-								[messageTs]: !prev[messageTs],
-							}))
-						}}
+					<BrowserActionRow
+						key={messageOrGroup.ts}
+						message={messageOrGroup}
+						nextMessage={nextMessage}
+						actionIndex={actionIndex}
+						totalActions={totalActions}
 					/>
 				)
 			}
-			const hasCheckpoint = modifiedMessages.some((message) => message.say === "checkpoint_saved")
+
+			// Check if this is a browser session status message
+			if (messageOrGroup.type === "say" && messageOrGroup.say === "browser_session_status") {
+				return <BrowserSessionStatusRow key={messageOrGroup.ts} message={messageOrGroup} />
+			}
 
 			// regular message
 			return (
@@ -1246,7 +1205,6 @@ const ChatViewComponent: React.ForwardRefRenderFunction<ChatViewRef, ChatViewPro
 					isStreaming={isStreaming}
 					onSuggestionClick={handleSuggestionClickInRow} // This was already stabilized
 					onBatchFileResponse={handleBatchFileResponse}
-					onFollowUpUnmount={handleFollowUpUnmount}
 					isFollowUpAnswered={messageOrGroup.isAnswered === true || messageOrGroup.ts === currentFollowUpTs}
 					editable={
 						messageOrGroup.type === "ask" &&
@@ -1279,7 +1237,6 @@ const ChatViewComponent: React.ForwardRefRenderFunction<ChatViewRef, ChatViewPro
 			isStreaming,
 			handleSuggestionClickInRow,
 			handleBatchFileResponse,
-			handleFollowUpUnmount,
 			currentFollowUpTs,
 			alwaysAllowUpdateTodoList,
 			enableButtons,
@@ -1434,24 +1391,26 @@ const ChatViewComponent: React.ForwardRefRenderFunction<ChatViewRef, ChatViewPro
 
 			{task && (
 				<>
-					<div className="grow flex" ref={scrollContainerRef}>
-						<Virtuoso
-							ref={virtuosoRef}
-							key={task.ts}
-							className="scrollable grow overflow-y-scroll mb-1"
-							increaseViewportBy={{ top: 3_000, bottom: 1000 }}
-							data={groupedMessages}
-							itemContent={itemContent}
-							atBottomStateChange={(isAtBottom: boolean) => {
-								setIsAtBottom(isAtBottom)
-								if (isAtBottom) {
-									disableAutoScrollRef.current = false
-								}
-								setShowScrollToBottom(disableAutoScrollRef.current && !isAtBottom)
-							}}
-							atBottomThreshold={10}
-							initialTopMostItemIndex={groupedMessages.length - 1}
-						/>
+					<div className="grow flex flex-col min-h-0" ref={scrollContainerRef}>
+						<div className="flex-auto min-h-0">
+							<Virtuoso
+								ref={virtuosoRef}
+								key={task.ts}
+								className="h-full overflow-y-auto mb-1"
+								increaseViewportBy={{ top: 3_000, bottom: 1000 }}
+								data={groupedMessages}
+								itemContent={itemContent}
+								atBottomStateChange={(isAtBottom: boolean) => {
+									setIsAtBottom(isAtBottom)
+									if (isAtBottom) {
+										disableAutoScrollRef.current = false
+									}
+									setShowScrollToBottom(disableAutoScrollRef.current && !isAtBottom)
+								}}
+								atBottomThreshold={10}
+								initialTopMostItemIndex={groupedMessages.length - 1}
+							/>
+						</div>
 					</div>
 					{areButtonsVisible && (
 						<div
@@ -1573,6 +1532,8 @@ const ChatViewComponent: React.ForwardRefRenderFunction<ChatViewRef, ChatViewPro
 				mode={mode}
 				setMode={setMode}
 				modeShortcutText={modeShortcutText}
+				isBrowserSessionActive={!!isBrowserSessionActive}
+				showBrowserDockToggle={showBrowserDockToggle}
 			/>
 
 			{isProfileDisabled && (

+ 104 - 50
webview-ui/src/components/chat/TaskHeader.tsx

@@ -1,4 +1,4 @@
-import { memo, useEffect, useRef, useState } from "react"
+import { memo, useEffect, useRef, useState, useMemo } from "react"
 import { useTranslation } from "react-i18next"
 import { useCloudUpsell } from "@src/hooks/useCloudUpsell"
 import { CloudUpsellDialog } from "@src/components/cloud/CloudUpsellDialog"
@@ -10,7 +10,8 @@ import {
 	Coins,
 	HardDriveDownload,
 	HardDriveUpload,
-	FoldVerticalIcon,
+	FoldVertical,
+	Globe,
 } from "lucide-react"
 import prettyBytes from "pretty-bytes"
 
@@ -21,9 +22,10 @@ import { findLastIndex } from "@roo/array"
 
 import { formatLargeNumber } from "@src/utils/format"
 import { cn } from "@src/lib/utils"
-import { StandardTooltip } from "@src/components/ui"
+import { StandardTooltip, Button } from "@src/components/ui"
 import { useExtensionState } from "@src/context/ExtensionStateContext"
 import { useSelectedModel } from "@/components/ui/hooks/useSelectedModel"
+import { vscode } from "@src/utils/vscode"
 
 import Thumbnails from "../common/Thumbnails"
 
@@ -59,7 +61,7 @@ const TaskHeader = ({
 	todos,
 }: TaskHeaderProps) => {
 	const { t } = useTranslation()
-	const { apiConfiguration, currentTaskItem, clineMessages } = useExtensionState()
+	const { apiConfiguration, currentTaskItem, clineMessages, isBrowserSessionActive } = useExtensionState()
 	const { id: modelId, info: model } = useSelectedModel(apiConfiguration)
 	const [isTaskExpanded, setIsTaskExpanded] = useState(false)
 	const [showLongRunningTaskMessage, setShowLongRunningTaskMessage] = useState(false)
@@ -95,10 +97,22 @@ const TaskHeader = ({
 	const textRef = useRef<HTMLDivElement>(null)
 	const contextWindow = model?.contextWindow || 1
 
+	// Detect if this task had any browser session activity so we can show a grey globe when inactive
+	const browserSessionStartIndex = useMemo(() => {
+		const msgs = clineMessages || []
+		for (let i = 0; i < msgs.length; i++) {
+			const m = msgs[i] as any
+			if (m?.ask === "browser_action_launch") return i
+		}
+		return -1
+	}, [clineMessages])
+
+	const showBrowserGlobe = browserSessionStartIndex !== -1 || !!isBrowserSessionActive
+
 	const condenseButton = (
 		<LucideIconButton
 			title={t("chat:task.condenseContext")}
-			icon={FoldVerticalIcon}
+			icon={FoldVertical}
 			disabled={buttonsDisabled}
 			onClick={() => currentTaskItem && handleCondenseContext(currentTaskItem.id)}
 		/>
@@ -182,53 +196,93 @@ const TaskHeader = ({
 				</div>
 				{!isTaskExpanded && contextWindow > 0 && (
 					<div
-						className="flex items-center gap-2 text-sm text-muted-foreground/70"
+						className="flex items-center justify-between text-sm text-muted-foreground/70"
 						onClick={(e) => e.stopPropagation()}>
-						<Coins className="size-3 shrink-0" />
-						<StandardTooltip
-							content={
-								<div className="space-y-1">
-									<div>
-										{t("chat:tokenProgress.tokensUsed", {
-											used: formatLargeNumber(contextTokens || 0),
-											total: formatLargeNumber(contextWindow),
-										})}
-									</div>
-									{(() => {
-										const maxTokens = model
-											? getModelMaxOutputTokens({ modelId, model, settings: apiConfiguration })
-											: 0
-										const reservedForOutput = maxTokens || 0
-										const availableSpace = contextWindow - (contextTokens || 0) - reservedForOutput
+						<div className="flex items-center gap-2">
+							<Coins className="size-3 shrink-0" />
+							<StandardTooltip
+								content={
+									<div className="space-y-1">
+										<div>
+											{t("chat:tokenProgress.tokensUsed", {
+												used: formatLargeNumber(contextTokens || 0),
+												total: formatLargeNumber(contextWindow),
+											})}
+										</div>
+										{(() => {
+											const maxTokens = model
+												? getModelMaxOutputTokens({
+														modelId,
+														model,
+														settings: apiConfiguration,
+													})
+												: 0
+											const reservedForOutput = maxTokens || 0
+											const availableSpace =
+												contextWindow - (contextTokens || 0) - reservedForOutput
 
-										return (
-											<>
-												{reservedForOutput > 0 && (
-													<div>
-														{t("chat:tokenProgress.reservedForResponse", {
-															amount: formatLargeNumber(reservedForOutput),
-														})}
-													</div>
-												)}
-												{availableSpace > 0 && (
-													<div>
-														{t("chat:tokenProgress.availableSpace", {
-															amount: formatLargeNumber(availableSpace),
-														})}
-													</div>
-												)}
-											</>
-										)
-									})()}
-								</div>
-							}
-							side="top"
-							sideOffset={8}>
-							<span className="mr-1">
-								{formatLargeNumber(contextTokens || 0)} / {formatLargeNumber(contextWindow)}
-							</span>
-						</StandardTooltip>
-						{!!totalCost && <span>${totalCost.toFixed(2)}</span>}
+											return (
+												<>
+													{reservedForOutput > 0 && (
+														<div>
+															{t("chat:tokenProgress.reservedForResponse", {
+																amount: formatLargeNumber(reservedForOutput),
+															})}
+														</div>
+													)}
+													{availableSpace > 0 && (
+														<div>
+															{t("chat:tokenProgress.availableSpace", {
+																amount: formatLargeNumber(availableSpace),
+															})}
+														</div>
+													)}
+												</>
+											)
+										})()}
+									</div>
+								}
+								side="top"
+								sideOffset={8}>
+								<span className="mr-1">
+									{formatLargeNumber(contextTokens || 0)} / {formatLargeNumber(contextWindow)}
+								</span>
+							</StandardTooltip>
+							{!!totalCost && <span>${totalCost.toFixed(2)}</span>}
+						</div>
+						{showBrowserGlobe && (
+							<div className="flex items-center gap-1" onClick={(e) => e.stopPropagation()}>
+								<StandardTooltip content={t("chat:browser.session")}>
+									<Button
+										variant="ghost"
+										size="sm"
+										aria-label={t("chat:browser.session")}
+										onClick={() => vscode.postMessage({ type: "openBrowserSessionPanel" } as any)}
+										className={cn(
+											"relative h-5 w-5 p-0",
+											"text-vscode-foreground opacity-85",
+											"hover:opacity-100 hover:bg-[rgba(255,255,255,0.03)]",
+											"focus:outline-none focus-visible:ring-1 focus-visible:ring-vscode-focusBorder",
+										)}>
+										<Globe
+											className="w-4 h-4"
+											style={{
+												color: isBrowserSessionActive
+													? "#4ade80"
+													: "var(--vscode-descriptionForeground)",
+											}}
+										/>
+									</Button>
+								</StandardTooltip>
+								{isBrowserSessionActive && (
+									<span
+										className="text-sm font-medium"
+										style={{ color: "var(--vscode-testing-iconPassed)" }}>
+										Active
+									</span>
+								)}
+							</div>
+						)}
 					</div>
 				)}
 				{/* Expanded state: Show task text and images */}

+ 55 - 0
webview-ui/src/components/chat/__tests__/BrowserSessionRow.aspect-ratio.spec.tsx

@@ -0,0 +1,55 @@
+import { render, screen, fireEvent } from "@testing-library/react"
+import React from "react"
+import BrowserSessionRow from "../BrowserSessionRow"
+import { ExtensionStateContext } from "@src/context/ExtensionStateContext"
+import { TooltipProvider } from "@src/components/ui/tooltip"
+
+describe("BrowserSessionRow - screenshot area", () => {
+	const renderRow = (messages: any[]) => {
+		const mockExtState: any = {
+			// Ensure known viewport so expected aspect ratio is deterministic (600/900 = 66.67%)
+			browserViewportSize: "900x600",
+			isBrowserSessionActive: false,
+		}
+
+		return render(
+			<TooltipProvider>
+				<ExtensionStateContext.Provider value={mockExtState}>
+					<BrowserSessionRow
+						messages={messages as any}
+						isExpanded={() => true}
+						onToggleExpand={() => {}}
+						lastModifiedMessage={undefined as any}
+						isLast={true}
+						onHeightChange={() => {}}
+						isStreaming={false}
+					/>
+				</ExtensionStateContext.Provider>
+			</TooltipProvider>,
+		)
+	}
+
+	it("reserves height while screenshot is loading (no layout collapse)", () => {
+		// Only a launch action, no corresponding browser_action_result yet (no screenshot)
+		const messages = [
+			{
+				ts: 1,
+				say: "browser_action",
+				text: JSON.stringify({ action: "launch", url: "http://localhost:3000" }),
+			},
+		]
+
+		renderRow(messages)
+
+		// Open the browser session drawer
+		const globe = screen.getByLabelText("Browser interaction")
+		fireEvent.click(globe)
+
+		const container = screen.getByTestId("screenshot-container") as HTMLDivElement
+		// padding-bottom should reflect aspect ratio (600/900 * 100) even without an image
+		const pb = parseFloat(container.style.paddingBottom || "0")
+		expect(pb).toBeGreaterThan(0)
+		// Be tolerant of rounding
+		expect(Math.round(pb)).toBe(67)
+	})
+})

+ 42 - 0
webview-ui/src/components/chat/__tests__/BrowserSessionRow.disconnect-button.spec.tsx

@@ -0,0 +1,42 @@
+import React from "react"
+import { render, screen } from "@testing-library/react"
+import BrowserSessionRow from "../BrowserSessionRow"
+import { ExtensionStateContext } from "@src/context/ExtensionStateContext"
+import { TooltipProvider } from "@radix-ui/react-tooltip"
+
+describe("BrowserSessionRow - Disconnect session button", () => {
+	const renderRow = (isActive: boolean) => {
+		const mockExtState: any = {
+			browserViewportSize: "900x600",
+			isBrowserSessionActive: isActive,
+		}
+
+		return render(
+			<TooltipProvider>
+				<ExtensionStateContext.Provider value={mockExtState}>
+					<BrowserSessionRow
+						messages={[] as any}
+						isExpanded={() => false}
+						onToggleExpand={() => {}}
+						lastModifiedMessage={undefined as any}
+						isLast={true}
+						onHeightChange={() => {}}
+						isStreaming={false}
+					/>
+				</ExtensionStateContext.Provider>
+			</TooltipProvider>,
+		)
+	}
+
+	it("shows the Disconnect session button when a session is active", () => {
+		renderRow(true)
+		const btn = screen.getByLabelText("Disconnect session")
+		expect(btn).toBeInTheDocument()
+	})
+
+	it("does not render the button when no session is active", () => {
+		renderRow(false)
+		const btn = screen.queryByLabelText("Disconnect session")
+		expect(btn).toBeNull()
+	})
+})

+ 126 - 0
webview-ui/src/components/chat/__tests__/BrowserSessionRow.spec.tsx

@@ -0,0 +1,126 @@
+import React from "react"
+import { describe, it, expect, vi } from "vitest"
+import { render, screen } from "@testing-library/react"
+
+import BrowserSessionRow from "../BrowserSessionRow"
+
+// Mock ExtensionStateContext so BrowserSessionRow falls back to props
+vi.mock("@src/context/ExtensionStateContext", () => ({
+	useExtensionState: () => {
+		throw new Error("No ExtensionStateContext in test environment")
+	},
+}))
+
+// Simplify i18n usage and provide initReactI18next for i18n setup
+vi.mock("react-i18next", () => ({
+	useTranslation: () => ({
+		t: (key: string) => key,
+	}),
+	initReactI18next: {
+		type: "3rdParty",
+		init: () => {},
+	},
+}))
+
+// Replace ProgressIndicator with a simple test marker
+vi.mock("../ProgressIndicator", () => ({
+	ProgressIndicator: () => <div data-testid="browser-session-spinner" />,
+}))
+
+const baseProps = {
+	isExpanded: () => false,
+	onToggleExpand: () => {},
+	lastModifiedMessage: undefined,
+	isLast: true,
+	onHeightChange: () => {},
+	isStreaming: false,
+}
+
+describe("BrowserSessionRow - action spinner", () => {
+	it("does not show spinner when there are no browser actions", () => {
+		const messages = [
+			{
+				type: "say",
+				say: "task",
+				ts: 1,
+				text: "Task started",
+			} as any,
+		]
+
+		render(<BrowserSessionRow {...baseProps} messages={messages} />)
+
+		expect(screen.queryByTestId("browser-session-spinner")).toBeNull()
+	})
+
+	it("shows spinner while the latest browser action is still running", () => {
+		const messages = [
+			{
+				type: "say",
+				say: "task",
+				ts: 1,
+				text: "Task started",
+			} as any,
+			{
+				type: "say",
+				say: "browser_action",
+				ts: 2,
+				text: JSON.stringify({ action: "click" }),
+			} as any,
+			{
+				type: "say",
+				say: "browser_action_result",
+				ts: 3,
+				text: JSON.stringify({ currentUrl: "https://example.com" }),
+			} as any,
+			{
+				type: "say",
+				say: "browser_action",
+				ts: 4,
+				text: JSON.stringify({ action: "scroll_down" }),
+			} as any,
+		]
+
+		render(<BrowserSessionRow {...baseProps} messages={messages} />)
+
+		expect(screen.getByTestId("browser-session-spinner")).toBeInTheDocument()
+	})
+
+	it("hides spinner once the latest browser action has a result", () => {
+		const messages = [
+			{
+				type: "say",
+				say: "task",
+				ts: 1,
+				text: "Task started",
+			} as any,
+			{
+				type: "say",
+				say: "browser_action",
+				ts: 2,
+				text: JSON.stringify({ action: "click" }),
+			} as any,
+			{
+				type: "say",
+				say: "browser_action_result",
+				ts: 3,
+				text: JSON.stringify({ currentUrl: "https://example.com" }),
+			} as any,
+			{
+				type: "say",
+				say: "browser_action",
+				ts: 4,
+				text: JSON.stringify({ action: "scroll_down" }),
+			} as any,
+			{
+				type: "say",
+				say: "browser_action_result",
+				ts: 5,
+				text: JSON.stringify({ currentUrl: "https://example.com/page2" }),
+			} as any,
+		]
+
+		render(<BrowserSessionRow {...baseProps} messages={messages} />)
+
+		expect(screen.queryByTestId("browser-session-spinner")).toBeNull()
+	})
+})

+ 1 - 0
webview-ui/src/context/ExtensionStateContext.tsx

@@ -200,6 +200,7 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode
 		deniedCommands: [],
 		soundEnabled: false,
 		soundVolume: 0.5,
+		isBrowserSessionActive: false,
 		ttsEnabled: false,
 		ttsSpeed: 1.0,
 		diffEnabled: false,

+ 1 - 0
webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx

@@ -214,6 +214,7 @@ describe("mergeExtensionState", () => {
 			remoteControlEnabled: false,
 			taskSyncEnabled: false,
 			featureRoomoteControlEnabled: false,
+			isBrowserSessionActive: false,
 			checkpointTimeout: DEFAULT_CHECKPOINT_TIMEOUT_SECONDS, // Add the checkpoint timeout property
 		}
 

+ 4 - 1
webview-ui/src/i18n/locales/ca/chat.json

@@ -306,6 +306,7 @@
 		"socialLinks": "Uneix-te a nosaltres a <xLink>X</xLink>, <discordLink>Discord</discordLink>, o <redditLink>r/RooCode</redditLink> 🚀"
 	},
 	"browser": {
+		"session": "Sessió del navegador",
 		"rooWantsToUse": "Roo vol utilitzar el navegador",
 		"consoleLogs": "Registres de consola",
 		"noNewLogs": "(Cap registre nou)",
@@ -318,12 +319,14 @@
 		},
 		"sessionStarted": "Sessió de navegador iniciada",
 		"actions": {
-			"title": "Acció de navegació: ",
+			"title": "Acció del navegador: ",
 			"launch": "Iniciar navegador a {{url}}",
 			"click": "Clic ({{coordinate}})",
 			"type": "Escriure \"{{text}}\"",
+			"press": "Prem {{key}}",
 			"scrollDown": "Desplaçar avall",
 			"scrollUp": "Desplaçar amunt",
+			"hover": "Plana sobre ({{coordinate}})",
 			"close": "Tancar navegador"
 		}
 	},

+ 3 - 0
webview-ui/src/i18n/locales/de/chat.json

@@ -306,6 +306,7 @@
 		"socialLinks": "Folge uns auf <xLink>X</xLink>, <discordLink>Discord</discordLink> oder <redditLink>r/RooCode</redditLink> 🚀"
 	},
 	"browser": {
+		"session": "Browser-Sitzung",
 		"rooWantsToUse": "Roo möchte den Browser verwenden",
 		"consoleLogs": "Konsolenprotokolle",
 		"noNewLogs": "(Keine neuen Protokolle)",
@@ -322,8 +323,10 @@
 			"launch": "Browser starten auf {{url}}",
 			"click": "Klicken ({{coordinate}})",
 			"type": "Eingeben \"{{text}}\"",
+			"press": "{{key}} drücken",
 			"scrollDown": "Nach unten scrollen",
 			"scrollUp": "Nach oben scrollen",
+			"hover": "Hover ({{coordinate}})",
 			"close": "Browser schließen"
 		}
 	},

+ 4 - 1
webview-ui/src/i18n/locales/en/chat.json

@@ -321,6 +321,7 @@
 		"countdownDisplay": "{{count}}s"
 	},
 	"browser": {
+		"session": "Browser Session",
 		"rooWantsToUse": "Roo wants to use the browser",
 		"consoleLogs": "Console Logs",
 		"noNewLogs": "(No new logs)",
@@ -333,12 +334,14 @@
 		},
 		"sessionStarted": "Browser Session Started",
 		"actions": {
-			"title": "Browse Action: ",
+			"title": "Browser Action: ",
 			"launch": "Launch browser at {{url}}",
 			"click": "Click ({{coordinate}})",
 			"type": "Type \"{{text}}\"",
+			"press": "Press {{key}}",
 			"scrollDown": "Scroll down",
 			"scrollUp": "Scroll up",
+			"hover": "Hover ({{coordinate}})",
 			"close": "Close browser"
 		}
 	},

+ 4 - 1
webview-ui/src/i18n/locales/es/chat.json

@@ -306,6 +306,7 @@
 		"socialLinks": "Únete a nosotros en <xLink>X</xLink>, <discordLink>Discord</discordLink>, o <redditLink>r/RooCode</redditLink> 🚀"
 	},
 	"browser": {
+		"session": "Sesión del navegador",
 		"rooWantsToUse": "Roo quiere usar el navegador",
 		"consoleLogs": "Registros de la consola",
 		"noNewLogs": "(No hay nuevos registros)",
@@ -318,12 +319,14 @@
 		},
 		"sessionStarted": "Sesión de navegador iniciada",
 		"actions": {
-			"title": "Acción de navegación: ",
+			"title": "Acción del navegador: ",
 			"launch": "Iniciar navegador en {{url}}",
 			"click": "Clic ({{coordinate}})",
 			"type": "Escribir \"{{text}}\"",
+			"press": "Pulsar {{key}}",
 			"scrollDown": "Desplazar hacia abajo",
 			"scrollUp": "Desplazar hacia arriba",
+			"hover": "Flotar ({{coordinate}})",
 			"close": "Cerrar navegador"
 		}
 	},

+ 4 - 1
webview-ui/src/i18n/locales/fr/chat.json

@@ -306,6 +306,7 @@
 		"socialLinks": "Rejoins-nous sur <xLink>X</xLink>, <discordLink>Discord</discordLink>, ou <redditLink>r/RooCode</redditLink> 🚀"
 	},
 	"browser": {
+		"session": "Session du navigateur",
 		"rooWantsToUse": "Roo veut utiliser le navigateur",
 		"consoleLogs": "Journaux de console",
 		"noNewLogs": "(Pas de nouveaux journaux)",
@@ -318,12 +319,14 @@
 		},
 		"sessionStarted": "Session de navigateur démarrée",
 		"actions": {
-			"title": "Action de navigation : ",
+			"title": "Action du navigateur : ",
 			"launch": "Lancer le navigateur sur {{url}}",
 			"click": "Cliquer ({{coordinate}})",
 			"type": "Saisir \"{{text}}\"",
+			"press": "Appuyer sur {{key}}",
 			"scrollDown": "Défiler vers le bas",
 			"scrollUp": "Défiler vers le haut",
+			"hover": "Survoler ({{coordinate}})",
 			"close": "Fermer le navigateur"
 		}
 	},

+ 3 - 0
webview-ui/src/i18n/locales/hi/chat.json

@@ -306,6 +306,7 @@
 		"socialLinks": "<xLink>X</xLink>, <discordLink>Discord</discordLink>, या <redditLink>r/RooCode</redditLink> पर हमसे जुड़ें 🚀"
 	},
 	"browser": {
+		"session": "ब्राउज़र सत्र",
 		"rooWantsToUse": "Roo ब्राउज़र का उपयोग करना चाहता है",
 		"consoleLogs": "कंसोल लॉग",
 		"noNewLogs": "(कोई नया लॉग नहीं)",
@@ -322,8 +323,10 @@
 			"launch": "{{url}} पर ब्राउज़र लॉन्च करें",
 			"click": "क्लिक करें ({{coordinate}})",
 			"type": "टाइप करें \"{{text}}\"",
+			"press": "{{key}} दबाएँ",
 			"scrollDown": "नीचे स्क्रॉल करें",
 			"scrollUp": "ऊपर स्क्रॉल करें",
+			"hover": "होवर करें ({{coordinate}})",
 			"close": "ब्राउज़र बंद करें"
 		}
 	},

+ 4 - 1
webview-ui/src/i18n/locales/id/chat.json

@@ -327,6 +327,7 @@
 		"countdownDisplay": "{{count}}dtk"
 	},
 	"browser": {
+		"session": "Sesi Browser",
 		"rooWantsToUse": "Roo ingin menggunakan browser",
 		"consoleLogs": "Log Konsol",
 		"noNewLogs": "(Tidak ada log baru)",
@@ -339,12 +340,14 @@
 		},
 		"sessionStarted": "Sesi Browser Dimulai",
 		"actions": {
-			"title": "Aksi Browse: ",
+			"title": "Aksi Browser: ",
 			"launch": "Luncurkan browser di {{url}}",
 			"click": "Klik ({{coordinate}})",
 			"type": "Ketik \"{{text}}\"",
+			"press": "Tekan {{key}}",
 			"scrollDown": "Gulir ke bawah",
 			"scrollUp": "Gulir ke atas",
+			"hover": "Arahkan ({{coordinate}})",
 			"close": "Tutup browser"
 		}
 	},

+ 3 - 0
webview-ui/src/i18n/locales/it/chat.json

@@ -306,6 +306,7 @@
 		"socialLinks": "Unisciti a noi su <xLink>X</xLink>, <discordLink>Discord</discordLink>, o <redditLink>r/RooCode</redditLink> 🚀"
 	},
 	"browser": {
+		"session": "Sessione del browser",
 		"rooWantsToUse": "Roo vuole utilizzare il browser",
 		"consoleLogs": "Log della console",
 		"noNewLogs": "(Nessun nuovo log)",
@@ -322,8 +323,10 @@
 			"launch": "Avvia browser su {{url}}",
 			"click": "Clic ({{coordinate}})",
 			"type": "Digita \"{{text}}\"",
+			"press": "Premi {{key}}",
 			"scrollDown": "Scorri verso il basso",
 			"scrollUp": "Scorri verso l'alto",
+			"hover": "Passa il mouse ({{coordinate}})",
 			"close": "Chiudi browser"
 		}
 	},

+ 4 - 1
webview-ui/src/i18n/locales/ja/chat.json

@@ -306,6 +306,7 @@
 		"socialLinks": "<xLink>X</xLink>、<discordLink>Discord</discordLink>、または<redditLink>r/RooCode</redditLink>でフォローしてください 🚀"
 	},
 	"browser": {
+		"session": "ブラウザセッション",
 		"rooWantsToUse": "Rooはブラウザを使用したい",
 		"consoleLogs": "コンソールログ",
 		"noNewLogs": "(新しいログはありません)",
@@ -318,12 +319,14 @@
 		},
 		"sessionStarted": "ブラウザセッション開始",
 		"actions": {
-			"title": "ブラウザアクション: ",
+			"title": "ブラウザ操作: ",
 			"launch": "{{url}} でブラウザを起動",
 			"click": "クリック ({{coordinate}})",
 			"type": "入力 \"{{text}}\"",
+			"press": "{{key}}を押す",
 			"scrollDown": "下にスクロール",
 			"scrollUp": "上にスクロール",
+			"hover": "ホバー ({{coordinate}})",
 			"close": "ブラウザを閉じる"
 		}
 	},

+ 3 - 0
webview-ui/src/i18n/locales/ko/chat.json

@@ -306,6 +306,7 @@
 		"socialLinks": "<xLink>X</xLink>, <discordLink>Discord</discordLink>, 또는 <redditLink>r/RooCode</redditLink>에서 만나요 🚀"
 	},
 	"browser": {
+		"session": "브라우저 세션",
 		"rooWantsToUse": "Roo가 브라우저를 사용하고 싶어합니다",
 		"consoleLogs": "콘솔 로그",
 		"noNewLogs": "(새 로그 없음)",
@@ -322,8 +323,10 @@
 			"launch": "{{url}}에서 브라우저 실행",
 			"click": "클릭 ({{coordinate}})",
 			"type": "입력 \"{{text}}\"",
+			"press": "{{key}} 누르기",
 			"scrollDown": "아래로 스크롤",
 			"scrollUp": "위로 스크롤",
+			"hover": "가리키기 ({{coordinate}})",
 			"close": "브라우저 닫기"
 		}
 	},

+ 4 - 1
webview-ui/src/i18n/locales/nl/chat.json

@@ -306,6 +306,7 @@
 		"countdownDisplay": "{{count}}s"
 	},
 	"browser": {
+		"session": "Browsersessie",
 		"rooWantsToUse": "Roo wil de browser gebruiken",
 		"consoleLogs": "Console-logboeken",
 		"noNewLogs": "(Geen nieuwe logboeken)",
@@ -318,12 +319,14 @@
 		},
 		"sessionStarted": "Browsersessie gestart",
 		"actions": {
-			"title": "Browse-actie: ",
+			"title": "Browseractie: ",
 			"launch": "Browser starten op {{url}}",
 			"click": "Klik ({{coordinate}})",
 			"type": "Typ \"{{text}}\"",
+			"press": "Druk op {{key}}",
 			"scrollDown": "Scroll naar beneden",
 			"scrollUp": "Scroll naar boven",
+			"hover": "Zweven ({{coordinate}})",
 			"close": "Browser sluiten"
 		}
 	},

+ 3 - 0
webview-ui/src/i18n/locales/pl/chat.json

@@ -306,6 +306,7 @@
 		"socialLinks": "Dołącz do nas na <xLink>X</xLink>, <discordLink>Discord</discordLink>, lub <redditLink>r/RooCode</redditLink> 🚀"
 	},
 	"browser": {
+		"session": "Sesja przeglądarki",
 		"rooWantsToUse": "Roo chce użyć przeglądarki",
 		"consoleLogs": "Logi konsoli",
 		"noNewLogs": "(Brak nowych logów)",
@@ -322,8 +323,10 @@
 			"launch": "Uruchom przeglądarkę na {{url}}",
 			"click": "Kliknij ({{coordinate}})",
 			"type": "Wpisz \"{{text}}\"",
+			"press": "Naciśnij {{key}}",
 			"scrollDown": "Przewiń w dół",
 			"scrollUp": "Przewiń w górę",
+			"hover": "Najedź ({{coordinate}})",
 			"close": "Zamknij przeglądarkę"
 		}
 	},

+ 3 - 0
webview-ui/src/i18n/locales/pt-BR/chat.json

@@ -306,6 +306,7 @@
 		"socialLinks": "Junte-se a nós no <xLink>X</xLink>, <discordLink>Discord</discordLink>, ou <redditLink>r/RooCode</redditLink> 🚀"
 	},
 	"browser": {
+		"session": "Sessão do Navegador",
 		"rooWantsToUse": "Roo quer usar o navegador",
 		"consoleLogs": "Logs do console",
 		"noNewLogs": "(Sem novos logs)",
@@ -322,8 +323,10 @@
 			"launch": "Iniciar navegador em {{url}}",
 			"click": "Clique ({{coordinate}})",
 			"type": "Digitar \"{{text}}\"",
+			"press": "Pressione {{key}}",
 			"scrollDown": "Rolar para baixo",
 			"scrollUp": "Rolar para cima",
+			"hover": "Pairar ({{coordinate}})",
 			"close": "Fechar navegador"
 		}
 	},

+ 4 - 1
webview-ui/src/i18n/locales/ru/chat.json

@@ -307,6 +307,7 @@
 		"countdownDisplay": "{{count}}с"
 	},
 	"browser": {
+		"session": "Сеанс браузера",
 		"rooWantsToUse": "Roo хочет использовать браузер",
 		"consoleLogs": "Логи консоли",
 		"noNewLogs": "(Новых логов нет)",
@@ -319,12 +320,14 @@
 		},
 		"sessionStarted": "Сессия браузера запущена",
 		"actions": {
-			"title": "Действие в браузере: ",
+			"title": "Действие браузера: ",
 			"launch": "Открыть браузер по адресу {{url}}",
 			"click": "Клик ({{coordinate}})",
 			"type": "Ввести \"{{text}}\"",
+			"press": "Нажать {{key}}",
 			"scrollDown": "Прокрутить вниз",
 			"scrollUp": "Прокрутить вверх",
+			"hover": "Навести ({{coordinate}})",
 			"close": "Закрыть браузер"
 		}
 	},

+ 4 - 1
webview-ui/src/i18n/locales/tr/chat.json

@@ -307,6 +307,7 @@
 		"socialLinks": "Bize <xLink>X</xLink>, <discordLink>Discord</discordLink>, veya <redditLink>r/RooCode</redditLink>'da katılın 🚀"
 	},
 	"browser": {
+		"session": "Tarayıcı Oturumu",
 		"rooWantsToUse": "Roo tarayıcıyı kullanmak istiyor",
 		"consoleLogs": "Konsol Kayıtları",
 		"noNewLogs": "(Yeni kayıt yok)",
@@ -319,12 +320,14 @@
 		},
 		"sessionStarted": "Tarayıcı Oturumu Başlatıldı",
 		"actions": {
-			"title": "Tarayıcı İşlemi: ",
+			"title": "Tarayıcı Eylemi: ",
 			"launch": "{{url}} adresinde tarayıcı başlat",
 			"click": "Tıkla ({{coordinate}})",
 			"type": "Yaz \"{{text}}\"",
+			"press": "{{key}} tuşuna bas",
 			"scrollDown": "Aşağı kaydır",
 			"scrollUp": "Yukarı kaydır",
+			"hover": "Üzerine gel ({{coordinate}})",
 			"close": "Tarayıcıyı kapat"
 		}
 	},

+ 3 - 0
webview-ui/src/i18n/locales/vi/chat.json

@@ -307,6 +307,7 @@
 		"socialLinks": "Tham gia với chúng tôi trên <xLink>X</xLink>, <discordLink>Discord</discordLink>, hoặc <redditLink>r/RooCode</redditLink> 🚀"
 	},
 	"browser": {
+		"session": "Phiên trình duyệt",
 		"rooWantsToUse": "Roo muốn sử dụng trình duyệt",
 		"consoleLogs": "Nhật ký bảng điều khiển",
 		"noNewLogs": "(Không có nhật ký mới)",
@@ -323,8 +324,10 @@
 			"launch": "Khởi chạy trình duyệt tại {{url}}",
 			"click": "Nhấp ({{coordinate}})",
 			"type": "Gõ \"{{text}}\"",
+			"press": "Nhấn {{key}}",
 			"scrollDown": "Cuộn xuống",
 			"scrollUp": "Cuộn lên",
+			"hover": "Di chuột ({{coordinate}})",
 			"close": "Đóng trình duyệt"
 		}
 	},

+ 3 - 0
webview-ui/src/i18n/locales/zh-CN/chat.json

@@ -307,6 +307,7 @@
 		"socialLinks": "在 <xLink>X</xLink>、<discordLink>Discord</discordLink> 或 <redditLink>r/RooCode</redditLink> 上关注我们 🚀"
 	},
 	"browser": {
+		"session": "浏览器会话",
 		"rooWantsToUse": "Roo想使用浏览器",
 		"consoleLogs": "控制台日志",
 		"noNewLogs": "(没有新日志)",
@@ -323,8 +324,10 @@
 			"launch": "访问 {{url}}",
 			"click": "点击 ({{coordinate}})",
 			"type": "输入 \"{{text}}\"",
+			"press": "按 {{key}}",
 			"scrollDown": "向下滚动",
 			"scrollUp": "向上滚动",
+			"hover": "悬停 ({{coordinate}})",
 			"close": "关闭浏览器"
 		}
 	},

+ 4 - 1
webview-ui/src/i18n/locales/zh-TW/chat.json

@@ -325,6 +325,7 @@
 		"countdownDisplay": "{{count}} 秒"
 	},
 	"browser": {
+		"session": "瀏覽器會話",
 		"rooWantsToUse": "Roo 想要使用瀏覽器",
 		"consoleLogs": "主控台記錄",
 		"noNewLogs": "(沒有新記錄)",
@@ -337,12 +338,14 @@
 		},
 		"sessionStarted": "瀏覽器工作階段已啟動",
 		"actions": {
-			"title": "瀏覽器動作",
+			"title": "瀏覽器動作",
 			"launch": "在 {{url}} 啟動瀏覽器",
 			"click": "點選 ({{coordinate}})",
 			"type": "輸入「{{text}}」",
+			"press": "按下 {{key}}",
 			"scrollDown": "向下捲動",
 			"scrollUp": "向上捲動",
+			"hover": "懸停 ({{coordinate}})",
 			"close": "關閉瀏覽器"
 		}
 	},

+ 4 - 0
webview-ui/vite.config.ts

@@ -101,6 +101,10 @@ export default defineConfig(({ mode }) => {
 			// Ensure source maps are properly included in the build
 			minify: mode === "production" ? "esbuild" : false,
 			rollupOptions: {
+				input: {
+					index: resolve(__dirname, "index.html"),
+					"browser-panel": resolve(__dirname, "browser-panel.html"),
+				},
 				output: {
 					entryFileNames: `assets/[name].js`,
 					chunkFileNames: (chunkInfo) => {

Niektóre pliki nie zostały wyświetlone z powodu dużej ilości zmienionych plików