Преглед на файлове

In-Editor Browser Improvements (#1601)

* Browser Automation Improvements
* Added multi-tab remote Chrome support
* Added support for hover
* Properly caching remote browser host in global state
* Cleanup functions

* Updated for changes after merge

* Added www. exception for common tabs

* Update src/core/webview/ClineProvider.ts

* Revert README changes

---------

Co-authored-by: Matt Rubens <[email protected]>
Afshawn Lotfi преди 9 месеца
родител
ревизия
cbe7075f4f

+ 1 - 0
.gitignore

@@ -1,3 +1,4 @@
+.pnpm-store
 dist
 dist
 out
 out
 out-*
 out-*

+ 2 - 1
scripts/generate-types.mts

@@ -3,7 +3,8 @@ import fs from "fs/promises"
 import { zodToTs, createTypeAlias, printNode } from "zod-to-ts"
 import { zodToTs, createTypeAlias, printNode } from "zod-to-ts"
 import { $ } from "execa"
 import { $ } from "execa"
 
 
-import { typeDefinitions } from "../src/schemas"
+import schemas from "../src/schemas"
+const { typeDefinitions } = schemas
 
 
 async function main() {
 async function main() {
 	const types: string[] = [
 	const types: string[] = [

+ 4 - 3
src/core/Cline.ts

@@ -2466,7 +2466,8 @@ export class Cline extends EventEmitter<ClineEvents> {
 								}
 								}
 								break
 								break
 							} else {
 							} else {
-								let browserActionResult: BrowserActionResult
+								// Initialize with empty object to avoid "used before assigned" errors
+								let browserActionResult: BrowserActionResult = {}
 								if (action === "launch") {
 								if (action === "launch") {
 									if (!url) {
 									if (!url) {
 										this.consecutiveMistakeCount++
 										this.consecutiveMistakeCount++
@@ -2552,9 +2553,9 @@ export class Cline extends EventEmitter<ClineEvents> {
 										pushToolResult(
 										pushToolResult(
 											formatResponse.toolResult(
 											formatResponse.toolResult(
 												`The browser action has been executed. The console logs and screenshot have been captured for your analysis.\n\nConsole logs:\n${
 												`The browser action has been executed. The console logs and screenshot have been captured for your analysis.\n\nConsole logs:\n${
-													browserActionResult.logs || "(No new logs)"
+													browserActionResult?.logs || "(No new logs)"
 												}\n\n(REMEMBER: if you need to proceed to using non-\`browser_action\` tools or launch a new browser, you MUST first close this browser. For example, if after analyzing the logs and screenshot you need to edit a file, you must first close the browser before you can use the write_to_file tool.)`,
 												}\n\n(REMEMBER: if you need to proceed to using non-\`browser_action\` tools or launch a new browser, you MUST first close this browser. For example, if after analyzing the logs and screenshot you need to edit a file, you must first close the browser before you can use the write_to_file tool.)`,
-												browserActionResult.screenshot ? [browserActionResult.screenshot] : [],
+												browserActionResult?.screenshot ? [browserActionResult.screenshot] : [],
 											),
 											),
 										)
 										)
 										break
 										break

+ 1 - 4
src/core/mentions/index.ts

@@ -22,10 +22,7 @@ export async function openMention(mention?: string, osInfo?: string): Promise<vo
 		return
 		return
 	}
 	}
 
 
-	if (
-		(osInfo !== "win32" && mention.startsWith("/")) ||
-		(osInfo === "win32" && mention.startsWith("\\"))
-	) {
+	if ((osInfo !== "win32" && mention.startsWith("/")) || (osInfo === "win32" && mention.startsWith("\\"))) {
 		const relPath = mention.slice(1)
 		const relPath = mention.slice(1)
 		let absPath = path.resolve(cwd, relPath)
 		let absPath = path.resolve(cwd, relPath)
 		if (absPath.includes(" ")) {
 		if (absPath.includes(" ")) {

+ 21 - 69
src/core/webview/ClineProvider.ts

@@ -50,7 +50,7 @@ import { McpHub } from "../../services/mcp/McpHub"
 import { McpServerManager } from "../../services/mcp/McpServerManager"
 import { McpServerManager } from "../../services/mcp/McpServerManager"
 import { ShadowCheckpointService } from "../../services/checkpoints/ShadowCheckpointService"
 import { ShadowCheckpointService } from "../../services/checkpoints/ShadowCheckpointService"
 import { BrowserSession } from "../../services/browser/BrowserSession"
 import { BrowserSession } from "../../services/browser/BrowserSession"
-import { discoverChromeInstances } from "../../services/browser/browserDiscovery"
+import { discoverChromeHostUrl, tryChromeHostUrl } from "../../services/browser/browserDiscovery"
 import { searchWorkspaceFiles } from "../../services/search/file-search"
 import { searchWorkspaceFiles } from "../../services/search/file-search"
 import { fileExistsAtPath } from "../../utils/fs"
 import { fileExistsAtPath } from "../../utils/fs"
 import { playSound, setSoundEnabled, setSoundVolume } from "../../utils/sound"
 import { playSound, setSoundEnabled, setSoundVolume } from "../../utils/sound"
@@ -1420,74 +1420,17 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
 						await this.postStateToWebview()
 						await this.postStateToWebview()
 						break
 						break
 					case "testBrowserConnection":
 					case "testBrowserConnection":
-						try {
-							const browserSession = new BrowserSession(this.context)
-							// If no text is provided, try auto-discovery
-							if (!message.text) {
-								try {
-									const discoveredHost = await discoverChromeInstances()
-									if (discoveredHost) {
-										// Test the connection to the discovered host
-										const result = await browserSession.testConnection(discoveredHost)
-										// Send the result back to the webview
-										await this.postMessageToWebview({
-											type: "browserConnectionResult",
-											success: result.success,
-											text: `Auto-discovered and tested connection to Chrome at ${discoveredHost}: ${result.message}`,
-											values: { endpoint: result.endpoint },
-										})
-									} else {
-										await this.postMessageToWebview({
-											type: "browserConnectionResult",
-											success: false,
-											text: "No Chrome instances found on the network. Make sure Chrome is running with remote debugging enabled (--remote-debugging-port=9222).",
-										})
-									}
-								} catch (error) {
-									await this.postMessageToWebview({
-										type: "browserConnectionResult",
-										success: false,
-										text: `Error during auto-discovery: ${error instanceof Error ? error.message : String(error)}`,
-									})
-								}
-							} else {
-								// Test the provided URL
-								const result = await browserSession.testConnection(message.text)
-
-								// Send the result back to the webview
-								await this.postMessageToWebview({
-									type: "browserConnectionResult",
-									success: result.success,
-									text: result.message,
-									values: { endpoint: result.endpoint },
-								})
-							}
-						} catch (error) {
-							await this.postMessageToWebview({
-								type: "browserConnectionResult",
-								success: false,
-								text: `Error testing connection: ${error instanceof Error ? error.message : String(error)}`,
-							})
-						}
-						break
-					case "discoverBrowser":
-						try {
-							const discoveredHost = await discoverChromeInstances()
-
-							if (discoveredHost) {
-								// Don't update the remoteBrowserHost state when auto-discovering
-								// This way we don't override the user's preference
-
-								// Test the connection to get the endpoint
-								const browserSession = new BrowserSession(this.context)
-								const result = await browserSession.testConnection(discoveredHost)
-
+						// If no text is provided, try auto-discovery
+						if (!message.text) {
+							// Use testBrowserConnection for auto-discovery
+							const chromeHostUrl = await discoverChromeHostUrl()
+							if (chromeHostUrl) {
 								// Send the result back to the webview
 								// Send the result back to the webview
 								await this.postMessageToWebview({
 								await this.postMessageToWebview({
 									type: "browserConnectionResult",
 									type: "browserConnectionResult",
-									success: true,
-									text: `Successfully discovered and connected to Chrome at ${discoveredHost}`,
-									values: { endpoint: result.endpoint },
+									success: !!chromeHostUrl,
+									text: `Auto-discovered and tested connection to Chrome: ${chromeHostUrl}`,
+									values: { endpoint: chromeHostUrl },
 								})
 								})
 							} else {
 							} else {
 								await this.postMessageToWebview({
 								await this.postMessageToWebview({
@@ -1496,11 +1439,17 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
 									text: "No Chrome instances found on the network. Make sure Chrome is running with remote debugging enabled (--remote-debugging-port=9222).",
 									text: "No Chrome instances found on the network. Make sure Chrome is running with remote debugging enabled (--remote-debugging-port=9222).",
 								})
 								})
 							}
 							}
-						} catch (error) {
+						} else {
+							// Test the provided URL
+							const customHostUrl = message.text
+							const hostIsValid = await tryChromeHostUrl(message.text)
+							// Send the result back to the webview
 							await this.postMessageToWebview({
 							await this.postMessageToWebview({
 								type: "browserConnectionResult",
 								type: "browserConnectionResult",
-								success: false,
-								text: `Error discovering browser: ${error instanceof Error ? error.message : String(error)}`,
+								success: hostIsValid,
+								text: hostIsValid
+									? `Successfully connected to Chrome: ${customHostUrl}`
+									: "Failed to connect to Chrome",
 							})
 							})
 						}
 						}
 						break
 						break
@@ -2602,6 +2551,7 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
 			screenshotQuality,
 			screenshotQuality,
 			remoteBrowserHost,
 			remoteBrowserHost,
 			remoteBrowserEnabled,
 			remoteBrowserEnabled,
+			cachedChromeHostUrl,
 			writeDelayMs,
 			writeDelayMs,
 			terminalOutputLineLimit,
 			terminalOutputLineLimit,
 			terminalShellIntegrationTimeout,
 			terminalShellIntegrationTimeout,
@@ -2670,6 +2620,7 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
 			screenshotQuality: screenshotQuality ?? 75,
 			screenshotQuality: screenshotQuality ?? 75,
 			remoteBrowserHost,
 			remoteBrowserHost,
 			remoteBrowserEnabled: remoteBrowserEnabled ?? false,
 			remoteBrowserEnabled: remoteBrowserEnabled ?? false,
+			cachedChromeHostUrl: cachedChromeHostUrl,
 			writeDelayMs: writeDelayMs ?? 1000,
 			writeDelayMs: writeDelayMs ?? 1000,
 			terminalOutputLineLimit: terminalOutputLineLimit ?? 500,
 			terminalOutputLineLimit: terminalOutputLineLimit ?? 500,
 			terminalShellIntegrationTimeout: terminalShellIntegrationTimeout ?? TERMINAL_SHELL_INTEGRATION_TIMEOUT,
 			terminalShellIntegrationTimeout: terminalShellIntegrationTimeout ?? TERMINAL_SHELL_INTEGRATION_TIMEOUT,
@@ -2755,6 +2706,7 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
 			screenshotQuality: stateValues.screenshotQuality ?? 75,
 			screenshotQuality: stateValues.screenshotQuality ?? 75,
 			remoteBrowserHost: stateValues.remoteBrowserHost,
 			remoteBrowserHost: stateValues.remoteBrowserHost,
 			remoteBrowserEnabled: stateValues.remoteBrowserEnabled ?? false,
 			remoteBrowserEnabled: stateValues.remoteBrowserEnabled ?? false,
+			cachedChromeHostUrl: stateValues.cachedChromeHostUrl as string | undefined,
 			fuzzyMatchThreshold: stateValues.fuzzyMatchThreshold ?? 1.0,
 			fuzzyMatchThreshold: stateValues.fuzzyMatchThreshold ?? 1.0,
 			writeDelayMs: stateValues.writeDelayMs ?? 1000,
 			writeDelayMs: stateValues.writeDelayMs ?? 1000,
 			terminalOutputLineLimit: stateValues.terminalOutputLineLimit ?? 500,
 			terminalOutputLineLimit: stateValues.terminalOutputLineLimit ?? 500,

+ 7 - 75
src/core/webview/__tests__/ClineProvider.test.ts

@@ -40,9 +40,12 @@ jest.mock("../../../services/browser/BrowserSession", () => ({
 
 
 // Mock browserDiscovery
 // Mock browserDiscovery
 jest.mock("../../../services/browser/browserDiscovery", () => ({
 jest.mock("../../../services/browser/browserDiscovery", () => ({
-	discoverChromeInstances: jest.fn().mockImplementation(async () => {
+	discoverChromeHostUrl: jest.fn().mockImplementation(async () => {
 		return "http://localhost:9222"
 		return "http://localhost:9222"
 	}),
 	}),
+	tryChromeHostUrl: jest.fn().mockImplementation(async (url) => {
+		return url === "http://localhost:9222"
+	}),
 }))
 }))
 
 
 jest.mock(
 jest.mock(
@@ -1916,9 +1919,9 @@ describe("ClineProvider", () => {
 				type: "testBrowserConnection",
 				type: "testBrowserConnection",
 			})
 			})
 
 
-			// Verify discoverChromeInstances was called
-			const { discoverChromeInstances } = require("../../../services/browser/browserDiscovery")
-			expect(discoverChromeInstances).toHaveBeenCalled()
+			// Verify discoverChromeHostUrl was called
+			const { discoverChromeHostUrl } = require("../../../services/browser/browserDiscovery")
+			expect(discoverChromeHostUrl).toHaveBeenCalled()
 
 
 			// Verify postMessage was called with success result
 			// Verify postMessage was called with success result
 			expect(mockPostMessage).toHaveBeenCalledWith(
 			expect(mockPostMessage).toHaveBeenCalledWith(
@@ -1929,77 +1932,6 @@ describe("ClineProvider", () => {
 				}),
 				}),
 			)
 			)
 		})
 		})
-
-		test("handles discoverBrowser message", async () => {
-			// Get the message handler
-			const messageHandler = (mockWebviewView.webview.onDidReceiveMessage as jest.Mock).mock.calls[0][0]
-
-			// Test browser discovery
-			await messageHandler({
-				type: "discoverBrowser",
-			})
-
-			// Verify discoverChromeInstances was called
-			const { discoverChromeInstances } = require("../../../services/browser/browserDiscovery")
-			expect(discoverChromeInstances).toHaveBeenCalled()
-
-			// Verify postMessage was called with success result
-			expect(mockPostMessage).toHaveBeenCalledWith(
-				expect.objectContaining({
-					type: "browserConnectionResult",
-					success: true,
-					text: expect.stringContaining("Successfully discovered and connected to Chrome"),
-				}),
-			)
-		})
-
-		test("handles errors during browser discovery", async () => {
-			// Mock discoverChromeInstances to throw an error
-			const { discoverChromeInstances } = require("../../../services/browser/browserDiscovery")
-			discoverChromeInstances.mockImplementationOnce(() => {
-				throw new Error("Discovery error")
-			})
-
-			// Get the message handler
-			const messageHandler = (mockWebviewView.webview.onDidReceiveMessage as jest.Mock).mock.calls[0][0]
-
-			// Test browser discovery with error
-			await messageHandler({
-				type: "discoverBrowser",
-			})
-
-			// Verify postMessage was called with error result
-			expect(mockPostMessage).toHaveBeenCalledWith(
-				expect.objectContaining({
-					type: "browserConnectionResult",
-					success: false,
-					text: expect.stringContaining("Error discovering browser"),
-				}),
-			)
-		})
-
-		test("handles case when no browsers are discovered", async () => {
-			// Mock discoverChromeInstances to return null (no browsers found)
-			const { discoverChromeInstances } = require("../../../services/browser/browserDiscovery")
-			discoverChromeInstances.mockImplementationOnce(() => null)
-
-			// Get the message handler
-			const messageHandler = (mockWebviewView.webview.onDidReceiveMessage as jest.Mock).mock.calls[0][0]
-
-			// Test browser discovery with no browsers found
-			await messageHandler({
-				type: "discoverBrowser",
-			})
-
-			// Verify postMessage was called with failure result
-			expect(mockPostMessage).toHaveBeenCalledWith(
-				expect.objectContaining({
-					type: "browserConnectionResult",
-					success: false,
-					text: expect.stringContaining("No Chrome instances found"),
-				}),
-			)
-		})
 	})
 	})
 })
 })
 
 

+ 1 - 0
src/exports/roo-code.d.ts

@@ -234,6 +234,7 @@ type GlobalSettings = {
 	screenshotQuality?: number | undefined
 	screenshotQuality?: number | undefined
 	remoteBrowserEnabled?: boolean | undefined
 	remoteBrowserEnabled?: boolean | undefined
 	remoteBrowserHost?: string | undefined
 	remoteBrowserHost?: string | undefined
+	cachedChromeHostUrl?: string | undefined
 	enableCheckpoints?: boolean | undefined
 	enableCheckpoints?: boolean | undefined
 	checkpointStorage?: ("task" | "workspace") | undefined
 	checkpointStorage?: ("task" | "workspace") | undefined
 	ttsEnabled?: boolean | undefined
 	ttsEnabled?: boolean | undefined

+ 1 - 0
src/exports/types.ts

@@ -237,6 +237,7 @@ type GlobalSettings = {
 	screenshotQuality?: number | undefined
 	screenshotQuality?: number | undefined
 	remoteBrowserEnabled?: boolean | undefined
 	remoteBrowserEnabled?: boolean | undefined
 	remoteBrowserHost?: string | undefined
 	remoteBrowserHost?: string | undefined
+	cachedChromeHostUrl?: string | undefined
 	enableCheckpoints?: boolean | undefined
 	enableCheckpoints?: boolean | undefined
 	checkpointStorage?: ("task" | "workspace") | undefined
 	checkpointStorage?: ("task" | "workspace") | undefined
 	ttsEnabled?: boolean | undefined
 	ttsEnabled?: boolean | undefined

+ 6 - 1
src/schemas/index.ts

@@ -513,6 +513,7 @@ export const globalSettingsSchema = z.object({
 	screenshotQuality: z.number().optional(),
 	screenshotQuality: z.number().optional(),
 	remoteBrowserEnabled: z.boolean().optional(),
 	remoteBrowserEnabled: z.boolean().optional(),
 	remoteBrowserHost: z.string().optional(),
 	remoteBrowserHost: z.string().optional(),
+	cachedChromeHostUrl: z.string().optional(),
 
 
 	enableCheckpoints: z.boolean().optional(),
 	enableCheckpoints: z.boolean().optional(),
 	checkpointStorage: checkpointStoragesSchema.optional(),
 	checkpointStorage: checkpointStoragesSchema.optional(),
@@ -618,6 +619,7 @@ const globalSettingsRecord: GlobalSettingsRecord = {
 	customModePrompts: undefined,
 	customModePrompts: undefined,
 	customSupportPrompts: undefined,
 	customSupportPrompts: undefined,
 	enhancementApiConfigId: undefined,
 	enhancementApiConfigId: undefined,
+	cachedChromeHostUrl: undefined,
 }
 }
 
 
 export const GLOBAL_SETTINGS_KEYS = Object.keys(globalSettingsRecord) as Keys<GlobalSettings>[]
 export const GLOBAL_SETTINGS_KEYS = Object.keys(globalSettingsRecord) as Keys<GlobalSettings>[]
@@ -791,7 +793,7 @@ export type TokenUsage = z.infer<typeof tokenUsageSchema>
  * TypeDefinition
  * TypeDefinition
  */
  */
 
 
-type TypeDefinition = {
+export type TypeDefinition = {
 	schema: z.ZodTypeAny
 	schema: z.ZodTypeAny
 	identifier: string
 	identifier: string
 }
 }
@@ -802,3 +804,6 @@ export const typeDefinitions: TypeDefinition[] = [
 	{ schema: clineMessageSchema, identifier: "ClineMessage" },
 	{ schema: clineMessageSchema, identifier: "ClineMessage" },
 	{ schema: tokenUsageSchema, identifier: "TokenUsage" },
 	{ schema: tokenUsageSchema, identifier: "TokenUsage" },
 ]
 ]
+
+// Also export as default for ESM compatibility
+export default { typeDefinitions }

+ 303 - 170
src/services/browser/BrowserSession.ts

@@ -9,7 +9,7 @@ import delay from "delay"
 import axios from "axios"
 import axios from "axios"
 import { fileExistsAtPath } from "../../utils/fs"
 import { fileExistsAtPath } from "../../utils/fs"
 import { BrowserActionResult } from "../../shared/ExtensionMessage"
 import { BrowserActionResult } from "../../shared/ExtensionMessage"
-import { discoverChromeInstances, testBrowserConnection } from "./browserDiscovery"
+import { discoverChromeHostUrl, tryChromeHostUrl } from "./browserDiscovery"
 
 
 interface PCRStats {
 interface PCRStats {
 	puppeteer: { launch: typeof launch }
 	puppeteer: { launch: typeof launch }
@@ -21,20 +21,12 @@ export class BrowserSession {
 	private browser?: Browser
 	private browser?: Browser
 	private page?: Page
 	private page?: Page
 	private currentMousePosition?: string
 	private currentMousePosition?: string
-	private cachedWebSocketEndpoint?: string
-	private lastConnectionAttempt: number = 0
+	private lastConnectionAttempt?: number
 
 
 	constructor(context: vscode.ExtensionContext) {
 	constructor(context: vscode.ExtensionContext) {
 		this.context = context
 		this.context = context
 	}
 	}
 
 
-	/**
-	 * Test connection to a remote browser
-	 */
-	async testConnection(host: string): Promise<{ success: boolean; message: string; endpoint?: string }> {
-		return testBrowserConnection(host)
-	}
-
 	private async ensureChromiumExists(): Promise<PCRStats> {
 	private async ensureChromiumExists(): Promise<PCRStats> {
 		const globalStoragePath = this.context?.globalStorageUri?.fsPath
 		const globalStoragePath = this.context?.globalStorageUri?.fsPath
 		if (!globalStoragePath) {
 		if (!globalStoragePath) {
@@ -56,162 +48,173 @@ export class BrowserSession {
 		return stats
 		return stats
 	}
 	}
 
 
-	async launchBrowser(): Promise<void> {
-		console.log("launch browser called")
-		if (this.browser) {
-			// throw new Error("Browser already launched")
-			await this.closeBrowser() // this may happen when the model launches a browser again after having used it already before
-		}
-
-		// Function to get viewport size
-		const getViewport = () => {
-			const size = (this.context.globalState.get("browserViewportSize") as string | undefined) || "900x600"
-			const [width, height] = size.split("x").map(Number)
-			return { width, height }
-		}
+	/**
+	 * Gets the viewport size from global state or returns default
+	 */
+	private getViewport() {
+		const size = (this.context.globalState.get("browserViewportSize") as string | undefined) || "900x600"
+		const [width, height] = size.split("x").map(Number)
+		return { width, height }
+	}
 
 
-		// Check if remote browser connection is enabled
-		const remoteBrowserEnabled = this.context.globalState.get("remoteBrowserEnabled") as boolean | undefined
+	/**
+	 * Launches a local browser instance
+	 */
+	private async launchLocalBrowser(): Promise<void> {
+		console.log("Launching local browser")
+		const stats = await this.ensureChromiumExists()
+		this.browser = await stats.puppeteer.launch({
+			args: [
+				"--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
+			],
+			executablePath: stats.executablePath,
+			defaultViewport: this.getViewport(),
+			// headless: false,
+		})
+	}
 
 
-		// If remote browser connection is not enabled, use local browser
-		if (!remoteBrowserEnabled) {
-			console.log("Remote browser connection is disabled, using local browser")
-			const stats = await this.ensureChromiumExists()
-			this.browser = await stats.puppeteer.launch({
-				args: [
-					"--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
-				],
-				executablePath: stats.executablePath,
-				defaultViewport: getViewport(),
-				// headless: false,
+	/**
+	 * Connects to a browser using a WebSocket URL
+	 */
+	private async connectWithChromeHostUrl(chromeHostUrl: string): Promise<boolean> {
+		try {
+			this.browser = await connect({
+				browserURL: chromeHostUrl,
+				defaultViewport: this.getViewport(),
 			})
 			})
-			this.page = await this.browser?.newPage()
-			return
+
+			// Cache the successful endpoint
+			console.log(`Connected to remote browser at ${chromeHostUrl}`)
+			this.context.globalState.update("cachedChromeHostUrl", chromeHostUrl)
+			this.lastConnectionAttempt = Date.now()
+
+			return true
+		} catch (error) {
+			console.log(`Failed to connect using WebSocket endpoint: ${error}`)
+			return false
 		}
 		}
-		// Remote browser connection is enabled
+	}
+
+	/**
+	 * Attempts to connect to a remote browser using various methods
+	 * Returns true if connection was successful, false otherwise
+	 */
+	private async connectToRemoteBrowser(): Promise<boolean> {
 		let remoteBrowserHost = this.context.globalState.get("remoteBrowserHost") as string | undefined
 		let remoteBrowserHost = this.context.globalState.get("remoteBrowserHost") as string | undefined
-		let browserWSEndpoint: string | undefined = this.cachedWebSocketEndpoint
 		let reconnectionAttempted = false
 		let reconnectionAttempted = false
 
 
 		// Try to connect with cached endpoint first if it exists and is recent (less than 1 hour old)
 		// Try to connect with cached endpoint first if it exists and is recent (less than 1 hour old)
-		if (browserWSEndpoint && Date.now() - this.lastConnectionAttempt < 3600000) {
-			try {
-				console.log(`Attempting to connect using cached WebSocket endpoint: ${browserWSEndpoint}`)
-				this.browser = await connect({
-					browserWSEndpoint,
-					defaultViewport: getViewport(),
-				})
-				this.page = await this.browser?.newPage()
-				return
-			} catch (error) {
-				console.log(`Failed to connect using cached endpoint: ${error}`)
-				// Clear the cached endpoint since it's no longer valid
-				this.cachedWebSocketEndpoint = undefined
-				// User wants to give up after one reconnection attempt
-				if (remoteBrowserHost) {
-					reconnectionAttempted = true
-				}
+		const cachedChromeHostUrl = this.context.globalState.get("cachedChromeHostUrl") as string | undefined
+		if (cachedChromeHostUrl && this.lastConnectionAttempt && Date.now() - this.lastConnectionAttempt < 3_600_000) {
+			console.log(`Attempting to connect using cached Chrome Host Url: ${cachedChromeHostUrl}`)
+			if (await this.connectWithChromeHostUrl(cachedChromeHostUrl)) {
+				return true
+			}
+
+			console.log(`Failed to connect using cached Chrome Host Url: ${cachedChromeHostUrl}`)
+			// Clear the cached endpoint since it's no longer valid
+			this.context.globalState.update("cachedChromeHostUrl", undefined)
+
+			// User wants to give up after one reconnection attempt
+			if (remoteBrowserHost) {
+				reconnectionAttempted = true
 			}
 			}
 		}
 		}
 
 
 		// If user provided a remote browser host, try to connect to it
 		// If user provided a remote browser host, try to connect to it
-		if (remoteBrowserHost && !reconnectionAttempted) {
+		else if (remoteBrowserHost && !reconnectionAttempted) {
 			console.log(`Attempting to connect to remote browser at ${remoteBrowserHost}`)
 			console.log(`Attempting to connect to remote browser at ${remoteBrowserHost}`)
 			try {
 			try {
-				// Fetch the WebSocket endpoint from the Chrome DevTools Protocol
-				const versionUrl = `${remoteBrowserHost.replace(/\/$/, "")}/json/version`
-				console.log(`Fetching WebSocket endpoint from ${versionUrl}`)
+				const hostIsValid = await tryChromeHostUrl(remoteBrowserHost)
 
 
-				const response = await axios.get(versionUrl)
-				browserWSEndpoint = response.data.webSocketDebuggerUrl
-
-				if (!browserWSEndpoint) {
-					throw new Error("Could not find webSocketDebuggerUrl in the response")
+				if (!hostIsValid) {
+					throw new Error("Could not find chromeHostUrl in the response")
 				}
 				}
 
 
-				console.log(`Found WebSocket endpoint: ${browserWSEndpoint}`)
-
-				// Cache the successful endpoint
-				this.cachedWebSocketEndpoint = browserWSEndpoint
-				this.lastConnectionAttempt = Date.now()
+				console.log(`Found WebSocket endpoint: ${remoteBrowserHost}`)
 
 
-				this.browser = await connect({
-					browserWSEndpoint,
-					defaultViewport: getViewport(),
-				})
-				this.page = await this.browser?.newPage()
-				return
+				if (await this.connectWithChromeHostUrl(remoteBrowserHost)) {
+					return true
+				}
 			} catch (error) {
 			} catch (error) {
 				console.error(`Failed to connect to remote browser: ${error}`)
 				console.error(`Failed to connect to remote browser: ${error}`)
 				// Fall back to auto-discovery if remote connection fails
 				// Fall back to auto-discovery if remote connection fails
 			}
 			}
 		}
 		}
 
 
-		// Always try auto-discovery if no custom URL is specified or if connection failed
 		try {
 		try {
-			console.log("Attempting auto-discovery...")
-			const discoveredHost = await discoverChromeInstances()
-
-			if (discoveredHost) {
-				console.log(`Auto-discovered Chrome at ${discoveredHost}`)
+			console.log("Attempting browser auto-discovery...")
+			const chromeHostUrl = await discoverChromeHostUrl()
 
 
-				// Don't save the discovered host to global state to avoid overriding user preference
-				// We'll just use it for this session
-
-				// Try to connect to the discovered host
-				const testResult = await testBrowserConnection(discoveredHost)
-
-				if (testResult.success && testResult.endpoint) {
-					// Cache the successful endpoint
-					this.cachedWebSocketEndpoint = testResult.endpoint
-					this.lastConnectionAttempt = Date.now()
-
-					this.browser = await connect({
-						browserWSEndpoint: testResult.endpoint,
-						defaultViewport: getViewport(),
-					})
-					this.page = await this.browser?.newPage()
-					return
-				}
+			if (chromeHostUrl && (await this.connectWithChromeHostUrl(chromeHostUrl))) {
+				return true
 			}
 			}
 		} catch (error) {
 		} catch (error) {
 			console.error(`Auto-discovery failed: ${error}`)
 			console.error(`Auto-discovery failed: ${error}`)
 			// Fall back to local browser if auto-discovery fails
 			// Fall back to local browser if auto-discovery fails
 		}
 		}
 
 
-		// If all remote connection attempts fail, fall back to local browser
-		console.log("Falling back to local browser")
-		const stats = await this.ensureChromiumExists()
-		this.browser = await stats.puppeteer.launch({
-			args: [
-				"--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
-			],
-			executablePath: stats.executablePath,
-			defaultViewport: getViewport(),
-			// headless: false,
-		})
-		// (latest version of puppeteer does not add headless to user agent)
-		this.page = await this.browser?.newPage()
+		return false
+	}
+
+	async launchBrowser(): Promise<void> {
+		console.log("launch browser called")
+
+		// Check if remote browser connection is enabled
+		const remoteBrowserEnabled = this.context.globalState.get("remoteBrowserEnabled") as boolean | undefined
+
+		if (!remoteBrowserEnabled) {
+			console.log("Launching local browser")
+			if (this.browser) {
+				// throw new Error("Browser already launched")
+				await this.closeBrowser() // this may happen when the model launches a browser again after having used it already before
+			} else {
+				// If browser wasn't open, just reset the state
+				this.resetBrowserState()
+			}
+			await this.launchLocalBrowser()
+		} else {
+			console.log("Connecting to remote browser")
+			// Remote browser connection is enabled
+			const remoteConnected = await this.connectToRemoteBrowser()
+
+			// If all remote connection attempts fail, fall back to local browser
+			if (!remoteConnected) {
+				console.log("Falling back to local browser")
+				await this.launchLocalBrowser()
+			}
+		}
 	}
 	}
 
 
+	/**
+	 * Closes the browser and resets browser state
+	 */
 	async closeBrowser(): Promise<BrowserActionResult> {
 	async closeBrowser(): Promise<BrowserActionResult> {
 		if (this.browser || this.page) {
 		if (this.browser || this.page) {
 			console.log("closing browser...")
 			console.log("closing browser...")
 
 
-			const remoteBrowserEnabled = this.context.globalState.get("remoteBrowserEnabled") as string | undefined
+			const remoteBrowserEnabled = this.context.globalState.get("remoteBrowserEnabled") as boolean | undefined
 			if (remoteBrowserEnabled && this.browser) {
 			if (remoteBrowserEnabled && this.browser) {
 				await this.browser.disconnect().catch(() => {})
 				await this.browser.disconnect().catch(() => {})
 			} else {
 			} else {
 				await this.browser?.close().catch(() => {})
 				await this.browser?.close().catch(() => {})
+				this.resetBrowserState()
 			}
 			}
 
 
-			this.browser = undefined
-			this.page = undefined
-			this.currentMousePosition = undefined
+			// this.resetBrowserState()
 		}
 		}
 		return {}
 		return {}
 	}
 	}
 
 
+	/**
+	 * Resets all browser state variables
+	 */
+	private resetBrowserState(): void {
+		this.browser = undefined
+		this.page = undefined
+		this.currentMousePosition = undefined
+	}
+
 	async doAction(action: (page: Page) => Promise<void>): Promise<BrowserActionResult> {
 	async doAction(action: (page: Page) => Promise<void>): Promise<BrowserActionResult> {
 		if (!this.page) {
 		if (!this.page) {
 			throw new Error(
 			throw new Error(
@@ -297,13 +300,118 @@ export class BrowserSession {
 		}
 		}
 	}
 	}
 
 
-	async navigateToUrl(url: string): Promise<BrowserActionResult> {
-		return this.doAction(async (page) => {
-			// networkidle2 isn't good enough since page may take some time to load. we can assume locally running dev sites will reach networkidle0 in a reasonable amount of time
-			await page.goto(url, { timeout: 7_000, waitUntil: ["domcontentloaded", "networkidle2"] })
-			// await page.goto(url, { timeout: 10_000, waitUntil: "load" })
-			await this.waitTillHTMLStable(page) // in case the page is loading more resources
+	/**
+	 * Extract the root domain from a URL
+	 * e.g., http://localhost:3000/path -> localhost:3000
+	 * e.g., https://example.com/path -> example.com
+	 */
+	private getRootDomain(url: string): string {
+		try {
+			const urlObj = new URL(url)
+			// Remove www. prefix if present
+			return urlObj.host.replace(/^www\./, "")
+		} catch (error) {
+			// If URL parsing fails, return the original URL
+			return url
+		}
+	}
+
+	/**
+	 * Navigate to a URL with standard loading options
+	 */
+	private async navigatePageToUrl(page: Page, url: string): Promise<void> {
+		await page.goto(url, { timeout: 7_000, waitUntil: ["domcontentloaded", "networkidle2"] })
+		await this.waitTillHTMLStable(page)
+	}
+
+	/**
+	 * Creates a new tab and navigates to the specified URL
+	 */
+	private async createNewTab(url: string): Promise<BrowserActionResult> {
+		if (!this.browser) {
+			throw new Error("Browser is not launched")
+		}
+
+		// Create a new page
+		const newPage = await this.browser.newPage()
+
+		// Set the new page as the active page
+		this.page = newPage
+
+		// Navigate to the URL
+		const result = await this.doAction(async (page) => {
+			await this.navigatePageToUrl(page, url)
 		})
 		})
+
+		return result
+	}
+
+	async navigateToUrl(url: string): Promise<BrowserActionResult> {
+		if (!this.browser) {
+			throw new Error("Browser is not launched")
+		}
+		// Remove trailing slash for comparison
+		const normalizedNewUrl = url.replace(/\/$/, "")
+
+		// Extract the root domain from the URL
+		const rootDomain = this.getRootDomain(normalizedNewUrl)
+
+		// Get all current pages
+		const pages = await this.browser.pages()
+
+		// Try to find a page with the same root domain
+		let existingPage: Page | undefined
+
+		for (const page of pages) {
+			try {
+				const pageUrl = page.url()
+				if (pageUrl && this.getRootDomain(pageUrl) === rootDomain) {
+					existingPage = page
+					break
+				}
+			} catch (error) {
+				// Skip pages that might have been closed or have errors
+				console.log(`Error checking page URL: ${error}`)
+				continue
+			}
+		}
+
+		if (existingPage) {
+			// Tab with the same root domain exists, switch to it
+			console.log(`Tab with domain ${rootDomain} already exists, switching to it`)
+
+			// Update the active page
+			this.page = existingPage
+			existingPage.bringToFront()
+
+			// Navigate to the new URL if it's different]
+			const currentUrl = existingPage.url().replace(/\/$/, "") // Remove trailing / if present
+			if (this.getRootDomain(currentUrl) === rootDomain && currentUrl !== normalizedNewUrl) {
+				console.log(`Navigating to new URL: ${normalizedNewUrl}`)
+				console.log(`Current URL: ${currentUrl}`)
+				console.log(`Root domain: ${this.getRootDomain(currentUrl)}`)
+				console.log(`New URL: ${normalizedNewUrl}`)
+				// Navigate to the new URL
+				return this.doAction(async (page) => {
+					await this.navigatePageToUrl(page, normalizedNewUrl)
+				})
+			} else {
+				console.log(`Tab with domain ${rootDomain} already exists, and URL is the same: ${normalizedNewUrl}`)
+				// URL is the same, just reload the page to ensure it's up to date
+				console.log(`Reloading page: ${normalizedNewUrl}`)
+				console.log(`Current URL: ${currentUrl}`)
+				console.log(`Root domain: ${this.getRootDomain(currentUrl)}`)
+				console.log(`New URL: ${normalizedNewUrl}`)
+				return this.doAction(async (page) => {
+					await page.reload({ timeout: 7_000, waitUntil: ["domcontentloaded", "networkidle2"] })
+					await this.waitTillHTMLStable(page)
+				})
+			}
+		} else {
+			// No tab with this root domain exists, create a new one
+			console.log(`No tab with domain ${rootDomain} exists, creating a new one`)
+			return this.createNewTab(normalizedNewUrl)
+		}
 	}
 	}
 
 
 	// page.goto { waitUntil: "networkidle0" } may not ever resolve, and not waiting could return page content too early before js has loaded
 	// page.goto { waitUntil: "networkidle0" } may not ever resolve, and not waiting could return page content too early before js has loaded
@@ -339,36 +447,50 @@ export class BrowserSession {
 		}
 		}
 	}
 	}
 
 
-	async click(coordinate: string): Promise<BrowserActionResult> {
+	/**
+	 * Handles mouse interaction with network activity monitoring
+	 */
+	private async handleMouseInteraction(
+		page: Page,
+		coordinate: string,
+		action: (x: number, y: number) => Promise<void>,
+	): Promise<void> {
 		const [x, y] = coordinate.split(",").map(Number)
 		const [x, y] = coordinate.split(",").map(Number)
-		return this.doAction(async (page) => {
-			// Set up network request monitoring
-			let hasNetworkActivity = false
-			const requestListener = () => {
-				hasNetworkActivity = true
-			}
-			page.on("request", requestListener)
-
-			// Perform the click
-			await page.mouse.click(x, y)
-			this.currentMousePosition = coordinate
-
-			// Small delay to check if click triggered any network activity
-			await delay(100)
-
-			if (hasNetworkActivity) {
-				// If we detected network activity, wait for navigation/loading
-				await page
-					.waitForNavigation({
-						waitUntil: ["domcontentloaded", "networkidle2"],
-						timeout: 7000,
-					})
-					.catch(() => {})
-				await this.waitTillHTMLStable(page)
-			}
 
 
-			// Clean up listener
-			page.off("request", requestListener)
+		// Set up network request monitoring
+		let hasNetworkActivity = false
+		const requestListener = () => {
+			hasNetworkActivity = true
+		}
+		page.on("request", requestListener)
+
+		// Perform the mouse action
+		await action(x, y)
+		this.currentMousePosition = coordinate
+
+		// Small delay to check if action triggered any network activity
+		await delay(100)
+
+		if (hasNetworkActivity) {
+			// If we detected network activity, wait for navigation/loading
+			await page
+				.waitForNavigation({
+					waitUntil: ["domcontentloaded", "networkidle2"],
+					timeout: 7000,
+				})
+				.catch(() => {})
+			await this.waitTillHTMLStable(page)
+		}
+
+		// Clean up listener
+		page.off("request", requestListener)
+	}
+
+	async click(coordinate: string): Promise<BrowserActionResult> {
+		return this.doAction(async (page) => {
+			await this.handleMouseInteraction(page, coordinate, async (x, y) => {
+				await page.mouse.click(x, y)
+			})
 		})
 		})
 	}
 	}
 
 
@@ -378,31 +500,42 @@ export class BrowserSession {
 		})
 		})
 	}
 	}
 
 
+	/**
+	 * Scrolls the page by the specified amount
+	 */
+	private async scrollPage(page: Page, direction: "up" | "down"): Promise<void> {
+		const { height } = this.getViewport()
+		const scrollAmount = direction === "down" ? height : -height
+
+		await page.evaluate((scrollHeight) => {
+			window.scrollBy({
+				top: scrollHeight,
+				behavior: "auto",
+			})
+		}, scrollAmount)
+
+		await delay(300)
+	}
+
 	async scrollDown(): Promise<BrowserActionResult> {
 	async scrollDown(): Promise<BrowserActionResult> {
-		const size = ((await this.context.globalState.get("browserViewportSize")) as string | undefined) || "900x600"
-		const height = parseInt(size.split("x")[1])
 		return this.doAction(async (page) => {
 		return this.doAction(async (page) => {
-			await page.evaluate((scrollHeight) => {
-				window.scrollBy({
-					top: scrollHeight,
-					behavior: "auto",
-				})
-			}, height)
-			await delay(300)
+			await this.scrollPage(page, "down")
 		})
 		})
 	}
 	}
 
 
 	async scrollUp(): Promise<BrowserActionResult> {
 	async scrollUp(): Promise<BrowserActionResult> {
-		const size = ((await this.context.globalState.get("browserViewportSize")) as string | undefined) || "900x600"
-		const height = parseInt(size.split("x")[1])
 		return this.doAction(async (page) => {
 		return this.doAction(async (page) => {
-			await page.evaluate((scrollHeight) => {
-				window.scrollBy({
-					top: -scrollHeight,
-					behavior: "auto",
-				})
-			}, height)
-			await delay(300)
+			await this.scrollPage(page, "up")
+		})
+	}
+
+	async hover(coordinate: string): Promise<BrowserActionResult> {
+		return this.doAction(async (page) => {
+			await this.handleMouseInteraction(page, coordinate, async (x, y) => {
+				await page.mouse.move(x, y)
+				// Small delay to allow any hover effects to appear
+				await delay(300)
+			})
 		})
 		})
 	}
 	}
 }
 }

+ 50 - 114
src/services/browser/browserDiscovery.ts

@@ -1,7 +1,6 @@
-import * as vscode from "vscode"
-import * as os from "os"
 import * as net from "net"
 import * as net from "net"
 import axios from "axios"
 import axios from "axios"
+import * as dns from "dns"
 
 
 /**
 /**
  * Check if a port is open on a given host
  * Check if a port is open on a given host
@@ -43,46 +42,14 @@ export async function isPortOpen(host: string, port: number, timeout = 1000): Pr
 /**
 /**
  * Try to connect to Chrome at a specific IP address
  * Try to connect to Chrome at a specific IP address
  */
  */
-export async function tryConnect(ipAddress: string): Promise<{ endpoint: string; ip: string } | null> {
+export async function tryChromeHostUrl(chromeHostUrl: string): Promise<boolean> {
 	try {
 	try {
-		console.log(`Trying to connect to Chrome at: http://${ipAddress}:9222/json/version`)
-		const response = await axios.get(`http://${ipAddress}:9222/json/version`, { timeout: 1000 })
+		console.log(`Trying to connect to Chrome at: ${chromeHostUrl}/json/version`)
+		const response = await axios.get(`${chromeHostUrl}/json/version`, { timeout: 1000 })
 		const data = response.data
 		const data = response.data
-		return { endpoint: data.webSocketDebuggerUrl, ip: ipAddress }
+		return true
 	} catch (error) {
 	} catch (error) {
-		return null
-	}
-}
-
-/**
- * Execute a shell command and return stdout and stderr
- */
-export async function executeShellCommand(command: string): Promise<{ stdout: string; stderr: string }> {
-	return new Promise<{ stdout: string; stderr: string }>((resolve) => {
-		const cp = require("child_process")
-		cp.exec(command, (err: any, stdout: string, stderr: string) => {
-			resolve({ stdout, stderr })
-		})
-	})
-}
-
-/**
- * Get Docker gateway IP without UI feedback
- */
-export async function getDockerGatewayIP(): Promise<string | null> {
-	try {
-		if (process.platform === "linux") {
-			try {
-				const { stdout } = await executeShellCommand("ip route | grep default | awk '{print $3}'")
-				return stdout.trim()
-			} catch (error) {
-				console.log("Could not determine Docker gateway IP:", error)
-			}
-		}
-		return null
-	} catch (error) {
-		console.log("Could not determine Docker gateway IP:", error)
-		return null
+		return false
 	}
 	}
 }
 }
 
 
@@ -93,7 +60,6 @@ export async function getDockerHostIP(): Promise<string | null> {
 	try {
 	try {
 		// Try to resolve host.docker.internal (works on Docker Desktop)
 		// Try to resolve host.docker.internal (works on Docker Desktop)
 		return new Promise((resolve) => {
 		return new Promise((resolve) => {
-			const dns = require("dns")
 			dns.lookup("host.docker.internal", (err: any, address: string) => {
 			dns.lookup("host.docker.internal", (err: any, address: string) => {
 				if (err) {
 				if (err) {
 					resolve(null)
 					resolve(null)
@@ -111,7 +77,7 @@ export async function getDockerHostIP(): Promise<string | null> {
 /**
 /**
  * Scan a network range for Chrome debugging port
  * Scan a network range for Chrome debugging port
  */
  */
-export async function scanNetworkForChrome(baseIP: string): Promise<string | null> {
+export async function scanNetworkForChrome(baseIP: string, port: number): Promise<string | null> {
 	if (!baseIP || !baseIP.match(/^\d+\.\d+\.\d+\./)) {
 	if (!baseIP || !baseIP.match(/^\d+\.\d+\.\d+\./)) {
 		return null
 		return null
 	}
 	}
@@ -130,7 +96,7 @@ export async function scanNetworkForChrome(baseIP: string): Promise<string | nul
 
 
 	// Check priority IPs first
 	// Check priority IPs first
 	for (const ip of priorityIPs) {
 	for (const ip of priorityIPs) {
-		const isOpen = await isPortOpen(ip, 9222)
+		const isOpen = await isPortOpen(ip, port)
 		if (isOpen) {
 		if (isOpen) {
 			console.log(`Found Chrome debugging port open on ${ip}`)
 			console.log(`Found Chrome debugging port open on ${ip}`)
 			return ip
 			return ip
@@ -140,25 +106,11 @@ export async function scanNetworkForChrome(baseIP: string): Promise<string | nul
 	return null
 	return null
 }
 }
 
 
-/**
- * Discover Chrome instances on the network
- */
-export async function discoverChromeInstances(): Promise<string | null> {
+// Function to discover Chrome instances on the network
+const discoverChromeHosts = async (port: number): Promise<string | null> => {
 	// Get all network interfaces
 	// Get all network interfaces
-	const networkInterfaces = os.networkInterfaces()
 	const ipAddresses = []
 	const ipAddresses = []
 
 
-	// Always try localhost first
-	ipAddresses.push("localhost")
-	ipAddresses.push("127.0.0.1")
-
-	// Try to get Docker gateway IP (headless mode)
-	const gatewayIP = await getDockerGatewayIP()
-	if (gatewayIP) {
-		console.log("Found Docker gateway IP:", gatewayIP)
-		ipAddresses.push(gatewayIP)
-	}
-
 	// Try to get Docker host IP
 	// Try to get Docker host IP
 	const hostIP = await getDockerHostIP()
 	const hostIP = await getDockerHostIP()
 	if (hostIP) {
 	if (hostIP) {
@@ -166,44 +118,21 @@ export async function discoverChromeInstances(): Promise<string | null> {
 		ipAddresses.push(hostIP)
 		ipAddresses.push(hostIP)
 	}
 	}
 
 
-	// Add all local IP addresses from network interfaces
-	const localIPs: string[] = []
-	Object.values(networkInterfaces).forEach((interfaces) => {
-		if (!interfaces) return
-		interfaces.forEach((iface) => {
-			// Only consider IPv4 addresses
-			if (iface.family === "IPv4" || iface.family === (4 as any)) {
-				localIPs.push(iface.address)
-			}
-		})
-	})
-
-	// Add local IPs to the list
-	ipAddresses.push(...localIPs)
-
-	// Scan network for Chrome debugging port
-	for (const ip of localIPs) {
-		const chromeIP = await scanNetworkForChrome(ip)
-		if (chromeIP && !ipAddresses.includes(chromeIP)) {
-			console.log("Found potential Chrome host via network scan:", chromeIP)
-			ipAddresses.push(chromeIP)
-		}
-	}
-
 	// Remove duplicates
 	// Remove duplicates
 	const uniqueIPs = [...new Set(ipAddresses)]
 	const uniqueIPs = [...new Set(ipAddresses)]
 	console.log("IP Addresses to try:", uniqueIPs)
 	console.log("IP Addresses to try:", uniqueIPs)
 
 
 	// Try connecting to each IP address
 	// Try connecting to each IP address
 	for (const ip of uniqueIPs) {
 	for (const ip of uniqueIPs) {
-		const connection = await tryConnect(ip)
-		if (connection) {
-			console.log(`Successfully connected to Chrome at: ${connection.ip}`)
+		const hostEndpoint = `http://${ip}:${port}`
+
+		const hostIsValid = await tryChromeHostUrl(hostEndpoint)
+		if (hostIsValid) {
 			// Store the successful IP for future use
 			// Store the successful IP for future use
-			console.log(`✅ Found Chrome at ${connection.ip} - You can hardcode this IP if needed`)
+			console.log(`✅ Found Chrome at ${hostEndpoint}`)
 
 
 			// Return the host URL and endpoint
 			// Return the host URL and endpoint
-			return `http://${connection.ip}:9222`
+			return hostEndpoint
 		}
 		}
 	}
 	}
 
 
@@ -211,36 +140,43 @@ export async function discoverChromeInstances(): Promise<string | null> {
 }
 }
 
 
 /**
 /**
- * Test connection to a remote browser
+ * Test connection to a remote browser debugging websocket.
+ * First tries specific hosts, then attempts auto-discovery if needed.
+ * @param browserHostUrl Optional specific host URL to check first
+ * @param port Browser debugging port (default: 9222)
+ * @returns WebSocket debugger URL if connection is successful, null otherwise
  */
  */
-export async function testBrowserConnection(
-	host: string,
-): Promise<{ success: boolean; message: string; endpoint?: string }> {
-	try {
-		// Fetch the WebSocket endpoint from the Chrome DevTools Protocol
-		const versionUrl = `${host.replace(/\/$/, "")}/json/version`
-		console.log(`Testing connection to ${versionUrl}`)
-
-		const response = await axios.get(versionUrl, { timeout: 3000 })
-		const browserWSEndpoint = response.data.webSocketDebuggerUrl
-
-		if (!browserWSEndpoint) {
-			return {
-				success: false,
-				message: "Could not find webSocketDebuggerUrl in the response",
-			}
+export async function discoverChromeHostUrl(port: number = 9222): Promise<string | null> {
+	// First try specific hosts
+	const hostsToTry = [`http://localhost:${port}`, `http://127.0.0.1:${port}`]
+
+	// Try each host directly first
+	for (const hostUrl of hostsToTry) {
+		console.log(`Trying to connect to: ${hostUrl}`)
+		try {
+			const hostIsValid = await tryChromeHostUrl(hostUrl)
+			if (hostIsValid) return hostUrl
+		} catch (error) {
+			console.log(`Failed to connect to ${hostUrl}: ${error instanceof Error ? error.message : error}`)
 		}
 		}
+	}
 
 
-		return {
-			success: true,
-			message: "Successfully connected to Chrome browser",
-			endpoint: browserWSEndpoint,
-		}
-	} catch (error) {
-		console.error(`Failed to connect to remote browser: ${error}`)
-		return {
-			success: false,
-			message: `Failed to connect: ${error instanceof Error ? error.message : String(error)}`,
+	// If direct connections failed, attempt auto-discovery
+	console.log("Direct connections failed. Attempting auto-discovery...")
+
+	const discoveredHostUrl = await discoverChromeHosts(port)
+	if (discoveredHostUrl) {
+		console.log(`Trying to connect to discovered host: ${discoveredHostUrl}`)
+		try {
+			const hostIsValid = await tryChromeHostUrl(discoveredHostUrl)
+			if (hostIsValid) return discoveredHostUrl
+			console.log(`Failed to connect to discovered host ${discoveredHostUrl}`)
+		} catch (error) {
+			console.log(`Error connecting to discovered host: ${error instanceof Error ? error.message : error}`)
 		}
 		}
+	} else {
+		console.log("No browser instances discovered on network")
 	}
 	}
+
+	return null
 }
 }

+ 1 - 1
src/shared/ExtensionMessage.ts

@@ -233,7 +233,7 @@ export interface ClineSayTool {
 }
 }
 
 
 // Must keep in sync with system prompt.
 // Must keep in sync with system prompt.
-export const browserActions = ["launch", "click", "type", "scroll_down", "scroll_up", "close"] as const
+export const browserActions = ["launch", "click", "hover", "type", "scroll_down", "scroll_up", "close"] as const
 
 
 export type BrowserAction = (typeof browserActions)[number]
 export type BrowserAction = (typeof browserActions)[number]
 
 

+ 0 - 1
src/shared/WebviewMessage.ts

@@ -115,7 +115,6 @@ export interface WebviewMessage {
 		| "telemetrySetting"
 		| "telemetrySetting"
 		| "showRooIgnoredFiles"
 		| "showRooIgnoredFiles"
 		| "testBrowserConnection"
 		| "testBrowserConnection"
-		| "discoverBrowser"
 		| "browserConnectionResult"
 		| "browserConnectionResult"
 		| "remoteBrowserEnabled"
 		| "remoteBrowserEnabled"
 		| "language"
 		| "language"

+ 2 - 3
webview-ui/src/components/chat/ChatTextArea.tsx

@@ -70,12 +70,11 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 			currentApiConfigName,
 			currentApiConfigName,
 			listApiConfigMeta,
 			listApiConfigMeta,
 			customModes,
 			customModes,
-			cwd, 
+			cwd,
 			osInfo,
 			osInfo,
 			pinnedApiConfigs,
 			pinnedApiConfigs,
 			togglePinnedApiConfig,
 			togglePinnedApiConfig,
-		} =
-			useExtensionState()
+		} = useExtensionState()
 
 
 		// Find the ID and display text for the currently selected API configuration
 		// Find the ID and display text for the currently selected API configuration
 		const { currentConfigId, displayName } = useMemo(() => {
 		const { currentConfigId, displayName } = useMemo(() => {

+ 10 - 28
webview-ui/src/components/settings/BrowserSettings.tsx

@@ -1,14 +1,14 @@
-import { HTMLAttributes, useState, useEffect, useMemo } from "react"
 import { VSCodeButton, VSCodeCheckbox, VSCodeTextField } from "@vscode/webview-ui-toolkit/react"
 import { VSCodeButton, VSCodeCheckbox, VSCodeTextField } from "@vscode/webview-ui-toolkit/react"
 import { SquareMousePointer } from "lucide-react"
 import { SquareMousePointer } from "lucide-react"
+import { HTMLAttributes, useEffect, useMemo, useState } from "react"
 
 
-import { vscode } from "@/utils/vscode"
-import { useAppTranslation } from "@/i18n/TranslationContext"
 import { Select, SelectContent, SelectGroup, SelectItem, SelectTrigger, SelectValue, Slider } from "@/components/ui"
 import { Select, SelectContent, SelectGroup, SelectItem, SelectTrigger, SelectValue, Slider } from "@/components/ui"
+import { useAppTranslation } from "@/i18n/TranslationContext"
+import { vscode } from "@/utils/vscode"
 
 
-import { SetCachedStateField } from "./types"
-import { SectionHeader } from "./SectionHeader"
 import { Section } from "./Section"
 import { Section } from "./Section"
+import { SectionHeader } from "./SectionHeader"
+import { SetCachedStateField } from "./types"
 
 
 type BrowserSettingsProps = HTMLAttributes<HTMLDivElement> & {
 type BrowserSettingsProps = HTMLAttributes<HTMLDivElement> & {
 	browserToolEnabled?: boolean
 	browserToolEnabled?: boolean
@@ -37,7 +37,7 @@ export const BrowserSettings = ({
 	const { t } = useAppTranslation()
 	const { t } = useAppTranslation()
 
 
 	const [testingConnection, setTestingConnection] = useState(false)
 	const [testingConnection, setTestingConnection] = useState(false)
-	const [testResult, setTestResult] = useState<{ success: boolean; message: string } | null>(null)
+	const [testResult, setTestResult] = useState<{ success: boolean; text: string } | null>(null)
 	const [discovering, setDiscovering] = useState(false)
 	const [discovering, setDiscovering] = useState(false)
 
 
 	// We don't need a local state for useRemoteBrowser since we're using the
 	// We don't need a local state for useRemoteBrowser since we're using the
@@ -50,7 +50,7 @@ export const BrowserSettings = ({
 			const message = event.data
 			const message = event.data
 
 
 			if (message.type === "browserConnectionResult") {
 			if (message.type === "browserConnectionResult") {
-				setTestResult({ success: message.success, message: message.text })
+				setTestResult({ success: message.success, text: message.text })
 				setTestingConnection(false)
 				setTestingConnection(false)
 				setDiscovering(false)
 				setDiscovering(false)
 			}
 			}
@@ -73,28 +73,12 @@ export const BrowserSettings = ({
 		} catch (error) {
 		} catch (error) {
 			setTestResult({
 			setTestResult({
 				success: false,
 				success: false,
-				message: `Error: ${error instanceof Error ? error.message : String(error)}`,
+				text: `Error: ${error instanceof Error ? error.message : String(error)}`,
 			})
 			})
 			setTestingConnection(false)
 			setTestingConnection(false)
 		}
 		}
 	}
 	}
 
 
-	const discoverBrowser = async () => {
-		setDiscovering(true)
-		setTestResult(null)
-
-		try {
-			// Send a message to the extension to discover Chrome instances.
-			vscode.postMessage({ type: "discoverBrowser" })
-		} catch (error) {
-			setTestResult({
-				success: false,
-				message: `Error: ${error instanceof Error ? error.message : String(error)}`,
-			})
-			setDiscovering(false)
-		}
-	}
-
 	const options = useMemo(
 	const options = useMemo(
 		() => [
 		() => [
 			{
 			{
@@ -206,9 +190,7 @@ export const BrowserSettings = ({
 										placeholder={t("settings:browser.remote.urlPlaceholder")}
 										placeholder={t("settings:browser.remote.urlPlaceholder")}
 										style={{ flexGrow: 1 }}
 										style={{ flexGrow: 1 }}
 									/>
 									/>
-									<VSCodeButton
-										disabled={testingConnection}
-										onClick={remoteBrowserHost ? testConnection : discoverBrowser}>
+									<VSCodeButton disabled={testingConnection} onClick={testConnection}>
 										{testingConnection || discovering
 										{testingConnection || discovering
 											? t("settings:browser.remote.testingButton")
 											? t("settings:browser.remote.testingButton")
 											: t("settings:browser.remote.testButton")}
 											: t("settings:browser.remote.testButton")}
@@ -221,7 +203,7 @@ export const BrowserSettings = ({
 												? "bg-green-800/20 text-green-400"
 												? "bg-green-800/20 text-green-400"
 												: "bg-red-800/20 text-red-400"
 												: "bg-red-800/20 text-red-400"
 										}`}>
 										}`}>
-										{testResult.message}
+										{testResult.text}
 									</div>
 									</div>
 								)}
 								)}
 								<div className="text-vscode-descriptionForeground text-sm mt-1">
 								<div className="text-vscode-descriptionForeground text-sm mt-1">