BrowserSessionRow.tsx 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572
  1. import deepEqual from "fast-deep-equal"
  2. import React, { memo, useEffect, useMemo, useRef, useState } from "react"
  3. import { useSize } from "react-use"
  4. import { useExtensionState } from "../../context/ExtensionStateContext"
  5. import {
  6. BrowserAction,
  7. BrowserActionResult,
  8. ClineMessage,
  9. ClineSayBrowserAction,
  10. } from "../../../../src/shared/ExtensionMessage"
  11. import { vscode } from "../../utils/vscode"
  12. import CodeBlock, { CODE_BLOCK_BG_COLOR } from "../common/CodeBlock"
  13. import { ChatRowContent, ProgressIndicator } from "./ChatRow"
  14. import { VSCodeButton } from "@vscode/webview-ui-toolkit/react"
  15. interface BrowserSessionRowProps {
  16. messages: ClineMessage[]
  17. isExpanded: (messageTs: number) => boolean
  18. onToggleExpand: (messageTs: number) => void
  19. lastModifiedMessage?: ClineMessage
  20. isLast: boolean
  21. onHeightChange: (isTaller: boolean) => void
  22. isStreaming: boolean
  23. }
  24. const BrowserSessionRow = memo((props: BrowserSessionRowProps) => {
  25. const { messages, isLast, onHeightChange, lastModifiedMessage } = props
  26. const prevHeightRef = useRef(0)
  27. const [maxActionHeight, setMaxActionHeight] = useState(0)
  28. const [consoleLogsExpanded, setConsoleLogsExpanded] = useState(false)
  29. const { browserViewportSize = "900x600" } = useExtensionState()
  30. const [viewportWidth, viewportHeight] = browserViewportSize.split("x").map(Number)
  31. const aspectRatio = ((viewportHeight / viewportWidth) * 100).toFixed(2)
  32. const defaultMousePosition = `${Math.round(viewportWidth / 2)},${Math.round(viewportHeight / 2)}`
  33. const isLastApiReqInterrupted = useMemo(() => {
  34. // Check if last api_req_started is cancelled
  35. const lastApiReqStarted = [...messages].reverse().find((m) => m.say === "api_req_started")
  36. if (lastApiReqStarted?.text) {
  37. const info = JSON.parse(lastApiReqStarted.text) as { cancelReason: string | null }
  38. if (info && info.cancelReason !== null) {
  39. return true
  40. }
  41. }
  42. const lastApiReqFailed = isLast && lastModifiedMessage?.ask === "api_req_failed"
  43. if (lastApiReqFailed) {
  44. return true
  45. }
  46. return false
  47. }, [messages, lastModifiedMessage, isLast])
  48. const isBrowsing = useMemo(() => {
  49. return isLast && messages.some((m) => m.say === "browser_action_result") && !isLastApiReqInterrupted // after user approves, browser_action_result with "" is sent to indicate that the session has started
  50. }, [isLast, messages, isLastApiReqInterrupted])
  51. // Organize messages into pages with current state and next action
  52. const pages = useMemo(() => {
  53. const result: {
  54. currentState: {
  55. url?: string
  56. screenshot?: string
  57. mousePosition?: string
  58. consoleLogs?: string
  59. messages: ClineMessage[] // messages up to and including the result
  60. }
  61. nextAction?: {
  62. messages: ClineMessage[] // messages leading to next result
  63. }
  64. }[] = []
  65. let currentStateMessages: ClineMessage[] = []
  66. let nextActionMessages: ClineMessage[] = []
  67. messages.forEach((message) => {
  68. if (message.ask === "browser_action_launch") {
  69. // Start first page
  70. currentStateMessages = [message]
  71. } else if (message.say === "browser_action_result") {
  72. if (message.text === "") {
  73. // first browser_action_result is an empty string that signals that session has started
  74. return
  75. }
  76. // Complete current state
  77. currentStateMessages.push(message)
  78. const resultData = JSON.parse(message.text || "{}") as BrowserActionResult
  79. // Add page with current state and previous next actions
  80. result.push({
  81. currentState: {
  82. url: resultData.currentUrl,
  83. screenshot: resultData.screenshot,
  84. mousePosition: resultData.currentMousePosition,
  85. consoleLogs: resultData.logs,
  86. messages: [...currentStateMessages],
  87. },
  88. nextAction:
  89. nextActionMessages.length > 0
  90. ? {
  91. messages: [...nextActionMessages],
  92. }
  93. : undefined,
  94. })
  95. // Reset for next page
  96. currentStateMessages = []
  97. nextActionMessages = []
  98. } else if (
  99. message.say === "api_req_started" ||
  100. message.say === "text" ||
  101. message.say === "browser_action"
  102. ) {
  103. // These messages lead to the next result, so they should always go in nextActionMessages
  104. nextActionMessages.push(message)
  105. } else {
  106. // Any other message types
  107. currentStateMessages.push(message)
  108. }
  109. })
  110. // Add incomplete page if exists
  111. if (currentStateMessages.length > 0 || nextActionMessages.length > 0) {
  112. result.push({
  113. currentState: {
  114. messages: [...currentStateMessages],
  115. },
  116. nextAction:
  117. nextActionMessages.length > 0
  118. ? {
  119. messages: [...nextActionMessages],
  120. }
  121. : undefined,
  122. })
  123. }
  124. return result
  125. }, [messages])
  126. // Auto-advance to latest page
  127. const [currentPageIndex, setCurrentPageIndex] = useState(0)
  128. useEffect(() => {
  129. setCurrentPageIndex(pages.length - 1)
  130. }, [pages.length])
  131. // Get initial URL from launch message
  132. const initialUrl = useMemo(() => {
  133. const launchMessage = messages.find((m) => m.ask === "browser_action_launch")
  134. return launchMessage?.text || ""
  135. }, [messages])
  136. // Find the latest available URL and screenshot
  137. const latestState = useMemo(() => {
  138. for (let i = pages.length - 1; i >= 0; i--) {
  139. const page = pages[i]
  140. if (page.currentState.url || page.currentState.screenshot) {
  141. return {
  142. url: page.currentState.url,
  143. mousePosition: page.currentState.mousePosition,
  144. consoleLogs: page.currentState.consoleLogs,
  145. screenshot: page.currentState.screenshot,
  146. }
  147. }
  148. }
  149. return { url: undefined, mousePosition: undefined, consoleLogs: undefined, screenshot: undefined }
  150. }, [pages])
  151. const currentPage = pages[currentPageIndex]
  152. const isLastPage = currentPageIndex === pages.length - 1
  153. // Use latest state if we're on the last page and don't have a state yet
  154. const displayState = isLastPage
  155. ? {
  156. url: currentPage?.currentState.url || latestState.url || initialUrl,
  157. mousePosition:
  158. currentPage?.currentState.mousePosition || latestState.mousePosition || defaultMousePosition,
  159. consoleLogs: currentPage?.currentState.consoleLogs,
  160. screenshot: currentPage?.currentState.screenshot || latestState.screenshot,
  161. }
  162. : {
  163. url: currentPage?.currentState.url || initialUrl,
  164. mousePosition: currentPage?.currentState.mousePosition || defaultMousePosition,
  165. consoleLogs: currentPage?.currentState.consoleLogs,
  166. screenshot: currentPage?.currentState.screenshot,
  167. }
  168. const [actionContent, { height: actionHeight }] = useSize(
  169. <div>
  170. {currentPage?.nextAction?.messages.map((message) => (
  171. <BrowserSessionRowContent
  172. key={message.ts}
  173. {...props}
  174. message={message}
  175. setMaxActionHeight={setMaxActionHeight}
  176. />
  177. ))}
  178. {!isBrowsing && messages.some((m) => m.say === "browser_action_result") && currentPageIndex === 0 && (
  179. <BrowserActionBox action={"launch"} text={initialUrl} />
  180. )}
  181. </div>,
  182. )
  183. useEffect(() => {
  184. if (actionHeight === 0 || actionHeight === Infinity) {
  185. return
  186. }
  187. if (actionHeight > maxActionHeight) {
  188. setMaxActionHeight(actionHeight)
  189. }
  190. }, [actionHeight, maxActionHeight])
  191. // Track latest click coordinate
  192. const latestClickPosition = useMemo(() => {
  193. if (!isBrowsing) return undefined
  194. // Look through current page's next actions for the latest browser_action
  195. const actions = currentPage?.nextAction?.messages || []
  196. for (let i = actions.length - 1; i >= 0; i--) {
  197. const message = actions[i]
  198. if (message.say === "browser_action") {
  199. const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction
  200. if (browserAction.action === "click" && browserAction.coordinate) {
  201. return browserAction.coordinate
  202. }
  203. }
  204. }
  205. return undefined
  206. }, [isBrowsing, currentPage?.nextAction?.messages])
  207. // Use latest click position while browsing, otherwise use display state
  208. const mousePosition = isBrowsing
  209. ? latestClickPosition || displayState.mousePosition
  210. : displayState.mousePosition || defaultMousePosition
  211. const [browserSessionRow, { height: rowHeight }] = useSize(
  212. <div style={{ padding: "10px 6px 10px 15px", marginBottom: -10 }}>
  213. <div style={{ display: "flex", alignItems: "center", gap: "10px", marginBottom: "10px" }}>
  214. {isBrowsing ? (
  215. <ProgressIndicator />
  216. ) : (
  217. <span
  218. className={`codicon codicon-inspect`}
  219. style={{ color: "var(--vscode-foreground)", marginBottom: "-1.5px" }}></span>
  220. )}
  221. <span style={{ fontWeight: "bold" }}>
  222. <>Roo wants to use the browser:</>
  223. </span>
  224. </div>
  225. <div
  226. style={{
  227. borderRadius: 3,
  228. border: "1px solid var(--vscode-editorGroup-border)",
  229. overflow: "hidden",
  230. backgroundColor: CODE_BLOCK_BG_COLOR,
  231. marginBottom: 10,
  232. }}>
  233. {/* URL Bar */}
  234. <div
  235. style={{
  236. margin: "5px auto",
  237. width: "calc(100% - 10px)",
  238. boxSizing: "border-box", // includes padding in width calculation
  239. backgroundColor: "var(--vscode-input-background)",
  240. border: "1px solid var(--vscode-input-border)",
  241. borderRadius: "4px",
  242. padding: "3px 5px",
  243. display: "flex",
  244. alignItems: "center",
  245. justifyContent: "center",
  246. color: displayState.url
  247. ? "var(--vscode-input-foreground)"
  248. : "var(--vscode-descriptionForeground)",
  249. fontSize: "12px",
  250. }}>
  251. <div
  252. style={{
  253. textOverflow: "ellipsis",
  254. overflow: "hidden",
  255. whiteSpace: "nowrap",
  256. width: "100%",
  257. textAlign: "center",
  258. }}>
  259. {displayState.url || "http"}
  260. </div>
  261. </div>
  262. {/* Screenshot Area */}
  263. <div
  264. data-testid="screenshot-container"
  265. style={{
  266. width: "100%",
  267. paddingBottom: `${aspectRatio}%`, // height/width ratio
  268. position: "relative",
  269. backgroundColor: "var(--vscode-input-background)",
  270. }}>
  271. {displayState.screenshot ? (
  272. <img
  273. src={displayState.screenshot}
  274. alt="Browser screenshot"
  275. style={{
  276. position: "absolute",
  277. top: 0,
  278. left: 0,
  279. width: "100%",
  280. height: "100%",
  281. objectFit: "contain",
  282. cursor: "pointer",
  283. }}
  284. onClick={() =>
  285. vscode.postMessage({
  286. type: "openImage",
  287. text: displayState.screenshot,
  288. })
  289. }
  290. />
  291. ) : (
  292. <div
  293. style={{
  294. position: "absolute",
  295. top: "50%",
  296. left: "50%",
  297. transform: "translate(-50%, -50%)",
  298. }}>
  299. <span
  300. className="codicon codicon-globe"
  301. style={{ fontSize: "80px", color: "var(--vscode-descriptionForeground)" }}
  302. />
  303. </div>
  304. )}
  305. {displayState.mousePosition && (
  306. <BrowserCursor
  307. style={{
  308. position: "absolute",
  309. top: `${(parseInt(mousePosition.split(",")[1]) / viewportHeight) * 100}%`,
  310. left: `${(parseInt(mousePosition.split(",")[0]) / viewportWidth) * 100}%`,
  311. transition: "top 0.3s ease-out, left 0.3s ease-out",
  312. }}
  313. />
  314. )}
  315. </div>
  316. <div style={{ width: "100%" }}>
  317. <div
  318. onClick={() => {
  319. setConsoleLogsExpanded(!consoleLogsExpanded)
  320. }}
  321. style={{
  322. display: "flex",
  323. alignItems: "center",
  324. gap: "4px",
  325. width: "100%",
  326. justifyContent: "flex-start",
  327. cursor: "pointer",
  328. padding: `9px 8px ${consoleLogsExpanded ? 0 : 8}px 8px`,
  329. }}>
  330. <span className={`codicon codicon-chevron-${consoleLogsExpanded ? "down" : "right"}`}></span>
  331. <span style={{ fontSize: "0.8em" }}>Console Logs</span>
  332. </div>
  333. {consoleLogsExpanded && (
  334. <CodeBlock source={`${"```"}shell\n${displayState.consoleLogs || "(No new logs)"}\n${"```"}`} />
  335. )}
  336. </div>
  337. </div>
  338. {/* Action content with min height */}
  339. <div style={{ minHeight: maxActionHeight }}>{actionContent}</div>
  340. {/* Pagination moved to bottom */}
  341. {pages.length > 1 && (
  342. <div
  343. style={{
  344. display: "flex",
  345. justifyContent: "space-between",
  346. alignItems: "center",
  347. padding: "8px 0px",
  348. marginTop: "15px",
  349. borderTop: "1px solid var(--vscode-editorGroup-border)",
  350. }}>
  351. <div>
  352. Step {currentPageIndex + 1} of {pages.length}
  353. </div>
  354. <div style={{ display: "flex", gap: "4px" }}>
  355. <VSCodeButton
  356. disabled={currentPageIndex === 0 || isBrowsing}
  357. onClick={() => setCurrentPageIndex((i) => i - 1)}>
  358. Previous
  359. </VSCodeButton>
  360. <VSCodeButton
  361. disabled={currentPageIndex === pages.length - 1 || isBrowsing}
  362. onClick={() => setCurrentPageIndex((i) => i + 1)}>
  363. Next
  364. </VSCodeButton>
  365. </div>
  366. </div>
  367. )}
  368. </div>,
  369. )
  370. // Height change effect
  371. useEffect(() => {
  372. const isInitialRender = prevHeightRef.current === 0
  373. if (isLast && rowHeight !== 0 && rowHeight !== Infinity && rowHeight !== prevHeightRef.current) {
  374. if (!isInitialRender) {
  375. onHeightChange(rowHeight > prevHeightRef.current)
  376. }
  377. prevHeightRef.current = rowHeight
  378. }
  379. }, [rowHeight, isLast, onHeightChange])
  380. return browserSessionRow
  381. }, deepEqual)
  382. interface BrowserSessionRowContentProps extends Omit<BrowserSessionRowProps, "messages"> {
  383. message: ClineMessage
  384. setMaxActionHeight: (height: number) => void
  385. isStreaming: boolean
  386. }
  387. const BrowserSessionRowContent = ({
  388. message,
  389. isExpanded,
  390. onToggleExpand,
  391. lastModifiedMessage,
  392. isLast,
  393. setMaxActionHeight,
  394. isStreaming,
  395. }: BrowserSessionRowContentProps) => {
  396. const headerStyle: React.CSSProperties = {
  397. display: "flex",
  398. alignItems: "center",
  399. gap: "10px",
  400. marginBottom: "10px",
  401. }
  402. switch (message.type) {
  403. case "say":
  404. switch (message.say) {
  405. case "api_req_started":
  406. case "text":
  407. return (
  408. <div style={{ padding: "10px 0 10px 0" }}>
  409. <ChatRowContent
  410. message={message}
  411. isExpanded={isExpanded(message.ts)}
  412. onToggleExpand={() => {
  413. if (message.say === "api_req_started") {
  414. setMaxActionHeight(0)
  415. }
  416. onToggleExpand(message.ts)
  417. }}
  418. lastModifiedMessage={lastModifiedMessage}
  419. isLast={isLast}
  420. isStreaming={isStreaming}
  421. />
  422. </div>
  423. )
  424. case "browser_action":
  425. const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction
  426. return (
  427. <BrowserActionBox
  428. action={browserAction.action}
  429. coordinate={browserAction.coordinate}
  430. text={browserAction.text}
  431. />
  432. )
  433. default:
  434. return null
  435. }
  436. case "ask":
  437. switch (message.ask) {
  438. case "browser_action_launch":
  439. return (
  440. <>
  441. <div style={headerStyle}>
  442. <span style={{ fontWeight: "bold" }}>Browser Session Started</span>
  443. </div>
  444. <div
  445. style={{
  446. borderRadius: 3,
  447. border: "1px solid var(--vscode-editorGroup-border)",
  448. overflow: "hidden",
  449. backgroundColor: CODE_BLOCK_BG_COLOR,
  450. }}>
  451. <CodeBlock source={`${"```"}shell\n${message.text}\n${"```"}`} forceWrap={true} />
  452. </div>
  453. </>
  454. )
  455. default:
  456. return null
  457. }
  458. }
  459. }
  460. const BrowserActionBox = ({
  461. action,
  462. coordinate,
  463. text,
  464. }: {
  465. action: BrowserAction
  466. coordinate?: string
  467. text?: string
  468. }) => {
  469. const getBrowserActionText = (action: BrowserAction, coordinate?: string, text?: string) => {
  470. switch (action) {
  471. case "launch":
  472. return `Launch browser at ${text}`
  473. case "click":
  474. return `Click (${coordinate?.replace(",", ", ")})`
  475. case "type":
  476. return `Type "${text}"`
  477. case "scroll_down":
  478. return "Scroll down"
  479. case "scroll_up":
  480. return "Scroll up"
  481. case "close":
  482. return "Close browser"
  483. default:
  484. return action
  485. }
  486. }
  487. return (
  488. <div style={{ padding: "10px 0 0 0" }}>
  489. <div
  490. style={{
  491. borderRadius: 3,
  492. backgroundColor: CODE_BLOCK_BG_COLOR,
  493. overflow: "hidden",
  494. border: "1px solid var(--vscode-editorGroup-border)",
  495. }}>
  496. <div
  497. style={{
  498. display: "flex",
  499. alignItems: "center",
  500. padding: "9px 10px",
  501. }}>
  502. <span
  503. style={{
  504. whiteSpace: "normal",
  505. wordBreak: "break-word",
  506. }}>
  507. <span style={{ fontWeight: 500 }}>Browse Action: </span>
  508. {getBrowserActionText(action, coordinate, text)}
  509. </span>
  510. </div>
  511. </div>
  512. </div>
  513. )
  514. }
  515. const BrowserCursor: React.FC<{ style?: React.CSSProperties }> = ({ style }) => {
  516. // (can't use svgs in vsc extensions)
  517. const cursorBase64 =
  518. ""
  519. return (
  520. <img
  521. src={cursorBase64}
  522. style={{
  523. width: "17px",
  524. height: "22px",
  525. ...style,
  526. }}
  527. alt="cursor"
  528. aria-label="cursor"
  529. />
  530. )
  531. }
  532. export default BrowserSessionRow