BrowserSessionRow.tsx 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565
  1. import deepEqual from "fast-deep-equal"
  2. import React, { memo, useEffect, useMemo, useRef, useState } from "react"
  3. import { useSize } from "react-use"
  4. import { useExtensionState } from "../../context/ExtensionStateContext"
  5. import {
  6. BrowserAction,
  7. BrowserActionResult,
  8. ClineMessage,
  9. ClineSayBrowserAction,
  10. } from "../../../../src/shared/ExtensionMessage"
  11. import { vscode } from "../../utils/vscode"
  12. import CodeBlock, { CODE_BLOCK_BG_COLOR } from "../common/CodeBlock"
  13. import { ChatRowContent, ProgressIndicator } from "./ChatRow"
  14. import { VSCodeButton } from "@vscode/webview-ui-toolkit/react"
  15. interface BrowserSessionRowProps {
  16. messages: ClineMessage[]
  17. isExpanded: (messageTs: number) => boolean
  18. onToggleExpand: (messageTs: number) => void
  19. lastModifiedMessage?: ClineMessage
  20. isLast: boolean
  21. onHeightChange: (isTaller: boolean) => void
  22. }
  23. const BrowserSessionRow = memo((props: BrowserSessionRowProps) => {
  24. const { messages, isLast, onHeightChange, lastModifiedMessage } = props
  25. const prevHeightRef = useRef(0)
  26. const [maxActionHeight, setMaxActionHeight] = useState(0)
  27. const [consoleLogsExpanded, setConsoleLogsExpanded] = useState(false)
  28. const { browserViewportSize = "900x600" } = useExtensionState()
  29. const [viewportWidth, viewportHeight] = browserViewportSize.split("x").map(Number)
  30. const aspectRatio = (viewportHeight / viewportWidth * 100).toFixed(2)
  31. const defaultMousePosition = `${Math.round(viewportWidth/2)},${Math.round(viewportHeight/2)}`
  32. const isLastApiReqInterrupted = useMemo(() => {
  33. // Check if last api_req_started is cancelled
  34. const lastApiReqStarted = [...messages].reverse().find((m) => m.say === "api_req_started")
  35. if (lastApiReqStarted?.text != null) {
  36. const info = JSON.parse(lastApiReqStarted.text)
  37. if (info.cancelReason != null) {
  38. return true
  39. }
  40. }
  41. const lastApiReqFailed = isLast && lastModifiedMessage?.ask === "api_req_failed"
  42. if (lastApiReqFailed) {
  43. return true
  44. }
  45. return false
  46. }, [messages, lastModifiedMessage, isLast])
  47. const isBrowsing = useMemo(() => {
  48. return isLast && messages.some((m) => m.say === "browser_action_result") && !isLastApiReqInterrupted // after user approves, browser_action_result with "" is sent to indicate that the session has started
  49. }, [isLast, messages, isLastApiReqInterrupted])
  50. // Organize messages into pages with current state and next action
  51. const pages = useMemo(() => {
  52. const result: {
  53. currentState: {
  54. url?: string
  55. screenshot?: string
  56. mousePosition?: string
  57. consoleLogs?: string
  58. messages: ClineMessage[] // messages up to and including the result
  59. }
  60. nextAction?: {
  61. messages: ClineMessage[] // messages leading to next result
  62. }
  63. }[] = []
  64. let currentStateMessages: ClineMessage[] = []
  65. let nextActionMessages: ClineMessage[] = []
  66. messages.forEach((message) => {
  67. if (message.ask === "browser_action_launch") {
  68. // Start first page
  69. currentStateMessages = [message]
  70. } else if (message.say === "browser_action_result") {
  71. if (message.text === "") {
  72. // first browser_action_result is an empty string that signals that session has started
  73. return
  74. }
  75. // Complete current state
  76. currentStateMessages.push(message)
  77. const resultData = JSON.parse(message.text || "{}") as BrowserActionResult
  78. // Add page with current state and previous next actions
  79. result.push({
  80. currentState: {
  81. url: resultData.currentUrl,
  82. screenshot: resultData.screenshot,
  83. mousePosition: resultData.currentMousePosition,
  84. consoleLogs: resultData.logs,
  85. messages: [...currentStateMessages],
  86. },
  87. nextAction:
  88. nextActionMessages.length > 0
  89. ? {
  90. messages: [...nextActionMessages],
  91. }
  92. : undefined,
  93. })
  94. // Reset for next page
  95. currentStateMessages = []
  96. nextActionMessages = []
  97. } else if (
  98. message.say === "api_req_started" ||
  99. message.say === "text" ||
  100. message.say === "browser_action"
  101. ) {
  102. // These messages lead to the next result, so they should always go in nextActionMessages
  103. nextActionMessages.push(message)
  104. } else {
  105. // Any other message types
  106. currentStateMessages.push(message)
  107. }
  108. })
  109. // Add incomplete page if exists
  110. if (currentStateMessages.length > 0 || nextActionMessages.length > 0) {
  111. result.push({
  112. currentState: {
  113. messages: [...currentStateMessages],
  114. },
  115. nextAction:
  116. nextActionMessages.length > 0
  117. ? {
  118. messages: [...nextActionMessages],
  119. }
  120. : undefined,
  121. })
  122. }
  123. return result
  124. }, [messages])
  125. // Auto-advance to latest page
  126. const [currentPageIndex, setCurrentPageIndex] = useState(0)
  127. useEffect(() => {
  128. setCurrentPageIndex(pages.length - 1)
  129. }, [pages.length])
  130. // Get initial URL from launch message
  131. const initialUrl = useMemo(() => {
  132. const launchMessage = messages.find((m) => m.ask === "browser_action_launch")
  133. return launchMessage?.text || ""
  134. }, [messages])
  135. // Find the latest available URL and screenshot
  136. const latestState = useMemo(() => {
  137. for (let i = pages.length - 1; i >= 0; i--) {
  138. const page = pages[i]
  139. if (page.currentState.url || page.currentState.screenshot) {
  140. return {
  141. url: page.currentState.url,
  142. mousePosition: page.currentState.mousePosition,
  143. consoleLogs: page.currentState.consoleLogs,
  144. screenshot: page.currentState.screenshot,
  145. }
  146. }
  147. }
  148. return { url: undefined, mousePosition: undefined, consoleLogs: undefined, screenshot: undefined }
  149. }, [pages])
  150. const currentPage = pages[currentPageIndex]
  151. const isLastPage = currentPageIndex === pages.length - 1
  152. // Use latest state if we're on the last page and don't have a state yet
  153. const displayState = isLastPage
  154. ? {
  155. url: currentPage?.currentState.url || latestState.url || initialUrl,
  156. mousePosition: currentPage?.currentState.mousePosition || latestState.mousePosition || defaultMousePosition,
  157. consoleLogs: currentPage?.currentState.consoleLogs,
  158. screenshot: currentPage?.currentState.screenshot || latestState.screenshot,
  159. }
  160. : {
  161. url: currentPage?.currentState.url || initialUrl,
  162. mousePosition: currentPage?.currentState.mousePosition || defaultMousePosition,
  163. consoleLogs: currentPage?.currentState.consoleLogs,
  164. screenshot: currentPage?.currentState.screenshot,
  165. }
  166. const [actionContent, { height: actionHeight }] = useSize(
  167. <div>
  168. {currentPage?.nextAction?.messages.map((message) => (
  169. <BrowserSessionRowContent
  170. key={message.ts}
  171. {...props}
  172. message={message}
  173. setMaxActionHeight={setMaxActionHeight}
  174. />
  175. ))}
  176. {!isBrowsing && messages.some((m) => m.say === "browser_action_result") && currentPageIndex === 0 && (
  177. <BrowserActionBox action={"launch"} text={initialUrl} />
  178. )}
  179. </div>,
  180. )
  181. useEffect(() => {
  182. if (actionHeight === 0 || actionHeight === Infinity) {
  183. return
  184. }
  185. if (actionHeight > maxActionHeight) {
  186. setMaxActionHeight(actionHeight)
  187. }
  188. }, [actionHeight, maxActionHeight])
  189. // Track latest click coordinate
  190. const latestClickPosition = useMemo(() => {
  191. if (!isBrowsing) return undefined
  192. // Look through current page's next actions for the latest browser_action
  193. const actions = currentPage?.nextAction?.messages || []
  194. for (let i = actions.length - 1; i >= 0; i--) {
  195. const message = actions[i]
  196. if (message.say === "browser_action") {
  197. const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction
  198. if (browserAction.action === "click" && browserAction.coordinate) {
  199. return browserAction.coordinate
  200. }
  201. }
  202. }
  203. return undefined
  204. }, [isBrowsing, currentPage?.nextAction?.messages])
  205. // Use latest click position while browsing, otherwise use display state
  206. const mousePosition = isBrowsing ? latestClickPosition || displayState.mousePosition : displayState.mousePosition || defaultMousePosition
  207. const [browserSessionRow, { height: rowHeight }] = useSize(
  208. <div style={{ padding: "10px 6px 10px 15px", marginBottom: -10 }}>
  209. <div style={{ display: "flex", alignItems: "center", gap: "10px", marginBottom: "10px" }}>
  210. {isBrowsing ? (
  211. <ProgressIndicator />
  212. ) : (
  213. <span
  214. className={`codicon codicon-inspect`}
  215. style={{ color: "var(--vscode-foreground)", marginBottom: "-1.5px" }}></span>
  216. )}
  217. <span style={{ fontWeight: "bold" }}>
  218. <>Cline wants to use the browser:</>
  219. </span>
  220. </div>
  221. <div
  222. style={{
  223. borderRadius: 3,
  224. border: "1px solid var(--vscode-editorGroup-border)",
  225. overflow: "hidden",
  226. backgroundColor: CODE_BLOCK_BG_COLOR,
  227. marginBottom: 10,
  228. }}>
  229. {/* URL Bar */}
  230. <div
  231. style={{
  232. margin: "5px auto",
  233. width: "calc(100% - 10px)",
  234. boxSizing: "border-box", // includes padding in width calculation
  235. backgroundColor: "var(--vscode-input-background)",
  236. border: "1px solid var(--vscode-input-border)",
  237. borderRadius: "4px",
  238. padding: "3px 5px",
  239. display: "flex",
  240. alignItems: "center",
  241. justifyContent: "center",
  242. color: displayState.url
  243. ? "var(--vscode-input-foreground)"
  244. : "var(--vscode-descriptionForeground)",
  245. fontSize: "12px",
  246. }}>
  247. <div
  248. style={{
  249. textOverflow: "ellipsis",
  250. overflow: "hidden",
  251. whiteSpace: "nowrap",
  252. width: "100%",
  253. textAlign: "center",
  254. }}>
  255. {displayState.url || "http"}
  256. </div>
  257. </div>
  258. {/* Screenshot Area */}
  259. <div
  260. data-testid="screenshot-container"
  261. style={{
  262. width: "100%",
  263. paddingBottom: `${aspectRatio}%`, // height/width ratio
  264. position: "relative",
  265. backgroundColor: "var(--vscode-input-background)",
  266. }}>
  267. {displayState.screenshot ? (
  268. <img
  269. src={displayState.screenshot}
  270. alt="Browser screenshot"
  271. style={{
  272. position: "absolute",
  273. top: 0,
  274. left: 0,
  275. width: "100%",
  276. height: "100%",
  277. objectFit: "contain",
  278. cursor: "pointer",
  279. }}
  280. onClick={() =>
  281. vscode.postMessage({
  282. type: "openImage",
  283. text: displayState.screenshot,
  284. })
  285. }
  286. />
  287. ) : (
  288. <div
  289. style={{
  290. position: "absolute",
  291. top: "50%",
  292. left: "50%",
  293. transform: "translate(-50%, -50%)",
  294. }}>
  295. <span
  296. className="codicon codicon-globe"
  297. style={{ fontSize: "80px", color: "var(--vscode-descriptionForeground)" }}
  298. />
  299. </div>
  300. )}
  301. {displayState.mousePosition && (
  302. <BrowserCursor
  303. style={{
  304. position: "absolute",
  305. top: `${(parseInt(mousePosition.split(",")[1]) / viewportHeight) * 100}%`,
  306. left: `${(parseInt(mousePosition.split(",")[0]) / viewportWidth) * 100}%`,
  307. transition: "top 0.3s ease-out, left 0.3s ease-out",
  308. }}
  309. />
  310. )}
  311. </div>
  312. <div style={{ width: "100%" }}>
  313. <div
  314. onClick={() => {
  315. setConsoleLogsExpanded(!consoleLogsExpanded)
  316. }}
  317. style={{
  318. display: "flex",
  319. alignItems: "center",
  320. gap: "4px",
  321. width: "100%",
  322. justifyContent: "flex-start",
  323. cursor: "pointer",
  324. padding: `9px 8px ${consoleLogsExpanded ? 0 : 8}px 8px`,
  325. }}>
  326. <span className={`codicon codicon-chevron-${consoleLogsExpanded ? "down" : "right"}`}></span>
  327. <span style={{ fontSize: "0.8em" }}>Console Logs</span>
  328. </div>
  329. {consoleLogsExpanded && (
  330. <CodeBlock source={`${"```"}shell\n${displayState.consoleLogs || "(No new logs)"}\n${"```"}`} />
  331. )}
  332. </div>
  333. </div>
  334. {/* Action content with min height */}
  335. <div style={{ minHeight: maxActionHeight }}>{actionContent}</div>
  336. {/* Pagination moved to bottom */}
  337. {pages.length > 1 && (
  338. <div
  339. style={{
  340. display: "flex",
  341. justifyContent: "space-between",
  342. alignItems: "center",
  343. padding: "8px 0px",
  344. marginTop: "15px",
  345. borderTop: "1px solid var(--vscode-editorGroup-border)",
  346. }}>
  347. <div>
  348. Step {currentPageIndex + 1} of {pages.length}
  349. </div>
  350. <div style={{ display: "flex", gap: "4px" }}>
  351. <VSCodeButton
  352. disabled={currentPageIndex === 0 || isBrowsing}
  353. onClick={() => setCurrentPageIndex((i) => i - 1)}>
  354. Previous
  355. </VSCodeButton>
  356. <VSCodeButton
  357. disabled={currentPageIndex === pages.length - 1 || isBrowsing}
  358. onClick={() => setCurrentPageIndex((i) => i + 1)}>
  359. Next
  360. </VSCodeButton>
  361. </div>
  362. </div>
  363. )}
  364. </div>,
  365. )
  366. // Height change effect
  367. useEffect(() => {
  368. const isInitialRender = prevHeightRef.current === 0
  369. if (isLast && rowHeight !== 0 && rowHeight !== Infinity && rowHeight !== prevHeightRef.current) {
  370. if (!isInitialRender) {
  371. onHeightChange(rowHeight > prevHeightRef.current)
  372. }
  373. prevHeightRef.current = rowHeight
  374. }
  375. }, [rowHeight, isLast, onHeightChange])
  376. return browserSessionRow
  377. }, deepEqual)
  378. interface BrowserSessionRowContentProps extends Omit<BrowserSessionRowProps, "messages"> {
  379. message: ClineMessage
  380. setMaxActionHeight: (height: number) => void
  381. }
  382. const BrowserSessionRowContent = ({
  383. message,
  384. isExpanded,
  385. onToggleExpand,
  386. lastModifiedMessage,
  387. isLast,
  388. setMaxActionHeight,
  389. }: BrowserSessionRowContentProps) => {
  390. const headerStyle: React.CSSProperties = {
  391. display: "flex",
  392. alignItems: "center",
  393. gap: "10px",
  394. marginBottom: "10px",
  395. }
  396. switch (message.type) {
  397. case "say":
  398. switch (message.say) {
  399. case "api_req_started":
  400. case "text":
  401. return (
  402. <div style={{ padding: "10px 0 10px 0" }}>
  403. <ChatRowContent
  404. message={message}
  405. isExpanded={isExpanded(message.ts)}
  406. onToggleExpand={() => {
  407. if (message.say === "api_req_started") {
  408. setMaxActionHeight(0)
  409. }
  410. onToggleExpand(message.ts)
  411. }}
  412. lastModifiedMessage={lastModifiedMessage}
  413. isLast={isLast}
  414. />
  415. </div>
  416. )
  417. case "browser_action":
  418. const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction
  419. return (
  420. <BrowserActionBox
  421. action={browserAction.action}
  422. coordinate={browserAction.coordinate}
  423. text={browserAction.text}
  424. />
  425. )
  426. default:
  427. return null
  428. }
  429. case "ask":
  430. switch (message.ask) {
  431. case "browser_action_launch":
  432. return (
  433. <>
  434. <div style={headerStyle}>
  435. <span style={{ fontWeight: "bold" }}>Browser Session Started</span>
  436. </div>
  437. <div
  438. style={{
  439. borderRadius: 3,
  440. border: "1px solid var(--vscode-editorGroup-border)",
  441. overflow: "hidden",
  442. backgroundColor: CODE_BLOCK_BG_COLOR,
  443. }}>
  444. <CodeBlock source={`${"```"}shell\n${message.text}\n${"```"}`} forceWrap={true} />
  445. </div>
  446. </>
  447. )
  448. default:
  449. return null
  450. }
  451. }
  452. }
  453. const BrowserActionBox = ({
  454. action,
  455. coordinate,
  456. text,
  457. }: {
  458. action: BrowserAction
  459. coordinate?: string
  460. text?: string
  461. }) => {
  462. const getBrowserActionText = (action: BrowserAction, coordinate?: string, text?: string) => {
  463. switch (action) {
  464. case "launch":
  465. return `Launch browser at ${text}`
  466. case "click":
  467. return `Click (${coordinate?.replace(",", ", ")})`
  468. case "type":
  469. return `Type "${text}"`
  470. case "scroll_down":
  471. return "Scroll down"
  472. case "scroll_up":
  473. return "Scroll up"
  474. case "close":
  475. return "Close browser"
  476. default:
  477. return action
  478. }
  479. }
  480. return (
  481. <div style={{ padding: "10px 0 0 0" }}>
  482. <div
  483. style={{
  484. borderRadius: 3,
  485. backgroundColor: CODE_BLOCK_BG_COLOR,
  486. overflow: "hidden",
  487. border: "1px solid var(--vscode-editorGroup-border)",
  488. }}>
  489. <div
  490. style={{
  491. display: "flex",
  492. alignItems: "center",
  493. padding: "9px 10px",
  494. }}>
  495. <span
  496. style={{
  497. whiteSpace: "normal",
  498. wordBreak: "break-word",
  499. }}>
  500. <span style={{ fontWeight: 500 }}>Browse Action: </span>
  501. {getBrowserActionText(action, coordinate, text)}
  502. </span>
  503. </div>
  504. </div>
  505. </div>
  506. )
  507. }
  508. const BrowserCursor: React.FC<{ style?: React.CSSProperties }> = ({ style }) => {
  509. // (can't use svgs in vsc extensions)
  510. const cursorBase64 =
  511. ""
  512. return (
  513. <img
  514. src={cursorBase64}
  515. style={{
  516. width: "17px",
  517. height: "22px",
  518. ...style,
  519. }}
  520. alt="cursor"
  521. aria-label="cursor"
  522. />
  523. )
  524. }
  525. export default BrowserSessionRow