agent-state.ts 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. /**
  2. * Agent Loop State Detection
  3. *
  4. * This module provides the core logic for detecting the current state of the
  5. * Roo Code agent loop. The state is determined by analyzing the clineMessages
  6. * array, specifically the last message's type and properties.
  7. *
  8. * Key insight: The agent loop stops whenever a message with `type: "ask"` arrives,
  9. * and the specific `ask` value determines what kind of response the agent is waiting for.
  10. */
  11. import { ClineMessage, ClineAsk, isIdleAsk, isResumableAsk, isInteractiveAsk, isNonBlockingAsk } from "@roo-code/types"
  12. // =============================================================================
  13. // Agent Loop State Enum
  14. // =============================================================================
  15. /**
  16. * The possible states of the agent loop.
  17. *
  18. * State Machine:
  19. * ```
  20. * ┌─────────────────┐
  21. * │ NO_TASK │ (initial state)
  22. * └────────┬────────┘
  23. * │ newTask
  24. * ▼
  25. * ┌─────────────────────────────┐
  26. * ┌───▶│ RUNNING │◀───┐
  27. * │ └──────────┬──────────────────┘ │
  28. * │ │ │
  29. * │ ┌──────────┼──────────────┐ │
  30. * │ │ │ │ │
  31. * │ ▼ ▼ ▼ │
  32. * │ ┌──────┐ ┌─────────┐ ┌──────────┐ │
  33. * │ │STREAM│ │INTERACT │ │ IDLE │ │
  34. * │ │ ING │ │ IVE │ │ │ │
  35. * │ └──┬───┘ └────┬────┘ └────┬─────┘ │
  36. * │ │ │ │ │
  37. * │ │ done │ approved │ newTask │
  38. * └────┴───────────┴────────────┘ │
  39. * │
  40. * ┌──────────────┐ │
  41. * │ RESUMABLE │────────────────────────┘
  42. * └──────────────┘ resumed
  43. * ```
  44. */
  45. export enum AgentLoopState {
  46. /**
  47. * No active task. This is the initial state before any task is started,
  48. * or after a task has been cleared.
  49. */
  50. NO_TASK = "no_task",
  51. /**
  52. * Agent is actively processing. This means:
  53. * - The last message is a "say" type (informational), OR
  54. * - The last message is a non-blocking ask (command_output)
  55. *
  56. * In this state, the agent may be:
  57. * - Executing tools
  58. * - Thinking/reasoning
  59. * - Processing between API calls
  60. */
  61. RUNNING = "running",
  62. /**
  63. * Agent is streaming a response. This is detected when:
  64. * - `partial === true` on the last message, OR
  65. * - The last `api_req_started` message has no `cost` in its text field
  66. *
  67. * Do NOT consider the agent "waiting" while streaming.
  68. */
  69. STREAMING = "streaming",
  70. /**
  71. * Agent is waiting for user approval or input. This includes:
  72. * - Tool approvals (file operations)
  73. * - Command execution permission
  74. * - Browser action permission
  75. * - MCP server permission
  76. * - Follow-up questions
  77. *
  78. * User must approve, reject, or provide input to continue.
  79. */
  80. WAITING_FOR_INPUT = "waiting_for_input",
  81. /**
  82. * Task is in an idle/terminal state. This includes:
  83. * - Task completed successfully (completion_result)
  84. * - API request failed (api_req_failed)
  85. * - Too many errors (mistake_limit_reached)
  86. * - Auto-approval limit reached
  87. * - Completed task waiting to be resumed
  88. *
  89. * User can start a new task or retry.
  90. */
  91. IDLE = "idle",
  92. /**
  93. * Task is paused and can be resumed. This happens when:
  94. * - User navigated away from a task
  95. * - Extension was restarted mid-task
  96. *
  97. * User can resume or abandon the task.
  98. */
  99. RESUMABLE = "resumable",
  100. }
  101. // =============================================================================
  102. // Detailed State Info
  103. // =============================================================================
  104. /**
  105. * What action the user should/can take in the current state.
  106. */
  107. export type RequiredAction =
  108. | "none" // No action needed (running/streaming)
  109. | "approve" // Can approve/reject (tool, command, browser, mcp)
  110. | "answer" // Need to answer a question (followup)
  111. | "retry_or_new_task" // Can retry or start new task (api_req_failed)
  112. | "proceed_or_new_task" // Can proceed or start new task (mistake_limit)
  113. | "start_task" // Should start a new task (completion_result)
  114. | "resume_or_abandon" // Can resume or abandon (resume_task)
  115. | "start_new_task" // Should start new task (resume_completed_task, no_task)
  116. | "continue_or_abort" // Can continue or abort (command_output)
  117. /**
  118. * Detailed information about the current agent state.
  119. * Provides everything needed to render UI or make decisions.
  120. */
  121. export interface AgentStateInfo {
  122. /** The high-level state of the agent loop */
  123. state: AgentLoopState
  124. /** Whether the agent is waiting for user input/action */
  125. isWaitingForInput: boolean
  126. /** Whether the agent loop is actively processing */
  127. isRunning: boolean
  128. /** Whether content is being streamed */
  129. isStreaming: boolean
  130. /** The specific ask type if waiting on an ask, undefined otherwise */
  131. currentAsk?: ClineAsk
  132. /** What action the user should/can take */
  133. requiredAction: RequiredAction
  134. /** The timestamp of the last message, useful for tracking */
  135. lastMessageTs?: number
  136. /** The full last message for advanced usage */
  137. lastMessage?: ClineMessage
  138. /** Human-readable description of the current state */
  139. description: string
  140. }
  141. // =============================================================================
  142. // State Detection Functions
  143. // =============================================================================
  144. /**
  145. * Structure of the text field in api_req_started messages.
  146. * Used to determine if the API request has completed (cost is defined).
  147. */
  148. export interface ApiReqStartedText {
  149. cost?: number // Undefined while streaming, defined when complete.
  150. tokensIn?: number
  151. tokensOut?: number
  152. cacheWrites?: number
  153. cacheReads?: number
  154. }
  155. /**
  156. * Check if an API request is still in progress (streaming).
  157. *
  158. * API requests are considered in-progress when:
  159. * - An api_req_started message exists
  160. * - Its text field, when parsed, has `cost: undefined`
  161. *
  162. * Once the request completes, the cost field will be populated.
  163. */
  164. function isApiRequestInProgress(messages: ClineMessage[]): boolean {
  165. // Find the last api_req_started message.
  166. // Using reverse iteration for efficiency (most recent first).
  167. for (let i = messages.length - 1; i >= 0; i--) {
  168. const message = messages[i]
  169. if (!message) {
  170. continue
  171. }
  172. if (message.say === "api_req_started") {
  173. if (!message.text) {
  174. // No text yet means still in progress.
  175. return true
  176. }
  177. try {
  178. const data: ApiReqStartedText = JSON.parse(message.text)
  179. // cost is undefined while streaming, defined when complete.
  180. return data.cost === undefined
  181. } catch {
  182. // Parse error - assume not in progress.
  183. return false
  184. }
  185. }
  186. }
  187. return false
  188. }
  189. /**
  190. * Determine the required action based on the current ask type.
  191. */
  192. function getRequiredAction(ask: ClineAsk): RequiredAction {
  193. switch (ask) {
  194. case "followup":
  195. return "answer"
  196. case "command":
  197. case "tool":
  198. case "browser_action_launch":
  199. case "use_mcp_server":
  200. return "approve"
  201. case "command_output":
  202. return "continue_or_abort"
  203. case "api_req_failed":
  204. return "retry_or_new_task"
  205. case "mistake_limit_reached":
  206. return "proceed_or_new_task"
  207. case "completion_result":
  208. return "start_task"
  209. case "resume_task":
  210. return "resume_or_abandon"
  211. case "resume_completed_task":
  212. case "auto_approval_max_req_reached":
  213. return "start_new_task"
  214. default:
  215. return "none"
  216. }
  217. }
  218. /**
  219. * Get a human-readable description for the current state.
  220. */
  221. function getStateDescription(state: AgentLoopState, ask?: ClineAsk): string {
  222. switch (state) {
  223. case AgentLoopState.NO_TASK:
  224. return "No active task. Ready to start a new task."
  225. case AgentLoopState.RUNNING:
  226. return "Agent is actively processing."
  227. case AgentLoopState.STREAMING:
  228. return "Agent is streaming a response."
  229. case AgentLoopState.WAITING_FOR_INPUT:
  230. switch (ask) {
  231. case "followup":
  232. return "Agent is asking a follow-up question. Please provide an answer."
  233. case "command":
  234. return "Agent wants to execute a command. Approve or reject."
  235. case "tool":
  236. return "Agent wants to perform a file operation. Approve or reject."
  237. case "browser_action_launch":
  238. return "Agent wants to use the browser. Approve or reject."
  239. case "use_mcp_server":
  240. return "Agent wants to use an MCP server. Approve or reject."
  241. default:
  242. return "Agent is waiting for user input."
  243. }
  244. case AgentLoopState.IDLE:
  245. switch (ask) {
  246. case "completion_result":
  247. return "Task completed successfully. You can provide feedback or start a new task."
  248. case "api_req_failed":
  249. return "API request failed. You can retry or start a new task."
  250. case "mistake_limit_reached":
  251. return "Too many errors encountered. You can proceed anyway or start a new task."
  252. case "auto_approval_max_req_reached":
  253. return "Auto-approval limit reached. Manual approval required."
  254. case "resume_completed_task":
  255. return "Previously completed task. Start a new task to continue."
  256. default:
  257. return "Task is idle."
  258. }
  259. case AgentLoopState.RESUMABLE:
  260. return "Task is paused. You can resume or start a new task."
  261. default:
  262. return "Unknown state."
  263. }
  264. }
  265. /**
  266. * Detect the current state of the agent loop from the clineMessages array.
  267. *
  268. * This is the main state detection function. It analyzes the messages array
  269. * and returns detailed information about the current agent state.
  270. *
  271. * @param messages - The clineMessages array from extension state
  272. * @returns Detailed state information
  273. */
  274. export function detectAgentState(messages: ClineMessage[]): AgentStateInfo {
  275. // No messages means no task
  276. if (!messages || messages.length === 0) {
  277. return {
  278. state: AgentLoopState.NO_TASK,
  279. isWaitingForInput: false,
  280. isRunning: false,
  281. isStreaming: false,
  282. requiredAction: "start_new_task",
  283. description: getStateDescription(AgentLoopState.NO_TASK),
  284. }
  285. }
  286. const lastMessage = messages[messages.length - 1]
  287. // Guard against undefined (should never happen after length check, but TypeScript requires it)
  288. if (!lastMessage) {
  289. return {
  290. state: AgentLoopState.NO_TASK,
  291. isWaitingForInput: false,
  292. isRunning: false,
  293. isStreaming: false,
  294. requiredAction: "start_new_task",
  295. description: getStateDescription(AgentLoopState.NO_TASK),
  296. }
  297. }
  298. // Check if the message is still streaming (partial)
  299. // This is the PRIMARY indicator of streaming
  300. if (lastMessage.partial === true) {
  301. return {
  302. state: AgentLoopState.STREAMING,
  303. isWaitingForInput: false,
  304. isRunning: true,
  305. isStreaming: true,
  306. currentAsk: lastMessage.ask,
  307. requiredAction: "none",
  308. lastMessageTs: lastMessage.ts,
  309. lastMessage,
  310. description: getStateDescription(AgentLoopState.STREAMING),
  311. }
  312. }
  313. // Handle "ask" type messages
  314. if (lastMessage.type === "ask" && lastMessage.ask) {
  315. const ask = lastMessage.ask
  316. // Non-blocking asks (command_output) - agent is running but can be interrupted
  317. if (isNonBlockingAsk(ask)) {
  318. return {
  319. state: AgentLoopState.RUNNING,
  320. isWaitingForInput: false,
  321. isRunning: true,
  322. isStreaming: false,
  323. currentAsk: ask,
  324. requiredAction: "continue_or_abort",
  325. lastMessageTs: lastMessage.ts,
  326. lastMessage,
  327. description: "Command is running. You can continue or abort.",
  328. }
  329. }
  330. // Idle asks - task has stopped
  331. if (isIdleAsk(ask)) {
  332. return {
  333. state: AgentLoopState.IDLE,
  334. isWaitingForInput: true, // User needs to decide what to do next
  335. isRunning: false,
  336. isStreaming: false,
  337. currentAsk: ask,
  338. requiredAction: getRequiredAction(ask),
  339. lastMessageTs: lastMessage.ts,
  340. lastMessage,
  341. description: getStateDescription(AgentLoopState.IDLE, ask),
  342. }
  343. }
  344. // Resumable asks - task is paused
  345. if (isResumableAsk(ask)) {
  346. return {
  347. state: AgentLoopState.RESUMABLE,
  348. isWaitingForInput: true,
  349. isRunning: false,
  350. isStreaming: false,
  351. currentAsk: ask,
  352. requiredAction: getRequiredAction(ask),
  353. lastMessageTs: lastMessage.ts,
  354. lastMessage,
  355. description: getStateDescription(AgentLoopState.RESUMABLE, ask),
  356. }
  357. }
  358. // Interactive asks - waiting for approval/input
  359. if (isInteractiveAsk(ask)) {
  360. return {
  361. state: AgentLoopState.WAITING_FOR_INPUT,
  362. isWaitingForInput: true,
  363. isRunning: false,
  364. isStreaming: false,
  365. currentAsk: ask,
  366. requiredAction: getRequiredAction(ask),
  367. lastMessageTs: lastMessage.ts,
  368. lastMessage,
  369. description: getStateDescription(AgentLoopState.WAITING_FOR_INPUT, ask),
  370. }
  371. }
  372. }
  373. // For "say" type messages, check if API request is in progress
  374. if (isApiRequestInProgress(messages)) {
  375. return {
  376. state: AgentLoopState.STREAMING,
  377. isWaitingForInput: false,
  378. isRunning: true,
  379. isStreaming: true,
  380. requiredAction: "none",
  381. lastMessageTs: lastMessage.ts,
  382. lastMessage,
  383. description: getStateDescription(AgentLoopState.STREAMING),
  384. }
  385. }
  386. // Default: agent is running
  387. return {
  388. state: AgentLoopState.RUNNING,
  389. isWaitingForInput: false,
  390. isRunning: true,
  391. isStreaming: false,
  392. requiredAction: "none",
  393. lastMessageTs: lastMessage.ts,
  394. lastMessage,
  395. description: getStateDescription(AgentLoopState.RUNNING),
  396. }
  397. }
  398. /**
  399. * Quick check: Is the agent waiting for user input?
  400. *
  401. * This is a convenience function for simple use cases where you just need
  402. * to know if user action is required.
  403. */
  404. export function isAgentWaitingForInput(messages: ClineMessage[]): boolean {
  405. return detectAgentState(messages).isWaitingForInput
  406. }
  407. /**
  408. * Quick check: Is the agent actively running (not waiting)?
  409. */
  410. export function isAgentRunning(messages: ClineMessage[]): boolean {
  411. const state = detectAgentState(messages)
  412. return state.isRunning && !state.isWaitingForInput
  413. }
  414. /**
  415. * Quick check: Is content currently streaming?
  416. */
  417. export function isContentStreaming(messages: ClineMessage[]): boolean {
  418. return detectAgentState(messages).isStreaming
  419. }