followup-completion-ask-response.ts 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. import { runStreamCase, StreamEvent } from "../lib/stream-harness"
  2. const START_PROMPT = 'Answer this question and finish: What is 1+1? Reply with only "2", then complete the task.'
  3. const FOLLOWUP_PROMPT = 'Different question now: what is 3+3? Reply with only "6".'
  4. async function main() {
  5. const startRequestId = `start-${Date.now()}`
  6. const followupRequestId = `message-${Date.now()}`
  7. const shutdownRequestId = `shutdown-${Date.now()}`
  8. let initSeen = false
  9. let sentFollowup = false
  10. let sentShutdown = false
  11. let startAckCount = 0
  12. let sawStartControlAfterFollowup = false
  13. let followupDoneCode: string | undefined
  14. let sawFollowupUserTurn = false
  15. let sawMisroutedToolResult = false
  16. let followupResult = ""
  17. await runStreamCase({
  18. onEvent(event: StreamEvent, context) {
  19. if (event.type === "system" && event.subtype === "init" && !initSeen) {
  20. initSeen = true
  21. context.sendCommand({
  22. command: "start",
  23. requestId: startRequestId,
  24. prompt: START_PROMPT,
  25. })
  26. return
  27. }
  28. if (event.type === "control" && event.subtype === "error") {
  29. throw new Error(
  30. `received control error for requestId=${event.requestId ?? "unknown"} command=${event.command ?? "unknown"} code=${event.code ?? "unknown"} content=${event.content ?? ""}`,
  31. )
  32. }
  33. if (event.type === "control" && event.command === "start" && event.subtype === "ack") {
  34. startAckCount += 1
  35. if (sentFollowup) {
  36. sawStartControlAfterFollowup = true
  37. }
  38. return
  39. }
  40. if (
  41. event.type === "control" &&
  42. event.command === "message" &&
  43. event.subtype === "done" &&
  44. event.requestId === followupRequestId
  45. ) {
  46. followupDoneCode = event.code
  47. return
  48. }
  49. if (
  50. event.type === "tool_result" &&
  51. event.requestId === followupRequestId &&
  52. typeof event.content === "string" &&
  53. event.content.includes("<user_message>")
  54. ) {
  55. sawMisroutedToolResult = true
  56. return
  57. }
  58. if (event.type === "user" && event.requestId === followupRequestId) {
  59. sawFollowupUserTurn = typeof event.content === "string" && event.content.includes("3+3")
  60. return
  61. }
  62. if (event.type === "result" && event.done === true && event.requestId === startRequestId && !sentFollowup) {
  63. context.sendCommand({
  64. command: "message",
  65. requestId: followupRequestId,
  66. prompt: FOLLOWUP_PROMPT,
  67. })
  68. sentFollowup = true
  69. return
  70. }
  71. if (event.type !== "result" || event.done !== true || event.requestId !== followupRequestId) {
  72. return
  73. }
  74. followupResult = event.content ?? ""
  75. if (followupResult.trim().length === 0) {
  76. throw new Error("follow-up produced an empty result")
  77. }
  78. if (followupDoneCode !== "responded") {
  79. throw new Error(
  80. `follow-up message was not routed as ask response; code="${followupDoneCode ?? "none"}"`,
  81. )
  82. }
  83. if (sawMisroutedToolResult) {
  84. throw new Error("follow-up message was misrouted into tool_result (<user_message>), old bug reproduced")
  85. }
  86. if (!sawFollowupUserTurn) {
  87. throw new Error("follow-up did not appear as a normal user turn in stream output")
  88. }
  89. if (sawStartControlAfterFollowup) {
  90. throw new Error("unexpected start control event after follow-up; message should not trigger a new task")
  91. }
  92. if (startAckCount !== 1) {
  93. throw new Error(`expected exactly one start ack event, saw ${startAckCount}`)
  94. }
  95. console.log(`[PASS] follow-up control code: "${followupDoneCode}"`)
  96. console.log(`[PASS] follow-up user turn observed: ${sawFollowupUserTurn}`)
  97. console.log(`[PASS] follow-up result: "${followupResult}"`)
  98. if (!sentShutdown) {
  99. context.sendCommand({
  100. command: "shutdown",
  101. requestId: shutdownRequestId,
  102. })
  103. sentShutdown = true
  104. }
  105. },
  106. onTimeoutMessage() {
  107. return [
  108. "timed out waiting for completion ask-response follow-up validation",
  109. `initSeen=${initSeen}`,
  110. `sentFollowup=${sentFollowup}`,
  111. `startAckCount=${startAckCount}`,
  112. `followupDoneCode=${followupDoneCode ?? "none"}`,
  113. `sawFollowupUserTurn=${sawFollowupUserTurn}`,
  114. `sawMisroutedToolResult=${sawMisroutedToolResult}`,
  115. `haveFollowupResult=${Boolean(followupResult)}`,
  116. ].join(" ")
  117. },
  118. })
  119. }
  120. main().catch((error) => {
  121. console.error(`[FAIL] ${error instanceof Error ? error.message : String(error)}`)
  122. process.exit(1)
  123. })