followup-completion-ask-response.ts 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. import { runStreamCase, StreamEvent } from "../lib/stream-harness"
  2. const START_PROMPT = 'Answer this question and finish: What is 1+1? Reply with only "2", then complete the task.'
  3. const FOLLOWUP_PROMPT = 'Different question now: what is 3+3? Reply with only "6".'
  4. async function main() {
  5. const startRequestId = `start-${Date.now()}`
  6. const followupRequestId = `message-${Date.now()}`
  7. const shutdownRequestId = `shutdown-${Date.now()}`
  8. let initSeen = false
  9. let sentFollowup = false
  10. let sentShutdown = false
  11. let startAckCount = 0
  12. let sawStartControlAfterFollowup = false
  13. let followupDoneCode: string | undefined
  14. let sawFollowupUserTurn = false
  15. let sawMisroutedToolResult = false
  16. let sawQueueEventForFollowupRequest = false
  17. let followupResult = ""
  18. await runStreamCase({
  19. onEvent(event: StreamEvent, context) {
  20. if (event.type === "system" && event.subtype === "init" && !initSeen) {
  21. initSeen = true
  22. context.sendCommand({
  23. command: "start",
  24. requestId: startRequestId,
  25. prompt: START_PROMPT,
  26. })
  27. return
  28. }
  29. if (event.type === "control" && event.subtype === "error") {
  30. throw new Error(
  31. `received control error for requestId=${event.requestId ?? "unknown"} command=${event.command ?? "unknown"} code=${event.code ?? "unknown"} content=${event.content ?? ""}`,
  32. )
  33. }
  34. if (event.type === "control" && event.command === "start" && event.subtype === "ack") {
  35. startAckCount += 1
  36. if (sentFollowup) {
  37. sawStartControlAfterFollowup = true
  38. }
  39. return
  40. }
  41. if (
  42. event.type === "control" &&
  43. event.command === "message" &&
  44. event.subtype === "done" &&
  45. event.requestId === followupRequestId
  46. ) {
  47. followupDoneCode = event.code
  48. return
  49. }
  50. if (event.type === "queue" && event.requestId === followupRequestId) {
  51. sawQueueEventForFollowupRequest = true
  52. return
  53. }
  54. if (
  55. event.type === "tool_result" &&
  56. event.requestId === followupRequestId &&
  57. typeof event.content === "string" &&
  58. event.content.includes("<user_message>")
  59. ) {
  60. sawMisroutedToolResult = true
  61. return
  62. }
  63. if (event.type === "user" && event.requestId === followupRequestId) {
  64. sawFollowupUserTurn = typeof event.content === "string" && event.content.includes("3+3")
  65. return
  66. }
  67. if (event.type === "result" && event.done === true && event.requestId === startRequestId && !sentFollowup) {
  68. context.sendCommand({
  69. command: "message",
  70. requestId: followupRequestId,
  71. prompt: FOLLOWUP_PROMPT,
  72. })
  73. sentFollowup = true
  74. return
  75. }
  76. if (event.type !== "result" || event.done !== true || event.requestId !== followupRequestId) {
  77. return
  78. }
  79. followupResult = event.content ?? ""
  80. if (followupResult.trim().length === 0) {
  81. throw new Error("follow-up produced an empty result")
  82. }
  83. if (followupDoneCode !== "responded") {
  84. throw new Error(
  85. `follow-up message was not routed as ask response; code="${followupDoneCode ?? "none"}"`,
  86. )
  87. }
  88. if (sawMisroutedToolResult) {
  89. throw new Error("follow-up message was misrouted into tool_result (<user_message>), old bug reproduced")
  90. }
  91. if (sawQueueEventForFollowupRequest) {
  92. throw new Error("follow-up message produced queue events despite responded routing")
  93. }
  94. if (!sawFollowupUserTurn) {
  95. throw new Error("follow-up did not appear as a normal user turn in stream output")
  96. }
  97. if (sawStartControlAfterFollowup) {
  98. throw new Error("unexpected start control event after follow-up; message should not trigger a new task")
  99. }
  100. if (startAckCount !== 1) {
  101. throw new Error(`expected exactly one start ack event, saw ${startAckCount}`)
  102. }
  103. console.log(`[PASS] follow-up control code: "${followupDoneCode}"`)
  104. console.log(`[PASS] follow-up user turn observed: ${sawFollowupUserTurn}`)
  105. console.log(`[PASS] follow-up result: "${followupResult}"`)
  106. if (!sentShutdown) {
  107. context.sendCommand({
  108. command: "shutdown",
  109. requestId: shutdownRequestId,
  110. })
  111. sentShutdown = true
  112. }
  113. },
  114. onTimeoutMessage() {
  115. return [
  116. "timed out waiting for completion ask-response follow-up validation",
  117. `initSeen=${initSeen}`,
  118. `sentFollowup=${sentFollowup}`,
  119. `startAckCount=${startAckCount}`,
  120. `followupDoneCode=${followupDoneCode ?? "none"}`,
  121. `sawFollowupUserTurn=${sawFollowupUserTurn}`,
  122. `sawMisroutedToolResult=${sawMisroutedToolResult}`,
  123. `sawQueueEventForFollowupRequest=${sawQueueEventForFollowupRequest}`,
  124. `haveFollowupResult=${Boolean(followupResult)}`,
  125. ].join(" ")
  126. },
  127. })
  128. }
  129. main().catch((error) => {
  130. console.error(`[FAIL] ${error instanceof Error ? error.message : String(error)}`)
  131. process.exit(1)
  132. })