followup-during-streaming.ts 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. import { runStreamCase, StreamEvent } from "../lib/stream-harness"
  2. const START_PROMPT = 'Answer this question and finish: What is 1+1? Reply with only "2", then complete the task.'
  3. const FOLLOWUP_PROMPT = 'Different question now: what is 3+3? Reply with only "6".'
  4. function looksLikeAttemptCompletionToolUse(event: StreamEvent): boolean {
  5. if (event.type !== "tool_use") {
  6. return false
  7. }
  8. if (event.tool_use?.name === "attempt_completion") {
  9. return true
  10. }
  11. const content = event.content ?? ""
  12. return content.includes('"tool":"attempt_completion"') || content.includes('"name":"attempt_completion"')
  13. }
  14. function validateFollowupAnswer(text: string): void {
  15. const normalized = text.toLowerCase()
  16. const hasSix = /\b6\b/.test(normalized) || normalized.includes("six")
  17. if (!hasSix) {
  18. throw new Error(`follow-up result did not answer follow-up prompt; result="${text}"`)
  19. }
  20. }
  21. async function main() {
  22. const startRequestId = `start-${Date.now()}`
  23. const followupRequestId = `message-${Date.now()}`
  24. const shutdownRequestId = `shutdown-${Date.now()}`
  25. let initSeen = false
  26. let sentFollowup = false
  27. let sentShutdown = false
  28. let sawAttemptCompletion = false
  29. let sawFollowupUserTurn = false
  30. let sawMisroutedToolResult = false
  31. let followupResult = ""
  32. let sawFirstAssistantChunkForStart = false
  33. await runStreamCase({
  34. onEvent(event: StreamEvent, context) {
  35. if (event.type === "system" && event.subtype === "init" && !initSeen) {
  36. initSeen = true
  37. context.sendCommand({
  38. command: "start",
  39. requestId: startRequestId,
  40. prompt: START_PROMPT,
  41. })
  42. return
  43. }
  44. if (event.type === "control" && event.subtype === "error") {
  45. throw new Error(
  46. `received control error for requestId=${event.requestId ?? "unknown"} command=${event.command ?? "unknown"} code=${event.code ?? "unknown"} content=${event.content ?? ""}`,
  47. )
  48. }
  49. if (!sawAttemptCompletion && looksLikeAttemptCompletionToolUse(event)) {
  50. sawAttemptCompletion = true
  51. if (!sentFollowup) {
  52. context.sendCommand({
  53. command: "message",
  54. requestId: followupRequestId,
  55. prompt: FOLLOWUP_PROMPT,
  56. })
  57. sentFollowup = true
  58. }
  59. return
  60. }
  61. if (
  62. event.type === "assistant" &&
  63. event.requestId === startRequestId &&
  64. event.done !== true &&
  65. !sawFirstAssistantChunkForStart
  66. ) {
  67. sawFirstAssistantChunkForStart = true
  68. if (!sentFollowup) {
  69. context.sendCommand({
  70. command: "message",
  71. requestId: followupRequestId,
  72. prompt: FOLLOWUP_PROMPT,
  73. })
  74. sentFollowup = true
  75. }
  76. return
  77. }
  78. if (
  79. event.type === "tool_result" &&
  80. event.requestId === followupRequestId &&
  81. typeof event.content === "string" &&
  82. event.content.includes("<user_message>")
  83. ) {
  84. sawMisroutedToolResult = true
  85. return
  86. }
  87. if (event.type === "user" && event.requestId === followupRequestId) {
  88. sawFollowupUserTurn = typeof event.content === "string" && event.content.includes("3+3")
  89. return
  90. }
  91. if (event.type === "result" && event.done === true && event.requestId === startRequestId && !sentFollowup) {
  92. context.sendCommand({
  93. command: "message",
  94. requestId: followupRequestId,
  95. prompt: FOLLOWUP_PROMPT,
  96. })
  97. sentFollowup = true
  98. return
  99. }
  100. if (event.type !== "result" || event.done !== true || event.requestId !== followupRequestId) {
  101. return
  102. }
  103. followupResult = event.content ?? ""
  104. validateFollowupAnswer(followupResult)
  105. if (sawMisroutedToolResult) {
  106. throw new Error("follow-up message was misrouted into tool_result (<user_message>), old bug reproduced")
  107. }
  108. if (!sawFollowupUserTurn) {
  109. throw new Error("follow-up did not appear as a normal user turn in stream output")
  110. }
  111. console.log(`[PASS] saw attempt_completion tool use: ${sawAttemptCompletion}`)
  112. console.log(`[PASS] saw start assistant chunk before follow-up: ${sawFirstAssistantChunkForStart}`)
  113. console.log(`[PASS] follow-up user turn observed: ${sawFollowupUserTurn}`)
  114. console.log(`[PASS] follow-up result: "${followupResult}"`)
  115. if (!sentShutdown) {
  116. context.sendCommand({
  117. command: "shutdown",
  118. requestId: shutdownRequestId,
  119. })
  120. sentShutdown = true
  121. }
  122. },
  123. onTimeoutMessage() {
  124. return [
  125. "timed out waiting for follow-up validation",
  126. `initSeen=${initSeen}`,
  127. `sentFollowup=${sentFollowup}`,
  128. `sawAttemptCompletion=${sawAttemptCompletion}`,
  129. `sawFirstAssistantChunkForStart=${sawFirstAssistantChunkForStart}`,
  130. `sawFollowupUserTurn=${sawFollowupUserTurn}`,
  131. `sawMisroutedToolResult=${sawMisroutedToolResult}`,
  132. `haveFollowupResult=${Boolean(followupResult)}`,
  133. ].join(" ")
  134. },
  135. })
  136. }
  137. main().catch((error) => {
  138. console.error(`[FAIL] ${error instanceof Error ? error.message : String(error)}`)
  139. process.exit(1)
  140. })