followup-after-completion.ts 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. import { runStreamCase, StreamEvent } from "../lib/stream-harness"
  2. const FIRST_PROMPT = `What is 1+1? Reply with only "2".`
  3. const FOLLOWUP_PROMPT = `Different question now: what is 3+3? Reply with only "6".`
  4. function parseEventContent(text: string | undefined): string {
  5. return typeof text === "string" ? text : ""
  6. }
  7. function validateFollowupAnswer(text: string): void {
  8. const normalized = text.toLowerCase()
  9. const containsExpected = /\b6\b/.test(normalized) || normalized.includes("six")
  10. const containsOldAnswer = /\b1\+1\b/.test(normalized) || /\b2\b/.test(normalized)
  11. const containsQuestionReference = normalized.includes("3+3")
  12. if (!containsExpected) {
  13. throw new Error(`follow-up result did not answer the follow-up question; result="${text}"`)
  14. }
  15. if (!containsQuestionReference && containsOldAnswer && !containsExpected) {
  16. throw new Error(`follow-up result appears anchored to first question; result="${text}"`)
  17. }
  18. }
  19. async function main() {
  20. const startRequestId = `start-${Date.now()}`
  21. const followupRequestId = `message-${Date.now()}`
  22. const shutdownRequestId = `shutdown-${Date.now()}`
  23. let initSeen = false
  24. let sentFollowup = false
  25. let sentShutdown = false
  26. let firstResult = ""
  27. let followupResult = ""
  28. await runStreamCase({
  29. onEvent(event: StreamEvent, context) {
  30. if (event.type === "system" && event.subtype === "init" && !initSeen) {
  31. initSeen = true
  32. context.sendCommand({
  33. command: "start",
  34. requestId: startRequestId,
  35. prompt: FIRST_PROMPT,
  36. })
  37. return
  38. }
  39. if (event.type === "control" && event.subtype === "error") {
  40. throw new Error(
  41. `received control error for requestId=${event.requestId ?? "unknown"} command=${event.command ?? "unknown"} code=${event.code ?? "unknown"} content=${event.content ?? ""}`,
  42. )
  43. }
  44. if (event.type !== "result" || event.done !== true) {
  45. return
  46. }
  47. if (event.requestId === startRequestId) {
  48. firstResult = parseEventContent(event.content)
  49. if (!/\b2\b/.test(firstResult)) {
  50. throw new Error(`first result did not answer first prompt; result="${firstResult}"`)
  51. }
  52. if (!sentFollowup) {
  53. context.sendCommand({
  54. command: "message",
  55. requestId: followupRequestId,
  56. prompt: FOLLOWUP_PROMPT,
  57. })
  58. sentFollowup = true
  59. }
  60. return
  61. }
  62. if (event.requestId !== followupRequestId) {
  63. return
  64. }
  65. followupResult = parseEventContent(event.content)
  66. validateFollowupAnswer(followupResult)
  67. console.log(`[PASS] first result="${firstResult}"`)
  68. console.log(`[PASS] follow-up result="${followupResult}"`)
  69. if (!sentShutdown) {
  70. context.sendCommand({
  71. command: "shutdown",
  72. requestId: shutdownRequestId,
  73. })
  74. sentShutdown = true
  75. }
  76. },
  77. onTimeoutMessage() {
  78. return `timed out waiting for completion (initSeen=${initSeen}, sentFollowup=${sentFollowup}, firstResult=${Boolean(firstResult)}, followupResult=${Boolean(followupResult)})`
  79. },
  80. })
  81. }
  82. main().catch((error) => {
  83. console.error(`[FAIL] ${error instanceof Error ? error.message : String(error)}`)
  84. process.exit(1)
  85. })