structured-output-integration.test.ts 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. import { describe, expect, test } from "bun:test"
  2. import path from "path"
  3. import { Session } from "../../src/session"
  4. import { SessionPrompt } from "../../src/session/prompt"
  5. import { Log } from "../../src/util/log"
  6. import { Instance } from "../../src/project/instance"
  7. import { MessageV2 } from "../../src/session/message-v2"
  8. const projectRoot = path.join(__dirname, "../..")
  9. Log.init({ print: false })
  10. // Skip tests if no API key is available
  11. const hasApiKey = !!process.env.ANTHROPIC_API_KEY
  12. // Helper to run test within Instance context
  13. async function withInstance<T>(fn: () => Promise<T>): Promise<T> {
  14. return Instance.provide({
  15. directory: projectRoot,
  16. fn,
  17. })
  18. }
  19. describe("StructuredOutput Integration", () => {
  20. test.skipIf(!hasApiKey)(
  21. "produces structured output with simple schema",
  22. async () => {
  23. await withInstance(async () => {
  24. const session = await Session.create({ title: "Structured Output Test" })
  25. const result = await SessionPrompt.prompt({
  26. sessionID: session.id,
  27. parts: [
  28. {
  29. type: "text",
  30. text: "What is 2 + 2? Provide a simple answer.",
  31. },
  32. ],
  33. format: {
  34. type: "json_schema",
  35. schema: {
  36. type: "object",
  37. properties: {
  38. answer: { type: "number", description: "The numerical answer" },
  39. explanation: { type: "string", description: "Brief explanation" },
  40. },
  41. required: ["answer"],
  42. },
  43. retryCount: 0,
  44. },
  45. })
  46. // Verify structured output was captured (only on assistant messages)
  47. expect(result.info.role).toBe("assistant")
  48. if (result.info.role === "assistant") {
  49. expect(result.info.structured).toBeDefined()
  50. expect(typeof result.info.structured).toBe("object")
  51. const output = result.info.structured as any
  52. expect(output.answer).toBe(4)
  53. // Verify no error was set
  54. expect(result.info.error).toBeUndefined()
  55. }
  56. // Clean up
  57. // Note: Not removing session to avoid race with background SessionSummary.summarize
  58. })
  59. },
  60. 60000,
  61. )
  62. test.skipIf(!hasApiKey)(
  63. "produces structured output with nested objects",
  64. async () => {
  65. await withInstance(async () => {
  66. const session = await Session.create({ title: "Nested Schema Test" })
  67. const result = await SessionPrompt.prompt({
  68. sessionID: session.id,
  69. parts: [
  70. {
  71. type: "text",
  72. text: "Tell me about Anthropic company in a structured format.",
  73. },
  74. ],
  75. format: {
  76. type: "json_schema",
  77. schema: {
  78. type: "object",
  79. properties: {
  80. company: {
  81. type: "object",
  82. properties: {
  83. name: { type: "string" },
  84. founded: { type: "number" },
  85. },
  86. required: ["name", "founded"],
  87. },
  88. products: {
  89. type: "array",
  90. items: { type: "string" },
  91. },
  92. },
  93. required: ["company"],
  94. },
  95. retryCount: 0,
  96. },
  97. })
  98. // Verify structured output was captured (only on assistant messages)
  99. expect(result.info.role).toBe("assistant")
  100. if (result.info.role === "assistant") {
  101. expect(result.info.structured).toBeDefined()
  102. const output = result.info.structured as any
  103. expect(output.company).toBeDefined()
  104. expect(output.company.name).toBe("Anthropic")
  105. expect(typeof output.company.founded).toBe("number")
  106. if (output.products) {
  107. expect(Array.isArray(output.products)).toBe(true)
  108. }
  109. // Verify no error was set
  110. expect(result.info.error).toBeUndefined()
  111. }
  112. // Clean up
  113. // Note: Not removing session to avoid race with background SessionSummary.summarize
  114. })
  115. },
  116. 60000,
  117. )
  118. test.skipIf(!hasApiKey)(
  119. "works with text outputFormat (default)",
  120. async () => {
  121. await withInstance(async () => {
  122. const session = await Session.create({ title: "Text Output Test" })
  123. const result = await SessionPrompt.prompt({
  124. sessionID: session.id,
  125. parts: [
  126. {
  127. type: "text",
  128. text: "Say hello.",
  129. },
  130. ],
  131. format: {
  132. type: "text",
  133. },
  134. })
  135. // Verify no structured output (text mode) and no error
  136. expect(result.info.role).toBe("assistant")
  137. if (result.info.role === "assistant") {
  138. expect(result.info.structured).toBeUndefined()
  139. expect(result.info.error).toBeUndefined()
  140. }
  141. // Verify we got a response with parts
  142. expect(result.parts.length).toBeGreaterThan(0)
  143. // Clean up
  144. // Note: Not removing session to avoid race with background SessionSummary.summarize
  145. })
  146. },
  147. 60000,
  148. )
  149. test.skipIf(!hasApiKey)(
  150. "stores outputFormat on user message",
  151. async () => {
  152. await withInstance(async () => {
  153. const session = await Session.create({ title: "OutputFormat Storage Test" })
  154. await SessionPrompt.prompt({
  155. sessionID: session.id,
  156. parts: [
  157. {
  158. type: "text",
  159. text: "What is 1 + 1?",
  160. },
  161. ],
  162. format: {
  163. type: "json_schema",
  164. schema: {
  165. type: "object",
  166. properties: {
  167. result: { type: "number" },
  168. },
  169. required: ["result"],
  170. },
  171. retryCount: 3,
  172. },
  173. })
  174. // Get all messages from session
  175. const messages = await Session.messages({ sessionID: session.id })
  176. const userMessage = messages.find((m) => m.info.role === "user")
  177. // Verify outputFormat was stored on user message
  178. expect(userMessage).toBeDefined()
  179. if (userMessage?.info.role === "user") {
  180. expect(userMessage.info.format).toBeDefined()
  181. expect(userMessage.info.format?.type).toBe("json_schema")
  182. if (userMessage.info.format?.type === "json_schema") {
  183. expect(userMessage.info.format.retryCount).toBe(3)
  184. }
  185. }
  186. // Clean up
  187. // Note: Not removing session to avoid race with background SessionSummary.summarize
  188. })
  189. },
  190. 60000,
  191. )
  192. test("unit test: StructuredOutputError is properly structured", () => {
  193. const error = new MessageV2.StructuredOutputError({
  194. message: "Failed to produce valid structured output after 3 attempts",
  195. retries: 3,
  196. })
  197. expect(error.name).toBe("StructuredOutputError")
  198. expect(error.data.message).toContain("3 attempts")
  199. expect(error.data.retries).toBe(3)
  200. const obj = error.toObject()
  201. expect(obj.name).toBe("StructuredOutputError")
  202. expect(obj.data.retries).toBe(3)
  203. })
  204. })