json-schema.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. import type { z as z4 } from "zod/v4"
  2. import { z } from "zod"
  3. /**
  4. * Re-export Zod v4's JSONSchema type for convenience
  5. */
  6. export type JsonSchema = z4.core.JSONSchema.JSONSchema
  7. /**
  8. * Set of format values supported by OpenAI's Structured Outputs (strict mode).
  9. * Unsupported format values will be stripped during schema normalization.
  10. * @see https://platform.openai.com/docs/guides/structured-outputs#supported-schemas
  11. */
  12. const OPENAI_SUPPORTED_FORMATS = new Set([
  13. "date-time",
  14. "time",
  15. "date",
  16. "duration",
  17. "email",
  18. "hostname",
  19. "ipv4",
  20. "ipv6",
  21. "uuid",
  22. ])
  23. /**
  24. * Array-specific JSON Schema properties that must be nested inside array type variants
  25. * when converting to anyOf format (JSON Schema draft 2020-12).
  26. */
  27. const ARRAY_SPECIFIC_PROPERTIES = ["items", "minItems", "maxItems", "uniqueItems"] as const
  28. /**
  29. * Applies array-specific properties from source to target object.
  30. * Only copies properties that are defined in the source.
  31. */
  32. function applyArrayProperties(
  33. target: Record<string, unknown>,
  34. source: Record<string, unknown>,
  35. ): Record<string, unknown> {
  36. for (const prop of ARRAY_SPECIFIC_PROPERTIES) {
  37. if (source[prop] !== undefined) {
  38. target[prop] = source[prop]
  39. }
  40. }
  41. return target
  42. }
  43. /**
  44. * Zod schema for JSON Schema primitive types
  45. */
  46. const JsonSchemaPrimitiveTypeSchema = z.enum(["string", "number", "integer", "boolean", "null"])
  47. /**
  48. * All valid JSON Schema type values including object and array
  49. */
  50. const JsonSchemaTypeSchema = z.union([JsonSchemaPrimitiveTypeSchema, z.literal("object"), z.literal("array")])
  51. /**
  52. * Zod schema for JSON Schema enum values
  53. */
  54. const JsonSchemaEnumValueSchema = z.union([z.string(), z.number(), z.boolean(), z.null()])
  55. /**
  56. * Zod schema that validates tool input JSON Schema and sets `additionalProperties: false` by default.
  57. * Uses recursive parsing so the default applies to all nested schemas automatically.
  58. *
  59. * This is required by some API providers (e.g., OpenAI) for strict function calling.
  60. *
  61. * @example
  62. * ```typescript
  63. * // Validates and applies defaults in one pass - throws on invalid
  64. * const validatedSchema = ToolInputSchema.parse(schema)
  65. *
  66. * // Or use safeParse for error handling
  67. * const result = ToolInputSchema.safeParse(schema)
  68. * if (result.success) {
  69. * // result.data has additionalProperties: false by default
  70. * }
  71. * ```
  72. */
  73. export const ToolInputSchema: z.ZodType<JsonSchema> = z.lazy(() =>
  74. z
  75. .object({
  76. type: JsonSchemaTypeSchema.optional(),
  77. properties: z.record(z.string(), ToolInputSchema).optional(),
  78. items: z.union([ToolInputSchema, z.array(ToolInputSchema)]).optional(),
  79. required: z.array(z.string()).optional(),
  80. additionalProperties: z.union([z.boolean(), ToolInputSchema]).default(false),
  81. description: z.string().optional(),
  82. default: z.unknown().optional(),
  83. enum: z.array(JsonSchemaEnumValueSchema).optional(),
  84. const: JsonSchemaEnumValueSchema.optional(),
  85. anyOf: z.array(ToolInputSchema).optional(),
  86. oneOf: z.array(ToolInputSchema).optional(),
  87. allOf: z.array(ToolInputSchema).optional(),
  88. $ref: z.string().optional(),
  89. minimum: z.number().optional(),
  90. maximum: z.number().optional(),
  91. minLength: z.number().optional(),
  92. maxLength: z.number().optional(),
  93. pattern: z.string().optional(),
  94. minItems: z.number().optional(),
  95. maxItems: z.number().optional(),
  96. uniqueItems: z.boolean().optional(),
  97. })
  98. .passthrough(),
  99. )
  100. /**
  101. * Schema for type field that accepts both single types and array types (draft-07 nullable syntax).
  102. * Array types like ["string", "null"] are transformed to anyOf format for 2020-12 compliance.
  103. */
  104. const TypeFieldSchema = z.union([JsonSchemaTypeSchema, z.array(JsonSchemaTypeSchema)])
  105. /**
  106. * Internal Zod schema that normalizes tool input JSON Schema to be compliant with JSON Schema draft 2020-12.
  107. *
  108. * This schema performs three key transformations:
  109. * 1. Sets `additionalProperties: false` by default (required by OpenAI strict mode)
  110. * 2. Converts deprecated `type: ["T", "null"]` array syntax to `anyOf` format
  111. * (required by Claude on Bedrock which enforces JSON Schema draft 2020-12)
  112. * 3. Strips unsupported `format` values (e.g., "uri") for OpenAI Structured Outputs compatibility
  113. *
  114. * Uses recursive parsing so transformations apply to all nested schemas automatically.
  115. */
  116. const NormalizedToolSchemaInternal: z.ZodType<Record<string, unknown>, z.ZodTypeDef, Record<string, unknown>> = z.lazy(
  117. () =>
  118. z
  119. .object({
  120. // Accept both single type and array of types, transform array to anyOf
  121. type: TypeFieldSchema.optional(),
  122. properties: z.record(z.string(), NormalizedToolSchemaInternal).optional(),
  123. items: z.union([NormalizedToolSchemaInternal, z.array(NormalizedToolSchemaInternal)]).optional(),
  124. required: z.array(z.string()).optional(),
  125. // Don't set default here - we'll handle it conditionally in the transform
  126. additionalProperties: z.union([z.boolean(), NormalizedToolSchemaInternal]).optional(),
  127. description: z.string().optional(),
  128. default: z.unknown().optional(),
  129. enum: z.array(JsonSchemaEnumValueSchema).optional(),
  130. const: JsonSchemaEnumValueSchema.optional(),
  131. anyOf: z.array(NormalizedToolSchemaInternal).optional(),
  132. oneOf: z.array(NormalizedToolSchemaInternal).optional(),
  133. allOf: z.array(NormalizedToolSchemaInternal).optional(),
  134. $ref: z.string().optional(),
  135. minimum: z.number().optional(),
  136. maximum: z.number().optional(),
  137. minLength: z.number().optional(),
  138. maxLength: z.number().optional(),
  139. pattern: z.string().optional(),
  140. minItems: z.number().optional(),
  141. maxItems: z.number().optional(),
  142. uniqueItems: z.boolean().optional(),
  143. // Format field - unsupported values will be stripped in transform
  144. format: z.string().optional(),
  145. })
  146. .passthrough()
  147. .transform((schema) => {
  148. const {
  149. type,
  150. required,
  151. properties,
  152. additionalProperties,
  153. format,
  154. items,
  155. minItems,
  156. maxItems,
  157. uniqueItems,
  158. ...rest
  159. } = schema
  160. const result: Record<string, unknown> = { ...rest }
  161. // Determine if this schema represents an object type
  162. const isObjectType =
  163. type === "object" || (Array.isArray(type) && type.includes("object")) || properties !== undefined
  164. // Collect array-specific properties for potential use in type handling
  165. const arrayProps = { items, minItems, maxItems, uniqueItems }
  166. // If type is an array, convert to anyOf format (JSON Schema 2020-12)
  167. // Array-specific properties must be moved inside the array variant
  168. if (Array.isArray(type)) {
  169. result.anyOf = type.map((t) => {
  170. if (t === "array") {
  171. return applyArrayProperties({ type: t }, arrayProps)
  172. }
  173. return { type: t }
  174. })
  175. } else if (type !== undefined) {
  176. result.type = type
  177. // For single "array" type, preserve array-specific properties at root
  178. if (type === "array") {
  179. applyArrayProperties(result, arrayProps)
  180. }
  181. }
  182. // Strip unsupported format values for OpenAI compatibility
  183. // Only include format if it's a supported value
  184. if (format && OPENAI_SUPPORTED_FORMATS.has(format)) {
  185. result.format = format
  186. }
  187. // Handle properties and required for strict mode
  188. if (properties) {
  189. result.properties = properties
  190. if (required) {
  191. const propertyKeys = Object.keys(properties)
  192. const filteredRequired = required.filter((key) => propertyKeys.includes(key))
  193. if (filteredRequired.length > 0) {
  194. result.required = filteredRequired
  195. }
  196. }
  197. } else if (result.type === "object" || (Array.isArray(type) && type.includes("object"))) {
  198. // For type: "object" without properties, add empty properties
  199. // This is required by OpenAI strict mode
  200. result.properties = {}
  201. }
  202. // Only add additionalProperties for object-type schemas
  203. // Adding it to primitive types (string, number, etc.) is invalid JSON Schema
  204. if (isObjectType) {
  205. // For strict mode compatibility, we MUST set additionalProperties to false
  206. // Even if the original schema had {} (any) or true, we force false because
  207. // OpenAI/OpenRouter strict mode rejects schemas with additionalProperties != false
  208. // The original schema intent (allowing arbitrary properties) is incompatible with strict mode
  209. result.additionalProperties = false
  210. }
  211. // For non-object types, don't include additionalProperties at all
  212. return result
  213. }),
  214. )
  215. /**
  216. * Flattens a schema with top-level anyOf/oneOf/allOf to a simple object schema.
  217. * This is needed because some providers (OpenRouter, Claude) don't support
  218. * schema composition keywords at the top level of tool input schemas.
  219. *
  220. * @param schema - The schema to flatten
  221. * @returns A flattened schema without top-level composition keywords
  222. */
  223. function flattenTopLevelComposition(schema: Record<string, unknown>): Record<string, unknown> {
  224. const { anyOf, oneOf, allOf, ...rest } = schema
  225. // If no top-level composition keywords, return as-is
  226. if (!anyOf && !oneOf && !allOf) {
  227. return schema
  228. }
  229. // Get the composition array to process (prefer anyOf, then oneOf, then allOf)
  230. const compositionArray = (anyOf || oneOf || allOf) as Record<string, unknown>[] | undefined
  231. if (!compositionArray || !Array.isArray(compositionArray) || compositionArray.length === 0) {
  232. return schema
  233. }
  234. // Find the first non-null object type variant to use as the base
  235. // This preserves the most information while making the schema compatible
  236. const objectVariant = compositionArray.find(
  237. (variant) =>
  238. typeof variant === "object" &&
  239. variant !== null &&
  240. (variant.type === "object" || variant.properties !== undefined),
  241. )
  242. if (objectVariant) {
  243. // Merge remaining properties with the object variant
  244. return { ...rest, ...objectVariant }
  245. }
  246. // If no object variant found, create a generic object schema
  247. // This is a fallback that allows any object structure
  248. return {
  249. type: "object",
  250. additionalProperties: false,
  251. ...rest,
  252. }
  253. }
  254. /**
  255. * Normalizes a tool input JSON Schema to be compliant with JSON Schema draft 2020-12.
  256. *
  257. * This function performs four key transformations:
  258. * 1. Sets `additionalProperties: false` by default (required by OpenAI strict mode)
  259. * 2. Converts deprecated `type: ["T", "null"]` array syntax to `anyOf` format
  260. * (required by Claude on Bedrock which enforces JSON Schema draft 2020-12)
  261. * 3. Strips unsupported `format` values (e.g., "uri") for OpenAI Structured Outputs compatibility
  262. * 4. Flattens top-level anyOf/oneOf/allOf (required by OpenRouter/Claude which don't support
  263. * schema composition keywords at the top level)
  264. *
  265. * Uses recursive parsing so transformations apply to all nested schemas automatically.
  266. *
  267. * @param schema - The JSON Schema to normalize
  268. * @returns A normalized schema object that is JSON Schema draft 2020-12 compliant
  269. */
  270. export function normalizeToolSchema(schema: Record<string, unknown>): Record<string, unknown> {
  271. if (typeof schema !== "object" || schema === null) {
  272. return schema
  273. }
  274. // First, flatten any top-level composition keywords before normalizing
  275. const flattenedSchema = flattenTopLevelComposition(schema)
  276. const result = NormalizedToolSchemaInternal.safeParse(flattenedSchema)
  277. return result.success ? result.data : flattenedSchema
  278. }