api.ts 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. import {
  2. type ModelInfo,
  3. type ProviderSettings,
  4. type DynamicProvider,
  5. type LocalProvider,
  6. ANTHROPIC_DEFAULT_MAX_TOKENS,
  7. CLAUDE_CODE_DEFAULT_MAX_OUTPUT_TOKENS,
  8. isDynamicProvider,
  9. isLocalProvider,
  10. } from "@roo-code/types"
  11. // ApiHandlerOptions
  12. // Extend ProviderSettings (minus apiProvider) with handler-specific toggles.
  13. export type ApiHandlerOptions = Omit<ProviderSettings, "apiProvider"> & {
  14. /**
  15. * When true and using OpenAI Responses API models that support reasoning summaries,
  16. * include reasoning.summary: "auto" so the API returns summaries (we already parse
  17. * and surface them). Defaults to true; set to false to disable summaries.
  18. */
  19. enableResponsesReasoningSummary?: boolean
  20. /**
  21. * Optional override for Ollama's num_ctx parameter.
  22. * When set, this value will be used in Ollama chat requests.
  23. * When undefined, Ollama will use the model's default num_ctx from the Modelfile.
  24. */
  25. ollamaNumCtx?: number
  26. }
  27. // RouterName
  28. export type RouterName = DynamicProvider | LocalProvider
  29. export const isRouterName = (value: string): value is RouterName => isDynamicProvider(value) || isLocalProvider(value)
  30. export function toRouterName(value?: string): RouterName {
  31. if (value && isRouterName(value)) {
  32. return value
  33. }
  34. throw new Error(`Invalid router name: ${value}`)
  35. }
  36. // RouterModels
  37. export type ModelRecord = Record<string, ModelInfo>
  38. export type RouterModels = Record<RouterName, ModelRecord>
  39. // Reasoning
  40. export const shouldUseReasoningBudget = ({
  41. model,
  42. settings,
  43. }: {
  44. model: ModelInfo
  45. settings?: ProviderSettings
  46. }): boolean => !!model.requiredReasoningBudget || (!!model.supportsReasoningBudget && !!settings?.enableReasoningEffort)
  47. export const shouldUseReasoningEffort = ({
  48. model,
  49. settings,
  50. }: {
  51. model: ModelInfo
  52. settings?: ProviderSettings
  53. }): boolean => {
  54. // Explicit off switch
  55. if (settings?.enableReasoningEffort === false) return false
  56. // Selected effort from settings or model default
  57. const selectedEffort = (settings?.reasoningEffort ?? (model as any).reasoningEffort) as
  58. | "disable"
  59. | "none"
  60. | "minimal"
  61. | "low"
  62. | "medium"
  63. | "high"
  64. | undefined
  65. // "disable" explicitly omits reasoning
  66. if (selectedEffort === "disable") return false
  67. const cap = model.supportsReasoningEffort as unknown
  68. // Capability array: use only if selected is included (treat "none"/"minimal" as valid)
  69. if (Array.isArray(cap)) {
  70. return !!selectedEffort && (cap as ReadonlyArray<string>).includes(selectedEffort as string)
  71. }
  72. // Boolean capability: true → require a selected effort
  73. if (model.supportsReasoningEffort === true) {
  74. return !!selectedEffort
  75. }
  76. // Not explicitly supported: only allow when the model itself defines a default effort
  77. // Ignore settings-only selections when capability is absent/false
  78. const modelDefaultEffort = (model as any).reasoningEffort as
  79. | "none"
  80. | "minimal"
  81. | "low"
  82. | "medium"
  83. | "high"
  84. | undefined
  85. return !!modelDefaultEffort
  86. }
  87. export const DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS = 16_384
  88. export const DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS = 8_192
  89. export const GEMINI_25_PRO_MIN_THINKING_TOKENS = 128
  90. // Max Tokens
  91. export const getModelMaxOutputTokens = ({
  92. modelId,
  93. model,
  94. settings,
  95. format,
  96. }: {
  97. modelId: string
  98. model: ModelInfo
  99. settings?: ProviderSettings
  100. format?: "anthropic" | "openai" | "gemini" | "openrouter"
  101. }): number | undefined => {
  102. // Check for Claude Code specific max output tokens setting
  103. if (settings?.apiProvider === "claude-code") {
  104. return settings.claudeCodeMaxOutputTokens || CLAUDE_CODE_DEFAULT_MAX_OUTPUT_TOKENS
  105. }
  106. if (shouldUseReasoningBudget({ model, settings })) {
  107. return settings?.modelMaxTokens || DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS
  108. }
  109. const isAnthropicContext =
  110. modelId.includes("claude") ||
  111. format === "anthropic" ||
  112. (format === "openrouter" && modelId.startsWith("anthropic/"))
  113. // For "Hybrid" reasoning models, discard the model's actual maxTokens for Anthropic contexts
  114. if (model.supportsReasoningBudget && isAnthropicContext) {
  115. return ANTHROPIC_DEFAULT_MAX_TOKENS
  116. }
  117. // For Anthropic contexts, always ensure a maxTokens value is set
  118. if (isAnthropicContext && (!model.maxTokens || model.maxTokens === 0)) {
  119. return ANTHROPIC_DEFAULT_MAX_TOKENS
  120. }
  121. // If model has explicit maxTokens, clamp it to 20% of the context window
  122. // Exception: GPT-5 models should use their exact configured max output tokens
  123. if (model.maxTokens) {
  124. // Check if this is a GPT-5 model (case-insensitive)
  125. const isGpt5Model = modelId.toLowerCase().includes("gpt-5")
  126. // GPT-5 models bypass the 20% cap and use their full configured max tokens
  127. if (isGpt5Model) {
  128. return model.maxTokens
  129. }
  130. // All other models are clamped to 20% of context window
  131. return Math.min(model.maxTokens, Math.ceil(model.contextWindow * 0.2))
  132. }
  133. // For non-Anthropic formats without explicit maxTokens, return undefined
  134. if (format) {
  135. return undefined
  136. }
  137. // Default fallback
  138. return ANTHROPIC_DEFAULT_MAX_TOKENS
  139. }
  140. // GetModelsOptions
  141. // Allow callers to always pass apiKey/baseUrl without excess property errors,
  142. // while still enforcing required fields per provider where applicable.
  143. type CommonFetchParams = {
  144. apiKey?: string
  145. baseUrl?: string
  146. }
  147. // Exhaustive, value-level map for all dynamic providers.
  148. // If a new dynamic provider is added in packages/types, this will fail to compile
  149. // until a corresponding entry is added here.
  150. const dynamicProviderExtras = {
  151. openrouter: {} as {}, // eslint-disable-line @typescript-eslint/no-empty-object-type
  152. "vercel-ai-gateway": {} as {}, // eslint-disable-line @typescript-eslint/no-empty-object-type
  153. huggingface: {} as {}, // eslint-disable-line @typescript-eslint/no-empty-object-type
  154. litellm: {} as { apiKey: string; baseUrl: string },
  155. deepinfra: {} as { apiKey?: string; baseUrl?: string },
  156. "io-intelligence": {} as { apiKey: string },
  157. requesty: {} as { apiKey?: string; baseUrl?: string },
  158. unbound: {} as { apiKey?: string },
  159. ollama: {} as {}, // eslint-disable-line @typescript-eslint/no-empty-object-type
  160. lmstudio: {} as {}, // eslint-disable-line @typescript-eslint/no-empty-object-type
  161. roo: {} as { apiKey?: string; baseUrl?: string },
  162. chutes: {} as { apiKey?: string },
  163. } as const satisfies Record<RouterName, object>
  164. // Build the dynamic options union from the map, intersected with CommonFetchParams
  165. // so extra fields are always allowed while required ones are enforced.
  166. export type GetModelsOptions = {
  167. [P in keyof typeof dynamicProviderExtras]: ({ provider: P } & (typeof dynamicProviderExtras)[P]) & CommonFetchParams
  168. }[RouterName]