api.ts 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734
  1. import * as vscode from "vscode"
  2. export type ApiProvider =
  3. | "anthropic"
  4. | "glama"
  5. | "openrouter"
  6. | "bedrock"
  7. | "vertex"
  8. | "openai"
  9. | "ollama"
  10. | "lmstudio"
  11. | "gemini"
  12. | "openai-native"
  13. | "deepseek"
  14. | "vscode-lm"
  15. | "mistral"
  16. | "unbound"
  17. | "requesty"
  18. export interface ApiHandlerOptions {
  19. apiModelId?: string
  20. apiKey?: string // anthropic
  21. anthropicBaseUrl?: string
  22. vsCodeLmModelSelector?: vscode.LanguageModelChatSelector
  23. glamaModelId?: string
  24. glamaModelInfo?: ModelInfo
  25. glamaApiKey?: string
  26. openRouterApiKey?: string
  27. openRouterModelId?: string
  28. openRouterModelInfo?: ModelInfo
  29. openRouterBaseUrl?: string
  30. awsAccessKey?: string
  31. awsSecretKey?: string
  32. awsSessionToken?: string
  33. awsRegion?: string
  34. awsUseCrossRegionInference?: boolean
  35. awsUsePromptCache?: boolean
  36. awspromptCacheId?: string
  37. awsProfile?: string
  38. awsUseProfile?: boolean
  39. vertexProjectId?: string
  40. vertexRegion?: string
  41. openAiBaseUrl?: string
  42. openAiApiKey?: string
  43. openAiModelId?: string
  44. openAiCustomModelInfo?: ModelInfo
  45. openAiUseAzure?: boolean
  46. ollamaModelId?: string
  47. ollamaBaseUrl?: string
  48. lmStudioModelId?: string
  49. lmStudioBaseUrl?: string
  50. geminiApiKey?: string
  51. openAiNativeApiKey?: string
  52. mistralApiKey?: string
  53. mistralCodestralUrl?: string // New option for Codestral URL
  54. azureApiVersion?: string
  55. openRouterUseMiddleOutTransform?: boolean
  56. openAiStreamingEnabled?: boolean
  57. setAzureApiVersion?: boolean
  58. deepSeekBaseUrl?: string
  59. deepSeekApiKey?: string
  60. includeMaxTokens?: boolean
  61. unboundApiKey?: string
  62. unboundModelId?: string
  63. unboundModelInfo?: ModelInfo
  64. requestyApiKey?: string
  65. requestyModelId?: string
  66. requestyModelInfo?: ModelInfo
  67. modelTemperature?: number
  68. }
  69. export type ApiConfiguration = ApiHandlerOptions & {
  70. apiProvider?: ApiProvider
  71. id?: string // stable unique identifier
  72. }
  73. // Models
  74. export interface ModelInfo {
  75. maxTokens?: number
  76. contextWindow: number
  77. supportsImages?: boolean
  78. supportsComputerUse?: boolean
  79. supportsPromptCache: boolean // this value is hardcoded for now
  80. inputPrice?: number
  81. outputPrice?: number
  82. cacheWritesPrice?: number
  83. cacheReadsPrice?: number
  84. description?: string
  85. reasoningEffort?: "low" | "medium" | "high"
  86. }
  87. // Anthropic
  88. // https://docs.anthropic.com/en/docs/about-claude/models
  89. export type AnthropicModelId = keyof typeof anthropicModels
  90. export const anthropicDefaultModelId: AnthropicModelId = "claude-3-5-sonnet-20241022"
  91. export const anthropicModels = {
  92. "claude-3-5-sonnet-20241022": {
  93. maxTokens: 8192,
  94. contextWindow: 200_000,
  95. supportsImages: true,
  96. supportsComputerUse: true,
  97. supportsPromptCache: true,
  98. inputPrice: 3.0, // $3 per million input tokens
  99. outputPrice: 15.0, // $15 per million output tokens
  100. cacheWritesPrice: 3.75, // $3.75 per million tokens
  101. cacheReadsPrice: 0.3, // $0.30 per million tokens
  102. },
  103. "claude-3-5-haiku-20241022": {
  104. maxTokens: 8192,
  105. contextWindow: 200_000,
  106. supportsImages: false,
  107. supportsPromptCache: true,
  108. inputPrice: 1.0,
  109. outputPrice: 5.0,
  110. cacheWritesPrice: 1.25,
  111. cacheReadsPrice: 0.1,
  112. },
  113. "claude-3-opus-20240229": {
  114. maxTokens: 4096,
  115. contextWindow: 200_000,
  116. supportsImages: true,
  117. supportsPromptCache: true,
  118. inputPrice: 15.0,
  119. outputPrice: 75.0,
  120. cacheWritesPrice: 18.75,
  121. cacheReadsPrice: 1.5,
  122. },
  123. "claude-3-haiku-20240307": {
  124. maxTokens: 4096,
  125. contextWindow: 200_000,
  126. supportsImages: true,
  127. supportsPromptCache: true,
  128. inputPrice: 0.25,
  129. outputPrice: 1.25,
  130. cacheWritesPrice: 0.3,
  131. cacheReadsPrice: 0.03,
  132. },
  133. } as const satisfies Record<string, ModelInfo> // as const assertion makes the object deeply readonly
  134. // AWS Bedrock
  135. // https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html
  136. export interface MessageContent {
  137. type: "text" | "image" | "video" | "tool_use" | "tool_result"
  138. text?: string
  139. source?: {
  140. type: "base64"
  141. data: string | Uint8Array // string for Anthropic, Uint8Array for Bedrock
  142. media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp"
  143. }
  144. // Video specific fields
  145. format?: string
  146. s3Location?: {
  147. uri: string
  148. bucketOwner?: string
  149. }
  150. // Tool use and result fields
  151. toolUseId?: string
  152. name?: string
  153. input?: any
  154. output?: any // Used for tool_result type
  155. }
  156. export type BedrockModelId = keyof typeof bedrockModels
  157. export const bedrockDefaultModelId: BedrockModelId = "anthropic.claude-3-5-sonnet-20241022-v2:0"
  158. export const bedrockModels = {
  159. "amazon.nova-pro-v1:0": {
  160. maxTokens: 5000,
  161. contextWindow: 300_000,
  162. supportsImages: true,
  163. supportsComputerUse: false,
  164. supportsPromptCache: false,
  165. inputPrice: 0.8,
  166. outputPrice: 3.2,
  167. cacheWritesPrice: 0.8, // per million tokens
  168. cacheReadsPrice: 0.2, // per million tokens
  169. },
  170. "amazon.nova-lite-v1:0": {
  171. maxTokens: 5000,
  172. contextWindow: 300_000,
  173. supportsImages: true,
  174. supportsComputerUse: false,
  175. supportsPromptCache: false,
  176. inputPrice: 0.06,
  177. outputPrice: 0.024,
  178. cacheWritesPrice: 0.06, // per million tokens
  179. cacheReadsPrice: 0.015, // per million tokens
  180. },
  181. "amazon.nova-micro-v1:0": {
  182. maxTokens: 5000,
  183. contextWindow: 128_000,
  184. supportsImages: false,
  185. supportsComputerUse: false,
  186. supportsPromptCache: false,
  187. inputPrice: 0.035,
  188. outputPrice: 0.14,
  189. cacheWritesPrice: 0.035, // per million tokens
  190. cacheReadsPrice: 0.00875, // per million tokens
  191. },
  192. "anthropic.claude-3-5-sonnet-20241022-v2:0": {
  193. maxTokens: 8192,
  194. contextWindow: 200_000,
  195. supportsImages: true,
  196. supportsComputerUse: true,
  197. supportsPromptCache: false,
  198. inputPrice: 3.0,
  199. outputPrice: 15.0,
  200. cacheWritesPrice: 3.75, // per million tokens
  201. cacheReadsPrice: 0.3, // per million tokens
  202. },
  203. "anthropic.claude-3-5-haiku-20241022-v1:0": {
  204. maxTokens: 8192,
  205. contextWindow: 200_000,
  206. supportsImages: false,
  207. supportsPromptCache: false,
  208. inputPrice: 1.0,
  209. outputPrice: 5.0,
  210. cacheWritesPrice: 1.0,
  211. cacheReadsPrice: 0.08,
  212. },
  213. "anthropic.claude-3-5-sonnet-20240620-v1:0": {
  214. maxTokens: 8192,
  215. contextWindow: 200_000,
  216. supportsImages: true,
  217. supportsPromptCache: false,
  218. inputPrice: 3.0,
  219. outputPrice: 15.0,
  220. },
  221. "anthropic.claude-3-opus-20240229-v1:0": {
  222. maxTokens: 4096,
  223. contextWindow: 200_000,
  224. supportsImages: true,
  225. supportsPromptCache: false,
  226. inputPrice: 15.0,
  227. outputPrice: 75.0,
  228. },
  229. "anthropic.claude-3-sonnet-20240229-v1:0": {
  230. maxTokens: 4096,
  231. contextWindow: 200_000,
  232. supportsImages: true,
  233. supportsPromptCache: false,
  234. inputPrice: 3.0,
  235. outputPrice: 15.0,
  236. },
  237. "anthropic.claude-3-haiku-20240307-v1:0": {
  238. maxTokens: 4096,
  239. contextWindow: 200_000,
  240. supportsImages: true,
  241. supportsPromptCache: false,
  242. inputPrice: 0.25,
  243. outputPrice: 1.25,
  244. },
  245. "meta.llama3-3-70b-instruct-v1:0": {
  246. maxTokens: 8192,
  247. contextWindow: 128_000,
  248. supportsImages: false,
  249. supportsComputerUse: false,
  250. supportsPromptCache: false,
  251. inputPrice: 0.72,
  252. outputPrice: 0.72,
  253. },
  254. "meta.llama3-2-90b-instruct-v1:0": {
  255. maxTokens: 8192,
  256. contextWindow: 128_000,
  257. supportsImages: true,
  258. supportsComputerUse: false,
  259. supportsPromptCache: false,
  260. inputPrice: 0.72,
  261. outputPrice: 0.72,
  262. },
  263. "meta.llama3-2-11b-instruct-v1:0": {
  264. maxTokens: 8192,
  265. contextWindow: 128_000,
  266. supportsImages: true,
  267. supportsComputerUse: false,
  268. supportsPromptCache: false,
  269. inputPrice: 0.16,
  270. outputPrice: 0.16,
  271. },
  272. "meta.llama3-2-3b-instruct-v1:0": {
  273. maxTokens: 8192,
  274. contextWindow: 128_000,
  275. supportsImages: false,
  276. supportsComputerUse: false,
  277. supportsPromptCache: false,
  278. inputPrice: 0.15,
  279. outputPrice: 0.15,
  280. },
  281. "meta.llama3-2-1b-instruct-v1:0": {
  282. maxTokens: 8192,
  283. contextWindow: 128_000,
  284. supportsImages: false,
  285. supportsComputerUse: false,
  286. supportsPromptCache: false,
  287. inputPrice: 0.1,
  288. outputPrice: 0.1,
  289. },
  290. "meta.llama3-1-405b-instruct-v1:0": {
  291. maxTokens: 8192,
  292. contextWindow: 128_000,
  293. supportsImages: false,
  294. supportsComputerUse: false,
  295. supportsPromptCache: false,
  296. inputPrice: 2.4,
  297. outputPrice: 2.4,
  298. },
  299. "meta.llama3-1-70b-instruct-v1:0": {
  300. maxTokens: 8192,
  301. contextWindow: 128_000,
  302. supportsImages: false,
  303. supportsComputerUse: false,
  304. supportsPromptCache: false,
  305. inputPrice: 0.72,
  306. outputPrice: 0.72,
  307. },
  308. "meta.llama3-1-8b-instruct-v1:0": {
  309. maxTokens: 8192,
  310. contextWindow: 8_000,
  311. supportsImages: false,
  312. supportsComputerUse: false,
  313. supportsPromptCache: false,
  314. inputPrice: 0.22,
  315. outputPrice: 0.22,
  316. },
  317. "meta.llama3-70b-instruct-v1:0": {
  318. maxTokens: 2048,
  319. contextWindow: 8_000,
  320. supportsImages: false,
  321. supportsComputerUse: false,
  322. supportsPromptCache: false,
  323. inputPrice: 2.65,
  324. outputPrice: 3.5,
  325. },
  326. "meta.llama3-8b-instruct-v1:0": {
  327. maxTokens: 2048,
  328. contextWindow: 4_000,
  329. supportsImages: false,
  330. supportsComputerUse: false,
  331. supportsPromptCache: false,
  332. inputPrice: 0.3,
  333. outputPrice: 0.6,
  334. },
  335. } as const satisfies Record<string, ModelInfo>
  336. // Glama
  337. // https://glama.ai/models
  338. export const glamaDefaultModelId = "anthropic/claude-3-5-sonnet"
  339. export const glamaDefaultModelInfo: ModelInfo = {
  340. maxTokens: 8192,
  341. contextWindow: 200_000,
  342. supportsImages: true,
  343. supportsComputerUse: true,
  344. supportsPromptCache: true,
  345. inputPrice: 3.0,
  346. outputPrice: 15.0,
  347. cacheWritesPrice: 3.75,
  348. cacheReadsPrice: 0.3,
  349. description:
  350. "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._",
  351. }
  352. export const requestyDefaultModelInfo: ModelInfo = {
  353. maxTokens: 8192,
  354. contextWindow: 200_000,
  355. supportsImages: true,
  356. supportsComputerUse: true,
  357. supportsPromptCache: true,
  358. inputPrice: 3.0,
  359. outputPrice: 15.0,
  360. cacheWritesPrice: 3.75,
  361. cacheReadsPrice: 0.3,
  362. description:
  363. "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._",
  364. }
  365. export const requestyDefaultModelId = "anthropic/claude-3-5-sonnet"
  366. // OpenRouter
  367. // https://openrouter.ai/models?order=newest&supported_parameters=tools
  368. export const openRouterDefaultModelId = "anthropic/claude-3.5-sonnet:beta" // will always exist in openRouterModels
  369. export const openRouterDefaultModelInfo: ModelInfo = {
  370. maxTokens: 8192,
  371. contextWindow: 200_000,
  372. supportsImages: true,
  373. supportsComputerUse: true,
  374. supportsPromptCache: true,
  375. inputPrice: 3.0,
  376. outputPrice: 15.0,
  377. cacheWritesPrice: 3.75,
  378. cacheReadsPrice: 0.3,
  379. description:
  380. "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._",
  381. }
  382. // Vertex AI
  383. // https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude
  384. export type VertexModelId = keyof typeof vertexModels
  385. export const vertexDefaultModelId: VertexModelId = "claude-3-5-sonnet-v2@20241022"
  386. export const vertexModels = {
  387. "claude-3-5-sonnet-v2@20241022": {
  388. maxTokens: 8192,
  389. contextWindow: 200_000,
  390. supportsImages: true,
  391. supportsComputerUse: true,
  392. supportsPromptCache: false,
  393. inputPrice: 3.0,
  394. outputPrice: 15.0,
  395. },
  396. "claude-3-5-sonnet@20240620": {
  397. maxTokens: 8192,
  398. contextWindow: 200_000,
  399. supportsImages: true,
  400. supportsPromptCache: false,
  401. inputPrice: 3.0,
  402. outputPrice: 15.0,
  403. },
  404. "claude-3-5-haiku@20241022": {
  405. maxTokens: 8192,
  406. contextWindow: 200_000,
  407. supportsImages: false,
  408. supportsPromptCache: false,
  409. inputPrice: 1.0,
  410. outputPrice: 5.0,
  411. },
  412. "claude-3-opus@20240229": {
  413. maxTokens: 4096,
  414. contextWindow: 200_000,
  415. supportsImages: true,
  416. supportsPromptCache: false,
  417. inputPrice: 15.0,
  418. outputPrice: 75.0,
  419. },
  420. "claude-3-haiku@20240307": {
  421. maxTokens: 4096,
  422. contextWindow: 200_000,
  423. supportsImages: true,
  424. supportsPromptCache: false,
  425. inputPrice: 0.25,
  426. outputPrice: 1.25,
  427. },
  428. } as const satisfies Record<string, ModelInfo>
  429. export const openAiModelInfoSaneDefaults: ModelInfo = {
  430. maxTokens: -1,
  431. contextWindow: 128_000,
  432. supportsImages: true,
  433. supportsPromptCache: false,
  434. inputPrice: 0,
  435. outputPrice: 0,
  436. }
  437. export const requestyModelInfoSaneDefaults: ModelInfo = {
  438. maxTokens: -1,
  439. contextWindow: 128_000,
  440. supportsImages: true,
  441. supportsPromptCache: false,
  442. inputPrice: 0,
  443. outputPrice: 0,
  444. }
  445. // Gemini
  446. // https://ai.google.dev/gemini-api/docs/models/gemini
  447. export type GeminiModelId = keyof typeof geminiModels
  448. export const geminiDefaultModelId: GeminiModelId = "gemini-2.0-flash-001"
  449. export const geminiModels = {
  450. "gemini-2.0-flash-001": {
  451. maxTokens: 8192,
  452. contextWindow: 1_048_576,
  453. supportsImages: true,
  454. supportsPromptCache: false,
  455. inputPrice: 0,
  456. outputPrice: 0,
  457. },
  458. "gemini-2.0-flash-lite-preview-02-05": {
  459. maxTokens: 8192,
  460. contextWindow: 1_048_576,
  461. supportsImages: true,
  462. supportsPromptCache: false,
  463. inputPrice: 0,
  464. outputPrice: 0,
  465. },
  466. "gemini-2.0-pro-exp-02-05": {
  467. maxTokens: 8192,
  468. contextWindow: 2_097_152,
  469. supportsImages: true,
  470. supportsPromptCache: false,
  471. inputPrice: 0,
  472. outputPrice: 0,
  473. },
  474. "gemini-2.0-flash-thinking-exp-01-21": {
  475. maxTokens: 65_536,
  476. contextWindow: 1_048_576,
  477. supportsImages: true,
  478. supportsPromptCache: false,
  479. inputPrice: 0,
  480. outputPrice: 0,
  481. },
  482. "gemini-2.0-flash-thinking-exp-1219": {
  483. maxTokens: 8192,
  484. contextWindow: 32_767,
  485. supportsImages: true,
  486. supportsPromptCache: false,
  487. inputPrice: 0,
  488. outputPrice: 0,
  489. },
  490. "gemini-2.0-flash-exp": {
  491. maxTokens: 8192,
  492. contextWindow: 1_048_576,
  493. supportsImages: true,
  494. supportsPromptCache: false,
  495. inputPrice: 0,
  496. outputPrice: 0,
  497. },
  498. "gemini-1.5-flash-002": {
  499. maxTokens: 8192,
  500. contextWindow: 1_048_576,
  501. supportsImages: true,
  502. supportsPromptCache: false,
  503. inputPrice: 0,
  504. outputPrice: 0,
  505. },
  506. "gemini-1.5-flash-exp-0827": {
  507. maxTokens: 8192,
  508. contextWindow: 1_048_576,
  509. supportsImages: true,
  510. supportsPromptCache: false,
  511. inputPrice: 0,
  512. outputPrice: 0,
  513. },
  514. "gemini-1.5-flash-8b-exp-0827": {
  515. maxTokens: 8192,
  516. contextWindow: 1_048_576,
  517. supportsImages: true,
  518. supportsPromptCache: false,
  519. inputPrice: 0,
  520. outputPrice: 0,
  521. },
  522. "gemini-1.5-pro-002": {
  523. maxTokens: 8192,
  524. contextWindow: 2_097_152,
  525. supportsImages: true,
  526. supportsPromptCache: false,
  527. inputPrice: 0,
  528. outputPrice: 0,
  529. },
  530. "gemini-1.5-pro-exp-0827": {
  531. maxTokens: 8192,
  532. contextWindow: 2_097_152,
  533. supportsImages: true,
  534. supportsPromptCache: false,
  535. inputPrice: 0,
  536. outputPrice: 0,
  537. },
  538. "gemini-exp-1206": {
  539. maxTokens: 8192,
  540. contextWindow: 2_097_152,
  541. supportsImages: true,
  542. supportsPromptCache: false,
  543. inputPrice: 0,
  544. outputPrice: 0,
  545. },
  546. } as const satisfies Record<string, ModelInfo>
  547. // OpenAI Native
  548. // https://openai.com/api/pricing/
  549. export type OpenAiNativeModelId = keyof typeof openAiNativeModels
  550. export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-4o"
  551. export const openAiNativeModels = {
  552. // don't support tool use yet
  553. "o3-mini": {
  554. maxTokens: 100_000,
  555. contextWindow: 200_000,
  556. supportsImages: false,
  557. supportsPromptCache: false,
  558. inputPrice: 1.1,
  559. outputPrice: 4.4,
  560. reasoningEffort: "medium",
  561. },
  562. "o3-mini-high": {
  563. maxTokens: 100_000,
  564. contextWindow: 200_000,
  565. supportsImages: false,
  566. supportsPromptCache: false,
  567. inputPrice: 1.1,
  568. outputPrice: 4.4,
  569. reasoningEffort: "high",
  570. },
  571. "o3-mini-low": {
  572. maxTokens: 100_000,
  573. contextWindow: 200_000,
  574. supportsImages: false,
  575. supportsPromptCache: false,
  576. inputPrice: 1.1,
  577. outputPrice: 4.4,
  578. reasoningEffort: "low",
  579. },
  580. o1: {
  581. maxTokens: 100_000,
  582. contextWindow: 200_000,
  583. supportsImages: true,
  584. supportsPromptCache: false,
  585. inputPrice: 15,
  586. outputPrice: 60,
  587. },
  588. "o1-preview": {
  589. maxTokens: 32_768,
  590. contextWindow: 128_000,
  591. supportsImages: true,
  592. supportsPromptCache: false,
  593. inputPrice: 15,
  594. outputPrice: 60,
  595. },
  596. "o1-mini": {
  597. maxTokens: 65_536,
  598. contextWindow: 128_000,
  599. supportsImages: true,
  600. supportsPromptCache: false,
  601. inputPrice: 1.1,
  602. outputPrice: 4.4,
  603. },
  604. "gpt-4o": {
  605. maxTokens: 4_096,
  606. contextWindow: 128_000,
  607. supportsImages: true,
  608. supportsPromptCache: false,
  609. inputPrice: 2.5,
  610. outputPrice: 10,
  611. },
  612. "gpt-4o-mini": {
  613. maxTokens: 16_384,
  614. contextWindow: 128_000,
  615. supportsImages: true,
  616. supportsPromptCache: false,
  617. inputPrice: 0.15,
  618. outputPrice: 0.6,
  619. },
  620. } as const satisfies Record<string, ModelInfo>
  621. // DeepSeek
  622. // https://platform.deepseek.com/docs/api
  623. export type DeepSeekModelId = keyof typeof deepSeekModels
  624. export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat"
  625. export const deepSeekModels = {
  626. "deepseek-chat": {
  627. maxTokens: 8192,
  628. contextWindow: 64_000,
  629. supportsImages: false,
  630. supportsPromptCache: false,
  631. inputPrice: 0.014, // $0.014 per million tokens
  632. outputPrice: 0.28, // $0.28 per million tokens
  633. description: `DeepSeek-V3 achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally.`,
  634. },
  635. "deepseek-reasoner": {
  636. maxTokens: 8192,
  637. contextWindow: 64_000,
  638. supportsImages: false,
  639. supportsPromptCache: false,
  640. inputPrice: 0.55, // $0.55 per million tokens
  641. outputPrice: 2.19, // $2.19 per million tokens
  642. description: `DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks.`,
  643. },
  644. } as const satisfies Record<string, ModelInfo>
  645. // Azure OpenAI
  646. // https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
  647. // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#api-specs
  648. export const azureOpenAiDefaultApiVersion = "2024-08-01-preview"
  649. // Mistral
  650. // https://docs.mistral.ai/getting-started/models/models_overview/
  651. export type MistralModelId = keyof typeof mistralModels
  652. export const mistralDefaultModelId: MistralModelId = "codestral-latest"
  653. export const mistralModels = {
  654. "codestral-latest": {
  655. maxTokens: 256_000,
  656. contextWindow: 256_000,
  657. supportsImages: false,
  658. supportsPromptCache: false,
  659. inputPrice: 0.3,
  660. outputPrice: 0.9,
  661. },
  662. "mistral-large-latest": {
  663. maxTokens: 131_000,
  664. contextWindow: 131_000,
  665. supportsImages: false,
  666. supportsPromptCache: false,
  667. inputPrice: 2.0,
  668. outputPrice: 6.0,
  669. },
  670. "ministral-8b-latest": {
  671. maxTokens: 131_000,
  672. contextWindow: 131_000,
  673. supportsImages: false,
  674. supportsPromptCache: false,
  675. inputPrice: 0.1,
  676. outputPrice: 0.1,
  677. },
  678. "ministral-3b-latest": {
  679. maxTokens: 131_000,
  680. contextWindow: 131_000,
  681. supportsImages: false,
  682. supportsPromptCache: false,
  683. inputPrice: 0.04,
  684. outputPrice: 0.04,
  685. },
  686. "mistral-small-latest": {
  687. maxTokens: 32_000,
  688. contextWindow: 32_000,
  689. supportsImages: false,
  690. supportsPromptCache: false,
  691. inputPrice: 0.2,
  692. outputPrice: 0.6,
  693. },
  694. "pixtral-large-latest": {
  695. maxTokens: 131_000,
  696. contextWindow: 131_000,
  697. supportsImages: true,
  698. supportsPromptCache: false,
  699. inputPrice: 2.0,
  700. outputPrice: 6.0,
  701. },
  702. } as const satisfies Record<string, ModelInfo>
  703. // Unbound Security
  704. export const unboundDefaultModelId = "anthropic/claude-3-5-sonnet-20241022"
  705. export const unboundDefaultModelInfo: ModelInfo = {
  706. maxTokens: 8192,
  707. contextWindow: 200_000,
  708. supportsImages: true,
  709. supportsPromptCache: true,
  710. inputPrice: 3.0,
  711. outputPrice: 15.0,
  712. cacheWritesPrice: 3.75,
  713. cacheReadsPrice: 0.3,
  714. }