api.ts 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637
  1. import * as vscode from "vscode"
  2. export type ApiProvider =
  3. | "anthropic"
  4. | "glama"
  5. | "openrouter"
  6. | "bedrock"
  7. | "vertex"
  8. | "openai"
  9. | "ollama"
  10. | "lmstudio"
  11. | "gemini"
  12. | "openai-native"
  13. | "deepseek"
  14. | "vscode-lm"
  15. | "mistral"
  16. | "unbound"
  17. export interface ApiHandlerOptions {
  18. apiModelId?: string
  19. apiKey?: string // anthropic
  20. anthropicBaseUrl?: string
  21. vsCodeLmModelSelector?: vscode.LanguageModelChatSelector
  22. glamaModelId?: string
  23. glamaModelInfo?: ModelInfo
  24. glamaApiKey?: string
  25. openRouterApiKey?: string
  26. openRouterModelId?: string
  27. openRouterModelInfo?: ModelInfo
  28. openRouterBaseUrl?: string
  29. awsAccessKey?: string
  30. awsSecretKey?: string
  31. awsSessionToken?: string
  32. awsRegion?: string
  33. awsUseCrossRegionInference?: boolean
  34. awsUsePromptCache?: boolean
  35. awspromptCacheId?: string
  36. awsProfile?: string
  37. awsUseProfile?: boolean
  38. vertexProjectId?: string
  39. vertexRegion?: string
  40. openAiBaseUrl?: string
  41. openAiApiKey?: string
  42. openAiModelId?: string
  43. openAiCustomModelInfo?: ModelInfo
  44. openAiUseAzure?: boolean
  45. ollamaModelId?: string
  46. ollamaBaseUrl?: string
  47. lmStudioModelId?: string
  48. lmStudioBaseUrl?: string
  49. geminiApiKey?: string
  50. openAiNativeApiKey?: string
  51. mistralApiKey?: string
  52. azureApiVersion?: string
  53. openRouterUseMiddleOutTransform?: boolean
  54. openAiStreamingEnabled?: boolean
  55. setAzureApiVersion?: boolean
  56. deepSeekBaseUrl?: string
  57. deepSeekApiKey?: string
  58. includeMaxTokens?: boolean
  59. unboundApiKey?: string
  60. unboundModelId?: string
  61. }
  62. export type ApiConfiguration = ApiHandlerOptions & {
  63. apiProvider?: ApiProvider
  64. id?: string // stable unique identifier
  65. }
  66. // Models
  67. export interface ModelInfo {
  68. maxTokens?: number
  69. contextWindow: number
  70. supportsImages?: boolean
  71. supportsComputerUse?: boolean
  72. supportsPromptCache: boolean // this value is hardcoded for now
  73. inputPrice?: number
  74. outputPrice?: number
  75. cacheWritesPrice?: number
  76. cacheReadsPrice?: number
  77. description?: string
  78. reasoningEffort?: "low" | "medium" | "high"
  79. }
  80. // Anthropic
  81. // https://docs.anthropic.com/en/docs/about-claude/models
  82. export type AnthropicModelId = keyof typeof anthropicModels
  83. export const anthropicDefaultModelId: AnthropicModelId = "claude-3-5-sonnet-20241022"
  84. export const anthropicModels = {
  85. "claude-3-5-sonnet-20241022": {
  86. maxTokens: 8192,
  87. contextWindow: 200_000,
  88. supportsImages: true,
  89. supportsComputerUse: true,
  90. supportsPromptCache: true,
  91. inputPrice: 3.0, // $3 per million input tokens
  92. outputPrice: 15.0, // $15 per million output tokens
  93. cacheWritesPrice: 3.75, // $3.75 per million tokens
  94. cacheReadsPrice: 0.3, // $0.30 per million tokens
  95. },
  96. "claude-3-5-haiku-20241022": {
  97. maxTokens: 8192,
  98. contextWindow: 200_000,
  99. supportsImages: false,
  100. supportsPromptCache: true,
  101. inputPrice: 1.0,
  102. outputPrice: 5.0,
  103. cacheWritesPrice: 1.25,
  104. cacheReadsPrice: 0.1,
  105. },
  106. "claude-3-opus-20240229": {
  107. maxTokens: 4096,
  108. contextWindow: 200_000,
  109. supportsImages: true,
  110. supportsPromptCache: true,
  111. inputPrice: 15.0,
  112. outputPrice: 75.0,
  113. cacheWritesPrice: 18.75,
  114. cacheReadsPrice: 1.5,
  115. },
  116. "claude-3-haiku-20240307": {
  117. maxTokens: 4096,
  118. contextWindow: 200_000,
  119. supportsImages: true,
  120. supportsPromptCache: true,
  121. inputPrice: 0.25,
  122. outputPrice: 1.25,
  123. cacheWritesPrice: 0.3,
  124. cacheReadsPrice: 0.03,
  125. },
  126. } as const satisfies Record<string, ModelInfo> // as const assertion makes the object deeply readonly
  127. // AWS Bedrock
  128. // https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html
  129. export interface MessageContent {
  130. type: "text" | "image" | "video" | "tool_use" | "tool_result"
  131. text?: string
  132. source?: {
  133. type: "base64"
  134. data: string | Uint8Array // string for Anthropic, Uint8Array for Bedrock
  135. media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp"
  136. }
  137. // Video specific fields
  138. format?: string
  139. s3Location?: {
  140. uri: string
  141. bucketOwner?: string
  142. }
  143. // Tool use and result fields
  144. toolUseId?: string
  145. name?: string
  146. input?: any
  147. output?: any // Used for tool_result type
  148. }
  149. export type BedrockModelId = keyof typeof bedrockModels
  150. export const bedrockDefaultModelId: BedrockModelId = "anthropic.claude-3-5-sonnet-20241022-v2:0"
  151. export const bedrockModels = {
  152. "amazon.nova-pro-v1:0": {
  153. maxTokens: 5000,
  154. contextWindow: 300_000,
  155. supportsImages: true,
  156. supportsComputerUse: false,
  157. supportsPromptCache: false,
  158. inputPrice: 0.8,
  159. outputPrice: 3.2,
  160. cacheWritesPrice: 0.8, // per million tokens
  161. cacheReadsPrice: 0.2, // per million tokens
  162. },
  163. "amazon.nova-lite-v1:0": {
  164. maxTokens: 5000,
  165. contextWindow: 300_000,
  166. supportsImages: true,
  167. supportsComputerUse: false,
  168. supportsPromptCache: false,
  169. inputPrice: 0.06,
  170. outputPrice: 0.024,
  171. cacheWritesPrice: 0.06, // per million tokens
  172. cacheReadsPrice: 0.015, // per million tokens
  173. },
  174. "amazon.nova-micro-v1:0": {
  175. maxTokens: 5000,
  176. contextWindow: 128_000,
  177. supportsImages: false,
  178. supportsComputerUse: false,
  179. supportsPromptCache: false,
  180. inputPrice: 0.035,
  181. outputPrice: 0.14,
  182. cacheWritesPrice: 0.035, // per million tokens
  183. cacheReadsPrice: 0.00875, // per million tokens
  184. },
  185. "anthropic.claude-3-5-sonnet-20241022-v2:0": {
  186. maxTokens: 8192,
  187. contextWindow: 200_000,
  188. supportsImages: true,
  189. supportsComputerUse: true,
  190. supportsPromptCache: false,
  191. inputPrice: 3.0,
  192. outputPrice: 15.0,
  193. cacheWritesPrice: 3.75, // per million tokens
  194. cacheReadsPrice: 0.3, // per million tokens
  195. },
  196. "anthropic.claude-3-5-haiku-20241022-v1:0": {
  197. maxTokens: 8192,
  198. contextWindow: 200_000,
  199. supportsImages: false,
  200. supportsPromptCache: false,
  201. inputPrice: 1.0,
  202. outputPrice: 5.0,
  203. cacheWritesPrice: 1.0,
  204. cacheReadsPrice: 0.08,
  205. },
  206. "anthropic.claude-3-5-sonnet-20240620-v1:0": {
  207. maxTokens: 8192,
  208. contextWindow: 200_000,
  209. supportsImages: true,
  210. supportsPromptCache: false,
  211. inputPrice: 3.0,
  212. outputPrice: 15.0,
  213. },
  214. "anthropic.claude-3-opus-20240229-v1:0": {
  215. maxTokens: 4096,
  216. contextWindow: 200_000,
  217. supportsImages: true,
  218. supportsPromptCache: false,
  219. inputPrice: 15.0,
  220. outputPrice: 75.0,
  221. },
  222. "anthropic.claude-3-sonnet-20240229-v1:0": {
  223. maxTokens: 4096,
  224. contextWindow: 200_000,
  225. supportsImages: true,
  226. supportsPromptCache: false,
  227. inputPrice: 3.0,
  228. outputPrice: 15.0,
  229. },
  230. "anthropic.claude-3-haiku-20240307-v1:0": {
  231. maxTokens: 4096,
  232. contextWindow: 200_000,
  233. supportsImages: true,
  234. supportsPromptCache: false,
  235. inputPrice: 0.25,
  236. outputPrice: 1.25,
  237. },
  238. "meta.llama3-3-70b-instruct-v1:0": {
  239. maxTokens: 8192,
  240. contextWindow: 128_000,
  241. supportsImages: false,
  242. supportsComputerUse: false,
  243. supportsPromptCache: false,
  244. inputPrice: 0.72,
  245. outputPrice: 0.72,
  246. },
  247. "meta.llama3-2-90b-instruct-v1:0": {
  248. maxTokens: 8192,
  249. contextWindow: 128_000,
  250. supportsImages: true,
  251. supportsComputerUse: false,
  252. supportsPromptCache: false,
  253. inputPrice: 0.72,
  254. outputPrice: 0.72,
  255. },
  256. "meta.llama3-2-11b-instruct-v1:0": {
  257. maxTokens: 8192,
  258. contextWindow: 128_000,
  259. supportsImages: true,
  260. supportsComputerUse: false,
  261. supportsPromptCache: false,
  262. inputPrice: 0.16,
  263. outputPrice: 0.16,
  264. },
  265. "meta.llama3-2-3b-instruct-v1:0": {
  266. maxTokens: 8192,
  267. contextWindow: 128_000,
  268. supportsImages: false,
  269. supportsComputerUse: false,
  270. supportsPromptCache: false,
  271. inputPrice: 0.15,
  272. outputPrice: 0.15,
  273. },
  274. "meta.llama3-2-1b-instruct-v1:0": {
  275. maxTokens: 8192,
  276. contextWindow: 128_000,
  277. supportsImages: false,
  278. supportsComputerUse: false,
  279. supportsPromptCache: false,
  280. inputPrice: 0.1,
  281. outputPrice: 0.1,
  282. },
  283. "meta.llama3-1-405b-instruct-v1:0": {
  284. maxTokens: 8192,
  285. contextWindow: 128_000,
  286. supportsImages: false,
  287. supportsComputerUse: false,
  288. supportsPromptCache: false,
  289. inputPrice: 2.4,
  290. outputPrice: 2.4,
  291. },
  292. "meta.llama3-1-70b-instruct-v1:0": {
  293. maxTokens: 8192,
  294. contextWindow: 128_000,
  295. supportsImages: false,
  296. supportsComputerUse: false,
  297. supportsPromptCache: false,
  298. inputPrice: 0.72,
  299. outputPrice: 0.72,
  300. },
  301. "meta.llama3-1-8b-instruct-v1:0": {
  302. maxTokens: 8192,
  303. contextWindow: 8_000,
  304. supportsImages: false,
  305. supportsComputerUse: false,
  306. supportsPromptCache: false,
  307. inputPrice: 0.22,
  308. outputPrice: 0.22,
  309. },
  310. "meta.llama3-70b-instruct-v1:0": {
  311. maxTokens: 2048,
  312. contextWindow: 8_000,
  313. supportsImages: false,
  314. supportsComputerUse: false,
  315. supportsPromptCache: false,
  316. inputPrice: 2.65,
  317. outputPrice: 3.5,
  318. },
  319. "meta.llama3-8b-instruct-v1:0": {
  320. maxTokens: 2048,
  321. contextWindow: 4_000,
  322. supportsImages: false,
  323. supportsComputerUse: false,
  324. supportsPromptCache: false,
  325. inputPrice: 0.3,
  326. outputPrice: 0.6,
  327. },
  328. } as const satisfies Record<string, ModelInfo>
  329. // Glama
  330. // https://glama.ai/models
  331. export const glamaDefaultModelId = "anthropic/claude-3-5-sonnet"
  332. export const glamaDefaultModelInfo: ModelInfo = {
  333. maxTokens: 8192,
  334. contextWindow: 200_000,
  335. supportsImages: true,
  336. supportsComputerUse: true,
  337. supportsPromptCache: true,
  338. inputPrice: 3.0,
  339. outputPrice: 15.0,
  340. cacheWritesPrice: 3.75,
  341. cacheReadsPrice: 0.3,
  342. description:
  343. "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._",
  344. }
  345. // OpenRouter
  346. // https://openrouter.ai/models?order=newest&supported_parameters=tools
  347. export const openRouterDefaultModelId = "anthropic/claude-3.5-sonnet:beta" // will always exist in openRouterModels
  348. export const openRouterDefaultModelInfo: ModelInfo = {
  349. maxTokens: 8192,
  350. contextWindow: 200_000,
  351. supportsImages: true,
  352. supportsComputerUse: true,
  353. supportsPromptCache: true,
  354. inputPrice: 3.0,
  355. outputPrice: 15.0,
  356. cacheWritesPrice: 3.75,
  357. cacheReadsPrice: 0.3,
  358. description:
  359. "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._",
  360. }
  361. // Vertex AI
  362. // https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude
  363. export type VertexModelId = keyof typeof vertexModels
  364. export const vertexDefaultModelId: VertexModelId = "claude-3-5-sonnet-v2@20241022"
  365. export const vertexModels = {
  366. "claude-3-5-sonnet-v2@20241022": {
  367. maxTokens: 8192,
  368. contextWindow: 200_000,
  369. supportsImages: true,
  370. supportsComputerUse: true,
  371. supportsPromptCache: false,
  372. inputPrice: 3.0,
  373. outputPrice: 15.0,
  374. },
  375. "claude-3-5-sonnet@20240620": {
  376. maxTokens: 8192,
  377. contextWindow: 200_000,
  378. supportsImages: true,
  379. supportsPromptCache: false,
  380. inputPrice: 3.0,
  381. outputPrice: 15.0,
  382. },
  383. "claude-3-5-haiku@20241022": {
  384. maxTokens: 8192,
  385. contextWindow: 200_000,
  386. supportsImages: false,
  387. supportsPromptCache: false,
  388. inputPrice: 1.0,
  389. outputPrice: 5.0,
  390. },
  391. "claude-3-opus@20240229": {
  392. maxTokens: 4096,
  393. contextWindow: 200_000,
  394. supportsImages: true,
  395. supportsPromptCache: false,
  396. inputPrice: 15.0,
  397. outputPrice: 75.0,
  398. },
  399. "claude-3-haiku@20240307": {
  400. maxTokens: 4096,
  401. contextWindow: 200_000,
  402. supportsImages: true,
  403. supportsPromptCache: false,
  404. inputPrice: 0.25,
  405. outputPrice: 1.25,
  406. },
  407. } as const satisfies Record<string, ModelInfo>
  408. export const openAiModelInfoSaneDefaults: ModelInfo = {
  409. maxTokens: -1,
  410. contextWindow: 128_000,
  411. supportsImages: true,
  412. supportsPromptCache: false,
  413. inputPrice: 0,
  414. outputPrice: 0,
  415. }
  416. // Gemini
  417. // https://ai.google.dev/gemini-api/docs/models/gemini
  418. export type GeminiModelId = keyof typeof geminiModels
  419. export const geminiDefaultModelId: GeminiModelId = "gemini-2.0-flash-thinking-exp-01-21"
  420. export const geminiModels = {
  421. "gemini-2.0-flash-thinking-exp-01-21": {
  422. maxTokens: 65_536,
  423. contextWindow: 1_048_576,
  424. supportsImages: true,
  425. supportsPromptCache: false,
  426. inputPrice: 0,
  427. outputPrice: 0,
  428. },
  429. "gemini-2.0-flash-thinking-exp-1219": {
  430. maxTokens: 8192,
  431. contextWindow: 32_767,
  432. supportsImages: true,
  433. supportsPromptCache: false,
  434. inputPrice: 0,
  435. outputPrice: 0,
  436. },
  437. "gemini-2.0-flash-exp": {
  438. maxTokens: 8192,
  439. contextWindow: 1_048_576,
  440. supportsImages: true,
  441. supportsPromptCache: false,
  442. inputPrice: 0,
  443. outputPrice: 0,
  444. },
  445. "gemini-1.5-flash-002": {
  446. maxTokens: 8192,
  447. contextWindow: 1_048_576,
  448. supportsImages: true,
  449. supportsPromptCache: false,
  450. inputPrice: 0,
  451. outputPrice: 0,
  452. },
  453. "gemini-1.5-flash-exp-0827": {
  454. maxTokens: 8192,
  455. contextWindow: 1_048_576,
  456. supportsImages: true,
  457. supportsPromptCache: false,
  458. inputPrice: 0,
  459. outputPrice: 0,
  460. },
  461. "gemini-1.5-flash-8b-exp-0827": {
  462. maxTokens: 8192,
  463. contextWindow: 1_048_576,
  464. supportsImages: true,
  465. supportsPromptCache: false,
  466. inputPrice: 0,
  467. outputPrice: 0,
  468. },
  469. "gemini-1.5-pro-002": {
  470. maxTokens: 8192,
  471. contextWindow: 2_097_152,
  472. supportsImages: true,
  473. supportsPromptCache: false,
  474. inputPrice: 0,
  475. outputPrice: 0,
  476. },
  477. "gemini-1.5-pro-exp-0827": {
  478. maxTokens: 8192,
  479. contextWindow: 2_097_152,
  480. supportsImages: true,
  481. supportsPromptCache: false,
  482. inputPrice: 0,
  483. outputPrice: 0,
  484. },
  485. "gemini-exp-1206": {
  486. maxTokens: 8192,
  487. contextWindow: 2_097_152,
  488. supportsImages: true,
  489. supportsPromptCache: false,
  490. inputPrice: 0,
  491. outputPrice: 0,
  492. },
  493. } as const satisfies Record<string, ModelInfo>
  494. // OpenAI Native
  495. // https://openai.com/api/pricing/
  496. export type OpenAiNativeModelId = keyof typeof openAiNativeModels
  497. export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-4o"
  498. export const openAiNativeModels = {
  499. // don't support tool use yet
  500. "o3-mini": {
  501. maxTokens: 100_000,
  502. contextWindow: 200_000,
  503. supportsImages: false,
  504. supportsPromptCache: false,
  505. inputPrice: 1.1,
  506. outputPrice: 4.4,
  507. reasoningEffort: "medium",
  508. },
  509. "o3-mini-high": {
  510. maxTokens: 100_000,
  511. contextWindow: 200_000,
  512. supportsImages: false,
  513. supportsPromptCache: false,
  514. inputPrice: 1.1,
  515. outputPrice: 4.4,
  516. reasoningEffort: "high",
  517. },
  518. "o3-mini-low": {
  519. maxTokens: 100_000,
  520. contextWindow: 200_000,
  521. supportsImages: false,
  522. supportsPromptCache: false,
  523. inputPrice: 1.1,
  524. outputPrice: 4.4,
  525. reasoningEffort: "low",
  526. },
  527. o1: {
  528. maxTokens: 100_000,
  529. contextWindow: 200_000,
  530. supportsImages: true,
  531. supportsPromptCache: false,
  532. inputPrice: 15,
  533. outputPrice: 60,
  534. },
  535. "o1-preview": {
  536. maxTokens: 32_768,
  537. contextWindow: 128_000,
  538. supportsImages: true,
  539. supportsPromptCache: false,
  540. inputPrice: 15,
  541. outputPrice: 60,
  542. },
  543. "o1-mini": {
  544. maxTokens: 65_536,
  545. contextWindow: 128_000,
  546. supportsImages: true,
  547. supportsPromptCache: false,
  548. inputPrice: 1.1,
  549. outputPrice: 4.4,
  550. },
  551. "gpt-4o": {
  552. maxTokens: 4_096,
  553. contextWindow: 128_000,
  554. supportsImages: true,
  555. supportsPromptCache: false,
  556. inputPrice: 5,
  557. outputPrice: 15,
  558. },
  559. "gpt-4o-mini": {
  560. maxTokens: 16_384,
  561. contextWindow: 128_000,
  562. supportsImages: true,
  563. supportsPromptCache: false,
  564. inputPrice: 0.15,
  565. outputPrice: 0.6,
  566. },
  567. } as const satisfies Record<string, ModelInfo>
  568. // DeepSeek
  569. // https://platform.deepseek.com/docs/api
  570. export type DeepSeekModelId = keyof typeof deepSeekModels
  571. export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat"
  572. export const deepSeekModels = {
  573. "deepseek-chat": {
  574. maxTokens: 8192,
  575. contextWindow: 64_000,
  576. supportsImages: false,
  577. supportsPromptCache: false,
  578. inputPrice: 0.014, // $0.014 per million tokens
  579. outputPrice: 0.28, // $0.28 per million tokens
  580. description: `DeepSeek-V3 achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally.`,
  581. },
  582. "deepseek-reasoner": {
  583. maxTokens: 8192,
  584. contextWindow: 64_000,
  585. supportsImages: false,
  586. supportsPromptCache: false,
  587. inputPrice: 0.55, // $0.55 per million tokens
  588. outputPrice: 2.19, // $2.19 per million tokens
  589. description: `DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks.`,
  590. },
  591. } as const satisfies Record<string, ModelInfo>
  592. // Azure OpenAI
  593. // https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
  594. // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#api-specs
  595. export const azureOpenAiDefaultApiVersion = "2024-08-01-preview"
  596. // Mistral
  597. // https://docs.mistral.ai/getting-started/models/models_overview/
  598. export type MistralModelId = keyof typeof mistralModels
  599. export const mistralDefaultModelId: MistralModelId = "codestral-latest"
  600. export const mistralModels = {
  601. "codestral-latest": {
  602. maxTokens: 32_768,
  603. contextWindow: 256_000,
  604. supportsImages: false,
  605. supportsPromptCache: false,
  606. inputPrice: 0.3,
  607. outputPrice: 0.9,
  608. },
  609. } as const satisfies Record<string, ModelInfo>
  610. // Unbound Security
  611. export type UnboundModelId = keyof typeof unboundModels
  612. export const unboundDefaultModelId = "openai/gpt-4o"
  613. export const unboundModels = {
  614. "anthropic/claude-3-5-sonnet-20241022": anthropicModels["claude-3-5-sonnet-20241022"],
  615. "openai/gpt-4o": openAiNativeModels["gpt-4o"],
  616. "deepseek/deepseek-chat": deepSeekModels["deepseek-chat"],
  617. "deepseek/deepseek-reasoner": deepSeekModels["deepseek-reasoner"],
  618. "mistral/codestral-latest": mistralModels["codestral-latest"],
  619. } as const satisfies Record<string, ModelInfo>