api.ts 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776
  1. import * as vscode from "vscode"
  2. export type ApiProvider =
  3. | "anthropic"
  4. | "glama"
  5. | "openrouter"
  6. | "bedrock"
  7. | "vertex"
  8. | "openai"
  9. | "ollama"
  10. | "lmstudio"
  11. | "gemini"
  12. | "openai-native"
  13. | "deepseek"
  14. | "vscode-lm"
  15. | "mistral"
  16. | "unbound"
  17. | "requesty"
  18. export interface ApiHandlerOptions {
  19. apiModelId?: string
  20. apiKey?: string // anthropic
  21. anthropicBaseUrl?: string
  22. anthropicThinking?: number
  23. vsCodeLmModelSelector?: vscode.LanguageModelChatSelector
  24. glamaModelId?: string
  25. glamaModelInfo?: ModelInfo
  26. glamaApiKey?: string
  27. openRouterApiKey?: string
  28. openRouterModelId?: string
  29. openRouterModelInfo?: ModelInfo
  30. openRouterBaseUrl?: string
  31. awsAccessKey?: string
  32. awsSecretKey?: string
  33. awsSessionToken?: string
  34. awsRegion?: string
  35. awsUseCrossRegionInference?: boolean
  36. awsUsePromptCache?: boolean
  37. awspromptCacheId?: string
  38. awsProfile?: string
  39. awsUseProfile?: boolean
  40. vertexProjectId?: string
  41. vertexRegion?: string
  42. openAiBaseUrl?: string
  43. openAiApiKey?: string
  44. openAiModelId?: string
  45. openAiCustomModelInfo?: ModelInfo
  46. openAiUseAzure?: boolean
  47. ollamaModelId?: string
  48. ollamaBaseUrl?: string
  49. lmStudioModelId?: string
  50. lmStudioBaseUrl?: string
  51. geminiApiKey?: string
  52. openAiNativeApiKey?: string
  53. mistralApiKey?: string
  54. mistralCodestralUrl?: string // New option for Codestral URL
  55. azureApiVersion?: string
  56. openRouterUseMiddleOutTransform?: boolean
  57. openAiStreamingEnabled?: boolean
  58. setAzureApiVersion?: boolean
  59. deepSeekBaseUrl?: string
  60. deepSeekApiKey?: string
  61. includeMaxTokens?: boolean
  62. unboundApiKey?: string
  63. unboundModelId?: string
  64. unboundModelInfo?: ModelInfo
  65. requestyApiKey?: string
  66. requestyModelId?: string
  67. requestyModelInfo?: ModelInfo
  68. modelTemperature?: number
  69. }
  70. export type ApiConfiguration = ApiHandlerOptions & {
  71. apiProvider?: ApiProvider
  72. id?: string // stable unique identifier
  73. }
  74. // Models
  75. export interface ModelInfo {
  76. maxTokens?: number
  77. contextWindow: number
  78. supportsImages?: boolean
  79. supportsComputerUse?: boolean
  80. supportsPromptCache: boolean // this value is hardcoded for now
  81. inputPrice?: number
  82. outputPrice?: number
  83. cacheWritesPrice?: number
  84. cacheReadsPrice?: number
  85. description?: string
  86. reasoningEffort?: "low" | "medium" | "high"
  87. thinking?: boolean
  88. }
  89. export const THINKING_BUDGET = {
  90. step: 1024,
  91. min: 1024,
  92. default: 8 * 1024,
  93. }
  94. // Anthropic
  95. // https://docs.anthropic.com/en/docs/about-claude/models
  96. export type AnthropicModelId = keyof typeof anthropicModels
  97. export const anthropicDefaultModelId: AnthropicModelId = "claude-3-7-sonnet-20250219"
  98. export const anthropicModels = {
  99. "claude-3-7-sonnet-20250219": {
  100. maxTokens: 64_000,
  101. contextWindow: 200_000,
  102. supportsImages: true,
  103. supportsComputerUse: true,
  104. supportsPromptCache: true,
  105. inputPrice: 3.0, // $3 per million input tokens
  106. outputPrice: 15.0, // $15 per million output tokens
  107. cacheWritesPrice: 3.75, // $3.75 per million tokens
  108. cacheReadsPrice: 0.3, // $0.30 per million tokens
  109. thinking: true,
  110. },
  111. "claude-3-5-sonnet-20241022": {
  112. maxTokens: 8192,
  113. contextWindow: 200_000,
  114. supportsImages: true,
  115. supportsComputerUse: true,
  116. supportsPromptCache: true,
  117. inputPrice: 3.0, // $3 per million input tokens
  118. outputPrice: 15.0, // $15 per million output tokens
  119. cacheWritesPrice: 3.75, // $3.75 per million tokens
  120. cacheReadsPrice: 0.3, // $0.30 per million tokens
  121. },
  122. "claude-3-5-haiku-20241022": {
  123. maxTokens: 8192,
  124. contextWindow: 200_000,
  125. supportsImages: false,
  126. supportsPromptCache: true,
  127. inputPrice: 1.0,
  128. outputPrice: 5.0,
  129. cacheWritesPrice: 1.25,
  130. cacheReadsPrice: 0.1,
  131. },
  132. "claude-3-opus-20240229": {
  133. maxTokens: 4096,
  134. contextWindow: 200_000,
  135. supportsImages: true,
  136. supportsPromptCache: true,
  137. inputPrice: 15.0,
  138. outputPrice: 75.0,
  139. cacheWritesPrice: 18.75,
  140. cacheReadsPrice: 1.5,
  141. },
  142. "claude-3-haiku-20240307": {
  143. maxTokens: 4096,
  144. contextWindow: 200_000,
  145. supportsImages: true,
  146. supportsPromptCache: true,
  147. inputPrice: 0.25,
  148. outputPrice: 1.25,
  149. cacheWritesPrice: 0.3,
  150. cacheReadsPrice: 0.03,
  151. },
  152. } as const satisfies Record<string, ModelInfo> // as const assertion makes the object deeply readonly
  153. // AWS Bedrock
  154. // https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html
  155. export interface MessageContent {
  156. type: "text" | "image" | "video" | "tool_use" | "tool_result"
  157. text?: string
  158. source?: {
  159. type: "base64"
  160. data: string | Uint8Array // string for Anthropic, Uint8Array for Bedrock
  161. media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp"
  162. }
  163. // Video specific fields
  164. format?: string
  165. s3Location?: {
  166. uri: string
  167. bucketOwner?: string
  168. }
  169. // Tool use and result fields
  170. toolUseId?: string
  171. name?: string
  172. input?: any
  173. output?: any // Used for tool_result type
  174. }
  175. export type BedrockModelId = keyof typeof bedrockModels
  176. export const bedrockDefaultModelId: BedrockModelId = "anthropic.claude-3-7-sonnet-20250219-v1:0"
  177. export const bedrockModels = {
  178. "amazon.nova-pro-v1:0": {
  179. maxTokens: 5000,
  180. contextWindow: 300_000,
  181. supportsImages: true,
  182. supportsComputerUse: false,
  183. supportsPromptCache: false,
  184. inputPrice: 0.8,
  185. outputPrice: 3.2,
  186. cacheWritesPrice: 0.8, // per million tokens
  187. cacheReadsPrice: 0.2, // per million tokens
  188. },
  189. "amazon.nova-lite-v1:0": {
  190. maxTokens: 5000,
  191. contextWindow: 300_000,
  192. supportsImages: true,
  193. supportsComputerUse: false,
  194. supportsPromptCache: false,
  195. inputPrice: 0.06,
  196. outputPrice: 0.024,
  197. cacheWritesPrice: 0.06, // per million tokens
  198. cacheReadsPrice: 0.015, // per million tokens
  199. },
  200. "amazon.nova-micro-v1:0": {
  201. maxTokens: 5000,
  202. contextWindow: 128_000,
  203. supportsImages: false,
  204. supportsComputerUse: false,
  205. supportsPromptCache: false,
  206. inputPrice: 0.035,
  207. outputPrice: 0.14,
  208. cacheWritesPrice: 0.035, // per million tokens
  209. cacheReadsPrice: 0.00875, // per million tokens
  210. },
  211. "anthropic.claude-3-7-sonnet-20250219-v1:0": {
  212. maxTokens: 8192,
  213. contextWindow: 200_000,
  214. supportsImages: true,
  215. supportsComputerUse: true,
  216. supportsPromptCache: true,
  217. inputPrice: 3.0,
  218. outputPrice: 15.0,
  219. cacheWritesPrice: 3.75,
  220. cacheReadsPrice: 0.3,
  221. },
  222. "anthropic.claude-3-5-sonnet-20241022-v2:0": {
  223. maxTokens: 8192,
  224. contextWindow: 200_000,
  225. supportsImages: true,
  226. supportsComputerUse: true,
  227. supportsPromptCache: false,
  228. inputPrice: 3.0,
  229. outputPrice: 15.0,
  230. cacheWritesPrice: 3.75,
  231. cacheReadsPrice: 0.3,
  232. },
  233. "anthropic.claude-3-5-haiku-20241022-v1:0": {
  234. maxTokens: 8192,
  235. contextWindow: 200_000,
  236. supportsImages: false,
  237. supportsPromptCache: false,
  238. inputPrice: 1.0,
  239. outputPrice: 5.0,
  240. cacheWritesPrice: 1.0,
  241. cacheReadsPrice: 0.08,
  242. },
  243. "anthropic.claude-3-5-sonnet-20240620-v1:0": {
  244. maxTokens: 8192,
  245. contextWindow: 200_000,
  246. supportsImages: true,
  247. supportsPromptCache: false,
  248. inputPrice: 3.0,
  249. outputPrice: 15.0,
  250. },
  251. "anthropic.claude-3-opus-20240229-v1:0": {
  252. maxTokens: 4096,
  253. contextWindow: 200_000,
  254. supportsImages: true,
  255. supportsPromptCache: false,
  256. inputPrice: 15.0,
  257. outputPrice: 75.0,
  258. },
  259. "anthropic.claude-3-sonnet-20240229-v1:0": {
  260. maxTokens: 4096,
  261. contextWindow: 200_000,
  262. supportsImages: true,
  263. supportsPromptCache: false,
  264. inputPrice: 3.0,
  265. outputPrice: 15.0,
  266. },
  267. "anthropic.claude-3-haiku-20240307-v1:0": {
  268. maxTokens: 4096,
  269. contextWindow: 200_000,
  270. supportsImages: true,
  271. supportsPromptCache: false,
  272. inputPrice: 0.25,
  273. outputPrice: 1.25,
  274. },
  275. "meta.llama3-3-70b-instruct-v1:0": {
  276. maxTokens: 8192,
  277. contextWindow: 128_000,
  278. supportsImages: false,
  279. supportsComputerUse: false,
  280. supportsPromptCache: false,
  281. inputPrice: 0.72,
  282. outputPrice: 0.72,
  283. },
  284. "meta.llama3-2-90b-instruct-v1:0": {
  285. maxTokens: 8192,
  286. contextWindow: 128_000,
  287. supportsImages: true,
  288. supportsComputerUse: false,
  289. supportsPromptCache: false,
  290. inputPrice: 0.72,
  291. outputPrice: 0.72,
  292. },
  293. "meta.llama3-2-11b-instruct-v1:0": {
  294. maxTokens: 8192,
  295. contextWindow: 128_000,
  296. supportsImages: true,
  297. supportsComputerUse: false,
  298. supportsPromptCache: false,
  299. inputPrice: 0.16,
  300. outputPrice: 0.16,
  301. },
  302. "meta.llama3-2-3b-instruct-v1:0": {
  303. maxTokens: 8192,
  304. contextWindow: 128_000,
  305. supportsImages: false,
  306. supportsComputerUse: false,
  307. supportsPromptCache: false,
  308. inputPrice: 0.15,
  309. outputPrice: 0.15,
  310. },
  311. "meta.llama3-2-1b-instruct-v1:0": {
  312. maxTokens: 8192,
  313. contextWindow: 128_000,
  314. supportsImages: false,
  315. supportsComputerUse: false,
  316. supportsPromptCache: false,
  317. inputPrice: 0.1,
  318. outputPrice: 0.1,
  319. },
  320. "meta.llama3-1-405b-instruct-v1:0": {
  321. maxTokens: 8192,
  322. contextWindow: 128_000,
  323. supportsImages: false,
  324. supportsComputerUse: false,
  325. supportsPromptCache: false,
  326. inputPrice: 2.4,
  327. outputPrice: 2.4,
  328. },
  329. "meta.llama3-1-70b-instruct-v1:0": {
  330. maxTokens: 8192,
  331. contextWindow: 128_000,
  332. supportsImages: false,
  333. supportsComputerUse: false,
  334. supportsPromptCache: false,
  335. inputPrice: 0.72,
  336. outputPrice: 0.72,
  337. },
  338. "meta.llama3-1-8b-instruct-v1:0": {
  339. maxTokens: 8192,
  340. contextWindow: 8_000,
  341. supportsImages: false,
  342. supportsComputerUse: false,
  343. supportsPromptCache: false,
  344. inputPrice: 0.22,
  345. outputPrice: 0.22,
  346. },
  347. "meta.llama3-70b-instruct-v1:0": {
  348. maxTokens: 2048,
  349. contextWindow: 8_000,
  350. supportsImages: false,
  351. supportsComputerUse: false,
  352. supportsPromptCache: false,
  353. inputPrice: 2.65,
  354. outputPrice: 3.5,
  355. },
  356. "meta.llama3-8b-instruct-v1:0": {
  357. maxTokens: 2048,
  358. contextWindow: 4_000,
  359. supportsImages: false,
  360. supportsComputerUse: false,
  361. supportsPromptCache: false,
  362. inputPrice: 0.3,
  363. outputPrice: 0.6,
  364. },
  365. } as const satisfies Record<string, ModelInfo>
  366. // Glama
  367. // https://glama.ai/models
  368. export const glamaDefaultModelId = "anthropic/claude-3-7-sonnet"
  369. export const glamaDefaultModelInfo: ModelInfo = {
  370. maxTokens: 8192,
  371. contextWindow: 200_000,
  372. supportsImages: true,
  373. supportsComputerUse: true,
  374. supportsPromptCache: true,
  375. inputPrice: 3.0,
  376. outputPrice: 15.0,
  377. cacheWritesPrice: 3.75,
  378. cacheReadsPrice: 0.3,
  379. description:
  380. "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
  381. }
  382. // Requesty
  383. // https://requesty.ai/router-2
  384. export const requestyDefaultModelId = "anthropic/claude-3-7-sonnet-latest"
  385. export const requestyDefaultModelInfo: ModelInfo = {
  386. maxTokens: 8192,
  387. contextWindow: 200_000,
  388. supportsImages: true,
  389. supportsComputerUse: true,
  390. supportsPromptCache: true,
  391. inputPrice: 3.0,
  392. outputPrice: 15.0,
  393. cacheWritesPrice: 3.75,
  394. cacheReadsPrice: 0.3,
  395. description:
  396. "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
  397. }
  398. // OpenRouter
  399. // https://openrouter.ai/models?order=newest&supported_parameters=tools
  400. export const openRouterDefaultModelId = "anthropic/claude-3.7-sonnet"
  401. export const openRouterDefaultModelInfo: ModelInfo = {
  402. maxTokens: 8192,
  403. contextWindow: 200_000,
  404. supportsImages: true,
  405. supportsComputerUse: true,
  406. supportsPromptCache: true,
  407. inputPrice: 3.0,
  408. outputPrice: 15.0,
  409. cacheWritesPrice: 3.75,
  410. cacheReadsPrice: 0.3,
  411. description:
  412. "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
  413. }
  414. // Vertex AI
  415. // https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude
  416. export type VertexModelId = keyof typeof vertexModels
  417. export const vertexDefaultModelId: VertexModelId = "claude-3-7-sonnet@20250219"
  418. export const vertexModels = {
  419. "claude-3-7-sonnet@20250219": {
  420. maxTokens: 8192,
  421. contextWindow: 200_000,
  422. supportsImages: true,
  423. supportsComputerUse: true,
  424. supportsPromptCache: false,
  425. inputPrice: 3.0,
  426. outputPrice: 15.0,
  427. },
  428. "claude-3-5-sonnet-v2@20241022": {
  429. maxTokens: 8192,
  430. contextWindow: 200_000,
  431. supportsImages: true,
  432. supportsComputerUse: true,
  433. supportsPromptCache: false,
  434. inputPrice: 3.0,
  435. outputPrice: 15.0,
  436. },
  437. "claude-3-5-sonnet@20240620": {
  438. maxTokens: 8192,
  439. contextWindow: 200_000,
  440. supportsImages: true,
  441. supportsPromptCache: false,
  442. inputPrice: 3.0,
  443. outputPrice: 15.0,
  444. },
  445. "claude-3-5-haiku@20241022": {
  446. maxTokens: 8192,
  447. contextWindow: 200_000,
  448. supportsImages: false,
  449. supportsPromptCache: false,
  450. inputPrice: 1.0,
  451. outputPrice: 5.0,
  452. },
  453. "claude-3-opus@20240229": {
  454. maxTokens: 4096,
  455. contextWindow: 200_000,
  456. supportsImages: true,
  457. supportsPromptCache: false,
  458. inputPrice: 15.0,
  459. outputPrice: 75.0,
  460. },
  461. "claude-3-haiku@20240307": {
  462. maxTokens: 4096,
  463. contextWindow: 200_000,
  464. supportsImages: true,
  465. supportsPromptCache: false,
  466. inputPrice: 0.25,
  467. outputPrice: 1.25,
  468. },
  469. } as const satisfies Record<string, ModelInfo>
  470. export const openAiModelInfoSaneDefaults: ModelInfo = {
  471. maxTokens: -1,
  472. contextWindow: 128_000,
  473. supportsImages: true,
  474. supportsPromptCache: false,
  475. inputPrice: 0,
  476. outputPrice: 0,
  477. }
  478. export const requestyModelInfoSaneDefaults: ModelInfo = {
  479. maxTokens: -1,
  480. contextWindow: 128_000,
  481. supportsImages: true,
  482. supportsPromptCache: false,
  483. inputPrice: 0,
  484. outputPrice: 0,
  485. }
  486. // Gemini
  487. // https://ai.google.dev/gemini-api/docs/models/gemini
  488. export type GeminiModelId = keyof typeof geminiModels
  489. export const geminiDefaultModelId: GeminiModelId = "gemini-2.0-flash-001"
  490. export const geminiModels = {
  491. "gemini-2.0-flash-001": {
  492. maxTokens: 8192,
  493. contextWindow: 1_048_576,
  494. supportsImages: true,
  495. supportsPromptCache: false,
  496. inputPrice: 0,
  497. outputPrice: 0,
  498. },
  499. "gemini-2.0-flash-lite-preview-02-05": {
  500. maxTokens: 8192,
  501. contextWindow: 1_048_576,
  502. supportsImages: true,
  503. supportsPromptCache: false,
  504. inputPrice: 0,
  505. outputPrice: 0,
  506. },
  507. "gemini-2.0-pro-exp-02-05": {
  508. maxTokens: 8192,
  509. contextWindow: 2_097_152,
  510. supportsImages: true,
  511. supportsPromptCache: false,
  512. inputPrice: 0,
  513. outputPrice: 0,
  514. },
  515. "gemini-2.0-flash-thinking-exp-01-21": {
  516. maxTokens: 65_536,
  517. contextWindow: 1_048_576,
  518. supportsImages: true,
  519. supportsPromptCache: false,
  520. inputPrice: 0,
  521. outputPrice: 0,
  522. },
  523. "gemini-2.0-flash-thinking-exp-1219": {
  524. maxTokens: 8192,
  525. contextWindow: 32_767,
  526. supportsImages: true,
  527. supportsPromptCache: false,
  528. inputPrice: 0,
  529. outputPrice: 0,
  530. },
  531. "gemini-2.0-flash-exp": {
  532. maxTokens: 8192,
  533. contextWindow: 1_048_576,
  534. supportsImages: true,
  535. supportsPromptCache: false,
  536. inputPrice: 0,
  537. outputPrice: 0,
  538. },
  539. "gemini-1.5-flash-002": {
  540. maxTokens: 8192,
  541. contextWindow: 1_048_576,
  542. supportsImages: true,
  543. supportsPromptCache: false,
  544. inputPrice: 0,
  545. outputPrice: 0,
  546. },
  547. "gemini-1.5-flash-exp-0827": {
  548. maxTokens: 8192,
  549. contextWindow: 1_048_576,
  550. supportsImages: true,
  551. supportsPromptCache: false,
  552. inputPrice: 0,
  553. outputPrice: 0,
  554. },
  555. "gemini-1.5-flash-8b-exp-0827": {
  556. maxTokens: 8192,
  557. contextWindow: 1_048_576,
  558. supportsImages: true,
  559. supportsPromptCache: false,
  560. inputPrice: 0,
  561. outputPrice: 0,
  562. },
  563. "gemini-1.5-pro-002": {
  564. maxTokens: 8192,
  565. contextWindow: 2_097_152,
  566. supportsImages: true,
  567. supportsPromptCache: false,
  568. inputPrice: 0,
  569. outputPrice: 0,
  570. },
  571. "gemini-1.5-pro-exp-0827": {
  572. maxTokens: 8192,
  573. contextWindow: 2_097_152,
  574. supportsImages: true,
  575. supportsPromptCache: false,
  576. inputPrice: 0,
  577. outputPrice: 0,
  578. },
  579. "gemini-exp-1206": {
  580. maxTokens: 8192,
  581. contextWindow: 2_097_152,
  582. supportsImages: true,
  583. supportsPromptCache: false,
  584. inputPrice: 0,
  585. outputPrice: 0,
  586. },
  587. } as const satisfies Record<string, ModelInfo>
  588. // OpenAI Native
  589. // https://openai.com/api/pricing/
  590. export type OpenAiNativeModelId = keyof typeof openAiNativeModels
  591. export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-4o"
  592. export const openAiNativeModels = {
  593. // don't support tool use yet
  594. "o3-mini": {
  595. maxTokens: 100_000,
  596. contextWindow: 200_000,
  597. supportsImages: false,
  598. supportsPromptCache: false,
  599. inputPrice: 1.1,
  600. outputPrice: 4.4,
  601. reasoningEffort: "medium",
  602. },
  603. "o3-mini-high": {
  604. maxTokens: 100_000,
  605. contextWindow: 200_000,
  606. supportsImages: false,
  607. supportsPromptCache: false,
  608. inputPrice: 1.1,
  609. outputPrice: 4.4,
  610. reasoningEffort: "high",
  611. },
  612. "o3-mini-low": {
  613. maxTokens: 100_000,
  614. contextWindow: 200_000,
  615. supportsImages: false,
  616. supportsPromptCache: false,
  617. inputPrice: 1.1,
  618. outputPrice: 4.4,
  619. reasoningEffort: "low",
  620. },
  621. o1: {
  622. maxTokens: 100_000,
  623. contextWindow: 200_000,
  624. supportsImages: true,
  625. supportsPromptCache: false,
  626. inputPrice: 15,
  627. outputPrice: 60,
  628. },
  629. "o1-preview": {
  630. maxTokens: 32_768,
  631. contextWindow: 128_000,
  632. supportsImages: true,
  633. supportsPromptCache: false,
  634. inputPrice: 15,
  635. outputPrice: 60,
  636. },
  637. "o1-mini": {
  638. maxTokens: 65_536,
  639. contextWindow: 128_000,
  640. supportsImages: true,
  641. supportsPromptCache: false,
  642. inputPrice: 1.1,
  643. outputPrice: 4.4,
  644. },
  645. "gpt-4o": {
  646. maxTokens: 4_096,
  647. contextWindow: 128_000,
  648. supportsImages: true,
  649. supportsPromptCache: false,
  650. inputPrice: 2.5,
  651. outputPrice: 10,
  652. },
  653. "gpt-4o-mini": {
  654. maxTokens: 16_384,
  655. contextWindow: 128_000,
  656. supportsImages: true,
  657. supportsPromptCache: false,
  658. inputPrice: 0.15,
  659. outputPrice: 0.6,
  660. },
  661. } as const satisfies Record<string, ModelInfo>
  662. // DeepSeek
  663. // https://platform.deepseek.com/docs/api
  664. export type DeepSeekModelId = keyof typeof deepSeekModels
  665. export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat"
  666. export const deepSeekModels = {
  667. "deepseek-chat": {
  668. maxTokens: 8192,
  669. contextWindow: 64_000,
  670. supportsImages: false,
  671. supportsPromptCache: false,
  672. inputPrice: 0.014, // $0.014 per million tokens
  673. outputPrice: 0.28, // $0.28 per million tokens
  674. description: `DeepSeek-V3 achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally.`,
  675. },
  676. "deepseek-reasoner": {
  677. maxTokens: 8192,
  678. contextWindow: 64_000,
  679. supportsImages: false,
  680. supportsPromptCache: false,
  681. inputPrice: 0.55, // $0.55 per million tokens
  682. outputPrice: 2.19, // $2.19 per million tokens
  683. description: `DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks.`,
  684. },
  685. } as const satisfies Record<string, ModelInfo>
  686. // Azure OpenAI
  687. // https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
  688. // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#api-specs
  689. export const azureOpenAiDefaultApiVersion = "2024-08-01-preview"
  690. // Mistral
  691. // https://docs.mistral.ai/getting-started/models/models_overview/
  692. export type MistralModelId = keyof typeof mistralModels
  693. export const mistralDefaultModelId: MistralModelId = "codestral-latest"
  694. export const mistralModels = {
  695. "codestral-latest": {
  696. maxTokens: 256_000,
  697. contextWindow: 256_000,
  698. supportsImages: false,
  699. supportsPromptCache: false,
  700. inputPrice: 0.3,
  701. outputPrice: 0.9,
  702. },
  703. "mistral-large-latest": {
  704. maxTokens: 131_000,
  705. contextWindow: 131_000,
  706. supportsImages: false,
  707. supportsPromptCache: false,
  708. inputPrice: 2.0,
  709. outputPrice: 6.0,
  710. },
  711. "ministral-8b-latest": {
  712. maxTokens: 131_000,
  713. contextWindow: 131_000,
  714. supportsImages: false,
  715. supportsPromptCache: false,
  716. inputPrice: 0.1,
  717. outputPrice: 0.1,
  718. },
  719. "ministral-3b-latest": {
  720. maxTokens: 131_000,
  721. contextWindow: 131_000,
  722. supportsImages: false,
  723. supportsPromptCache: false,
  724. inputPrice: 0.04,
  725. outputPrice: 0.04,
  726. },
  727. "mistral-small-latest": {
  728. maxTokens: 32_000,
  729. contextWindow: 32_000,
  730. supportsImages: false,
  731. supportsPromptCache: false,
  732. inputPrice: 0.2,
  733. outputPrice: 0.6,
  734. },
  735. "pixtral-large-latest": {
  736. maxTokens: 131_000,
  737. contextWindow: 131_000,
  738. supportsImages: true,
  739. supportsPromptCache: false,
  740. inputPrice: 2.0,
  741. outputPrice: 6.0,
  742. },
  743. } as const satisfies Record<string, ModelInfo>
  744. // Unbound Security
  745. export const unboundDefaultModelId = "anthropic/claude-3-5-sonnet-20241022"
  746. export const unboundDefaultModelInfo: ModelInfo = {
  747. maxTokens: 8192,
  748. contextWindow: 200_000,
  749. supportsImages: true,
  750. supportsPromptCache: true,
  751. inputPrice: 3.0,
  752. outputPrice: 15.0,
  753. cacheWritesPrice: 3.75,
  754. cacheReadsPrice: 0.3,
  755. }