api.ts 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693
  1. import * as vscode from "vscode"
  2. export type ApiProvider =
  3. | "anthropic"
  4. | "glama"
  5. | "openrouter"
  6. | "bedrock"
  7. | "vertex"
  8. | "openai"
  9. | "ollama"
  10. | "lmstudio"
  11. | "gemini"
  12. | "openai-native"
  13. | "deepseek"
  14. | "vscode-lm"
  15. | "mistral"
  16. | "unbound"
  17. | "requesty"
  18. export interface ApiHandlerOptions {
  19. apiModelId?: string
  20. apiKey?: string // anthropic
  21. anthropicBaseUrl?: string
  22. vsCodeLmModelSelector?: vscode.LanguageModelChatSelector
  23. glamaModelId?: string
  24. glamaModelInfo?: ModelInfo
  25. glamaApiKey?: string
  26. openRouterApiKey?: string
  27. openRouterModelId?: string
  28. openRouterModelInfo?: ModelInfo
  29. openRouterBaseUrl?: string
  30. awsAccessKey?: string
  31. awsSecretKey?: string
  32. awsSessionToken?: string
  33. awsRegion?: string
  34. awsUseCrossRegionInference?: boolean
  35. awsUsePromptCache?: boolean
  36. awspromptCacheId?: string
  37. awsProfile?: string
  38. awsUseProfile?: boolean
  39. vertexProjectId?: string
  40. vertexRegion?: string
  41. openAiBaseUrl?: string
  42. openAiApiKey?: string
  43. openAiModelId?: string
  44. openAiCustomModelInfo?: ModelInfo
  45. openAiUseAzure?: boolean
  46. ollamaModelId?: string
  47. ollamaBaseUrl?: string
  48. lmStudioModelId?: string
  49. lmStudioBaseUrl?: string
  50. geminiApiKey?: string
  51. openAiNativeApiKey?: string
  52. mistralApiKey?: string
  53. azureApiVersion?: string
  54. openRouterUseMiddleOutTransform?: boolean
  55. openAiStreamingEnabled?: boolean
  56. setAzureApiVersion?: boolean
  57. deepSeekBaseUrl?: string
  58. deepSeekApiKey?: string
  59. includeMaxTokens?: boolean
  60. unboundApiKey?: string
  61. unboundModelId?: string
  62. unboundModelInfo?: ModelInfo
  63. requestyApiKey?: string
  64. requestyModelId?: string
  65. requestyModelInfo?: ModelInfo
  66. modelTemperature?: number
  67. }
  68. export type ApiConfiguration = ApiHandlerOptions & {
  69. apiProvider?: ApiProvider
  70. id?: string // stable unique identifier
  71. }
  72. // Models
  73. export interface ModelInfo {
  74. maxTokens?: number
  75. contextWindow: number
  76. supportsImages?: boolean
  77. supportsComputerUse?: boolean
  78. supportsPromptCache: boolean // this value is hardcoded for now
  79. inputPrice?: number
  80. outputPrice?: number
  81. cacheWritesPrice?: number
  82. cacheReadsPrice?: number
  83. description?: string
  84. reasoningEffort?: "low" | "medium" | "high"
  85. }
  86. // Anthropic
  87. // https://docs.anthropic.com/en/docs/about-claude/models
  88. export type AnthropicModelId = keyof typeof anthropicModels
  89. export const anthropicDefaultModelId: AnthropicModelId = "claude-3-5-sonnet-20241022"
  90. export const anthropicModels = {
  91. "claude-3-5-sonnet-20241022": {
  92. maxTokens: 8192,
  93. contextWindow: 200_000,
  94. supportsImages: true,
  95. supportsComputerUse: true,
  96. supportsPromptCache: true,
  97. inputPrice: 3.0, // $3 per million input tokens
  98. outputPrice: 15.0, // $15 per million output tokens
  99. cacheWritesPrice: 3.75, // $3.75 per million tokens
  100. cacheReadsPrice: 0.3, // $0.30 per million tokens
  101. },
  102. "claude-3-5-haiku-20241022": {
  103. maxTokens: 8192,
  104. contextWindow: 200_000,
  105. supportsImages: false,
  106. supportsPromptCache: true,
  107. inputPrice: 1.0,
  108. outputPrice: 5.0,
  109. cacheWritesPrice: 1.25,
  110. cacheReadsPrice: 0.1,
  111. },
  112. "claude-3-opus-20240229": {
  113. maxTokens: 4096,
  114. contextWindow: 200_000,
  115. supportsImages: true,
  116. supportsPromptCache: true,
  117. inputPrice: 15.0,
  118. outputPrice: 75.0,
  119. cacheWritesPrice: 18.75,
  120. cacheReadsPrice: 1.5,
  121. },
  122. "claude-3-haiku-20240307": {
  123. maxTokens: 4096,
  124. contextWindow: 200_000,
  125. supportsImages: true,
  126. supportsPromptCache: true,
  127. inputPrice: 0.25,
  128. outputPrice: 1.25,
  129. cacheWritesPrice: 0.3,
  130. cacheReadsPrice: 0.03,
  131. },
  132. } as const satisfies Record<string, ModelInfo> // as const assertion makes the object deeply readonly
  133. // AWS Bedrock
  134. // https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html
  135. export interface MessageContent {
  136. type: "text" | "image" | "video" | "tool_use" | "tool_result"
  137. text?: string
  138. source?: {
  139. type: "base64"
  140. data: string | Uint8Array // string for Anthropic, Uint8Array for Bedrock
  141. media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp"
  142. }
  143. // Video specific fields
  144. format?: string
  145. s3Location?: {
  146. uri: string
  147. bucketOwner?: string
  148. }
  149. // Tool use and result fields
  150. toolUseId?: string
  151. name?: string
  152. input?: any
  153. output?: any // Used for tool_result type
  154. }
  155. export type BedrockModelId = keyof typeof bedrockModels
  156. export const bedrockDefaultModelId: BedrockModelId = "anthropic.claude-3-5-sonnet-20241022-v2:0"
  157. export const bedrockModels = {
  158. "amazon.nova-pro-v1:0": {
  159. maxTokens: 5000,
  160. contextWindow: 300_000,
  161. supportsImages: true,
  162. supportsComputerUse: false,
  163. supportsPromptCache: false,
  164. inputPrice: 0.8,
  165. outputPrice: 3.2,
  166. cacheWritesPrice: 0.8, // per million tokens
  167. cacheReadsPrice: 0.2, // per million tokens
  168. },
  169. "amazon.nova-lite-v1:0": {
  170. maxTokens: 5000,
  171. contextWindow: 300_000,
  172. supportsImages: true,
  173. supportsComputerUse: false,
  174. supportsPromptCache: false,
  175. inputPrice: 0.06,
  176. outputPrice: 0.024,
  177. cacheWritesPrice: 0.06, // per million tokens
  178. cacheReadsPrice: 0.015, // per million tokens
  179. },
  180. "amazon.nova-micro-v1:0": {
  181. maxTokens: 5000,
  182. contextWindow: 128_000,
  183. supportsImages: false,
  184. supportsComputerUse: false,
  185. supportsPromptCache: false,
  186. inputPrice: 0.035,
  187. outputPrice: 0.14,
  188. cacheWritesPrice: 0.035, // per million tokens
  189. cacheReadsPrice: 0.00875, // per million tokens
  190. },
  191. "anthropic.claude-3-5-sonnet-20241022-v2:0": {
  192. maxTokens: 8192,
  193. contextWindow: 200_000,
  194. supportsImages: true,
  195. supportsComputerUse: true,
  196. supportsPromptCache: false,
  197. inputPrice: 3.0,
  198. outputPrice: 15.0,
  199. cacheWritesPrice: 3.75, // per million tokens
  200. cacheReadsPrice: 0.3, // per million tokens
  201. },
  202. "anthropic.claude-3-5-haiku-20241022-v1:0": {
  203. maxTokens: 8192,
  204. contextWindow: 200_000,
  205. supportsImages: false,
  206. supportsPromptCache: false,
  207. inputPrice: 1.0,
  208. outputPrice: 5.0,
  209. cacheWritesPrice: 1.0,
  210. cacheReadsPrice: 0.08,
  211. },
  212. "anthropic.claude-3-5-sonnet-20240620-v1:0": {
  213. maxTokens: 8192,
  214. contextWindow: 200_000,
  215. supportsImages: true,
  216. supportsPromptCache: false,
  217. inputPrice: 3.0,
  218. outputPrice: 15.0,
  219. },
  220. "anthropic.claude-3-opus-20240229-v1:0": {
  221. maxTokens: 4096,
  222. contextWindow: 200_000,
  223. supportsImages: true,
  224. supportsPromptCache: false,
  225. inputPrice: 15.0,
  226. outputPrice: 75.0,
  227. },
  228. "anthropic.claude-3-sonnet-20240229-v1:0": {
  229. maxTokens: 4096,
  230. contextWindow: 200_000,
  231. supportsImages: true,
  232. supportsPromptCache: false,
  233. inputPrice: 3.0,
  234. outputPrice: 15.0,
  235. },
  236. "anthropic.claude-3-haiku-20240307-v1:0": {
  237. maxTokens: 4096,
  238. contextWindow: 200_000,
  239. supportsImages: true,
  240. supportsPromptCache: false,
  241. inputPrice: 0.25,
  242. outputPrice: 1.25,
  243. },
  244. "meta.llama3-3-70b-instruct-v1:0": {
  245. maxTokens: 8192,
  246. contextWindow: 128_000,
  247. supportsImages: false,
  248. supportsComputerUse: false,
  249. supportsPromptCache: false,
  250. inputPrice: 0.72,
  251. outputPrice: 0.72,
  252. },
  253. "meta.llama3-2-90b-instruct-v1:0": {
  254. maxTokens: 8192,
  255. contextWindow: 128_000,
  256. supportsImages: true,
  257. supportsComputerUse: false,
  258. supportsPromptCache: false,
  259. inputPrice: 0.72,
  260. outputPrice: 0.72,
  261. },
  262. "meta.llama3-2-11b-instruct-v1:0": {
  263. maxTokens: 8192,
  264. contextWindow: 128_000,
  265. supportsImages: true,
  266. supportsComputerUse: false,
  267. supportsPromptCache: false,
  268. inputPrice: 0.16,
  269. outputPrice: 0.16,
  270. },
  271. "meta.llama3-2-3b-instruct-v1:0": {
  272. maxTokens: 8192,
  273. contextWindow: 128_000,
  274. supportsImages: false,
  275. supportsComputerUse: false,
  276. supportsPromptCache: false,
  277. inputPrice: 0.15,
  278. outputPrice: 0.15,
  279. },
  280. "meta.llama3-2-1b-instruct-v1:0": {
  281. maxTokens: 8192,
  282. contextWindow: 128_000,
  283. supportsImages: false,
  284. supportsComputerUse: false,
  285. supportsPromptCache: false,
  286. inputPrice: 0.1,
  287. outputPrice: 0.1,
  288. },
  289. "meta.llama3-1-405b-instruct-v1:0": {
  290. maxTokens: 8192,
  291. contextWindow: 128_000,
  292. supportsImages: false,
  293. supportsComputerUse: false,
  294. supportsPromptCache: false,
  295. inputPrice: 2.4,
  296. outputPrice: 2.4,
  297. },
  298. "meta.llama3-1-70b-instruct-v1:0": {
  299. maxTokens: 8192,
  300. contextWindow: 128_000,
  301. supportsImages: false,
  302. supportsComputerUse: false,
  303. supportsPromptCache: false,
  304. inputPrice: 0.72,
  305. outputPrice: 0.72,
  306. },
  307. "meta.llama3-1-8b-instruct-v1:0": {
  308. maxTokens: 8192,
  309. contextWindow: 8_000,
  310. supportsImages: false,
  311. supportsComputerUse: false,
  312. supportsPromptCache: false,
  313. inputPrice: 0.22,
  314. outputPrice: 0.22,
  315. },
  316. "meta.llama3-70b-instruct-v1:0": {
  317. maxTokens: 2048,
  318. contextWindow: 8_000,
  319. supportsImages: false,
  320. supportsComputerUse: false,
  321. supportsPromptCache: false,
  322. inputPrice: 2.65,
  323. outputPrice: 3.5,
  324. },
  325. "meta.llama3-8b-instruct-v1:0": {
  326. maxTokens: 2048,
  327. contextWindow: 4_000,
  328. supportsImages: false,
  329. supportsComputerUse: false,
  330. supportsPromptCache: false,
  331. inputPrice: 0.3,
  332. outputPrice: 0.6,
  333. },
  334. } as const satisfies Record<string, ModelInfo>
  335. // Glama
  336. // https://glama.ai/models
  337. export const glamaDefaultModelId = "anthropic/claude-3-5-sonnet"
  338. export const glamaDefaultModelInfo: ModelInfo = {
  339. maxTokens: 8192,
  340. contextWindow: 200_000,
  341. supportsImages: true,
  342. supportsComputerUse: true,
  343. supportsPromptCache: true,
  344. inputPrice: 3.0,
  345. outputPrice: 15.0,
  346. cacheWritesPrice: 3.75,
  347. cacheReadsPrice: 0.3,
  348. description:
  349. "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._",
  350. }
  351. export const requestyDefaultModelInfo: ModelInfo = {
  352. maxTokens: 8192,
  353. contextWindow: 200_000,
  354. supportsImages: true,
  355. supportsComputerUse: true,
  356. supportsPromptCache: true,
  357. inputPrice: 3.0,
  358. outputPrice: 15.0,
  359. cacheWritesPrice: 3.75,
  360. cacheReadsPrice: 0.3,
  361. description:
  362. "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._",
  363. }
  364. export const requestyDefaultModelId = "anthropic/claude-3-5-sonnet"
  365. // OpenRouter
  366. // https://openrouter.ai/models?order=newest&supported_parameters=tools
  367. export const openRouterDefaultModelId = "anthropic/claude-3.5-sonnet:beta" // will always exist in openRouterModels
  368. export const openRouterDefaultModelInfo: ModelInfo = {
  369. maxTokens: 8192,
  370. contextWindow: 200_000,
  371. supportsImages: true,
  372. supportsComputerUse: true,
  373. supportsPromptCache: true,
  374. inputPrice: 3.0,
  375. outputPrice: 15.0,
  376. cacheWritesPrice: 3.75,
  377. cacheReadsPrice: 0.3,
  378. description:
  379. "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._",
  380. }
  381. // Vertex AI
  382. // https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude
  383. export type VertexModelId = keyof typeof vertexModels
  384. export const vertexDefaultModelId: VertexModelId = "claude-3-5-sonnet-v2@20241022"
  385. export const vertexModels = {
  386. "claude-3-5-sonnet-v2@20241022": {
  387. maxTokens: 8192,
  388. contextWindow: 200_000,
  389. supportsImages: true,
  390. supportsComputerUse: true,
  391. supportsPromptCache: false,
  392. inputPrice: 3.0,
  393. outputPrice: 15.0,
  394. },
  395. "claude-3-5-sonnet@20240620": {
  396. maxTokens: 8192,
  397. contextWindow: 200_000,
  398. supportsImages: true,
  399. supportsPromptCache: false,
  400. inputPrice: 3.0,
  401. outputPrice: 15.0,
  402. },
  403. "claude-3-5-haiku@20241022": {
  404. maxTokens: 8192,
  405. contextWindow: 200_000,
  406. supportsImages: false,
  407. supportsPromptCache: false,
  408. inputPrice: 1.0,
  409. outputPrice: 5.0,
  410. },
  411. "claude-3-opus@20240229": {
  412. maxTokens: 4096,
  413. contextWindow: 200_000,
  414. supportsImages: true,
  415. supportsPromptCache: false,
  416. inputPrice: 15.0,
  417. outputPrice: 75.0,
  418. },
  419. "claude-3-haiku@20240307": {
  420. maxTokens: 4096,
  421. contextWindow: 200_000,
  422. supportsImages: true,
  423. supportsPromptCache: false,
  424. inputPrice: 0.25,
  425. outputPrice: 1.25,
  426. },
  427. } as const satisfies Record<string, ModelInfo>
  428. export const openAiModelInfoSaneDefaults: ModelInfo = {
  429. maxTokens: -1,
  430. contextWindow: 128_000,
  431. supportsImages: true,
  432. supportsPromptCache: false,
  433. inputPrice: 0,
  434. outputPrice: 0,
  435. }
  436. export const requestyModelInfoSaneDefaults: ModelInfo = {
  437. maxTokens: -1,
  438. contextWindow: 128_000,
  439. supportsImages: true,
  440. supportsPromptCache: false,
  441. inputPrice: 0,
  442. outputPrice: 0,
  443. }
  444. // Gemini
  445. // https://ai.google.dev/gemini-api/docs/models/gemini
  446. export type GeminiModelId = keyof typeof geminiModels
  447. export const geminiDefaultModelId: GeminiModelId = "gemini-2.0-flash-001"
  448. export const geminiModels = {
  449. "gemini-2.0-flash-001": {
  450. maxTokens: 8192,
  451. contextWindow: 1_048_576,
  452. supportsImages: true,
  453. supportsPromptCache: false,
  454. inputPrice: 0,
  455. outputPrice: 0,
  456. },
  457. "gemini-2.0-flash-lite-preview-02-05": {
  458. maxTokens: 8192,
  459. contextWindow: 1_048_576,
  460. supportsImages: true,
  461. supportsPromptCache: false,
  462. inputPrice: 0,
  463. outputPrice: 0,
  464. },
  465. "gemini-2.0-pro-exp-02-05": {
  466. maxTokens: 8192,
  467. contextWindow: 2_097_152,
  468. supportsImages: true,
  469. supportsPromptCache: false,
  470. inputPrice: 0,
  471. outputPrice: 0,
  472. },
  473. "gemini-2.0-flash-thinking-exp-01-21": {
  474. maxTokens: 65_536,
  475. contextWindow: 1_048_576,
  476. supportsImages: true,
  477. supportsPromptCache: false,
  478. inputPrice: 0,
  479. outputPrice: 0,
  480. },
  481. "gemini-2.0-flash-thinking-exp-1219": {
  482. maxTokens: 8192,
  483. contextWindow: 32_767,
  484. supportsImages: true,
  485. supportsPromptCache: false,
  486. inputPrice: 0,
  487. outputPrice: 0,
  488. },
  489. "gemini-2.0-flash-exp": {
  490. maxTokens: 8192,
  491. contextWindow: 1_048_576,
  492. supportsImages: true,
  493. supportsPromptCache: false,
  494. inputPrice: 0,
  495. outputPrice: 0,
  496. },
  497. "gemini-1.5-flash-002": {
  498. maxTokens: 8192,
  499. contextWindow: 1_048_576,
  500. supportsImages: true,
  501. supportsPromptCache: false,
  502. inputPrice: 0,
  503. outputPrice: 0,
  504. },
  505. "gemini-1.5-flash-exp-0827": {
  506. maxTokens: 8192,
  507. contextWindow: 1_048_576,
  508. supportsImages: true,
  509. supportsPromptCache: false,
  510. inputPrice: 0,
  511. outputPrice: 0,
  512. },
  513. "gemini-1.5-flash-8b-exp-0827": {
  514. maxTokens: 8192,
  515. contextWindow: 1_048_576,
  516. supportsImages: true,
  517. supportsPromptCache: false,
  518. inputPrice: 0,
  519. outputPrice: 0,
  520. },
  521. "gemini-1.5-pro-002": {
  522. maxTokens: 8192,
  523. contextWindow: 2_097_152,
  524. supportsImages: true,
  525. supportsPromptCache: false,
  526. inputPrice: 0,
  527. outputPrice: 0,
  528. },
  529. "gemini-1.5-pro-exp-0827": {
  530. maxTokens: 8192,
  531. contextWindow: 2_097_152,
  532. supportsImages: true,
  533. supportsPromptCache: false,
  534. inputPrice: 0,
  535. outputPrice: 0,
  536. },
  537. "gemini-exp-1206": {
  538. maxTokens: 8192,
  539. contextWindow: 2_097_152,
  540. supportsImages: true,
  541. supportsPromptCache: false,
  542. inputPrice: 0,
  543. outputPrice: 0,
  544. },
  545. } as const satisfies Record<string, ModelInfo>
  546. // OpenAI Native
  547. // https://openai.com/api/pricing/
  548. export type OpenAiNativeModelId = keyof typeof openAiNativeModels
  549. export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-4o"
  550. export const openAiNativeModels = {
  551. // don't support tool use yet
  552. "o3-mini": {
  553. maxTokens: 100_000,
  554. contextWindow: 200_000,
  555. supportsImages: false,
  556. supportsPromptCache: false,
  557. inputPrice: 1.1,
  558. outputPrice: 4.4,
  559. reasoningEffort: "medium",
  560. },
  561. "o3-mini-high": {
  562. maxTokens: 100_000,
  563. contextWindow: 200_000,
  564. supportsImages: false,
  565. supportsPromptCache: false,
  566. inputPrice: 1.1,
  567. outputPrice: 4.4,
  568. reasoningEffort: "high",
  569. },
  570. "o3-mini-low": {
  571. maxTokens: 100_000,
  572. contextWindow: 200_000,
  573. supportsImages: false,
  574. supportsPromptCache: false,
  575. inputPrice: 1.1,
  576. outputPrice: 4.4,
  577. reasoningEffort: "low",
  578. },
  579. o1: {
  580. maxTokens: 100_000,
  581. contextWindow: 200_000,
  582. supportsImages: true,
  583. supportsPromptCache: false,
  584. inputPrice: 15,
  585. outputPrice: 60,
  586. },
  587. "o1-preview": {
  588. maxTokens: 32_768,
  589. contextWindow: 128_000,
  590. supportsImages: true,
  591. supportsPromptCache: false,
  592. inputPrice: 15,
  593. outputPrice: 60,
  594. },
  595. "o1-mini": {
  596. maxTokens: 65_536,
  597. contextWindow: 128_000,
  598. supportsImages: true,
  599. supportsPromptCache: false,
  600. inputPrice: 1.1,
  601. outputPrice: 4.4,
  602. },
  603. "gpt-4o": {
  604. maxTokens: 4_096,
  605. contextWindow: 128_000,
  606. supportsImages: true,
  607. supportsPromptCache: false,
  608. inputPrice: 5,
  609. outputPrice: 15,
  610. },
  611. "gpt-4o-mini": {
  612. maxTokens: 16_384,
  613. contextWindow: 128_000,
  614. supportsImages: true,
  615. supportsPromptCache: false,
  616. inputPrice: 0.15,
  617. outputPrice: 0.6,
  618. },
  619. } as const satisfies Record<string, ModelInfo>
  620. // DeepSeek
  621. // https://platform.deepseek.com/docs/api
  622. export type DeepSeekModelId = keyof typeof deepSeekModels
  623. export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat"
  624. export const deepSeekModels = {
  625. "deepseek-chat": {
  626. maxTokens: 8192,
  627. contextWindow: 64_000,
  628. supportsImages: false,
  629. supportsPromptCache: false,
  630. inputPrice: 0.014, // $0.014 per million tokens
  631. outputPrice: 0.28, // $0.28 per million tokens
  632. description: `DeepSeek-V3 achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally.`,
  633. },
  634. "deepseek-reasoner": {
  635. maxTokens: 8192,
  636. contextWindow: 64_000,
  637. supportsImages: false,
  638. supportsPromptCache: false,
  639. inputPrice: 0.55, // $0.55 per million tokens
  640. outputPrice: 2.19, // $2.19 per million tokens
  641. description: `DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks.`,
  642. },
  643. } as const satisfies Record<string, ModelInfo>
  644. // Azure OpenAI
  645. // https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
  646. // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#api-specs
  647. export const azureOpenAiDefaultApiVersion = "2024-08-01-preview"
  648. // Mistral
  649. // https://docs.mistral.ai/getting-started/models/models_overview/
  650. export type MistralModelId = keyof typeof mistralModels
  651. export const mistralDefaultModelId: MistralModelId = "codestral-latest"
  652. export const mistralModels = {
  653. "codestral-latest": {
  654. maxTokens: 32_768,
  655. contextWindow: 256_000,
  656. supportsImages: false,
  657. supportsPromptCache: false,
  658. inputPrice: 0.3,
  659. outputPrice: 0.9,
  660. },
  661. } as const satisfies Record<string, ModelInfo>
  662. // Unbound Security
  663. export const unboundDefaultModelId = "anthropic/claude-3-5-sonnet-20241022"
  664. export const unboundDefaultModelInfo: ModelInfo = {
  665. maxTokens: 8192,
  666. contextWindow: 200_000,
  667. supportsImages: true,
  668. supportsPromptCache: true,
  669. inputPrice: 3.0,
  670. outputPrice: 15.0,
  671. cacheWritesPrice: 3.75,
  672. cacheReadsPrice: 0.3,
  673. }