api.ts 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461
  1. import { ModelInfo, ProviderName, ProviderSettings } from "../schemas"
  2. export type { ModelInfo, ProviderName as ApiProvider }
  3. export type ApiHandlerOptions = Omit<ProviderSettings, "apiProvider" | "id">
  4. export type ApiConfiguration = ProviderSettings
  5. // Anthropic
  6. // https://docs.anthropic.com/en/docs/about-claude/models
  7. export type AnthropicModelId = keyof typeof anthropicModels
  8. export const anthropicDefaultModelId: AnthropicModelId = "claude-3-7-sonnet-20250219"
  9. export const anthropicModels = {
  10. "claude-3-7-sonnet-20250219:thinking": {
  11. maxTokens: 128_000,
  12. contextWindow: 200_000,
  13. supportsImages: true,
  14. supportsComputerUse: true,
  15. supportsPromptCache: true,
  16. inputPrice: 3.0, // $3 per million input tokens
  17. outputPrice: 15.0, // $15 per million output tokens
  18. cacheWritesPrice: 3.75, // $3.75 per million tokens
  19. cacheReadsPrice: 0.3, // $0.30 per million tokens
  20. thinking: true,
  21. },
  22. "claude-3-7-sonnet-20250219": {
  23. maxTokens: 8192,
  24. contextWindow: 200_000,
  25. supportsImages: true,
  26. supportsComputerUse: true,
  27. supportsPromptCache: true,
  28. inputPrice: 3.0, // $3 per million input tokens
  29. outputPrice: 15.0, // $15 per million output tokens
  30. cacheWritesPrice: 3.75, // $3.75 per million tokens
  31. cacheReadsPrice: 0.3, // $0.30 per million tokens
  32. thinking: false,
  33. },
  34. "claude-3-5-sonnet-20241022": {
  35. maxTokens: 8192,
  36. contextWindow: 200_000,
  37. supportsImages: true,
  38. supportsComputerUse: true,
  39. supportsPromptCache: true,
  40. inputPrice: 3.0, // $3 per million input tokens
  41. outputPrice: 15.0, // $15 per million output tokens
  42. cacheWritesPrice: 3.75, // $3.75 per million tokens
  43. cacheReadsPrice: 0.3, // $0.30 per million tokens
  44. },
  45. "claude-3-5-haiku-20241022": {
  46. maxTokens: 8192,
  47. contextWindow: 200_000,
  48. supportsImages: false,
  49. supportsPromptCache: true,
  50. inputPrice: 1.0,
  51. outputPrice: 5.0,
  52. cacheWritesPrice: 1.25,
  53. cacheReadsPrice: 0.1,
  54. },
  55. "claude-3-opus-20240229": {
  56. maxTokens: 4096,
  57. contextWindow: 200_000,
  58. supportsImages: true,
  59. supportsPromptCache: true,
  60. inputPrice: 15.0,
  61. outputPrice: 75.0,
  62. cacheWritesPrice: 18.75,
  63. cacheReadsPrice: 1.5,
  64. },
  65. "claude-3-haiku-20240307": {
  66. maxTokens: 4096,
  67. contextWindow: 200_000,
  68. supportsImages: true,
  69. supportsPromptCache: true,
  70. inputPrice: 0.25,
  71. outputPrice: 1.25,
  72. cacheWritesPrice: 0.3,
  73. cacheReadsPrice: 0.03,
  74. },
  75. } as const satisfies Record<string, ModelInfo> // as const assertion makes the object deeply readonly
  76. // Amazon Bedrock
  77. // https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html
  78. export interface MessageContent {
  79. type: "text" | "image" | "video" | "tool_use" | "tool_result"
  80. text?: string
  81. source?: {
  82. type: "base64"
  83. data: string | Uint8Array // string for Anthropic, Uint8Array for Bedrock
  84. media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp"
  85. }
  86. // Video specific fields
  87. format?: string
  88. s3Location?: {
  89. uri: string
  90. bucketOwner?: string
  91. }
  92. // Tool use and result fields
  93. toolUseId?: string
  94. name?: string
  95. input?: any
  96. output?: any // Used for tool_result type
  97. }
  98. export type BedrockModelId = keyof typeof bedrockModels
  99. export const bedrockDefaultModelId: BedrockModelId = "anthropic.claude-3-7-sonnet-20250219-v1:0"
  100. export const bedrockDefaultPromptRouterModelId: BedrockModelId = "anthropic.claude-3-sonnet-20240229-v1:0"
  101. // March, 12 2025 - updated prices to match US-West-2 list price shown at https://aws.amazon.com/bedrock/pricing/
  102. // including older models that are part of the default prompt routers AWS enabled for GA of the promot router feature
  103. export const bedrockModels = {
  104. "amazon.nova-pro-v1:0": {
  105. maxTokens: 5000,
  106. contextWindow: 300_000,
  107. supportsImages: true,
  108. supportsComputerUse: false,
  109. supportsPromptCache: true,
  110. inputPrice: 0.8,
  111. outputPrice: 3.2,
  112. cacheWritesPrice: 0.8, // per million tokens
  113. cacheReadsPrice: 0.2, // per million tokens
  114. minTokensPerCachePoint: 1,
  115. maxCachePoints: 1,
  116. cachableFields: ["system"],
  117. },
  118. "amazon.nova-pro-latency-optimized-v1:0": {
  119. maxTokens: 5000,
  120. contextWindow: 300_000,
  121. supportsImages: true,
  122. supportsComputerUse: false,
  123. supportsPromptCache: false,
  124. inputPrice: 1.0,
  125. outputPrice: 4.0,
  126. cacheWritesPrice: 1.0, // per million tokens
  127. cacheReadsPrice: 0.25, // per million tokens
  128. description: "Amazon Nova Pro with latency optimized inference",
  129. },
  130. "amazon.nova-lite-v1:0": {
  131. maxTokens: 5000,
  132. contextWindow: 300_000,
  133. supportsImages: true,
  134. supportsComputerUse: false,
  135. supportsPromptCache: true,
  136. inputPrice: 0.06,
  137. outputPrice: 0.24,
  138. cacheWritesPrice: 0.06, // per million tokens
  139. cacheReadsPrice: 0.015, // per million tokens
  140. minTokensPerCachePoint: 1,
  141. maxCachePoints: 1,
  142. cachableFields: ["system"],
  143. },
  144. "amazon.nova-micro-v1:0": {
  145. maxTokens: 5000,
  146. contextWindow: 128_000,
  147. supportsImages: false,
  148. supportsComputerUse: false,
  149. supportsPromptCache: true,
  150. inputPrice: 0.035,
  151. outputPrice: 0.14,
  152. cacheWritesPrice: 0.035, // per million tokens
  153. cacheReadsPrice: 0.00875, // per million tokens
  154. minTokensPerCachePoint: 1,
  155. maxCachePoints: 1,
  156. cachableFields: ["system"],
  157. },
  158. "anthropic.claude-3-7-sonnet-20250219-v1:0": {
  159. maxTokens: 8192,
  160. contextWindow: 200_000,
  161. supportsImages: true,
  162. supportsComputerUse: true,
  163. supportsPromptCache: true,
  164. inputPrice: 3.0,
  165. outputPrice: 15.0,
  166. cacheWritesPrice: 3.75,
  167. cacheReadsPrice: 0.3,
  168. minTokensPerCachePoint: 1024,
  169. maxCachePoints: 4,
  170. cachableFields: ["system", "messages", "tools"],
  171. },
  172. "anthropic.claude-3-5-sonnet-20241022-v2:0": {
  173. maxTokens: 8192,
  174. contextWindow: 200_000,
  175. supportsImages: true,
  176. supportsComputerUse: true,
  177. supportsPromptCache: true,
  178. inputPrice: 3.0,
  179. outputPrice: 15.0,
  180. cacheWritesPrice: 3.75,
  181. cacheReadsPrice: 0.3,
  182. minTokensPerCachePoint: 1024,
  183. maxCachePoints: 4,
  184. cachableFields: ["system", "messages", "tools"],
  185. },
  186. "anthropic.claude-3-5-haiku-20241022-v1:0": {
  187. maxTokens: 8192,
  188. contextWindow: 200_000,
  189. supportsImages: false,
  190. supportsPromptCache: true,
  191. inputPrice: 0.8,
  192. outputPrice: 4.0,
  193. cacheWritesPrice: 1.0,
  194. cacheReadsPrice: 0.08,
  195. minTokensPerCachePoint: 2048,
  196. maxCachePoints: 4,
  197. cachableFields: ["system", "messages", "tools"],
  198. },
  199. "anthropic.claude-3-5-sonnet-20240620-v1:0": {
  200. maxTokens: 8192,
  201. contextWindow: 200_000,
  202. supportsImages: true,
  203. supportsPromptCache: false,
  204. inputPrice: 3.0,
  205. outputPrice: 15.0,
  206. },
  207. "anthropic.claude-3-opus-20240229-v1:0": {
  208. maxTokens: 4096,
  209. contextWindow: 200_000,
  210. supportsImages: true,
  211. supportsPromptCache: false,
  212. inputPrice: 15.0,
  213. outputPrice: 75.0,
  214. },
  215. "anthropic.claude-3-sonnet-20240229-v1:0": {
  216. maxTokens: 4096,
  217. contextWindow: 200_000,
  218. supportsImages: true,
  219. supportsPromptCache: false,
  220. inputPrice: 3.0,
  221. outputPrice: 15.0,
  222. },
  223. "anthropic.claude-3-haiku-20240307-v1:0": {
  224. maxTokens: 4096,
  225. contextWindow: 200_000,
  226. supportsImages: true,
  227. supportsPromptCache: false,
  228. inputPrice: 0.25,
  229. outputPrice: 1.25,
  230. },
  231. "anthropic.claude-2-1-v1:0": {
  232. maxTokens: 4096,
  233. contextWindow: 100_000,
  234. supportsImages: false,
  235. supportsPromptCache: false,
  236. inputPrice: 8.0,
  237. outputPrice: 24.0,
  238. description: "Claude 2.1",
  239. },
  240. "anthropic.claude-2-0-v1:0": {
  241. maxTokens: 4096,
  242. contextWindow: 100_000,
  243. supportsImages: false,
  244. supportsPromptCache: false,
  245. inputPrice: 8.0,
  246. outputPrice: 24.0,
  247. description: "Claude 2.0",
  248. },
  249. "anthropic.claude-instant-v1:0": {
  250. maxTokens: 4096,
  251. contextWindow: 100_000,
  252. supportsImages: false,
  253. supportsPromptCache: false,
  254. inputPrice: 0.8,
  255. outputPrice: 2.4,
  256. description: "Claude Instant",
  257. },
  258. "deepseek.r1-v1:0": {
  259. maxTokens: 32_768,
  260. contextWindow: 128_000,
  261. supportsImages: false,
  262. supportsPromptCache: false,
  263. inputPrice: 1.35,
  264. outputPrice: 5.4,
  265. },
  266. "meta.llama3-3-70b-instruct-v1:0": {
  267. maxTokens: 8192,
  268. contextWindow: 128_000,
  269. supportsImages: false,
  270. supportsComputerUse: false,
  271. supportsPromptCache: false,
  272. inputPrice: 0.72,
  273. outputPrice: 0.72,
  274. description: "Llama 3.3 Instruct (70B)",
  275. },
  276. "meta.llama3-2-90b-instruct-v1:0": {
  277. maxTokens: 8192,
  278. contextWindow: 128_000,
  279. supportsImages: true,
  280. supportsComputerUse: false,
  281. supportsPromptCache: false,
  282. inputPrice: 0.72,
  283. outputPrice: 0.72,
  284. description: "Llama 3.2 Instruct (90B)",
  285. },
  286. "meta.llama3-2-11b-instruct-v1:0": {
  287. maxTokens: 8192,
  288. contextWindow: 128_000,
  289. supportsImages: true,
  290. supportsComputerUse: false,
  291. supportsPromptCache: false,
  292. inputPrice: 0.16,
  293. outputPrice: 0.16,
  294. description: "Llama 3.2 Instruct (11B)",
  295. },
  296. "meta.llama3-2-3b-instruct-v1:0": {
  297. maxTokens: 8192,
  298. contextWindow: 128_000,
  299. supportsImages: false,
  300. supportsComputerUse: false,
  301. supportsPromptCache: false,
  302. inputPrice: 0.15,
  303. outputPrice: 0.15,
  304. description: "Llama 3.2 Instruct (3B)",
  305. },
  306. "meta.llama3-2-1b-instruct-v1:0": {
  307. maxTokens: 8192,
  308. contextWindow: 128_000,
  309. supportsImages: false,
  310. supportsComputerUse: false,
  311. supportsPromptCache: false,
  312. inputPrice: 0.1,
  313. outputPrice: 0.1,
  314. description: "Llama 3.2 Instruct (1B)",
  315. },
  316. "meta.llama3-1-405b-instruct-v1:0": {
  317. maxTokens: 8192,
  318. contextWindow: 128_000,
  319. supportsImages: false,
  320. supportsComputerUse: false,
  321. supportsPromptCache: false,
  322. inputPrice: 2.4,
  323. outputPrice: 2.4,
  324. description: "Llama 3.1 Instruct (405B)",
  325. },
  326. "meta.llama3-1-70b-instruct-v1:0": {
  327. maxTokens: 8192,
  328. contextWindow: 128_000,
  329. supportsImages: false,
  330. supportsComputerUse: false,
  331. supportsPromptCache: false,
  332. inputPrice: 0.72,
  333. outputPrice: 0.72,
  334. description: "Llama 3.1 Instruct (70B)",
  335. },
  336. "meta.llama3-1-70b-instruct-latency-optimized-v1:0": {
  337. maxTokens: 8192,
  338. contextWindow: 128_000,
  339. supportsImages: false,
  340. supportsComputerUse: false,
  341. supportsPromptCache: false,
  342. inputPrice: 0.9,
  343. outputPrice: 0.9,
  344. description: "Llama 3.1 Instruct (70B) (w/ latency optimized inference)",
  345. },
  346. "meta.llama3-1-8b-instruct-v1:0": {
  347. maxTokens: 8192,
  348. contextWindow: 8_000,
  349. supportsImages: false,
  350. supportsComputerUse: false,
  351. supportsPromptCache: false,
  352. inputPrice: 0.22,
  353. outputPrice: 0.22,
  354. description: "Llama 3.1 Instruct (8B)",
  355. },
  356. "meta.llama3-70b-instruct-v1:0": {
  357. maxTokens: 2048,
  358. contextWindow: 8_000,
  359. supportsImages: false,
  360. supportsComputerUse: false,
  361. supportsPromptCache: false,
  362. inputPrice: 2.65,
  363. outputPrice: 3.5,
  364. },
  365. "meta.llama3-8b-instruct-v1:0": {
  366. maxTokens: 2048,
  367. contextWindow: 4_000,
  368. supportsImages: false,
  369. supportsComputerUse: false,
  370. supportsPromptCache: false,
  371. inputPrice: 0.3,
  372. outputPrice: 0.6,
  373. },
  374. "amazon.titan-text-lite-v1:0": {
  375. maxTokens: 4096,
  376. contextWindow: 8_000,
  377. supportsImages: false,
  378. supportsComputerUse: false,
  379. supportsPromptCache: false,
  380. inputPrice: 0.15,
  381. outputPrice: 0.2,
  382. description: "Amazon Titan Text Lite",
  383. },
  384. "amazon.titan-text-express-v1:0": {
  385. maxTokens: 4096,
  386. contextWindow: 8_000,
  387. supportsImages: false,
  388. supportsComputerUse: false,
  389. supportsPromptCache: false,
  390. inputPrice: 0.2,
  391. outputPrice: 0.6,
  392. description: "Amazon Titan Text Express",
  393. },
  394. "amazon.titan-text-embeddings-v1:0": {
  395. maxTokens: 8192,
  396. contextWindow: 8_000,
  397. supportsImages: false,
  398. supportsComputerUse: false,
  399. supportsPromptCache: false,
  400. inputPrice: 0.1,
  401. description: "Amazon Titan Text Embeddings",
  402. },
  403. "amazon.titan-text-embeddings-v2:0": {
  404. maxTokens: 8192,
  405. contextWindow: 8_000,
  406. supportsImages: false,
  407. supportsComputerUse: false,
  408. supportsPromptCache: false,
  409. inputPrice: 0.02,
  410. description: "Amazon Titan Text Embeddings V2",
  411. },
  412. } as const satisfies Record<string, ModelInfo>
  413. // Glama
  414. // https://glama.ai/models
  415. export const glamaDefaultModelId = "anthropic/claude-3-7-sonnet"
  416. export const glamaDefaultModelInfo: ModelInfo = {
  417. maxTokens: 8192,
  418. contextWindow: 200_000,
  419. supportsImages: true,
  420. supportsComputerUse: true,
  421. supportsPromptCache: true,
  422. inputPrice: 3.0,
  423. outputPrice: 15.0,
  424. cacheWritesPrice: 3.75,
  425. cacheReadsPrice: 0.3,
  426. description:
  427. "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
  428. }
  429. // Requesty
  430. // https://requesty.ai/router-2
  431. export const requestyDefaultModelId = "anthropic/claude-3-7-sonnet-latest"
  432. export const requestyDefaultModelInfo: ModelInfo = {
  433. maxTokens: 8192,
  434. contextWindow: 200_000,
  435. supportsImages: true,
  436. supportsComputerUse: true,
  437. supportsPromptCache: true,
  438. inputPrice: 3.0,
  439. outputPrice: 15.0,
  440. cacheWritesPrice: 3.75,
  441. cacheReadsPrice: 0.3,
  442. description:
  443. "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
  444. }
  445. // OpenRouter
  446. // https://openrouter.ai/models?order=newest&supported_parameters=tools
  447. export const openRouterDefaultModelId = "anthropic/claude-3.7-sonnet"
  448. export const openRouterDefaultModelInfo: ModelInfo = {
  449. maxTokens: 8192,
  450. contextWindow: 200_000,
  451. supportsImages: true,
  452. supportsComputerUse: true,
  453. supportsPromptCache: true,
  454. inputPrice: 3.0,
  455. outputPrice: 15.0,
  456. cacheWritesPrice: 3.75,
  457. cacheReadsPrice: 0.3,
  458. description:
  459. "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
  460. }
  461. // Vertex AI
  462. // https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude
  463. export type VertexModelId = keyof typeof vertexModels
  464. export const vertexDefaultModelId: VertexModelId = "claude-3-7-sonnet@20250219"
  465. export const vertexModels = {
  466. "gemini-2.5-flash-preview-04-17:thinking": {
  467. maxTokens: 65_535,
  468. contextWindow: 1_048_576,
  469. supportsImages: true,
  470. supportsPromptCache: false,
  471. inputPrice: 0.15,
  472. outputPrice: 3.5,
  473. thinking: true,
  474. maxThinkingTokens: 24_576,
  475. },
  476. "gemini-2.5-flash-preview-04-17": {
  477. maxTokens: 65_535,
  478. contextWindow: 1_048_576,
  479. supportsImages: true,
  480. supportsPromptCache: false,
  481. inputPrice: 0.15,
  482. outputPrice: 0.6,
  483. thinking: false,
  484. },
  485. "gemini-2.5-pro-preview-03-25": {
  486. maxTokens: 65_535,
  487. contextWindow: 1_048_576,
  488. supportsImages: true,
  489. supportsPromptCache: true,
  490. isPromptCacheOptional: true,
  491. inputPrice: 2.5,
  492. outputPrice: 15,
  493. },
  494. "gemini-2.5-pro-exp-03-25": {
  495. maxTokens: 65_535,
  496. contextWindow: 1_048_576,
  497. supportsImages: true,
  498. supportsPromptCache: false,
  499. inputPrice: 0,
  500. outputPrice: 0,
  501. },
  502. "gemini-2.0-pro-exp-02-05": {
  503. maxTokens: 8192,
  504. contextWindow: 2_097_152,
  505. supportsImages: true,
  506. supportsPromptCache: false,
  507. inputPrice: 0,
  508. outputPrice: 0,
  509. },
  510. "gemini-2.0-flash-001": {
  511. maxTokens: 8192,
  512. contextWindow: 1_048_576,
  513. supportsImages: true,
  514. supportsPromptCache: true,
  515. isPromptCacheOptional: true,
  516. inputPrice: 0.15,
  517. outputPrice: 0.6,
  518. },
  519. "gemini-2.0-flash-lite-001": {
  520. maxTokens: 8192,
  521. contextWindow: 1_048_576,
  522. supportsImages: true,
  523. supportsPromptCache: false,
  524. inputPrice: 0.075,
  525. outputPrice: 0.3,
  526. },
  527. "gemini-2.0-flash-thinking-exp-01-21": {
  528. maxTokens: 8192,
  529. contextWindow: 32_768,
  530. supportsImages: true,
  531. supportsPromptCache: false,
  532. inputPrice: 0,
  533. outputPrice: 0,
  534. },
  535. "gemini-1.5-flash-002": {
  536. maxTokens: 8192,
  537. contextWindow: 1_048_576,
  538. supportsImages: true,
  539. supportsPromptCache: true,
  540. isPromptCacheOptional: true,
  541. inputPrice: 0.075,
  542. outputPrice: 0.3,
  543. },
  544. "gemini-1.5-pro-002": {
  545. maxTokens: 8192,
  546. contextWindow: 2_097_152,
  547. supportsImages: true,
  548. supportsPromptCache: false,
  549. inputPrice: 1.25,
  550. outputPrice: 5,
  551. },
  552. "claude-3-7-sonnet@20250219:thinking": {
  553. maxTokens: 64_000,
  554. contextWindow: 200_000,
  555. supportsImages: true,
  556. supportsComputerUse: true,
  557. supportsPromptCache: true,
  558. inputPrice: 3.0,
  559. outputPrice: 15.0,
  560. cacheWritesPrice: 3.75,
  561. cacheReadsPrice: 0.3,
  562. thinking: true,
  563. },
  564. "claude-3-7-sonnet@20250219": {
  565. maxTokens: 8192,
  566. contextWindow: 200_000,
  567. supportsImages: true,
  568. supportsComputerUse: true,
  569. supportsPromptCache: true,
  570. inputPrice: 3.0,
  571. outputPrice: 15.0,
  572. cacheWritesPrice: 3.75,
  573. cacheReadsPrice: 0.3,
  574. thinking: false,
  575. },
  576. "claude-3-5-sonnet-v2@20241022": {
  577. maxTokens: 8192,
  578. contextWindow: 200_000,
  579. supportsImages: true,
  580. supportsComputerUse: true,
  581. supportsPromptCache: true,
  582. inputPrice: 3.0,
  583. outputPrice: 15.0,
  584. cacheWritesPrice: 3.75,
  585. cacheReadsPrice: 0.3,
  586. },
  587. "claude-3-5-sonnet@20240620": {
  588. maxTokens: 8192,
  589. contextWindow: 200_000,
  590. supportsImages: true,
  591. supportsPromptCache: true,
  592. inputPrice: 3.0,
  593. outputPrice: 15.0,
  594. cacheWritesPrice: 3.75,
  595. cacheReadsPrice: 0.3,
  596. },
  597. "claude-3-5-haiku@20241022": {
  598. maxTokens: 8192,
  599. contextWindow: 200_000,
  600. supportsImages: false,
  601. supportsPromptCache: true,
  602. inputPrice: 1.0,
  603. outputPrice: 5.0,
  604. cacheWritesPrice: 1.25,
  605. cacheReadsPrice: 0.1,
  606. },
  607. "claude-3-opus@20240229": {
  608. maxTokens: 4096,
  609. contextWindow: 200_000,
  610. supportsImages: true,
  611. supportsPromptCache: true,
  612. inputPrice: 15.0,
  613. outputPrice: 75.0,
  614. cacheWritesPrice: 18.75,
  615. cacheReadsPrice: 1.5,
  616. },
  617. "claude-3-haiku@20240307": {
  618. maxTokens: 4096,
  619. contextWindow: 200_000,
  620. supportsImages: true,
  621. supportsPromptCache: true,
  622. inputPrice: 0.25,
  623. outputPrice: 1.25,
  624. cacheWritesPrice: 0.3,
  625. cacheReadsPrice: 0.03,
  626. },
  627. } as const satisfies Record<string, ModelInfo>
  628. export const openAiModelInfoSaneDefaults: ModelInfo = {
  629. maxTokens: -1,
  630. contextWindow: 128_000,
  631. supportsImages: true,
  632. supportsPromptCache: false,
  633. inputPrice: 0,
  634. outputPrice: 0,
  635. }
  636. // Gemini
  637. // https://ai.google.dev/gemini-api/docs/models/gemini
  638. export type GeminiModelId = keyof typeof geminiModels
  639. export const geminiDefaultModelId: GeminiModelId = "gemini-2.0-flash-001"
  640. export const geminiModels = {
  641. "gemini-2.5-flash-preview-04-17:thinking": {
  642. maxTokens: 65_535,
  643. contextWindow: 1_048_576,
  644. supportsImages: true,
  645. supportsPromptCache: false,
  646. inputPrice: 0.15,
  647. outputPrice: 3.5,
  648. thinking: true,
  649. maxThinkingTokens: 24_576,
  650. },
  651. "gemini-2.5-flash-preview-04-17": {
  652. maxTokens: 65_535,
  653. contextWindow: 1_048_576,
  654. supportsImages: true,
  655. supportsPromptCache: false,
  656. inputPrice: 0.15,
  657. outputPrice: 0.6,
  658. thinking: false,
  659. },
  660. "gemini-2.5-pro-exp-03-25": {
  661. maxTokens: 65_535,
  662. contextWindow: 1_048_576,
  663. supportsImages: true,
  664. supportsPromptCache: false,
  665. inputPrice: 0,
  666. outputPrice: 0,
  667. },
  668. "gemini-2.5-pro-preview-03-25": {
  669. maxTokens: 65_535,
  670. contextWindow: 1_048_576,
  671. supportsImages: true,
  672. supportsPromptCache: true,
  673. isPromptCacheOptional: true,
  674. inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
  675. outputPrice: 15,
  676. cacheReadsPrice: 0.625,
  677. cacheWritesPrice: 4.5,
  678. tiers: [
  679. {
  680. contextWindow: 200_000,
  681. inputPrice: 1.25,
  682. outputPrice: 10,
  683. cacheReadsPrice: 0.31,
  684. },
  685. {
  686. contextWindow: Infinity,
  687. inputPrice: 2.5,
  688. outputPrice: 15,
  689. cacheReadsPrice: 0.625,
  690. },
  691. ],
  692. },
  693. "gemini-2.0-flash-001": {
  694. maxTokens: 8192,
  695. contextWindow: 1_048_576,
  696. supportsImages: true,
  697. supportsPromptCache: true,
  698. isPromptCacheOptional: true,
  699. inputPrice: 0.1,
  700. outputPrice: 0.4,
  701. cacheReadsPrice: 0.025,
  702. cacheWritesPrice: 1.0,
  703. },
  704. "gemini-2.0-flash-lite-preview-02-05": {
  705. maxTokens: 8192,
  706. contextWindow: 1_048_576,
  707. supportsImages: true,
  708. supportsPromptCache: false,
  709. inputPrice: 0,
  710. outputPrice: 0,
  711. },
  712. "gemini-2.0-pro-exp-02-05": {
  713. maxTokens: 8192,
  714. contextWindow: 2_097_152,
  715. supportsImages: true,
  716. supportsPromptCache: false,
  717. inputPrice: 0,
  718. outputPrice: 0,
  719. },
  720. "gemini-2.0-flash-thinking-exp-01-21": {
  721. maxTokens: 65_536,
  722. contextWindow: 1_048_576,
  723. supportsImages: true,
  724. supportsPromptCache: false,
  725. inputPrice: 0,
  726. outputPrice: 0,
  727. },
  728. "gemini-2.0-flash-thinking-exp-1219": {
  729. maxTokens: 8192,
  730. contextWindow: 32_767,
  731. supportsImages: true,
  732. supportsPromptCache: false,
  733. inputPrice: 0,
  734. outputPrice: 0,
  735. },
  736. "gemini-2.0-flash-exp": {
  737. maxTokens: 8192,
  738. contextWindow: 1_048_576,
  739. supportsImages: true,
  740. supportsPromptCache: false,
  741. inputPrice: 0,
  742. outputPrice: 0,
  743. },
  744. "gemini-1.5-flash-002": {
  745. maxTokens: 8192,
  746. contextWindow: 1_048_576,
  747. supportsImages: true,
  748. supportsPromptCache: true,
  749. isPromptCacheOptional: true,
  750. inputPrice: 0.15, // This is the pricing for prompts above 128k tokens.
  751. outputPrice: 0.6,
  752. cacheReadsPrice: 0.0375,
  753. cacheWritesPrice: 1.0,
  754. tiers: [
  755. {
  756. contextWindow: 128_000,
  757. inputPrice: 0.075,
  758. outputPrice: 0.3,
  759. cacheReadsPrice: 0.01875,
  760. },
  761. {
  762. contextWindow: Infinity,
  763. inputPrice: 0.15,
  764. outputPrice: 0.6,
  765. cacheReadsPrice: 0.0375,
  766. },
  767. ],
  768. },
  769. "gemini-1.5-flash-exp-0827": {
  770. maxTokens: 8192,
  771. contextWindow: 1_048_576,
  772. supportsImages: true,
  773. supportsPromptCache: false,
  774. inputPrice: 0,
  775. outputPrice: 0,
  776. },
  777. "gemini-1.5-flash-8b-exp-0827": {
  778. maxTokens: 8192,
  779. contextWindow: 1_048_576,
  780. supportsImages: true,
  781. supportsPromptCache: false,
  782. inputPrice: 0,
  783. outputPrice: 0,
  784. },
  785. "gemini-1.5-pro-002": {
  786. maxTokens: 8192,
  787. contextWindow: 2_097_152,
  788. supportsImages: true,
  789. supportsPromptCache: false,
  790. inputPrice: 0,
  791. outputPrice: 0,
  792. },
  793. "gemini-1.5-pro-exp-0827": {
  794. maxTokens: 8192,
  795. contextWindow: 2_097_152,
  796. supportsImages: true,
  797. supportsPromptCache: false,
  798. inputPrice: 0,
  799. outputPrice: 0,
  800. },
  801. "gemini-exp-1206": {
  802. maxTokens: 8192,
  803. contextWindow: 2_097_152,
  804. supportsImages: true,
  805. supportsPromptCache: false,
  806. inputPrice: 0,
  807. outputPrice: 0,
  808. },
  809. } as const satisfies Record<string, ModelInfo>
  810. // OpenAI Native
  811. // https://openai.com/api/pricing/
  812. export type OpenAiNativeModelId = keyof typeof openAiNativeModels
  813. export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-4.1"
  814. export const openAiNativeModels = {
  815. "gpt-4.1": {
  816. maxTokens: 32_768,
  817. contextWindow: 1_047_576,
  818. supportsImages: true,
  819. supportsPromptCache: true,
  820. inputPrice: 2,
  821. outputPrice: 8,
  822. cacheReadsPrice: 0.5,
  823. },
  824. "gpt-4.1-mini": {
  825. maxTokens: 32_768,
  826. contextWindow: 1_047_576,
  827. supportsImages: true,
  828. supportsPromptCache: true,
  829. inputPrice: 0.4,
  830. outputPrice: 1.6,
  831. cacheReadsPrice: 0.1,
  832. },
  833. "gpt-4.1-nano": {
  834. maxTokens: 32_768,
  835. contextWindow: 1_047_576,
  836. supportsImages: true,
  837. supportsPromptCache: true,
  838. inputPrice: 0.1,
  839. outputPrice: 0.4,
  840. cacheReadsPrice: 0.025,
  841. },
  842. o3: {
  843. maxTokens: 100_000,
  844. contextWindow: 200_000,
  845. supportsImages: true,
  846. supportsPromptCache: true,
  847. inputPrice: 10.0,
  848. outputPrice: 40.0,
  849. cacheReadsPrice: 2.5,
  850. reasoningEffort: "medium",
  851. },
  852. "o3-high": {
  853. maxTokens: 100_000,
  854. contextWindow: 200_000,
  855. supportsImages: true,
  856. supportsPromptCache: true,
  857. inputPrice: 10.0,
  858. outputPrice: 40.0,
  859. cacheReadsPrice: 2.5,
  860. reasoningEffort: "high",
  861. },
  862. "o3-low": {
  863. maxTokens: 100_000,
  864. contextWindow: 200_000,
  865. supportsImages: true,
  866. supportsPromptCache: true,
  867. inputPrice: 10.0,
  868. outputPrice: 40.0,
  869. cacheReadsPrice: 2.5,
  870. reasoningEffort: "low",
  871. },
  872. "o4-mini": {
  873. maxTokens: 100_000,
  874. contextWindow: 200_000,
  875. supportsImages: true,
  876. supportsPromptCache: true,
  877. inputPrice: 1.1,
  878. outputPrice: 4.4,
  879. cacheReadsPrice: 0.275,
  880. reasoningEffort: "medium",
  881. },
  882. "o4-mini-high": {
  883. maxTokens: 100_000,
  884. contextWindow: 200_000,
  885. supportsImages: true,
  886. supportsPromptCache: true,
  887. inputPrice: 1.1,
  888. outputPrice: 4.4,
  889. cacheReadsPrice: 0.275,
  890. reasoningEffort: "high",
  891. },
  892. "o4-mini-low": {
  893. maxTokens: 100_000,
  894. contextWindow: 200_000,
  895. supportsImages: true,
  896. supportsPromptCache: true,
  897. inputPrice: 1.1,
  898. outputPrice: 4.4,
  899. cacheReadsPrice: 0.275,
  900. reasoningEffort: "low",
  901. },
  902. "o3-mini": {
  903. maxTokens: 100_000,
  904. contextWindow: 200_000,
  905. supportsImages: false,
  906. supportsPromptCache: true,
  907. inputPrice: 1.1,
  908. outputPrice: 4.4,
  909. cacheReadsPrice: 0.55,
  910. reasoningEffort: "medium",
  911. },
  912. "o3-mini-high": {
  913. maxTokens: 100_000,
  914. contextWindow: 200_000,
  915. supportsImages: false,
  916. supportsPromptCache: true,
  917. inputPrice: 1.1,
  918. outputPrice: 4.4,
  919. cacheReadsPrice: 0.55,
  920. reasoningEffort: "high",
  921. },
  922. "o3-mini-low": {
  923. maxTokens: 100_000,
  924. contextWindow: 200_000,
  925. supportsImages: false,
  926. supportsPromptCache: true,
  927. inputPrice: 1.1,
  928. outputPrice: 4.4,
  929. cacheReadsPrice: 0.55,
  930. reasoningEffort: "low",
  931. },
  932. o1: {
  933. maxTokens: 100_000,
  934. contextWindow: 200_000,
  935. supportsImages: true,
  936. supportsPromptCache: true,
  937. inputPrice: 15,
  938. outputPrice: 60,
  939. cacheReadsPrice: 7.5,
  940. },
  941. "o1-preview": {
  942. maxTokens: 32_768,
  943. contextWindow: 128_000,
  944. supportsImages: true,
  945. supportsPromptCache: true,
  946. inputPrice: 15,
  947. outputPrice: 60,
  948. cacheReadsPrice: 7.5,
  949. },
  950. "o1-mini": {
  951. maxTokens: 65_536,
  952. contextWindow: 128_000,
  953. supportsImages: true,
  954. supportsPromptCache: true,
  955. inputPrice: 1.1,
  956. outputPrice: 4.4,
  957. cacheReadsPrice: 0.55,
  958. },
  959. "gpt-4.5-preview": {
  960. maxTokens: 16_384,
  961. contextWindow: 128_000,
  962. supportsImages: true,
  963. supportsPromptCache: true,
  964. inputPrice: 75,
  965. outputPrice: 150,
  966. cacheReadsPrice: 37.5,
  967. },
  968. "gpt-4o": {
  969. maxTokens: 16_384,
  970. contextWindow: 128_000,
  971. supportsImages: true,
  972. supportsPromptCache: true,
  973. inputPrice: 2.5,
  974. outputPrice: 10,
  975. cacheReadsPrice: 1.25,
  976. },
  977. "gpt-4o-mini": {
  978. maxTokens: 16_384,
  979. contextWindow: 128_000,
  980. supportsImages: true,
  981. supportsPromptCache: true,
  982. inputPrice: 0.15,
  983. outputPrice: 0.6,
  984. cacheReadsPrice: 0.075,
  985. },
  986. } as const satisfies Record<string, ModelInfo>
  987. // DeepSeek
  988. // https://platform.deepseek.com/docs/api
  989. export type DeepSeekModelId = keyof typeof deepSeekModels
  990. export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat"
  991. export const deepSeekModels = {
  992. "deepseek-chat": {
  993. maxTokens: 8192,
  994. contextWindow: 64_000,
  995. supportsImages: false,
  996. supportsPromptCache: true,
  997. inputPrice: 0.27, // $0.27 per million tokens (cache miss)
  998. outputPrice: 1.1, // $1.10 per million tokens
  999. cacheWritesPrice: 0.27, // $0.27 per million tokens (cache miss)
  1000. cacheReadsPrice: 0.07, // $0.07 per million tokens (cache hit).
  1001. description: `DeepSeek-V3 achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally.`,
  1002. },
  1003. "deepseek-reasoner": {
  1004. maxTokens: 8192,
  1005. contextWindow: 64_000,
  1006. supportsImages: false,
  1007. supportsPromptCache: true,
  1008. inputPrice: 0.55, // $0.55 per million tokens (cache miss)
  1009. outputPrice: 2.19, // $2.19 per million tokens
  1010. cacheWritesPrice: 0.55, // $0.55 per million tokens (cache miss)
  1011. cacheReadsPrice: 0.14, // $0.14 per million tokens (cache hit)
  1012. description: `DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 32K tokens.`,
  1013. },
  1014. } as const satisfies Record<string, ModelInfo>
  1015. // Azure OpenAI
  1016. // https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
  1017. // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#api-specs
  1018. export const azureOpenAiDefaultApiVersion = "2024-08-01-preview"
  1019. // Mistral
  1020. // https://docs.mistral.ai/getting-started/models/models_overview/
  1021. export type MistralModelId = keyof typeof mistralModels
  1022. export const mistralDefaultModelId: MistralModelId = "codestral-latest"
  1023. export const mistralModels = {
  1024. "codestral-latest": {
  1025. maxTokens: 256_000,
  1026. contextWindow: 256_000,
  1027. supportsImages: false,
  1028. supportsPromptCache: false,
  1029. inputPrice: 0.3,
  1030. outputPrice: 0.9,
  1031. },
  1032. "mistral-large-latest": {
  1033. maxTokens: 131_000,
  1034. contextWindow: 131_000,
  1035. supportsImages: false,
  1036. supportsPromptCache: false,
  1037. inputPrice: 2.0,
  1038. outputPrice: 6.0,
  1039. },
  1040. "ministral-8b-latest": {
  1041. maxTokens: 131_000,
  1042. contextWindow: 131_000,
  1043. supportsImages: false,
  1044. supportsPromptCache: false,
  1045. inputPrice: 0.1,
  1046. outputPrice: 0.1,
  1047. },
  1048. "ministral-3b-latest": {
  1049. maxTokens: 131_000,
  1050. contextWindow: 131_000,
  1051. supportsImages: false,
  1052. supportsPromptCache: false,
  1053. inputPrice: 0.04,
  1054. outputPrice: 0.04,
  1055. },
  1056. "mistral-small-latest": {
  1057. maxTokens: 32_000,
  1058. contextWindow: 32_000,
  1059. supportsImages: false,
  1060. supportsPromptCache: false,
  1061. inputPrice: 0.2,
  1062. outputPrice: 0.6,
  1063. },
  1064. "pixtral-large-latest": {
  1065. maxTokens: 131_000,
  1066. contextWindow: 131_000,
  1067. supportsImages: true,
  1068. supportsPromptCache: false,
  1069. inputPrice: 2.0,
  1070. outputPrice: 6.0,
  1071. },
  1072. } as const satisfies Record<string, ModelInfo>
  1073. // Unbound Security
  1074. // https://www.unboundsecurity.ai/ai-gateway
  1075. export const unboundDefaultModelId = "anthropic/claude-3-7-sonnet-20250219"
  1076. export const unboundDefaultModelInfo: ModelInfo = {
  1077. maxTokens: 8192,
  1078. contextWindow: 200_000,
  1079. supportsImages: true,
  1080. supportsPromptCache: true,
  1081. inputPrice: 3.0,
  1082. outputPrice: 15.0,
  1083. cacheWritesPrice: 3.75,
  1084. cacheReadsPrice: 0.3,
  1085. }
  1086. // xAI
  1087. // https://docs.x.ai/docs/api-reference
  1088. export type XAIModelId = keyof typeof xaiModels
  1089. export const xaiDefaultModelId: XAIModelId = "grok-3-beta"
  1090. export const xaiModels = {
  1091. "grok-3-beta": {
  1092. maxTokens: 8192,
  1093. contextWindow: 131072,
  1094. supportsImages: false,
  1095. supportsPromptCache: false,
  1096. inputPrice: 3.0,
  1097. outputPrice: 15.0,
  1098. description: "xAI's Grok-3 beta model with 131K context window",
  1099. },
  1100. "grok-3-fast-beta": {
  1101. maxTokens: 8192,
  1102. contextWindow: 131072,
  1103. supportsImages: false,
  1104. supportsPromptCache: false,
  1105. inputPrice: 5.0,
  1106. outputPrice: 25.0,
  1107. description: "xAI's Grok-3 fast beta model with 131K context window",
  1108. },
  1109. "grok-3-mini-beta": {
  1110. maxTokens: 8192,
  1111. contextWindow: 131072,
  1112. supportsImages: false,
  1113. supportsPromptCache: false,
  1114. inputPrice: 0.3,
  1115. outputPrice: 0.5,
  1116. description: "xAI's Grok-3 mini beta model with 131K context window",
  1117. },
  1118. "grok-3-mini-fast-beta": {
  1119. maxTokens: 8192,
  1120. contextWindow: 131072,
  1121. supportsImages: false,
  1122. supportsPromptCache: false,
  1123. inputPrice: 0.6,
  1124. outputPrice: 4.0,
  1125. description: "xAI's Grok-3 mini fast beta model with 131K context window",
  1126. },
  1127. "grok-2-latest": {
  1128. maxTokens: 8192,
  1129. contextWindow: 131072,
  1130. supportsImages: false,
  1131. supportsPromptCache: false,
  1132. inputPrice: 2.0,
  1133. outputPrice: 10.0,
  1134. description: "xAI's Grok-2 model - latest version with 131K context window",
  1135. },
  1136. "grok-2": {
  1137. maxTokens: 8192,
  1138. contextWindow: 131072,
  1139. supportsImages: false,
  1140. supportsPromptCache: false,
  1141. inputPrice: 2.0,
  1142. outputPrice: 10.0,
  1143. description: "xAI's Grok-2 model with 131K context window",
  1144. },
  1145. "grok-2-1212": {
  1146. maxTokens: 8192,
  1147. contextWindow: 131072,
  1148. supportsImages: false,
  1149. supportsPromptCache: false,
  1150. inputPrice: 2.0,
  1151. outputPrice: 10.0,
  1152. description: "xAI's Grok-2 model (version 1212) with 131K context window",
  1153. },
  1154. "grok-2-vision-latest": {
  1155. maxTokens: 8192,
  1156. contextWindow: 32768,
  1157. supportsImages: true,
  1158. supportsPromptCache: false,
  1159. inputPrice: 2.0,
  1160. outputPrice: 10.0,
  1161. description: "xAI's Grok-2 Vision model - latest version with image support and 32K context window",
  1162. },
  1163. "grok-2-vision": {
  1164. maxTokens: 8192,
  1165. contextWindow: 32768,
  1166. supportsImages: true,
  1167. supportsPromptCache: false,
  1168. inputPrice: 2.0,
  1169. outputPrice: 10.0,
  1170. description: "xAI's Grok-2 Vision model with image support and 32K context window",
  1171. },
  1172. "grok-2-vision-1212": {
  1173. maxTokens: 8192,
  1174. contextWindow: 32768,
  1175. supportsImages: true,
  1176. supportsPromptCache: false,
  1177. inputPrice: 2.0,
  1178. outputPrice: 10.0,
  1179. description: "xAI's Grok-2 Vision model (version 1212) with image support and 32K context window",
  1180. },
  1181. "grok-vision-beta": {
  1182. maxTokens: 8192,
  1183. contextWindow: 8192,
  1184. supportsImages: true,
  1185. supportsPromptCache: false,
  1186. inputPrice: 5.0,
  1187. outputPrice: 15.0,
  1188. description: "xAI's Grok Vision Beta model with image support and 8K context window",
  1189. },
  1190. "grok-beta": {
  1191. maxTokens: 8192,
  1192. contextWindow: 131072,
  1193. supportsImages: false,
  1194. supportsPromptCache: false,
  1195. inputPrice: 5.0,
  1196. outputPrice: 15.0,
  1197. description: "xAI's Grok Beta model (legacy) with 131K context window",
  1198. },
  1199. } as const satisfies Record<string, ModelInfo>
  1200. export type VscodeLlmModelId = keyof typeof vscodeLlmModels
  1201. export const vscodeLlmDefaultModelId: VscodeLlmModelId = "claude-3.5-sonnet"
  1202. export const vscodeLlmModels = {
  1203. "gpt-3.5-turbo": {
  1204. contextWindow: 12114,
  1205. supportsImages: false,
  1206. supportsPromptCache: false,
  1207. inputPrice: 0,
  1208. outputPrice: 0,
  1209. family: "gpt-3.5-turbo",
  1210. version: "gpt-3.5-turbo-0613",
  1211. name: "GPT 3.5 Turbo",
  1212. supportsToolCalling: true,
  1213. maxInputTokens: 12114,
  1214. },
  1215. "gpt-4o-mini": {
  1216. contextWindow: 12115,
  1217. supportsImages: false,
  1218. supportsPromptCache: false,
  1219. inputPrice: 0,
  1220. outputPrice: 0,
  1221. family: "gpt-4o-mini",
  1222. version: "gpt-4o-mini-2024-07-18",
  1223. name: "GPT-4o mini",
  1224. supportsToolCalling: true,
  1225. maxInputTokens: 12115,
  1226. },
  1227. "gpt-4": {
  1228. contextWindow: 28501,
  1229. supportsImages: false,
  1230. supportsPromptCache: false,
  1231. inputPrice: 0,
  1232. outputPrice: 0,
  1233. family: "gpt-4",
  1234. version: "gpt-4-0613",
  1235. name: "GPT 4",
  1236. supportsToolCalling: true,
  1237. maxInputTokens: 28501,
  1238. },
  1239. "gpt-4-0125-preview": {
  1240. contextWindow: 63826,
  1241. supportsImages: false,
  1242. supportsPromptCache: false,
  1243. inputPrice: 0,
  1244. outputPrice: 0,
  1245. family: "gpt-4-turbo",
  1246. version: "gpt-4-0125-preview",
  1247. name: "GPT 4 Turbo",
  1248. supportsToolCalling: true,
  1249. maxInputTokens: 63826,
  1250. },
  1251. "gpt-4o": {
  1252. contextWindow: 63827,
  1253. supportsImages: true,
  1254. supportsPromptCache: false,
  1255. inputPrice: 0,
  1256. outputPrice: 0,
  1257. family: "gpt-4o",
  1258. version: "gpt-4o-2024-11-20",
  1259. name: "GPT-4o",
  1260. supportsToolCalling: true,
  1261. maxInputTokens: 63827,
  1262. },
  1263. o1: {
  1264. contextWindow: 19827,
  1265. supportsImages: false,
  1266. supportsPromptCache: false,
  1267. inputPrice: 0,
  1268. outputPrice: 0,
  1269. family: "o1-ga",
  1270. version: "o1-2024-12-17",
  1271. name: "o1 (Preview)",
  1272. supportsToolCalling: true,
  1273. maxInputTokens: 19827,
  1274. },
  1275. "o3-mini": {
  1276. contextWindow: 63827,
  1277. supportsImages: false,
  1278. supportsPromptCache: false,
  1279. inputPrice: 0,
  1280. outputPrice: 0,
  1281. family: "o3-mini",
  1282. version: "o3-mini-2025-01-31",
  1283. name: "o3-mini",
  1284. supportsToolCalling: true,
  1285. maxInputTokens: 63827,
  1286. },
  1287. "claude-3.5-sonnet": {
  1288. contextWindow: 81638,
  1289. supportsImages: true,
  1290. supportsPromptCache: false,
  1291. inputPrice: 0,
  1292. outputPrice: 0,
  1293. family: "claude-3.5-sonnet",
  1294. version: "claude-3.5-sonnet",
  1295. name: "Claude 3.5 Sonnet",
  1296. supportsToolCalling: true,
  1297. maxInputTokens: 81638,
  1298. },
  1299. "claude-3.7-sonnet": {
  1300. contextWindow: 89827,
  1301. supportsImages: true,
  1302. supportsPromptCache: false,
  1303. inputPrice: 0,
  1304. outputPrice: 0,
  1305. family: "claude-3.7-sonnet",
  1306. version: "claude-3.7-sonnet",
  1307. name: "Claude 3.7 Sonnet",
  1308. supportsToolCalling: true,
  1309. maxInputTokens: 89827,
  1310. },
  1311. "claude-3.7-sonnet-thought": {
  1312. contextWindow: 89827,
  1313. supportsImages: true,
  1314. supportsPromptCache: false,
  1315. inputPrice: 0,
  1316. outputPrice: 0,
  1317. family: "claude-3.7-sonnet-thought",
  1318. version: "claude-3.7-sonnet-thought",
  1319. name: "Claude 3.7 Sonnet Thinking",
  1320. supportsToolCalling: false,
  1321. maxInputTokens: 89827,
  1322. thinking: true,
  1323. },
  1324. "gemini-2.0-flash-001": {
  1325. contextWindow: 127827,
  1326. supportsImages: true,
  1327. supportsPromptCache: false,
  1328. inputPrice: 0,
  1329. outputPrice: 0,
  1330. family: "gemini-2.0-flash",
  1331. version: "gemini-2.0-flash-001",
  1332. name: "Gemini 2.0 Flash",
  1333. supportsToolCalling: false,
  1334. maxInputTokens: 127827,
  1335. },
  1336. "gemini-2.5-pro": {
  1337. contextWindow: 63830,
  1338. supportsImages: true,
  1339. supportsPromptCache: false,
  1340. inputPrice: 0,
  1341. outputPrice: 0,
  1342. family: "gemini-2.5-pro",
  1343. version: "gemini-2.5-pro-preview-03-25",
  1344. name: "Gemini 2.5 Pro (Preview)",
  1345. supportsToolCalling: true,
  1346. maxInputTokens: 63830,
  1347. },
  1348. "o4-mini": {
  1349. contextWindow: 111446,
  1350. supportsImages: false,
  1351. supportsPromptCache: false,
  1352. inputPrice: 0,
  1353. outputPrice: 0,
  1354. family: "o4-mini",
  1355. version: "o4-mini-2025-04-16",
  1356. name: "o4-mini (Preview)",
  1357. supportsToolCalling: true,
  1358. maxInputTokens: 111446,
  1359. },
  1360. "gpt-4.1": {
  1361. contextWindow: 111446,
  1362. supportsImages: true,
  1363. supportsPromptCache: false,
  1364. inputPrice: 0,
  1365. outputPrice: 0,
  1366. family: "gpt-4.1",
  1367. version: "gpt-4.1-2025-04-14",
  1368. name: "GPT-4.1 (Preview)",
  1369. supportsToolCalling: true,
  1370. maxInputTokens: 111446,
  1371. },
  1372. } as const satisfies Record<
  1373. string,
  1374. ModelInfo & {
  1375. family: string
  1376. version: string
  1377. name: string
  1378. supportsToolCalling: boolean
  1379. maxInputTokens: number
  1380. }
  1381. >
  1382. /**
  1383. * Constants
  1384. */
  1385. // These models support reasoning efforts.
  1386. export const REASONING_MODELS = new Set(["x-ai/grok-3-mini-beta", "grok-3-mini-beta", "grok-3-mini-fast-beta"])
  1387. // These models support prompt caching.
  1388. export const PROMPT_CACHING_MODELS = new Set([
  1389. "anthropic/claude-3-haiku",
  1390. "anthropic/claude-3-haiku:beta",
  1391. "anthropic/claude-3-opus",
  1392. "anthropic/claude-3-opus:beta",
  1393. "anthropic/claude-3-sonnet",
  1394. "anthropic/claude-3-sonnet:beta",
  1395. "anthropic/claude-3.5-haiku",
  1396. "anthropic/claude-3.5-haiku-20241022",
  1397. "anthropic/claude-3.5-haiku-20241022:beta",
  1398. "anthropic/claude-3.5-haiku:beta",
  1399. "anthropic/claude-3.5-sonnet",
  1400. "anthropic/claude-3.5-sonnet-20240620",
  1401. "anthropic/claude-3.5-sonnet-20240620:beta",
  1402. "anthropic/claude-3.5-sonnet:beta",
  1403. "anthropic/claude-3.7-sonnet",
  1404. "anthropic/claude-3.7-sonnet:beta",
  1405. "anthropic/claude-3.7-sonnet:thinking",
  1406. "google/gemini-2.5-pro-preview-03-25",
  1407. "google/gemini-2.0-flash-001",
  1408. "google/gemini-flash-1.5",
  1409. "google/gemini-flash-1.5-8b",
  1410. ])
  1411. // These models don't have prompt caching enabled by default (you can turn it on
  1412. // in settings).
  1413. export const OPTIONAL_PROMPT_CACHING_MODELS = new Set([
  1414. "google/gemini-2.5-pro-preview-03-25",
  1415. "google/gemini-2.0-flash-001",
  1416. "google/gemini-flash-1.5",
  1417. "google/gemini-flash-1.5-8b",
  1418. ])
  1419. // https://www.anthropic.com/news/3-5-models-and-computer-use
  1420. export const COMPUTER_USE_MODELS = new Set([
  1421. "anthropic/claude-3.5-sonnet",
  1422. "anthropic/claude-3.5-sonnet:beta",
  1423. "anthropic/claude-3.7-sonnet",
  1424. "anthropic/claude-3.7-sonnet:beta",
  1425. "anthropic/claude-3.7-sonnet:thinking",
  1426. ])
  1427. const routerNames = ["openrouter", "requesty", "glama", "unbound"] as const
  1428. export type RouterName = (typeof routerNames)[number]
  1429. export const isRouterName = (value: string): value is RouterName => routerNames.includes(value as RouterName)
  1430. export type ModelRecord = Record<string, ModelInfo>
  1431. export type RouterModels = Record<RouterName, ModelRecord>