api.ts 52 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966
  1. import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "../api/providers/constants"
  2. import { ModelInfo, ProviderName, ProviderSettings } from "../schemas"
  3. export type { ModelInfo, ProviderName, ProviderSettings }
  4. export type ApiHandlerOptions = Omit<ProviderSettings, "apiProvider">
  5. // Anthropic
  6. // https://docs.anthropic.com/en/docs/about-claude/models
  7. export type AnthropicModelId = keyof typeof anthropicModels
  8. export const anthropicDefaultModelId: AnthropicModelId = "claude-sonnet-4-20250514"
  9. export const anthropicModels = {
  10. "claude-sonnet-4-20250514": {
  11. maxTokens: 64_000, // Overridden to 8k if `enableReasoningEffort` is false.
  12. contextWindow: 200_000,
  13. supportsImages: true,
  14. supportsComputerUse: true,
  15. supportsPromptCache: true,
  16. inputPrice: 3.0, // $3 per million input tokens
  17. outputPrice: 15.0, // $15 per million output tokens
  18. cacheWritesPrice: 3.75, // $3.75 per million tokens
  19. cacheReadsPrice: 0.3, // $0.30 per million tokens
  20. supportsReasoningBudget: true,
  21. },
  22. "claude-opus-4-20250514": {
  23. maxTokens: 32_000, // Overridden to 8k if `enableReasoningEffort` is false.
  24. contextWindow: 200_000,
  25. supportsImages: true,
  26. supportsComputerUse: true,
  27. supportsPromptCache: true,
  28. inputPrice: 15.0, // $15 per million input tokens
  29. outputPrice: 75.0, // $75 per million output tokens
  30. cacheWritesPrice: 18.75, // $18.75 per million tokens
  31. cacheReadsPrice: 1.5, // $1.50 per million tokens
  32. supportsReasoningBudget: true,
  33. },
  34. "claude-3-7-sonnet-20250219:thinking": {
  35. maxTokens: 128_000, // Unlocked by passing `beta` flag to the model. Otherwise, it's 64k.
  36. contextWindow: 200_000,
  37. supportsImages: true,
  38. supportsComputerUse: true,
  39. supportsPromptCache: true,
  40. inputPrice: 3.0, // $3 per million input tokens
  41. outputPrice: 15.0, // $15 per million output tokens
  42. cacheWritesPrice: 3.75, // $3.75 per million tokens
  43. cacheReadsPrice: 0.3, // $0.30 per million tokens
  44. supportsReasoningBudget: true,
  45. requiredReasoningBudget: true,
  46. },
  47. "claude-3-7-sonnet-20250219": {
  48. maxTokens: 8192, // Since we already have a `:thinking` virtual model we aren't setting `supportsReasoningBudget: true` here.
  49. contextWindow: 200_000,
  50. supportsImages: true,
  51. supportsComputerUse: true,
  52. supportsPromptCache: true,
  53. inputPrice: 3.0, // $3 per million input tokens
  54. outputPrice: 15.0, // $15 per million output tokens
  55. cacheWritesPrice: 3.75, // $3.75 per million tokens
  56. cacheReadsPrice: 0.3, // $0.30 per million tokens
  57. },
  58. "claude-3-5-sonnet-20241022": {
  59. maxTokens: 8192,
  60. contextWindow: 200_000,
  61. supportsImages: true,
  62. supportsComputerUse: true,
  63. supportsPromptCache: true,
  64. inputPrice: 3.0, // $3 per million input tokens
  65. outputPrice: 15.0, // $15 per million output tokens
  66. cacheWritesPrice: 3.75, // $3.75 per million tokens
  67. cacheReadsPrice: 0.3, // $0.30 per million tokens
  68. },
  69. "claude-3-5-haiku-20241022": {
  70. maxTokens: 8192,
  71. contextWindow: 200_000,
  72. supportsImages: false,
  73. supportsPromptCache: true,
  74. inputPrice: 1.0,
  75. outputPrice: 5.0,
  76. cacheWritesPrice: 1.25,
  77. cacheReadsPrice: 0.1,
  78. },
  79. "claude-3-opus-20240229": {
  80. maxTokens: 4096,
  81. contextWindow: 200_000,
  82. supportsImages: true,
  83. supportsPromptCache: true,
  84. inputPrice: 15.0,
  85. outputPrice: 75.0,
  86. cacheWritesPrice: 18.75,
  87. cacheReadsPrice: 1.5,
  88. },
  89. "claude-3-haiku-20240307": {
  90. maxTokens: 4096,
  91. contextWindow: 200_000,
  92. supportsImages: true,
  93. supportsPromptCache: true,
  94. inputPrice: 0.25,
  95. outputPrice: 1.25,
  96. cacheWritesPrice: 0.3,
  97. cacheReadsPrice: 0.03,
  98. },
  99. } as const satisfies Record<string, ModelInfo> // as const assertion makes the object deeply readonly
  100. // Amazon Bedrock
  101. // https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html
  102. export interface MessageContent {
  103. type: "text" | "image" | "video" | "tool_use" | "tool_result"
  104. text?: string
  105. source?: {
  106. type: "base64"
  107. data: string | Uint8Array // string for Anthropic, Uint8Array for Bedrock
  108. media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp"
  109. }
  110. // Video specific fields
  111. format?: string
  112. s3Location?: {
  113. uri: string
  114. bucketOwner?: string
  115. }
  116. // Tool use and result fields
  117. toolUseId?: string
  118. name?: string
  119. input?: any
  120. output?: any // Used for tool_result type
  121. }
  122. export type BedrockModelId = keyof typeof bedrockModels
  123. export const bedrockDefaultModelId: BedrockModelId = "anthropic.claude-sonnet-4-20250514-v1:0"
  124. export const bedrockDefaultPromptRouterModelId: BedrockModelId = "anthropic.claude-3-sonnet-20240229-v1:0"
  125. // March, 12 2025 - updated prices to match US-West-2 list price shown at https://aws.amazon.com/bedrock/pricing/
  126. // including older models that are part of the default prompt routers AWS enabled for GA of the promot router feature
  127. export const bedrockModels = {
  128. "amazon.nova-pro-v1:0": {
  129. maxTokens: 5000,
  130. contextWindow: 300_000,
  131. supportsImages: true,
  132. supportsComputerUse: false,
  133. supportsPromptCache: true,
  134. inputPrice: 0.8,
  135. outputPrice: 3.2,
  136. cacheWritesPrice: 0.8, // per million tokens
  137. cacheReadsPrice: 0.2, // per million tokens
  138. minTokensPerCachePoint: 1,
  139. maxCachePoints: 1,
  140. cachableFields: ["system"],
  141. },
  142. "amazon.nova-pro-latency-optimized-v1:0": {
  143. maxTokens: 5000,
  144. contextWindow: 300_000,
  145. supportsImages: true,
  146. supportsComputerUse: false,
  147. supportsPromptCache: false,
  148. inputPrice: 1.0,
  149. outputPrice: 4.0,
  150. cacheWritesPrice: 1.0, // per million tokens
  151. cacheReadsPrice: 0.25, // per million tokens
  152. description: "Amazon Nova Pro with latency optimized inference",
  153. },
  154. "amazon.nova-lite-v1:0": {
  155. maxTokens: 5000,
  156. contextWindow: 300_000,
  157. supportsImages: true,
  158. supportsComputerUse: false,
  159. supportsPromptCache: true,
  160. inputPrice: 0.06,
  161. outputPrice: 0.24,
  162. cacheWritesPrice: 0.06, // per million tokens
  163. cacheReadsPrice: 0.015, // per million tokens
  164. minTokensPerCachePoint: 1,
  165. maxCachePoints: 1,
  166. cachableFields: ["system"],
  167. },
  168. "amazon.nova-micro-v1:0": {
  169. maxTokens: 5000,
  170. contextWindow: 128_000,
  171. supportsImages: false,
  172. supportsComputerUse: false,
  173. supportsPromptCache: true,
  174. inputPrice: 0.035,
  175. outputPrice: 0.14,
  176. cacheWritesPrice: 0.035, // per million tokens
  177. cacheReadsPrice: 0.00875, // per million tokens
  178. minTokensPerCachePoint: 1,
  179. maxCachePoints: 1,
  180. cachableFields: ["system"],
  181. },
  182. "anthropic.claude-sonnet-4-20250514-v1:0": {
  183. maxTokens: 8192,
  184. contextWindow: 200_000,
  185. supportsImages: true,
  186. supportsComputerUse: true,
  187. supportsPromptCache: true,
  188. inputPrice: 3.0,
  189. outputPrice: 15.0,
  190. cacheWritesPrice: 3.75,
  191. cacheReadsPrice: 0.3,
  192. minTokensPerCachePoint: 1024,
  193. maxCachePoints: 4,
  194. cachableFields: ["system", "messages", "tools"],
  195. },
  196. "anthropic.claude-opus-4-20250514-v1:0": {
  197. maxTokens: 8192,
  198. contextWindow: 200_000,
  199. supportsImages: true,
  200. supportsComputerUse: true,
  201. supportsPromptCache: true,
  202. inputPrice: 15.0,
  203. outputPrice: 75.0,
  204. cacheWritesPrice: 18.75,
  205. cacheReadsPrice: 1.5,
  206. minTokensPerCachePoint: 1024,
  207. maxCachePoints: 4,
  208. cachableFields: ["system", "messages", "tools"],
  209. },
  210. "anthropic.claude-3-7-sonnet-20250219-v1:0": {
  211. maxTokens: 8192,
  212. contextWindow: 200_000,
  213. supportsImages: true,
  214. supportsComputerUse: true,
  215. supportsPromptCache: true,
  216. inputPrice: 3.0,
  217. outputPrice: 15.0,
  218. cacheWritesPrice: 3.75,
  219. cacheReadsPrice: 0.3,
  220. minTokensPerCachePoint: 1024,
  221. maxCachePoints: 4,
  222. cachableFields: ["system", "messages", "tools"],
  223. },
  224. "anthropic.claude-3-5-sonnet-20241022-v2:0": {
  225. maxTokens: 8192,
  226. contextWindow: 200_000,
  227. supportsImages: true,
  228. supportsComputerUse: true,
  229. supportsPromptCache: true,
  230. inputPrice: 3.0,
  231. outputPrice: 15.0,
  232. cacheWritesPrice: 3.75,
  233. cacheReadsPrice: 0.3,
  234. minTokensPerCachePoint: 1024,
  235. maxCachePoints: 4,
  236. cachableFields: ["system", "messages", "tools"],
  237. },
  238. "anthropic.claude-3-5-haiku-20241022-v1:0": {
  239. maxTokens: 8192,
  240. contextWindow: 200_000,
  241. supportsImages: false,
  242. supportsPromptCache: true,
  243. inputPrice: 0.8,
  244. outputPrice: 4.0,
  245. cacheWritesPrice: 1.0,
  246. cacheReadsPrice: 0.08,
  247. minTokensPerCachePoint: 2048,
  248. maxCachePoints: 4,
  249. cachableFields: ["system", "messages", "tools"],
  250. },
  251. "anthropic.claude-3-5-sonnet-20240620-v1:0": {
  252. maxTokens: 8192,
  253. contextWindow: 200_000,
  254. supportsImages: true,
  255. supportsPromptCache: false,
  256. inputPrice: 3.0,
  257. outputPrice: 15.0,
  258. },
  259. "anthropic.claude-3-opus-20240229-v1:0": {
  260. maxTokens: 4096,
  261. contextWindow: 200_000,
  262. supportsImages: true,
  263. supportsPromptCache: false,
  264. inputPrice: 15.0,
  265. outputPrice: 75.0,
  266. },
  267. "anthropic.claude-3-sonnet-20240229-v1:0": {
  268. maxTokens: 4096,
  269. contextWindow: 200_000,
  270. supportsImages: true,
  271. supportsPromptCache: false,
  272. inputPrice: 3.0,
  273. outputPrice: 15.0,
  274. },
  275. "anthropic.claude-3-haiku-20240307-v1:0": {
  276. maxTokens: 4096,
  277. contextWindow: 200_000,
  278. supportsImages: true,
  279. supportsPromptCache: false,
  280. inputPrice: 0.25,
  281. outputPrice: 1.25,
  282. },
  283. "anthropic.claude-2-1-v1:0": {
  284. maxTokens: 4096,
  285. contextWindow: 100_000,
  286. supportsImages: false,
  287. supportsPromptCache: false,
  288. inputPrice: 8.0,
  289. outputPrice: 24.0,
  290. description: "Claude 2.1",
  291. },
  292. "anthropic.claude-2-0-v1:0": {
  293. maxTokens: 4096,
  294. contextWindow: 100_000,
  295. supportsImages: false,
  296. supportsPromptCache: false,
  297. inputPrice: 8.0,
  298. outputPrice: 24.0,
  299. description: "Claude 2.0",
  300. },
  301. "anthropic.claude-instant-v1:0": {
  302. maxTokens: 4096,
  303. contextWindow: 100_000,
  304. supportsImages: false,
  305. supportsPromptCache: false,
  306. inputPrice: 0.8,
  307. outputPrice: 2.4,
  308. description: "Claude Instant",
  309. },
  310. "deepseek.r1-v1:0": {
  311. maxTokens: 32_768,
  312. contextWindow: 128_000,
  313. supportsImages: false,
  314. supportsPromptCache: false,
  315. inputPrice: 1.35,
  316. outputPrice: 5.4,
  317. },
  318. "meta.llama3-3-70b-instruct-v1:0": {
  319. maxTokens: 8192,
  320. contextWindow: 128_000,
  321. supportsImages: false,
  322. supportsComputerUse: false,
  323. supportsPromptCache: false,
  324. inputPrice: 0.72,
  325. outputPrice: 0.72,
  326. description: "Llama 3.3 Instruct (70B)",
  327. },
  328. "meta.llama3-2-90b-instruct-v1:0": {
  329. maxTokens: 8192,
  330. contextWindow: 128_000,
  331. supportsImages: true,
  332. supportsComputerUse: false,
  333. supportsPromptCache: false,
  334. inputPrice: 0.72,
  335. outputPrice: 0.72,
  336. description: "Llama 3.2 Instruct (90B)",
  337. },
  338. "meta.llama3-2-11b-instruct-v1:0": {
  339. maxTokens: 8192,
  340. contextWindow: 128_000,
  341. supportsImages: true,
  342. supportsComputerUse: false,
  343. supportsPromptCache: false,
  344. inputPrice: 0.16,
  345. outputPrice: 0.16,
  346. description: "Llama 3.2 Instruct (11B)",
  347. },
  348. "meta.llama3-2-3b-instruct-v1:0": {
  349. maxTokens: 8192,
  350. contextWindow: 128_000,
  351. supportsImages: false,
  352. supportsComputerUse: false,
  353. supportsPromptCache: false,
  354. inputPrice: 0.15,
  355. outputPrice: 0.15,
  356. description: "Llama 3.2 Instruct (3B)",
  357. },
  358. "meta.llama3-2-1b-instruct-v1:0": {
  359. maxTokens: 8192,
  360. contextWindow: 128_000,
  361. supportsImages: false,
  362. supportsComputerUse: false,
  363. supportsPromptCache: false,
  364. inputPrice: 0.1,
  365. outputPrice: 0.1,
  366. description: "Llama 3.2 Instruct (1B)",
  367. },
  368. "meta.llama3-1-405b-instruct-v1:0": {
  369. maxTokens: 8192,
  370. contextWindow: 128_000,
  371. supportsImages: false,
  372. supportsComputerUse: false,
  373. supportsPromptCache: false,
  374. inputPrice: 2.4,
  375. outputPrice: 2.4,
  376. description: "Llama 3.1 Instruct (405B)",
  377. },
  378. "meta.llama3-1-70b-instruct-v1:0": {
  379. maxTokens: 8192,
  380. contextWindow: 128_000,
  381. supportsImages: false,
  382. supportsComputerUse: false,
  383. supportsPromptCache: false,
  384. inputPrice: 0.72,
  385. outputPrice: 0.72,
  386. description: "Llama 3.1 Instruct (70B)",
  387. },
  388. "meta.llama3-1-70b-instruct-latency-optimized-v1:0": {
  389. maxTokens: 8192,
  390. contextWindow: 128_000,
  391. supportsImages: false,
  392. supportsComputerUse: false,
  393. supportsPromptCache: false,
  394. inputPrice: 0.9,
  395. outputPrice: 0.9,
  396. description: "Llama 3.1 Instruct (70B) (w/ latency optimized inference)",
  397. },
  398. "meta.llama3-1-8b-instruct-v1:0": {
  399. maxTokens: 8192,
  400. contextWindow: 8_000,
  401. supportsImages: false,
  402. supportsComputerUse: false,
  403. supportsPromptCache: false,
  404. inputPrice: 0.22,
  405. outputPrice: 0.22,
  406. description: "Llama 3.1 Instruct (8B)",
  407. },
  408. "meta.llama3-70b-instruct-v1:0": {
  409. maxTokens: 2048,
  410. contextWindow: 8_000,
  411. supportsImages: false,
  412. supportsComputerUse: false,
  413. supportsPromptCache: false,
  414. inputPrice: 2.65,
  415. outputPrice: 3.5,
  416. },
  417. "meta.llama3-8b-instruct-v1:0": {
  418. maxTokens: 2048,
  419. contextWindow: 4_000,
  420. supportsImages: false,
  421. supportsComputerUse: false,
  422. supportsPromptCache: false,
  423. inputPrice: 0.3,
  424. outputPrice: 0.6,
  425. },
  426. "amazon.titan-text-lite-v1:0": {
  427. maxTokens: 4096,
  428. contextWindow: 8_000,
  429. supportsImages: false,
  430. supportsComputerUse: false,
  431. supportsPromptCache: false,
  432. inputPrice: 0.15,
  433. outputPrice: 0.2,
  434. description: "Amazon Titan Text Lite",
  435. },
  436. "amazon.titan-text-express-v1:0": {
  437. maxTokens: 4096,
  438. contextWindow: 8_000,
  439. supportsImages: false,
  440. supportsComputerUse: false,
  441. supportsPromptCache: false,
  442. inputPrice: 0.2,
  443. outputPrice: 0.6,
  444. description: "Amazon Titan Text Express",
  445. },
  446. "amazon.titan-text-embeddings-v1:0": {
  447. maxTokens: 8192,
  448. contextWindow: 8_000,
  449. supportsImages: false,
  450. supportsComputerUse: false,
  451. supportsPromptCache: false,
  452. inputPrice: 0.1,
  453. description: "Amazon Titan Text Embeddings",
  454. },
  455. "amazon.titan-text-embeddings-v2:0": {
  456. maxTokens: 8192,
  457. contextWindow: 8_000,
  458. supportsImages: false,
  459. supportsComputerUse: false,
  460. supportsPromptCache: false,
  461. inputPrice: 0.02,
  462. description: "Amazon Titan Text Embeddings V2",
  463. },
  464. } as const satisfies Record<string, ModelInfo>
  465. // Glama
  466. // https://glama.ai/models
  467. export const glamaDefaultModelId = "anthropic/claude-3-7-sonnet"
  468. export const glamaDefaultModelInfo: ModelInfo = {
  469. maxTokens: 8192,
  470. contextWindow: 200_000,
  471. supportsImages: true,
  472. supportsComputerUse: true,
  473. supportsPromptCache: true,
  474. inputPrice: 3.0,
  475. outputPrice: 15.0,
  476. cacheWritesPrice: 3.75,
  477. cacheReadsPrice: 0.3,
  478. description:
  479. "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
  480. }
  481. // Requesty
  482. // https://requesty.ai/router-2
  483. export const requestyDefaultModelId = "coding/claude-4-sonnet"
  484. export const requestyDefaultModelInfo: ModelInfo = {
  485. maxTokens: 8192,
  486. contextWindow: 200_000,
  487. supportsImages: true,
  488. supportsComputerUse: true,
  489. supportsPromptCache: true,
  490. inputPrice: 3.0,
  491. outputPrice: 15.0,
  492. cacheWritesPrice: 3.75,
  493. cacheReadsPrice: 0.3,
  494. description:
  495. "The best coding model, optimized by Requesty, and automatically routed to the fastest provider. Claude 4 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities.",
  496. }
  497. // OpenRouter
  498. // https://openrouter.ai/models?order=newest&supported_parameters=tools
  499. export const openRouterDefaultModelId = "anthropic/claude-sonnet-4"
  500. export const openRouterDefaultModelInfo: ModelInfo = {
  501. maxTokens: 8192,
  502. contextWindow: 200_000,
  503. supportsImages: true,
  504. supportsComputerUse: true,
  505. supportsPromptCache: true,
  506. inputPrice: 3.0,
  507. outputPrice: 15.0,
  508. cacheWritesPrice: 3.75,
  509. cacheReadsPrice: 0.3,
  510. description:
  511. "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
  512. }
  513. // Vertex AI
  514. // https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude
  515. export type VertexModelId = keyof typeof vertexModels
  516. export const vertexDefaultModelId: VertexModelId = "claude-sonnet-4@20250514"
  517. export const vertexModels = {
  518. "gemini-2.5-flash-preview-05-20:thinking": {
  519. maxTokens: 65_535,
  520. contextWindow: 1_048_576,
  521. supportsImages: true,
  522. supportsPromptCache: true,
  523. inputPrice: 0.15,
  524. outputPrice: 3.5,
  525. maxThinkingTokens: 24_576,
  526. supportsReasoningBudget: true,
  527. requiredReasoningBudget: true,
  528. },
  529. "gemini-2.5-flash-preview-05-20": {
  530. maxTokens: 65_535,
  531. contextWindow: 1_048_576,
  532. supportsImages: true,
  533. supportsPromptCache: true,
  534. inputPrice: 0.15,
  535. outputPrice: 0.6,
  536. },
  537. "gemini-2.5-flash-preview-04-17:thinking": {
  538. maxTokens: 65_535,
  539. contextWindow: 1_048_576,
  540. supportsImages: true,
  541. supportsPromptCache: false,
  542. inputPrice: 0.15,
  543. outputPrice: 3.5,
  544. maxThinkingTokens: 24_576,
  545. supportsReasoningBudget: true,
  546. requiredReasoningBudget: true,
  547. },
  548. "gemini-2.5-flash-preview-04-17": {
  549. maxTokens: 65_535,
  550. contextWindow: 1_048_576,
  551. supportsImages: true,
  552. supportsPromptCache: false,
  553. inputPrice: 0.15,
  554. outputPrice: 0.6,
  555. },
  556. "gemini-2.5-pro-preview-03-25": {
  557. maxTokens: 65_535,
  558. contextWindow: 1_048_576,
  559. supportsImages: true,
  560. supportsPromptCache: true,
  561. inputPrice: 2.5,
  562. outputPrice: 15,
  563. },
  564. "gemini-2.5-pro-preview-05-06": {
  565. maxTokens: 65_535,
  566. contextWindow: 1_048_576,
  567. supportsImages: true,
  568. supportsPromptCache: true,
  569. inputPrice: 2.5,
  570. outputPrice: 15,
  571. },
  572. "gemini-2.5-pro-exp-03-25": {
  573. maxTokens: 65_535,
  574. contextWindow: 1_048_576,
  575. supportsImages: true,
  576. supportsPromptCache: false,
  577. inputPrice: 0,
  578. outputPrice: 0,
  579. },
  580. "gemini-2.0-pro-exp-02-05": {
  581. maxTokens: 8192,
  582. contextWindow: 2_097_152,
  583. supportsImages: true,
  584. supportsPromptCache: false,
  585. inputPrice: 0,
  586. outputPrice: 0,
  587. },
  588. "gemini-2.0-flash-001": {
  589. maxTokens: 8192,
  590. contextWindow: 1_048_576,
  591. supportsImages: true,
  592. supportsPromptCache: true,
  593. inputPrice: 0.15,
  594. outputPrice: 0.6,
  595. },
  596. "gemini-2.0-flash-lite-001": {
  597. maxTokens: 8192,
  598. contextWindow: 1_048_576,
  599. supportsImages: true,
  600. supportsPromptCache: false,
  601. inputPrice: 0.075,
  602. outputPrice: 0.3,
  603. },
  604. "gemini-2.0-flash-thinking-exp-01-21": {
  605. maxTokens: 8192,
  606. contextWindow: 32_768,
  607. supportsImages: true,
  608. supportsPromptCache: false,
  609. inputPrice: 0,
  610. outputPrice: 0,
  611. },
  612. "gemini-1.5-flash-002": {
  613. maxTokens: 8192,
  614. contextWindow: 1_048_576,
  615. supportsImages: true,
  616. supportsPromptCache: true,
  617. inputPrice: 0.075,
  618. outputPrice: 0.3,
  619. },
  620. "gemini-1.5-pro-002": {
  621. maxTokens: 8192,
  622. contextWindow: 2_097_152,
  623. supportsImages: true,
  624. supportsPromptCache: false,
  625. inputPrice: 1.25,
  626. outputPrice: 5,
  627. },
  628. "claude-sonnet-4@20250514": {
  629. maxTokens: 8192,
  630. contextWindow: 200_000,
  631. supportsImages: true,
  632. supportsComputerUse: true,
  633. supportsPromptCache: true,
  634. inputPrice: 3.0,
  635. outputPrice: 15.0,
  636. cacheWritesPrice: 3.75,
  637. cacheReadsPrice: 0.3,
  638. supportsReasoningBudget: true,
  639. },
  640. "claude-opus-4@20250514": {
  641. maxTokens: 8192,
  642. contextWindow: 200_000,
  643. supportsImages: true,
  644. supportsComputerUse: true,
  645. supportsPromptCache: true,
  646. inputPrice: 15.0,
  647. outputPrice: 75.0,
  648. cacheWritesPrice: 18.75,
  649. cacheReadsPrice: 1.5,
  650. },
  651. "claude-3-7-sonnet@20250219:thinking": {
  652. maxTokens: 64_000,
  653. contextWindow: 200_000,
  654. supportsImages: true,
  655. supportsComputerUse: true,
  656. supportsPromptCache: true,
  657. inputPrice: 3.0,
  658. outputPrice: 15.0,
  659. cacheWritesPrice: 3.75,
  660. cacheReadsPrice: 0.3,
  661. supportsReasoningBudget: true,
  662. requiredReasoningBudget: true,
  663. },
  664. "claude-3-7-sonnet@20250219": {
  665. maxTokens: 8192,
  666. contextWindow: 200_000,
  667. supportsImages: true,
  668. supportsComputerUse: true,
  669. supportsPromptCache: true,
  670. inputPrice: 3.0,
  671. outputPrice: 15.0,
  672. cacheWritesPrice: 3.75,
  673. cacheReadsPrice: 0.3,
  674. },
  675. "claude-3-5-sonnet-v2@20241022": {
  676. maxTokens: 8192,
  677. contextWindow: 200_000,
  678. supportsImages: true,
  679. supportsComputerUse: true,
  680. supportsPromptCache: true,
  681. inputPrice: 3.0,
  682. outputPrice: 15.0,
  683. cacheWritesPrice: 3.75,
  684. cacheReadsPrice: 0.3,
  685. },
  686. "claude-3-5-sonnet@20240620": {
  687. maxTokens: 8192,
  688. contextWindow: 200_000,
  689. supportsImages: true,
  690. supportsPromptCache: true,
  691. inputPrice: 3.0,
  692. outputPrice: 15.0,
  693. cacheWritesPrice: 3.75,
  694. cacheReadsPrice: 0.3,
  695. },
  696. "claude-3-5-haiku@20241022": {
  697. maxTokens: 8192,
  698. contextWindow: 200_000,
  699. supportsImages: false,
  700. supportsPromptCache: true,
  701. inputPrice: 1.0,
  702. outputPrice: 5.0,
  703. cacheWritesPrice: 1.25,
  704. cacheReadsPrice: 0.1,
  705. },
  706. "claude-3-opus@20240229": {
  707. maxTokens: 4096,
  708. contextWindow: 200_000,
  709. supportsImages: true,
  710. supportsPromptCache: true,
  711. inputPrice: 15.0,
  712. outputPrice: 75.0,
  713. cacheWritesPrice: 18.75,
  714. cacheReadsPrice: 1.5,
  715. },
  716. "claude-3-haiku@20240307": {
  717. maxTokens: 4096,
  718. contextWindow: 200_000,
  719. supportsImages: true,
  720. supportsPromptCache: true,
  721. inputPrice: 0.25,
  722. outputPrice: 1.25,
  723. cacheWritesPrice: 0.3,
  724. cacheReadsPrice: 0.03,
  725. },
  726. } as const satisfies Record<string, ModelInfo>
  727. export const openAiModelInfoSaneDefaults: ModelInfo = {
  728. maxTokens: -1,
  729. contextWindow: 128_000,
  730. supportsImages: true,
  731. supportsPromptCache: false,
  732. inputPrice: 0,
  733. outputPrice: 0,
  734. }
  735. // Gemini
  736. // https://ai.google.dev/gemini-api/docs/models/gemini
  737. export type GeminiModelId = keyof typeof geminiModels
  738. export const geminiDefaultModelId: GeminiModelId = "gemini-2.0-flash-001"
  739. export const geminiModels = {
  740. "gemini-2.5-flash-preview-04-17:thinking": {
  741. maxTokens: 65_535,
  742. contextWindow: 1_048_576,
  743. supportsImages: true,
  744. supportsPromptCache: false,
  745. inputPrice: 0.15,
  746. outputPrice: 3.5,
  747. maxThinkingTokens: 24_576,
  748. supportsReasoningBudget: true,
  749. requiredReasoningBudget: true,
  750. },
  751. "gemini-2.5-flash-preview-04-17": {
  752. maxTokens: 65_535,
  753. contextWindow: 1_048_576,
  754. supportsImages: true,
  755. supportsPromptCache: false,
  756. inputPrice: 0.15,
  757. outputPrice: 0.6,
  758. },
  759. "gemini-2.5-flash-preview-05-20:thinking": {
  760. maxTokens: 65_535,
  761. contextWindow: 1_048_576,
  762. supportsImages: true,
  763. supportsPromptCache: true,
  764. inputPrice: 0.15,
  765. outputPrice: 3.5,
  766. cacheReadsPrice: 0.0375,
  767. cacheWritesPrice: 1.0,
  768. maxThinkingTokens: 24_576,
  769. supportsReasoningBudget: true,
  770. requiredReasoningBudget: true,
  771. },
  772. "gemini-2.5-flash-preview-05-20": {
  773. maxTokens: 65_535,
  774. contextWindow: 1_048_576,
  775. supportsImages: true,
  776. supportsPromptCache: true,
  777. inputPrice: 0.15,
  778. outputPrice: 0.6,
  779. cacheReadsPrice: 0.0375,
  780. cacheWritesPrice: 1.0,
  781. },
  782. "gemini-2.5-pro-exp-03-25": {
  783. maxTokens: 65_535,
  784. contextWindow: 1_048_576,
  785. supportsImages: true,
  786. supportsPromptCache: false,
  787. inputPrice: 0,
  788. outputPrice: 0,
  789. },
  790. "gemini-2.5-pro-preview-03-25": {
  791. maxTokens: 65_535,
  792. contextWindow: 1_048_576,
  793. supportsImages: true,
  794. supportsPromptCache: true,
  795. inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
  796. outputPrice: 15,
  797. cacheReadsPrice: 0.625,
  798. cacheWritesPrice: 4.5,
  799. tiers: [
  800. {
  801. contextWindow: 200_000,
  802. inputPrice: 1.25,
  803. outputPrice: 10,
  804. cacheReadsPrice: 0.31,
  805. },
  806. {
  807. contextWindow: Infinity,
  808. inputPrice: 2.5,
  809. outputPrice: 15,
  810. cacheReadsPrice: 0.625,
  811. },
  812. ],
  813. },
  814. "gemini-2.5-pro-preview-05-06": {
  815. maxTokens: 65_535,
  816. contextWindow: 1_048_576,
  817. supportsImages: true,
  818. supportsPromptCache: true,
  819. inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
  820. outputPrice: 15,
  821. cacheReadsPrice: 0.625,
  822. cacheWritesPrice: 4.5,
  823. tiers: [
  824. {
  825. contextWindow: 200_000,
  826. inputPrice: 1.25,
  827. outputPrice: 10,
  828. cacheReadsPrice: 0.31,
  829. },
  830. {
  831. contextWindow: Infinity,
  832. inputPrice: 2.5,
  833. outputPrice: 15,
  834. cacheReadsPrice: 0.625,
  835. },
  836. ],
  837. },
  838. "gemini-2.0-flash-001": {
  839. maxTokens: 8192,
  840. contextWindow: 1_048_576,
  841. supportsImages: true,
  842. supportsPromptCache: true,
  843. inputPrice: 0.1,
  844. outputPrice: 0.4,
  845. cacheReadsPrice: 0.025,
  846. cacheWritesPrice: 1.0,
  847. },
  848. "gemini-2.0-flash-lite-preview-02-05": {
  849. maxTokens: 8192,
  850. contextWindow: 1_048_576,
  851. supportsImages: true,
  852. supportsPromptCache: false,
  853. inputPrice: 0,
  854. outputPrice: 0,
  855. },
  856. "gemini-2.0-pro-exp-02-05": {
  857. maxTokens: 8192,
  858. contextWindow: 2_097_152,
  859. supportsImages: true,
  860. supportsPromptCache: false,
  861. inputPrice: 0,
  862. outputPrice: 0,
  863. },
  864. "gemini-2.0-flash-thinking-exp-01-21": {
  865. maxTokens: 65_536,
  866. contextWindow: 1_048_576,
  867. supportsImages: true,
  868. supportsPromptCache: false,
  869. inputPrice: 0,
  870. outputPrice: 0,
  871. },
  872. "gemini-2.0-flash-thinking-exp-1219": {
  873. maxTokens: 8192,
  874. contextWindow: 32_767,
  875. supportsImages: true,
  876. supportsPromptCache: false,
  877. inputPrice: 0,
  878. outputPrice: 0,
  879. },
  880. "gemini-2.0-flash-exp": {
  881. maxTokens: 8192,
  882. contextWindow: 1_048_576,
  883. supportsImages: true,
  884. supportsPromptCache: false,
  885. inputPrice: 0,
  886. outputPrice: 0,
  887. },
  888. "gemini-1.5-flash-002": {
  889. maxTokens: 8192,
  890. contextWindow: 1_048_576,
  891. supportsImages: true,
  892. supportsPromptCache: true,
  893. inputPrice: 0.15, // This is the pricing for prompts above 128k tokens.
  894. outputPrice: 0.6,
  895. cacheReadsPrice: 0.0375,
  896. cacheWritesPrice: 1.0,
  897. tiers: [
  898. {
  899. contextWindow: 128_000,
  900. inputPrice: 0.075,
  901. outputPrice: 0.3,
  902. cacheReadsPrice: 0.01875,
  903. },
  904. {
  905. contextWindow: Infinity,
  906. inputPrice: 0.15,
  907. outputPrice: 0.6,
  908. cacheReadsPrice: 0.0375,
  909. },
  910. ],
  911. },
  912. "gemini-1.5-flash-exp-0827": {
  913. maxTokens: 8192,
  914. contextWindow: 1_048_576,
  915. supportsImages: true,
  916. supportsPromptCache: false,
  917. inputPrice: 0,
  918. outputPrice: 0,
  919. },
  920. "gemini-1.5-flash-8b-exp-0827": {
  921. maxTokens: 8192,
  922. contextWindow: 1_048_576,
  923. supportsImages: true,
  924. supportsPromptCache: false,
  925. inputPrice: 0,
  926. outputPrice: 0,
  927. },
  928. "gemini-1.5-pro-002": {
  929. maxTokens: 8192,
  930. contextWindow: 2_097_152,
  931. supportsImages: true,
  932. supportsPromptCache: false,
  933. inputPrice: 0,
  934. outputPrice: 0,
  935. },
  936. "gemini-1.5-pro-exp-0827": {
  937. maxTokens: 8192,
  938. contextWindow: 2_097_152,
  939. supportsImages: true,
  940. supportsPromptCache: false,
  941. inputPrice: 0,
  942. outputPrice: 0,
  943. },
  944. "gemini-exp-1206": {
  945. maxTokens: 8192,
  946. contextWindow: 2_097_152,
  947. supportsImages: true,
  948. supportsPromptCache: false,
  949. inputPrice: 0,
  950. outputPrice: 0,
  951. },
  952. } as const satisfies Record<string, ModelInfo>
  953. // OpenAI Native
  954. // https://openai.com/api/pricing/
  955. export type OpenAiNativeModelId = keyof typeof openAiNativeModels
  956. export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-4.1"
  957. export const openAiNativeModels = {
  958. "gpt-4.1": {
  959. maxTokens: 32_768,
  960. contextWindow: 1_047_576,
  961. supportsImages: true,
  962. supportsPromptCache: true,
  963. inputPrice: 2,
  964. outputPrice: 8,
  965. cacheReadsPrice: 0.5,
  966. },
  967. "gpt-4.1-mini": {
  968. maxTokens: 32_768,
  969. contextWindow: 1_047_576,
  970. supportsImages: true,
  971. supportsPromptCache: true,
  972. inputPrice: 0.4,
  973. outputPrice: 1.6,
  974. cacheReadsPrice: 0.1,
  975. },
  976. "gpt-4.1-nano": {
  977. maxTokens: 32_768,
  978. contextWindow: 1_047_576,
  979. supportsImages: true,
  980. supportsPromptCache: true,
  981. inputPrice: 0.1,
  982. outputPrice: 0.4,
  983. cacheReadsPrice: 0.025,
  984. },
  985. o3: {
  986. maxTokens: 100_000,
  987. contextWindow: 200_000,
  988. supportsImages: true,
  989. supportsPromptCache: true,
  990. inputPrice: 10.0,
  991. outputPrice: 40.0,
  992. cacheReadsPrice: 2.5,
  993. supportsReasoningEffort: true,
  994. reasoningEffort: "medium",
  995. },
  996. "o3-high": {
  997. maxTokens: 100_000,
  998. contextWindow: 200_000,
  999. supportsImages: true,
  1000. supportsPromptCache: true,
  1001. inputPrice: 10.0,
  1002. outputPrice: 40.0,
  1003. cacheReadsPrice: 2.5,
  1004. reasoningEffort: "high",
  1005. },
  1006. "o3-low": {
  1007. maxTokens: 100_000,
  1008. contextWindow: 200_000,
  1009. supportsImages: true,
  1010. supportsPromptCache: true,
  1011. inputPrice: 10.0,
  1012. outputPrice: 40.0,
  1013. cacheReadsPrice: 2.5,
  1014. reasoningEffort: "low",
  1015. },
  1016. "o4-mini": {
  1017. maxTokens: 100_000,
  1018. contextWindow: 200_000,
  1019. supportsImages: true,
  1020. supportsPromptCache: true,
  1021. inputPrice: 1.1,
  1022. outputPrice: 4.4,
  1023. cacheReadsPrice: 0.275,
  1024. supportsReasoningEffort: true,
  1025. reasoningEffort: "medium",
  1026. },
  1027. "o4-mini-high": {
  1028. maxTokens: 100_000,
  1029. contextWindow: 200_000,
  1030. supportsImages: true,
  1031. supportsPromptCache: true,
  1032. inputPrice: 1.1,
  1033. outputPrice: 4.4,
  1034. cacheReadsPrice: 0.275,
  1035. reasoningEffort: "high",
  1036. },
  1037. "o4-mini-low": {
  1038. maxTokens: 100_000,
  1039. contextWindow: 200_000,
  1040. supportsImages: true,
  1041. supportsPromptCache: true,
  1042. inputPrice: 1.1,
  1043. outputPrice: 4.4,
  1044. cacheReadsPrice: 0.275,
  1045. reasoningEffort: "low",
  1046. },
  1047. "o3-mini": {
  1048. maxTokens: 100_000,
  1049. contextWindow: 200_000,
  1050. supportsImages: false,
  1051. supportsPromptCache: true,
  1052. inputPrice: 1.1,
  1053. outputPrice: 4.4,
  1054. cacheReadsPrice: 0.55,
  1055. supportsReasoningEffort: true,
  1056. reasoningEffort: "medium",
  1057. },
  1058. "o3-mini-high": {
  1059. maxTokens: 100_000,
  1060. contextWindow: 200_000,
  1061. supportsImages: false,
  1062. supportsPromptCache: true,
  1063. inputPrice: 1.1,
  1064. outputPrice: 4.4,
  1065. cacheReadsPrice: 0.55,
  1066. reasoningEffort: "high",
  1067. },
  1068. "o3-mini-low": {
  1069. maxTokens: 100_000,
  1070. contextWindow: 200_000,
  1071. supportsImages: false,
  1072. supportsPromptCache: true,
  1073. inputPrice: 1.1,
  1074. outputPrice: 4.4,
  1075. cacheReadsPrice: 0.55,
  1076. reasoningEffort: "low",
  1077. },
  1078. o1: {
  1079. maxTokens: 100_000,
  1080. contextWindow: 200_000,
  1081. supportsImages: true,
  1082. supportsPromptCache: true,
  1083. inputPrice: 15,
  1084. outputPrice: 60,
  1085. cacheReadsPrice: 7.5,
  1086. },
  1087. "o1-preview": {
  1088. maxTokens: 32_768,
  1089. contextWindow: 128_000,
  1090. supportsImages: true,
  1091. supportsPromptCache: true,
  1092. inputPrice: 15,
  1093. outputPrice: 60,
  1094. cacheReadsPrice: 7.5,
  1095. },
  1096. "o1-mini": {
  1097. maxTokens: 65_536,
  1098. contextWindow: 128_000,
  1099. supportsImages: true,
  1100. supportsPromptCache: true,
  1101. inputPrice: 1.1,
  1102. outputPrice: 4.4,
  1103. cacheReadsPrice: 0.55,
  1104. },
  1105. "gpt-4.5-preview": {
  1106. maxTokens: 16_384,
  1107. contextWindow: 128_000,
  1108. supportsImages: true,
  1109. supportsPromptCache: true,
  1110. inputPrice: 75,
  1111. outputPrice: 150,
  1112. cacheReadsPrice: 37.5,
  1113. },
  1114. "gpt-4o": {
  1115. maxTokens: 16_384,
  1116. contextWindow: 128_000,
  1117. supportsImages: true,
  1118. supportsPromptCache: true,
  1119. inputPrice: 2.5,
  1120. outputPrice: 10,
  1121. cacheReadsPrice: 1.25,
  1122. },
  1123. "gpt-4o-mini": {
  1124. maxTokens: 16_384,
  1125. contextWindow: 128_000,
  1126. supportsImages: true,
  1127. supportsPromptCache: true,
  1128. inputPrice: 0.15,
  1129. outputPrice: 0.6,
  1130. cacheReadsPrice: 0.075,
  1131. },
  1132. } as const satisfies Record<string, ModelInfo>
  1133. // DeepSeek
  1134. // https://platform.deepseek.com/docs/api
  1135. export type DeepSeekModelId = keyof typeof deepSeekModels
  1136. export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat"
  1137. export const deepSeekModels = {
  1138. "deepseek-chat": {
  1139. maxTokens: 8192,
  1140. contextWindow: 64_000,
  1141. supportsImages: false,
  1142. supportsPromptCache: true,
  1143. inputPrice: 0.27, // $0.27 per million tokens (cache miss)
  1144. outputPrice: 1.1, // $1.10 per million tokens
  1145. cacheWritesPrice: 0.27, // $0.27 per million tokens (cache miss)
  1146. cacheReadsPrice: 0.07, // $0.07 per million tokens (cache hit).
  1147. description: `DeepSeek-V3 achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally.`,
  1148. },
  1149. "deepseek-reasoner": {
  1150. maxTokens: 8192,
  1151. contextWindow: 64_000,
  1152. supportsImages: false,
  1153. supportsPromptCache: true,
  1154. inputPrice: 0.55, // $0.55 per million tokens (cache miss)
  1155. outputPrice: 2.19, // $2.19 per million tokens
  1156. cacheWritesPrice: 0.55, // $0.55 per million tokens (cache miss)
  1157. cacheReadsPrice: 0.14, // $0.14 per million tokens (cache hit)
  1158. description: `DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 32K tokens.`,
  1159. },
  1160. } as const satisfies Record<string, ModelInfo>
  1161. // Azure OpenAI
  1162. // https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
  1163. // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#api-specs
  1164. export const azureOpenAiDefaultApiVersion = "2024-08-01-preview"
  1165. // Mistral
  1166. // https://docs.mistral.ai/getting-started/models/models_overview/
  1167. export type MistralModelId = keyof typeof mistralModels
  1168. export const mistralDefaultModelId: MistralModelId = "codestral-latest"
  1169. export const mistralModels = {
  1170. "codestral-latest": {
  1171. maxTokens: 256_000,
  1172. contextWindow: 256_000,
  1173. supportsImages: false,
  1174. supportsPromptCache: false,
  1175. inputPrice: 0.3,
  1176. outputPrice: 0.9,
  1177. },
  1178. "mistral-large-latest": {
  1179. maxTokens: 131_000,
  1180. contextWindow: 131_000,
  1181. supportsImages: false,
  1182. supportsPromptCache: false,
  1183. inputPrice: 2.0,
  1184. outputPrice: 6.0,
  1185. },
  1186. "ministral-8b-latest": {
  1187. maxTokens: 131_000,
  1188. contextWindow: 131_000,
  1189. supportsImages: false,
  1190. supportsPromptCache: false,
  1191. inputPrice: 0.1,
  1192. outputPrice: 0.1,
  1193. },
  1194. "ministral-3b-latest": {
  1195. maxTokens: 131_000,
  1196. contextWindow: 131_000,
  1197. supportsImages: false,
  1198. supportsPromptCache: false,
  1199. inputPrice: 0.04,
  1200. outputPrice: 0.04,
  1201. },
  1202. "mistral-small-latest": {
  1203. maxTokens: 32_000,
  1204. contextWindow: 32_000,
  1205. supportsImages: false,
  1206. supportsPromptCache: false,
  1207. inputPrice: 0.2,
  1208. outputPrice: 0.6,
  1209. },
  1210. "pixtral-large-latest": {
  1211. maxTokens: 131_000,
  1212. contextWindow: 131_000,
  1213. supportsImages: true,
  1214. supportsPromptCache: false,
  1215. inputPrice: 2.0,
  1216. outputPrice: 6.0,
  1217. },
  1218. } as const satisfies Record<string, ModelInfo>
  1219. // Unbound Security
  1220. // https://www.unboundsecurity.ai/ai-gateway
  1221. export const unboundDefaultModelId = "anthropic/claude-3-7-sonnet-20250219"
  1222. export const unboundDefaultModelInfo: ModelInfo = {
  1223. maxTokens: 8192,
  1224. contextWindow: 200_000,
  1225. supportsImages: true,
  1226. supportsPromptCache: true,
  1227. inputPrice: 3.0,
  1228. outputPrice: 15.0,
  1229. cacheWritesPrice: 3.75,
  1230. cacheReadsPrice: 0.3,
  1231. }
  1232. // LiteLLM
  1233. // https://docs.litellm.ai/
  1234. export const litellmDefaultModelId = "claude-3-7-sonnet-20250219"
  1235. export const litellmDefaultModelInfo: ModelInfo = {
  1236. maxTokens: 8192,
  1237. contextWindow: 200_000,
  1238. supportsImages: true,
  1239. supportsComputerUse: true,
  1240. supportsPromptCache: true,
  1241. inputPrice: 3.0,
  1242. outputPrice: 15.0,
  1243. cacheWritesPrice: 3.75,
  1244. cacheReadsPrice: 0.3,
  1245. }
  1246. // xAI
  1247. // https://docs.x.ai/docs/api-reference
  1248. export type XAIModelId = keyof typeof xaiModels
  1249. export const xaiDefaultModelId: XAIModelId = "grok-3-beta"
  1250. export const xaiModels = {
  1251. "grok-3-beta": {
  1252. maxTokens: 8192,
  1253. contextWindow: 131072,
  1254. supportsImages: false,
  1255. supportsPromptCache: false,
  1256. inputPrice: 3.0,
  1257. outputPrice: 15.0,
  1258. description: "xAI's Grok-3 beta model with 131K context window",
  1259. },
  1260. "grok-3-fast-beta": {
  1261. maxTokens: 8192,
  1262. contextWindow: 131072,
  1263. supportsImages: false,
  1264. supportsPromptCache: false,
  1265. inputPrice: 5.0,
  1266. outputPrice: 25.0,
  1267. description: "xAI's Grok-3 fast beta model with 131K context window",
  1268. },
  1269. "grok-3-mini-beta": {
  1270. maxTokens: 8192,
  1271. contextWindow: 131072,
  1272. supportsImages: false,
  1273. supportsPromptCache: false,
  1274. inputPrice: 0.3,
  1275. outputPrice: 0.5,
  1276. description: "xAI's Grok-3 mini beta model with 131K context window",
  1277. supportsReasoningEffort: true,
  1278. },
  1279. "grok-3-mini-fast-beta": {
  1280. maxTokens: 8192,
  1281. contextWindow: 131072,
  1282. supportsImages: false,
  1283. supportsPromptCache: false,
  1284. inputPrice: 0.6,
  1285. outputPrice: 4.0,
  1286. description: "xAI's Grok-3 mini fast beta model with 131K context window",
  1287. supportsReasoningEffort: true,
  1288. },
  1289. "grok-2-latest": {
  1290. maxTokens: 8192,
  1291. contextWindow: 131072,
  1292. supportsImages: false,
  1293. supportsPromptCache: false,
  1294. inputPrice: 2.0,
  1295. outputPrice: 10.0,
  1296. description: "xAI's Grok-2 model - latest version with 131K context window",
  1297. },
  1298. "grok-2": {
  1299. maxTokens: 8192,
  1300. contextWindow: 131072,
  1301. supportsImages: false,
  1302. supportsPromptCache: false,
  1303. inputPrice: 2.0,
  1304. outputPrice: 10.0,
  1305. description: "xAI's Grok-2 model with 131K context window",
  1306. },
  1307. "grok-2-1212": {
  1308. maxTokens: 8192,
  1309. contextWindow: 131072,
  1310. supportsImages: false,
  1311. supportsPromptCache: false,
  1312. inputPrice: 2.0,
  1313. outputPrice: 10.0,
  1314. description: "xAI's Grok-2 model (version 1212) with 131K context window",
  1315. },
  1316. "grok-2-vision-latest": {
  1317. maxTokens: 8192,
  1318. contextWindow: 32768,
  1319. supportsImages: true,
  1320. supportsPromptCache: false,
  1321. inputPrice: 2.0,
  1322. outputPrice: 10.0,
  1323. description: "xAI's Grok-2 Vision model - latest version with image support and 32K context window",
  1324. },
  1325. "grok-2-vision": {
  1326. maxTokens: 8192,
  1327. contextWindow: 32768,
  1328. supportsImages: true,
  1329. supportsPromptCache: false,
  1330. inputPrice: 2.0,
  1331. outputPrice: 10.0,
  1332. description: "xAI's Grok-2 Vision model with image support and 32K context window",
  1333. },
  1334. "grok-2-vision-1212": {
  1335. maxTokens: 8192,
  1336. contextWindow: 32768,
  1337. supportsImages: true,
  1338. supportsPromptCache: false,
  1339. inputPrice: 2.0,
  1340. outputPrice: 10.0,
  1341. description: "xAI's Grok-2 Vision model (version 1212) with image support and 32K context window",
  1342. },
  1343. "grok-vision-beta": {
  1344. maxTokens: 8192,
  1345. contextWindow: 8192,
  1346. supportsImages: true,
  1347. supportsPromptCache: false,
  1348. inputPrice: 5.0,
  1349. outputPrice: 15.0,
  1350. description: "xAI's Grok Vision Beta model with image support and 8K context window",
  1351. },
  1352. "grok-beta": {
  1353. maxTokens: 8192,
  1354. contextWindow: 131072,
  1355. supportsImages: false,
  1356. supportsPromptCache: false,
  1357. inputPrice: 5.0,
  1358. outputPrice: 15.0,
  1359. description: "xAI's Grok Beta model (legacy) with 131K context window",
  1360. },
  1361. } as const satisfies Record<string, ModelInfo>
  1362. export type VscodeLlmModelId = keyof typeof vscodeLlmModels
  1363. export const vscodeLlmDefaultModelId: VscodeLlmModelId = "claude-3.5-sonnet"
  1364. export const vscodeLlmModels = {
  1365. "gpt-3.5-turbo": {
  1366. contextWindow: 12114,
  1367. supportsImages: false,
  1368. supportsPromptCache: false,
  1369. inputPrice: 0,
  1370. outputPrice: 0,
  1371. family: "gpt-3.5-turbo",
  1372. version: "gpt-3.5-turbo-0613",
  1373. name: "GPT 3.5 Turbo",
  1374. supportsToolCalling: true,
  1375. maxInputTokens: 12114,
  1376. },
  1377. "gpt-4o-mini": {
  1378. contextWindow: 12115,
  1379. supportsImages: false,
  1380. supportsPromptCache: false,
  1381. inputPrice: 0,
  1382. outputPrice: 0,
  1383. family: "gpt-4o-mini",
  1384. version: "gpt-4o-mini-2024-07-18",
  1385. name: "GPT-4o mini",
  1386. supportsToolCalling: true,
  1387. maxInputTokens: 12115,
  1388. },
  1389. "gpt-4": {
  1390. contextWindow: 28501,
  1391. supportsImages: false,
  1392. supportsPromptCache: false,
  1393. inputPrice: 0,
  1394. outputPrice: 0,
  1395. family: "gpt-4",
  1396. version: "gpt-4-0613",
  1397. name: "GPT 4",
  1398. supportsToolCalling: true,
  1399. maxInputTokens: 28501,
  1400. },
  1401. "gpt-4-0125-preview": {
  1402. contextWindow: 63826,
  1403. supportsImages: false,
  1404. supportsPromptCache: false,
  1405. inputPrice: 0,
  1406. outputPrice: 0,
  1407. family: "gpt-4-turbo",
  1408. version: "gpt-4-0125-preview",
  1409. name: "GPT 4 Turbo",
  1410. supportsToolCalling: true,
  1411. maxInputTokens: 63826,
  1412. },
  1413. "gpt-4o": {
  1414. contextWindow: 63827,
  1415. supportsImages: true,
  1416. supportsPromptCache: false,
  1417. inputPrice: 0,
  1418. outputPrice: 0,
  1419. family: "gpt-4o",
  1420. version: "gpt-4o-2024-11-20",
  1421. name: "GPT-4o",
  1422. supportsToolCalling: true,
  1423. maxInputTokens: 63827,
  1424. },
  1425. o1: {
  1426. contextWindow: 19827,
  1427. supportsImages: false,
  1428. supportsPromptCache: false,
  1429. inputPrice: 0,
  1430. outputPrice: 0,
  1431. family: "o1-ga",
  1432. version: "o1-2024-12-17",
  1433. name: "o1 (Preview)",
  1434. supportsToolCalling: true,
  1435. maxInputTokens: 19827,
  1436. },
  1437. "o3-mini": {
  1438. contextWindow: 63827,
  1439. supportsImages: false,
  1440. supportsPromptCache: false,
  1441. inputPrice: 0,
  1442. outputPrice: 0,
  1443. family: "o3-mini",
  1444. version: "o3-mini-2025-01-31",
  1445. name: "o3-mini",
  1446. supportsToolCalling: true,
  1447. maxInputTokens: 63827,
  1448. },
  1449. "claude-3.5-sonnet": {
  1450. contextWindow: 81638,
  1451. supportsImages: true,
  1452. supportsPromptCache: false,
  1453. inputPrice: 0,
  1454. outputPrice: 0,
  1455. family: "claude-3.5-sonnet",
  1456. version: "claude-3.5-sonnet",
  1457. name: "Claude 3.5 Sonnet",
  1458. supportsToolCalling: true,
  1459. maxInputTokens: 81638,
  1460. },
  1461. "gemini-2.0-flash-001": {
  1462. contextWindow: 127827,
  1463. supportsImages: true,
  1464. supportsPromptCache: false,
  1465. inputPrice: 0,
  1466. outputPrice: 0,
  1467. family: "gemini-2.0-flash",
  1468. version: "gemini-2.0-flash-001",
  1469. name: "Gemini 2.0 Flash",
  1470. supportsToolCalling: false,
  1471. maxInputTokens: 127827,
  1472. },
  1473. "gemini-2.5-pro": {
  1474. contextWindow: 63830,
  1475. supportsImages: true,
  1476. supportsPromptCache: false,
  1477. inputPrice: 0,
  1478. outputPrice: 0,
  1479. family: "gemini-2.5-pro",
  1480. version: "gemini-2.5-pro-preview-03-25",
  1481. name: "Gemini 2.5 Pro (Preview)",
  1482. supportsToolCalling: true,
  1483. maxInputTokens: 63830,
  1484. },
  1485. "o4-mini": {
  1486. contextWindow: 111446,
  1487. supportsImages: false,
  1488. supportsPromptCache: false,
  1489. inputPrice: 0,
  1490. outputPrice: 0,
  1491. family: "o4-mini",
  1492. version: "o4-mini-2025-04-16",
  1493. name: "o4-mini (Preview)",
  1494. supportsToolCalling: true,
  1495. maxInputTokens: 111446,
  1496. },
  1497. "gpt-4.1": {
  1498. contextWindow: 111446,
  1499. supportsImages: true,
  1500. supportsPromptCache: false,
  1501. inputPrice: 0,
  1502. outputPrice: 0,
  1503. family: "gpt-4.1",
  1504. version: "gpt-4.1-2025-04-14",
  1505. name: "GPT-4.1 (Preview)",
  1506. supportsToolCalling: true,
  1507. maxInputTokens: 111446,
  1508. },
  1509. } as const satisfies Record<
  1510. string,
  1511. ModelInfo & {
  1512. family: string
  1513. version: string
  1514. name: string
  1515. supportsToolCalling: boolean
  1516. maxInputTokens: number
  1517. }
  1518. >
  1519. // Groq
  1520. // https://console.groq.com/docs/models
  1521. export type GroqModelId =
  1522. | "llama-3.1-8b-instant"
  1523. | "llama-3.3-70b-versatile"
  1524. | "meta-llama/llama-4-scout-17b-16e-instruct"
  1525. | "meta-llama/llama-4-maverick-17b-128e-instruct"
  1526. | "mistral-saba-24b"
  1527. | "qwen-qwq-32b"
  1528. | "deepseek-r1-distill-llama-70b"
  1529. export const groqDefaultModelId: GroqModelId = "llama-3.3-70b-versatile" // Defaulting to Llama3 70B Versatile
  1530. export const groqModels = {
  1531. // Models based on API response: https://api.groq.com/openai/v1/models
  1532. "llama-3.1-8b-instant": {
  1533. maxTokens: 131072,
  1534. contextWindow: 131072,
  1535. supportsImages: false,
  1536. supportsPromptCache: false,
  1537. inputPrice: 0,
  1538. outputPrice: 0,
  1539. description: "Meta Llama 3.1 8B Instant model, 128K context.",
  1540. },
  1541. "llama-3.3-70b-versatile": {
  1542. maxTokens: 32768,
  1543. contextWindow: 131072,
  1544. supportsImages: false,
  1545. supportsPromptCache: false,
  1546. inputPrice: 0,
  1547. outputPrice: 0,
  1548. description: "Meta Llama 3.3 70B Versatile model, 128K context.",
  1549. },
  1550. "meta-llama/llama-4-scout-17b-16e-instruct": {
  1551. maxTokens: 8192,
  1552. contextWindow: 131072,
  1553. supportsImages: false,
  1554. supportsPromptCache: false,
  1555. inputPrice: 0,
  1556. outputPrice: 0,
  1557. description: "Meta Llama 4 Scout 17B Instruct model, 128K context.",
  1558. },
  1559. "meta-llama/llama-4-maverick-17b-128e-instruct": {
  1560. maxTokens: 8192,
  1561. contextWindow: 131072,
  1562. supportsImages: false,
  1563. supportsPromptCache: false,
  1564. inputPrice: 0,
  1565. outputPrice: 0,
  1566. description: "Meta Llama 4 Maverick 17B Instruct model, 128K context.",
  1567. },
  1568. "mistral-saba-24b": {
  1569. maxTokens: 32768,
  1570. contextWindow: 32768,
  1571. supportsImages: false,
  1572. supportsPromptCache: false,
  1573. inputPrice: 0,
  1574. outputPrice: 0,
  1575. description: "Mistral Saba 24B model, 32K context.",
  1576. },
  1577. "qwen-qwq-32b": {
  1578. maxTokens: 131072,
  1579. contextWindow: 131072,
  1580. supportsImages: false,
  1581. supportsPromptCache: false,
  1582. inputPrice: 0,
  1583. outputPrice: 0,
  1584. description: "Alibaba Qwen QwQ 32B model, 128K context.",
  1585. },
  1586. "deepseek-r1-distill-llama-70b": {
  1587. maxTokens: 131072,
  1588. contextWindow: 131072,
  1589. supportsImages: false,
  1590. supportsPromptCache: false,
  1591. inputPrice: 0,
  1592. outputPrice: 0,
  1593. description: "DeepSeek R1 Distill Llama 70B model, 128K context.",
  1594. },
  1595. } as const satisfies Record<string, ModelInfo>
  1596. // Chutes AI
  1597. // https://llm.chutes.ai/v1 (OpenAI compatible)
  1598. export type ChutesModelId =
  1599. | "deepseek-ai/DeepSeek-R1"
  1600. | "deepseek-ai/DeepSeek-V3"
  1601. | "unsloth/Llama-3.3-70B-Instruct"
  1602. | "chutesai/Llama-4-Scout-17B-16E-Instruct"
  1603. | "unsloth/Mistral-Nemo-Instruct-2407"
  1604. | "unsloth/gemma-3-12b-it"
  1605. | "NousResearch/DeepHermes-3-Llama-3-8B-Preview"
  1606. | "unsloth/gemma-3-4b-it"
  1607. | "nvidia/Llama-3_3-Nemotron-Super-49B-v1"
  1608. | "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1"
  1609. | "chutesai/Llama-4-Maverick-17B-128E-Instruct-FP8"
  1610. | "deepseek-ai/DeepSeek-V3-Base"
  1611. | "deepseek-ai/DeepSeek-R1-Zero"
  1612. | "deepseek-ai/DeepSeek-V3-0324"
  1613. | "Qwen/Qwen3-235B-A22B"
  1614. | "Qwen/Qwen3-32B"
  1615. | "Qwen/Qwen3-30B-A3B"
  1616. | "Qwen/Qwen3-14B"
  1617. | "Qwen/Qwen3-8B"
  1618. | "microsoft/MAI-DS-R1-FP8"
  1619. | "tngtech/DeepSeek-R1T-Chimera"
  1620. export const chutesDefaultModelId: ChutesModelId = "deepseek-ai/DeepSeek-R1"
  1621. export const chutesModels = {
  1622. "deepseek-ai/DeepSeek-R1": {
  1623. maxTokens: 32768,
  1624. contextWindow: 163840,
  1625. supportsImages: false,
  1626. supportsPromptCache: false,
  1627. inputPrice: 0,
  1628. outputPrice: 0,
  1629. description: "DeepSeek R1 model.",
  1630. },
  1631. "deepseek-ai/DeepSeek-V3": {
  1632. maxTokens: 32768,
  1633. contextWindow: 163840,
  1634. supportsImages: false,
  1635. supportsPromptCache: false,
  1636. inputPrice: 0,
  1637. outputPrice: 0,
  1638. description: "DeepSeek V3 model.",
  1639. },
  1640. "unsloth/Llama-3.3-70B-Instruct": {
  1641. maxTokens: 32768, // From Groq
  1642. contextWindow: 131072, // From Groq
  1643. supportsImages: false,
  1644. supportsPromptCache: false,
  1645. inputPrice: 0,
  1646. outputPrice: 0,
  1647. description: "Unsloth Llama 3.3 70B Instruct model.",
  1648. },
  1649. "chutesai/Llama-4-Scout-17B-16E-Instruct": {
  1650. maxTokens: 32768,
  1651. contextWindow: 512000,
  1652. supportsImages: false,
  1653. supportsPromptCache: false,
  1654. inputPrice: 0,
  1655. outputPrice: 0,
  1656. description: "ChutesAI Llama 4 Scout 17B Instruct model, 512K context.",
  1657. },
  1658. "unsloth/Mistral-Nemo-Instruct-2407": {
  1659. maxTokens: 32768,
  1660. contextWindow: 128000,
  1661. supportsImages: false,
  1662. supportsPromptCache: false,
  1663. inputPrice: 0,
  1664. outputPrice: 0,
  1665. description: "Unsloth Mistral Nemo Instruct model.",
  1666. },
  1667. "unsloth/gemma-3-12b-it": {
  1668. maxTokens: 32768,
  1669. contextWindow: 131072,
  1670. supportsImages: false,
  1671. supportsPromptCache: false,
  1672. inputPrice: 0,
  1673. outputPrice: 0,
  1674. description: "Unsloth Gemma 3 12B IT model.",
  1675. },
  1676. "NousResearch/DeepHermes-3-Llama-3-8B-Preview": {
  1677. maxTokens: 32768,
  1678. contextWindow: 131072,
  1679. supportsImages: false,
  1680. supportsPromptCache: false,
  1681. inputPrice: 0,
  1682. outputPrice: 0,
  1683. description: "Nous DeepHermes 3 Llama 3 8B Preview model.",
  1684. },
  1685. "unsloth/gemma-3-4b-it": {
  1686. maxTokens: 32768,
  1687. contextWindow: 131072,
  1688. supportsImages: false,
  1689. supportsPromptCache: false,
  1690. inputPrice: 0,
  1691. outputPrice: 0,
  1692. description: "Unsloth Gemma 3 4B IT model.",
  1693. },
  1694. "nvidia/Llama-3_3-Nemotron-Super-49B-v1": {
  1695. maxTokens: 32768,
  1696. contextWindow: 131072,
  1697. supportsImages: false,
  1698. supportsPromptCache: false,
  1699. inputPrice: 0,
  1700. outputPrice: 0,
  1701. description: "Nvidia Llama 3.3 Nemotron Super 49B model.",
  1702. },
  1703. "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1": {
  1704. maxTokens: 32768,
  1705. contextWindow: 131072,
  1706. supportsImages: false,
  1707. supportsPromptCache: false,
  1708. inputPrice: 0,
  1709. outputPrice: 0,
  1710. description: "Nvidia Llama 3.1 Nemotron Ultra 253B model.",
  1711. },
  1712. "chutesai/Llama-4-Maverick-17B-128E-Instruct-FP8": {
  1713. maxTokens: 32768,
  1714. contextWindow: 256000,
  1715. supportsImages: false,
  1716. supportsPromptCache: false,
  1717. inputPrice: 0,
  1718. outputPrice: 0,
  1719. description: "ChutesAI Llama 4 Maverick 17B Instruct FP8 model.",
  1720. },
  1721. "deepseek-ai/DeepSeek-V3-Base": {
  1722. maxTokens: 32768,
  1723. contextWindow: 163840,
  1724. supportsImages: false,
  1725. supportsPromptCache: false,
  1726. inputPrice: 0,
  1727. outputPrice: 0,
  1728. description: "DeepSeek V3 Base model.",
  1729. },
  1730. "deepseek-ai/DeepSeek-R1-Zero": {
  1731. maxTokens: 32768,
  1732. contextWindow: 163840,
  1733. supportsImages: false,
  1734. supportsPromptCache: false,
  1735. inputPrice: 0,
  1736. outputPrice: 0,
  1737. description: "DeepSeek R1 Zero model.",
  1738. },
  1739. "deepseek-ai/DeepSeek-V3-0324": {
  1740. maxTokens: 32768,
  1741. contextWindow: 163840,
  1742. supportsImages: false,
  1743. supportsPromptCache: false,
  1744. inputPrice: 0,
  1745. outputPrice: 0,
  1746. description: "DeepSeek V3 (0324) model.",
  1747. },
  1748. "Qwen/Qwen3-235B-A22B": {
  1749. maxTokens: 32768,
  1750. contextWindow: 40960,
  1751. supportsImages: false,
  1752. supportsPromptCache: false,
  1753. inputPrice: 0,
  1754. outputPrice: 0,
  1755. description: "Qwen3 235B A22B model.",
  1756. },
  1757. "Qwen/Qwen3-32B": {
  1758. maxTokens: 32768,
  1759. contextWindow: 40960,
  1760. supportsImages: false,
  1761. supportsPromptCache: false,
  1762. inputPrice: 0,
  1763. outputPrice: 0,
  1764. description: "Qwen3 32B model.",
  1765. },
  1766. "Qwen/Qwen3-30B-A3B": {
  1767. maxTokens: 32768,
  1768. contextWindow: 40960,
  1769. supportsImages: false,
  1770. supportsPromptCache: false,
  1771. inputPrice: 0,
  1772. outputPrice: 0,
  1773. description: "Qwen3 30B A3B model.",
  1774. },
  1775. "Qwen/Qwen3-14B": {
  1776. maxTokens: 32768,
  1777. contextWindow: 40960,
  1778. supportsImages: false,
  1779. supportsPromptCache: false,
  1780. inputPrice: 0,
  1781. outputPrice: 0,
  1782. description: "Qwen3 14B model.",
  1783. },
  1784. "Qwen/Qwen3-8B": {
  1785. maxTokens: 32768,
  1786. contextWindow: 40960,
  1787. supportsImages: false,
  1788. supportsPromptCache: false,
  1789. inputPrice: 0,
  1790. outputPrice: 0,
  1791. description: "Qwen3 8B model.",
  1792. },
  1793. "microsoft/MAI-DS-R1-FP8": {
  1794. maxTokens: 32768,
  1795. contextWindow: 163840,
  1796. supportsImages: false,
  1797. supportsPromptCache: false,
  1798. inputPrice: 0,
  1799. outputPrice: 0,
  1800. description: "Microsoft MAI-DS-R1 FP8 model.",
  1801. },
  1802. "tngtech/DeepSeek-R1T-Chimera": {
  1803. maxTokens: 32768,
  1804. contextWindow: 163840,
  1805. supportsImages: false,
  1806. supportsPromptCache: false,
  1807. inputPrice: 0,
  1808. outputPrice: 0,
  1809. description: "TNGTech DeepSeek R1T Chimera model.",
  1810. },
  1811. } as const satisfies Record<string, ModelInfo>
  1812. /**
  1813. * Constants
  1814. */
  1815. // These models support prompt caching.
  1816. export const OPEN_ROUTER_PROMPT_CACHING_MODELS = new Set([
  1817. "anthropic/claude-3-haiku",
  1818. "anthropic/claude-3-haiku:beta",
  1819. "anthropic/claude-3-opus",
  1820. "anthropic/claude-3-opus:beta",
  1821. "anthropic/claude-3-sonnet",
  1822. "anthropic/claude-3-sonnet:beta",
  1823. "anthropic/claude-3.5-haiku",
  1824. "anthropic/claude-3.5-haiku-20241022",
  1825. "anthropic/claude-3.5-haiku-20241022:beta",
  1826. "anthropic/claude-3.5-haiku:beta",
  1827. "anthropic/claude-3.5-sonnet",
  1828. "anthropic/claude-3.5-sonnet-20240620",
  1829. "anthropic/claude-3.5-sonnet-20240620:beta",
  1830. "anthropic/claude-3.5-sonnet:beta",
  1831. "anthropic/claude-3.7-sonnet",
  1832. "anthropic/claude-3.7-sonnet:beta",
  1833. "anthropic/claude-3.7-sonnet:thinking",
  1834. "anthropic/claude-sonnet-4",
  1835. "anthropic/claude-opus-4",
  1836. "google/gemini-2.5-pro-preview",
  1837. "google/gemini-2.5-flash-preview",
  1838. "google/gemini-2.5-flash-preview:thinking",
  1839. "google/gemini-2.5-flash-preview-05-20",
  1840. "google/gemini-2.5-flash-preview-05-20:thinking",
  1841. "google/gemini-2.0-flash-001",
  1842. "google/gemini-flash-1.5",
  1843. "google/gemini-flash-1.5-8b",
  1844. ])
  1845. // https://www.anthropic.com/news/3-5-models-and-computer-use
  1846. export const OPEN_ROUTER_COMPUTER_USE_MODELS = new Set([
  1847. "anthropic/claude-3.5-sonnet",
  1848. "anthropic/claude-3.5-sonnet:beta",
  1849. "anthropic/claude-3.7-sonnet",
  1850. "anthropic/claude-3.7-sonnet:beta",
  1851. "anthropic/claude-3.7-sonnet:thinking",
  1852. "anthropic/claude-sonnet-4",
  1853. "anthropic/claude-opus-4",
  1854. ])
  1855. export const OPEN_ROUTER_REASONING_BUDGET_MODELS = new Set([
  1856. "anthropic/claude-3.7-sonnet:beta",
  1857. "anthropic/claude-3.7-sonnet:thinking",
  1858. "anthropic/claude-opus-4",
  1859. "anthropic/claude-sonnet-4",
  1860. "google/gemini-2.5-flash-preview-05-20",
  1861. "google/gemini-2.5-flash-preview-05-20:thinking",
  1862. ])
  1863. export const OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS = new Set([
  1864. "anthropic/claude-3.7-sonnet:thinking",
  1865. "google/gemini-2.5-flash-preview-05-20:thinking",
  1866. ])
  1867. const routerNames = ["openrouter", "requesty", "glama", "unbound", "litellm"] as const
  1868. export type RouterName = (typeof routerNames)[number]
  1869. export const isRouterName = (value: string): value is RouterName => routerNames.includes(value as RouterName)
  1870. export function toRouterName(value?: string): RouterName {
  1871. if (value && isRouterName(value)) {
  1872. return value
  1873. }
  1874. throw new Error(`Invalid router name: ${value}`)
  1875. }
  1876. export type ModelRecord = Record<string, ModelInfo>
  1877. export type RouterModels = Record<RouterName, ModelRecord>
  1878. export const shouldUseReasoningBudget = ({
  1879. model,
  1880. settings,
  1881. }: {
  1882. model: ModelInfo
  1883. settings?: ProviderSettings
  1884. }): boolean => !!model.requiredReasoningBudget || (!!model.supportsReasoningBudget && !!settings?.enableReasoningEffort)
  1885. export const shouldUseReasoningEffort = ({
  1886. model,
  1887. settings,
  1888. }: {
  1889. model: ModelInfo
  1890. settings?: ProviderSettings
  1891. }): boolean => (!!model.supportsReasoningEffort && !!settings?.reasoningEffort) || !!model.reasoningEffort
  1892. export const DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS = 16_384
  1893. export const DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS = 8_192
  1894. export const getModelMaxOutputTokens = ({
  1895. modelId,
  1896. model,
  1897. settings,
  1898. }: {
  1899. modelId: string
  1900. model: ModelInfo
  1901. settings?: ProviderSettings
  1902. }): number | undefined => {
  1903. if (shouldUseReasoningBudget({ model, settings })) {
  1904. return settings?.modelMaxTokens || DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS
  1905. }
  1906. const isAnthropicModel = modelId.includes("claude")
  1907. // For "Hybrid" reasoning models, we should discard the model's actual
  1908. // `maxTokens` value if we're not using reasoning. We do this for Anthropic
  1909. // models only for now. Should we do this for Gemini too?
  1910. if (model.supportsReasoningBudget && isAnthropicModel) {
  1911. return ANTHROPIC_DEFAULT_MAX_TOKENS
  1912. }
  1913. return model.maxTokens ?? undefined
  1914. }
  1915. /**
  1916. * Options for fetching models from different providers.
  1917. * This is a discriminated union type where the provider property determines
  1918. * which other properties are required.
  1919. */
  1920. export type GetModelsOptions =
  1921. | { provider: "openrouter" }
  1922. | { provider: "glama" }
  1923. | { provider: "requesty"; apiKey?: string }
  1924. | { provider: "unbound"; apiKey?: string }
  1925. | { provider: "litellm"; apiKey: string; baseUrl: string }