api.ts 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. export type ApiProvider =
  2. | "anthropic"
  3. | "glama"
  4. | "openrouter"
  5. | "bedrock"
  6. | "vertex"
  7. | "openai"
  8. | "ollama"
  9. | "lmstudio"
  10. | "gemini"
  11. | "openai-native"
  12. | "deepseek"
  13. export interface ApiHandlerOptions {
  14. apiModelId?: string
  15. apiKey?: string // anthropic
  16. anthropicBaseUrl?: string
  17. glamaModelId?: string
  18. glamaModelInfo?: ModelInfo
  19. glamaApiKey?: string
  20. openRouterApiKey?: string
  21. openRouterModelId?: string
  22. openRouterModelInfo?: ModelInfo
  23. awsAccessKey?: string
  24. awsSecretKey?: string
  25. awsSessionToken?: string
  26. awsRegion?: string
  27. awsUseCrossRegionInference?: boolean
  28. awsUsePromptCache?: boolean
  29. awspromptCacheId?: string
  30. vertexProjectId?: string
  31. vertexRegion?: string
  32. openAiBaseUrl?: string
  33. openAiApiKey?: string
  34. openAiModelId?: string
  35. ollamaModelId?: string
  36. ollamaBaseUrl?: string
  37. lmStudioModelId?: string
  38. lmStudioBaseUrl?: string
  39. geminiApiKey?: string
  40. openAiNativeApiKey?: string
  41. azureApiVersion?: string
  42. openRouterUseMiddleOutTransform?: boolean
  43. openAiStreamingEnabled?: boolean
  44. setAzureApiVersion?: boolean
  45. deepSeekBaseUrl?: string
  46. deepSeekApiKey?: string
  47. deepSeekModelId?: string
  48. includeMaxTokens?: boolean
  49. }
  50. export type ApiConfiguration = ApiHandlerOptions & {
  51. apiProvider?: ApiProvider
  52. id?: string // stable unique identifier
  53. }
  54. // Models
  55. export interface ModelInfo {
  56. maxTokens?: number
  57. contextWindow?: number
  58. supportsImages?: boolean
  59. supportsComputerUse?: boolean
  60. supportsPromptCache: boolean // this value is hardcoded for now
  61. inputPrice?: number
  62. outputPrice?: number
  63. cacheWritesPrice?: number
  64. cacheReadsPrice?: number
  65. description?: string
  66. }
  67. // Anthropic
  68. // https://docs.anthropic.com/en/docs/about-claude/models
  69. export type AnthropicModelId = keyof typeof anthropicModels
  70. export const anthropicDefaultModelId: AnthropicModelId = "claude-3-5-sonnet-20241022"
  71. export const anthropicModels = {
  72. "claude-3-5-sonnet-20241022": {
  73. maxTokens: 8192,
  74. contextWindow: 200_000,
  75. supportsImages: true,
  76. supportsComputerUse: true,
  77. supportsPromptCache: true,
  78. inputPrice: 3.0, // $3 per million input tokens
  79. outputPrice: 15.0, // $15 per million output tokens
  80. cacheWritesPrice: 3.75, // $3.75 per million tokens
  81. cacheReadsPrice: 0.3, // $0.30 per million tokens
  82. },
  83. "claude-3-5-haiku-20241022": {
  84. maxTokens: 8192,
  85. contextWindow: 200_000,
  86. supportsImages: false,
  87. supportsPromptCache: true,
  88. inputPrice: 1.0,
  89. outputPrice: 5.0,
  90. cacheWritesPrice: 1.25,
  91. cacheReadsPrice: 0.1,
  92. },
  93. "claude-3-opus-20240229": {
  94. maxTokens: 4096,
  95. contextWindow: 200_000,
  96. supportsImages: true,
  97. supportsPromptCache: true,
  98. inputPrice: 15.0,
  99. outputPrice: 75.0,
  100. cacheWritesPrice: 18.75,
  101. cacheReadsPrice: 1.5,
  102. },
  103. "claude-3-haiku-20240307": {
  104. maxTokens: 4096,
  105. contextWindow: 200_000,
  106. supportsImages: true,
  107. supportsPromptCache: true,
  108. inputPrice: 0.25,
  109. outputPrice: 1.25,
  110. cacheWritesPrice: 0.3,
  111. cacheReadsPrice: 0.03,
  112. },
  113. } as const satisfies Record<string, ModelInfo> // as const assertion makes the object deeply readonly
  114. // AWS Bedrock
  115. // https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html
  116. export interface MessageContent {
  117. type: 'text' | 'image' | 'video' | 'tool_use' | 'tool_result';
  118. text?: string;
  119. source?: {
  120. type: 'base64';
  121. data: string | Uint8Array; // string for Anthropic, Uint8Array for Bedrock
  122. media_type: 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp';
  123. };
  124. // Video specific fields
  125. format?: string;
  126. s3Location?: {
  127. uri: string;
  128. bucketOwner?: string;
  129. };
  130. // Tool use and result fields
  131. toolUseId?: string;
  132. name?: string;
  133. input?: any;
  134. output?: any; // Used for tool_result type
  135. }
  136. export type BedrockModelId = keyof typeof bedrockModels
  137. export const bedrockDefaultModelId: BedrockModelId = "anthropic.claude-3-5-sonnet-20241022-v2:0"
  138. export const bedrockModels = {
  139. "amazon.nova-pro-v1:0": {
  140. maxTokens: 5000,
  141. contextWindow: 300_000,
  142. supportsImages: true,
  143. supportsComputerUse: false,
  144. supportsPromptCache: false,
  145. inputPrice: 0.8,
  146. outputPrice: 3.2,
  147. cacheWritesPrice: 0.8, // per million tokens
  148. cacheReadsPrice: 0.2, // per million tokens
  149. },
  150. "amazon.nova-lite-v1:0": {
  151. maxTokens: 5000,
  152. contextWindow: 300_000,
  153. supportsImages: true,
  154. supportsComputerUse: false,
  155. supportsPromptCache: false,
  156. inputPrice: 0.06,
  157. outputPrice: 0.024,
  158. cacheWritesPrice: 0.06, // per million tokens
  159. cacheReadsPrice: 0.015, // per million tokens
  160. },
  161. "amazon.nova-micro-v1:0": {
  162. maxTokens: 5000,
  163. contextWindow: 128_000,
  164. supportsImages: false,
  165. supportsComputerUse: false,
  166. supportsPromptCache: false,
  167. inputPrice: 0.035,
  168. outputPrice: 0.14,
  169. cacheWritesPrice: 0.035, // per million tokens
  170. cacheReadsPrice: 0.00875, // per million tokens
  171. },
  172. "anthropic.claude-3-5-sonnet-20241022-v2:0": {
  173. maxTokens: 8192,
  174. contextWindow: 200_000,
  175. supportsImages: true,
  176. supportsComputerUse: true,
  177. supportsPromptCache: false,
  178. inputPrice: 3.0,
  179. outputPrice: 15.0,
  180. cacheWritesPrice: 3.75, // per million tokens
  181. cacheReadsPrice: 0.3, // per million tokens
  182. },
  183. "anthropic.claude-3-5-haiku-20241022-v1:0": {
  184. maxTokens: 8192,
  185. contextWindow: 200_000,
  186. supportsImages: false,
  187. supportsPromptCache: false,
  188. inputPrice: 1.0,
  189. outputPrice: 5.0,
  190. cacheWritesPrice: 1.0,
  191. cacheReadsPrice: 0.08,
  192. },
  193. "anthropic.claude-3-5-sonnet-20240620-v1:0": {
  194. maxTokens: 8192,
  195. contextWindow: 200_000,
  196. supportsImages: true,
  197. supportsPromptCache: false,
  198. inputPrice: 3.0,
  199. outputPrice: 15.0,
  200. },
  201. "anthropic.claude-3-opus-20240229-v1:0": {
  202. maxTokens: 4096,
  203. contextWindow: 200_000,
  204. supportsImages: true,
  205. supportsPromptCache: false,
  206. inputPrice: 15.0,
  207. outputPrice: 75.0,
  208. },
  209. "anthropic.claude-3-sonnet-20240229-v1:0": {
  210. maxTokens: 4096,
  211. contextWindow: 200_000,
  212. supportsImages: true,
  213. supportsPromptCache: false,
  214. inputPrice: 3.0,
  215. outputPrice: 15.0,
  216. },
  217. "anthropic.claude-3-haiku-20240307-v1:0": {
  218. maxTokens: 4096,
  219. contextWindow: 200_000,
  220. supportsImages: true,
  221. supportsPromptCache: false,
  222. inputPrice: 0.25,
  223. outputPrice: 1.25,
  224. },
  225. "meta.llama3-2-90b-instruct-v1:0" : {
  226. maxTokens: 8192,
  227. contextWindow: 128_000,
  228. supportsImages: true,
  229. supportsComputerUse: false,
  230. supportsPromptCache: false,
  231. inputPrice: 0.72,
  232. outputPrice: 0.72,
  233. },
  234. "meta.llama3-2-11b-instruct-v1:0" : {
  235. maxTokens: 8192,
  236. contextWindow: 128_000,
  237. supportsImages: true,
  238. supportsComputerUse: false,
  239. supportsPromptCache: false,
  240. inputPrice: 0.16,
  241. outputPrice: 0.16,
  242. },
  243. "meta.llama3-2-3b-instruct-v1:0" : {
  244. maxTokens: 8192,
  245. contextWindow: 128_000,
  246. supportsImages: false,
  247. supportsComputerUse: false,
  248. supportsPromptCache: false,
  249. inputPrice: 0.15,
  250. outputPrice: 0.15,
  251. },
  252. "meta.llama3-2-1b-instruct-v1:0" : {
  253. maxTokens: 8192,
  254. contextWindow: 128_000,
  255. supportsImages: false,
  256. supportsComputerUse: false,
  257. supportsPromptCache: false,
  258. inputPrice: 0.1,
  259. outputPrice: 0.1,
  260. },
  261. "meta.llama3-1-405b-instruct-v1:0" : {
  262. maxTokens: 8192,
  263. contextWindow: 128_000,
  264. supportsImages: false,
  265. supportsComputerUse: false,
  266. supportsPromptCache: false,
  267. inputPrice: 2.4,
  268. outputPrice: 2.4,
  269. },
  270. "meta.llama3-1-70b-instruct-v1:0" : {
  271. maxTokens: 8192,
  272. contextWindow: 128_000,
  273. supportsImages: false,
  274. supportsComputerUse: false,
  275. supportsPromptCache: false,
  276. inputPrice: 0.72,
  277. outputPrice: 0.72,
  278. },
  279. "meta.llama3-1-8b-instruct-v1:0" : {
  280. maxTokens: 8192,
  281. contextWindow: 8_000,
  282. supportsImages: false,
  283. supportsComputerUse: false,
  284. supportsPromptCache: false,
  285. inputPrice: 0.22,
  286. outputPrice: 0.22,
  287. },
  288. "meta.llama3-70b-instruct-v1:0" : {
  289. maxTokens: 2048 ,
  290. contextWindow: 8_000,
  291. supportsImages: false,
  292. supportsComputerUse: false,
  293. supportsPromptCache: false,
  294. inputPrice: 2.65,
  295. outputPrice: 3.5,
  296. },
  297. "meta.llama3-8b-instruct-v1:0" : {
  298. maxTokens: 2048 ,
  299. contextWindow: 4_000,
  300. supportsImages: false,
  301. supportsComputerUse: false,
  302. supportsPromptCache: false,
  303. inputPrice: 0.3,
  304. outputPrice: 0.6,
  305. },
  306. } as const satisfies Record<string, ModelInfo>
  307. // Glama
  308. // https://glama.ai/models
  309. export const glamaDefaultModelId = "anthropic/claude-3-5-sonnet"
  310. export const glamaDefaultModelInfo: ModelInfo = {
  311. maxTokens: 8192,
  312. contextWindow: 200_000,
  313. supportsImages: true,
  314. supportsComputerUse: true,
  315. supportsPromptCache: true,
  316. inputPrice: 3.0,
  317. outputPrice: 15.0,
  318. cacheWritesPrice: 3.75,
  319. cacheReadsPrice: 0.3,
  320. description:
  321. "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._",
  322. }
  323. // OpenRouter
  324. // https://openrouter.ai/models?order=newest&supported_parameters=tools
  325. export const openRouterDefaultModelId = "anthropic/claude-3.5-sonnet:beta" // will always exist in openRouterModels
  326. export const openRouterDefaultModelInfo: ModelInfo = {
  327. maxTokens: 8192,
  328. contextWindow: 200_000,
  329. supportsImages: true,
  330. supportsComputerUse: true,
  331. supportsPromptCache: true,
  332. inputPrice: 3.0,
  333. outputPrice: 15.0,
  334. cacheWritesPrice: 3.75,
  335. cacheReadsPrice: 0.3,
  336. description:
  337. "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._",
  338. }
  339. // Vertex AI
  340. // https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude
  341. export type VertexModelId = keyof typeof vertexModels
  342. export const vertexDefaultModelId: VertexModelId = "claude-3-5-sonnet-v2@20241022"
  343. export const vertexModels = {
  344. "claude-3-5-sonnet-v2@20241022": {
  345. maxTokens: 8192,
  346. contextWindow: 200_000,
  347. supportsImages: true,
  348. supportsComputerUse: true,
  349. supportsPromptCache: false,
  350. inputPrice: 3.0,
  351. outputPrice: 15.0,
  352. },
  353. "claude-3-5-sonnet@20240620": {
  354. maxTokens: 8192,
  355. contextWindow: 200_000,
  356. supportsImages: true,
  357. supportsPromptCache: false,
  358. inputPrice: 3.0,
  359. outputPrice: 15.0,
  360. },
  361. "claude-3-5-haiku@20241022": {
  362. maxTokens: 8192,
  363. contextWindow: 200_000,
  364. supportsImages: false,
  365. supportsPromptCache: false,
  366. inputPrice: 1.0,
  367. outputPrice: 5.0,
  368. },
  369. "claude-3-opus@20240229": {
  370. maxTokens: 4096,
  371. contextWindow: 200_000,
  372. supportsImages: true,
  373. supportsPromptCache: false,
  374. inputPrice: 15.0,
  375. outputPrice: 75.0,
  376. },
  377. "claude-3-haiku@20240307": {
  378. maxTokens: 4096,
  379. contextWindow: 200_000,
  380. supportsImages: true,
  381. supportsPromptCache: false,
  382. inputPrice: 0.25,
  383. outputPrice: 1.25,
  384. },
  385. } as const satisfies Record<string, ModelInfo>
  386. export const openAiModelInfoSaneDefaults: ModelInfo = {
  387. maxTokens: -1,
  388. contextWindow: 128_000,
  389. supportsImages: true,
  390. supportsPromptCache: false,
  391. inputPrice: 0,
  392. outputPrice: 0,
  393. }
  394. // Gemini
  395. // https://ai.google.dev/gemini-api/docs/models/gemini
  396. export type GeminiModelId = keyof typeof geminiModels
  397. export const geminiDefaultModelId: GeminiModelId = "gemini-2.0-flash-thinking-exp-1219"
  398. export const geminiModels = {
  399. "gemini-2.0-flash-thinking-exp-1219": {
  400. maxTokens: 8192,
  401. contextWindow: 32_767,
  402. supportsImages: true,
  403. supportsPromptCache: false,
  404. inputPrice: 0,
  405. outputPrice: 0,
  406. },
  407. "gemini-2.0-flash-exp": {
  408. maxTokens: 8192,
  409. contextWindow: 1_048_576,
  410. supportsImages: true,
  411. supportsPromptCache: false,
  412. inputPrice: 0,
  413. outputPrice: 0,
  414. },
  415. "gemini-1.5-flash-002": {
  416. maxTokens: 8192,
  417. contextWindow: 1_048_576,
  418. supportsImages: true,
  419. supportsPromptCache: false,
  420. inputPrice: 0,
  421. outputPrice: 0,
  422. },
  423. "gemini-1.5-flash-exp-0827": {
  424. maxTokens: 8192,
  425. contextWindow: 1_048_576,
  426. supportsImages: true,
  427. supportsPromptCache: false,
  428. inputPrice: 0,
  429. outputPrice: 0,
  430. },
  431. "gemini-1.5-flash-8b-exp-0827": {
  432. maxTokens: 8192,
  433. contextWindow: 1_048_576,
  434. supportsImages: true,
  435. supportsPromptCache: false,
  436. inputPrice: 0,
  437. outputPrice: 0,
  438. },
  439. "gemini-1.5-pro-002": {
  440. maxTokens: 8192,
  441. contextWindow: 2_097_152,
  442. supportsImages: true,
  443. supportsPromptCache: false,
  444. inputPrice: 0,
  445. outputPrice: 0,
  446. },
  447. "gemini-1.5-pro-exp-0827": {
  448. maxTokens: 8192,
  449. contextWindow: 2_097_152,
  450. supportsImages: true,
  451. supportsPromptCache: false,
  452. inputPrice: 0,
  453. outputPrice: 0,
  454. },
  455. "gemini-exp-1206": {
  456. maxTokens: 8192,
  457. contextWindow: 2_097_152,
  458. supportsImages: true,
  459. supportsPromptCache: false,
  460. inputPrice: 0,
  461. outputPrice: 0,
  462. },
  463. } as const satisfies Record<string, ModelInfo>
  464. // OpenAI Native
  465. // https://openai.com/api/pricing/
  466. export type OpenAiNativeModelId = keyof typeof openAiNativeModels
  467. export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-4o"
  468. export const openAiNativeModels = {
  469. // don't support tool use yet
  470. "o1": {
  471. maxTokens: 100_000,
  472. contextWindow: 200_000,
  473. supportsImages: true,
  474. supportsPromptCache: false,
  475. inputPrice: 15,
  476. outputPrice: 60,
  477. },
  478. "o1-preview": {
  479. maxTokens: 32_768,
  480. contextWindow: 128_000,
  481. supportsImages: true,
  482. supportsPromptCache: false,
  483. inputPrice: 15,
  484. outputPrice: 60,
  485. },
  486. "o1-mini": {
  487. maxTokens: 65_536,
  488. contextWindow: 128_000,
  489. supportsImages: true,
  490. supportsPromptCache: false,
  491. inputPrice: 3,
  492. outputPrice: 12,
  493. },
  494. "gpt-4o": {
  495. maxTokens: 4_096,
  496. contextWindow: 128_000,
  497. supportsImages: true,
  498. supportsPromptCache: false,
  499. inputPrice: 5,
  500. outputPrice: 15,
  501. },
  502. "gpt-4o-mini": {
  503. maxTokens: 16_384,
  504. contextWindow: 128_000,
  505. supportsImages: true,
  506. supportsPromptCache: false,
  507. inputPrice: 0.15,
  508. outputPrice: 0.6,
  509. },
  510. } as const satisfies Record<string, ModelInfo>
  511. // DeepSeek
  512. // https://platform.deepseek.com/docs/api
  513. export type DeepSeekModelId = keyof typeof deepSeekModels
  514. export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat"
  515. export const deepSeekModels = {
  516. "deepseek-chat": {
  517. maxTokens: 8192,
  518. contextWindow: 64_000,
  519. supportsImages: false,
  520. supportsPromptCache: false,
  521. inputPrice: 0.014, // $0.014 per million tokens
  522. outputPrice: 0.28, // $0.28 per million tokens
  523. description: `DeepSeek-V3 achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally.`,
  524. },
  525. } as const satisfies Record<string, ModelInfo>
  526. // Azure OpenAI
  527. // https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
  528. // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#api-specs
  529. export const azureOpenAiDefaultApiVersion = "2024-08-01-preview"