| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595 |
- import * as vscode from "vscode"
- export type ApiProvider =
- | "anthropic"
- | "glama"
- | "openrouter"
- | "bedrock"
- | "vertex"
- | "openai"
- | "ollama"
- | "lmstudio"
- | "gemini"
- | "openai-native"
- | "deepseek"
- | "vscode-lm"
- | "mistral"
- export interface ApiHandlerOptions {
- apiModelId?: string
- apiKey?: string // anthropic
- anthropicBaseUrl?: string
- vsCodeLmModelSelector?: vscode.LanguageModelChatSelector
- glamaModelId?: string
- glamaModelInfo?: ModelInfo
- glamaApiKey?: string
- openRouterApiKey?: string
- openRouterModelId?: string
- openRouterModelInfo?: ModelInfo
- openRouterBaseUrl?: string
- awsAccessKey?: string
- awsSecretKey?: string
- awsSessionToken?: string
- awsRegion?: string
- awsUseCrossRegionInference?: boolean
- awsUsePromptCache?: boolean
- awspromptCacheId?: string
- awsProfile?: string
- awsUseProfile?: boolean
- vertexProjectId?: string
- vertexRegion?: string
- openAiBaseUrl?: string
- openAiApiKey?: string
- openAiModelId?: string
- openAiCustomModelInfo?: ModelInfo
- openAiUseAzure?: boolean
- ollamaModelId?: string
- ollamaBaseUrl?: string
- lmStudioModelId?: string
- lmStudioBaseUrl?: string
- geminiApiKey?: string
- openAiNativeApiKey?: string
- mistralApiKey?: string
- azureApiVersion?: string
- openRouterUseMiddleOutTransform?: boolean
- openAiStreamingEnabled?: boolean
- setAzureApiVersion?: boolean
- deepSeekBaseUrl?: string
- deepSeekApiKey?: string
- includeMaxTokens?: boolean
- }
- export type ApiConfiguration = ApiHandlerOptions & {
- apiProvider?: ApiProvider
- id?: string // stable unique identifier
- }
- // Models
- export interface ModelInfo {
- maxTokens?: number
- contextWindow: number
- supportsImages?: boolean
- supportsComputerUse?: boolean
- supportsPromptCache: boolean // this value is hardcoded for now
- inputPrice?: number
- outputPrice?: number
- cacheWritesPrice?: number
- cacheReadsPrice?: number
- description?: string
- }
- // Anthropic
- // https://docs.anthropic.com/en/docs/about-claude/models
- export type AnthropicModelId = keyof typeof anthropicModels
- export const anthropicDefaultModelId: AnthropicModelId = "claude-3-5-sonnet-20241022"
- export const anthropicModels = {
- "claude-3-5-sonnet-20241022": {
- maxTokens: 8192,
- contextWindow: 200_000,
- supportsImages: true,
- supportsComputerUse: true,
- supportsPromptCache: true,
- inputPrice: 3.0, // $3 per million input tokens
- outputPrice: 15.0, // $15 per million output tokens
- cacheWritesPrice: 3.75, // $3.75 per million tokens
- cacheReadsPrice: 0.3, // $0.30 per million tokens
- },
- "claude-3-5-haiku-20241022": {
- maxTokens: 8192,
- contextWindow: 200_000,
- supportsImages: false,
- supportsPromptCache: true,
- inputPrice: 1.0,
- outputPrice: 5.0,
- cacheWritesPrice: 1.25,
- cacheReadsPrice: 0.1,
- },
- "claude-3-opus-20240229": {
- maxTokens: 4096,
- contextWindow: 200_000,
- supportsImages: true,
- supportsPromptCache: true,
- inputPrice: 15.0,
- outputPrice: 75.0,
- cacheWritesPrice: 18.75,
- cacheReadsPrice: 1.5,
- },
- "claude-3-haiku-20240307": {
- maxTokens: 4096,
- contextWindow: 200_000,
- supportsImages: true,
- supportsPromptCache: true,
- inputPrice: 0.25,
- outputPrice: 1.25,
- cacheWritesPrice: 0.3,
- cacheReadsPrice: 0.03,
- },
- } as const satisfies Record<string, ModelInfo> // as const assertion makes the object deeply readonly
- // AWS Bedrock
- // https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html
- export interface MessageContent {
- type: "text" | "image" | "video" | "tool_use" | "tool_result"
- text?: string
- source?: {
- type: "base64"
- data: string | Uint8Array // string for Anthropic, Uint8Array for Bedrock
- media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp"
- }
- // Video specific fields
- format?: string
- s3Location?: {
- uri: string
- bucketOwner?: string
- }
- // Tool use and result fields
- toolUseId?: string
- name?: string
- input?: any
- output?: any // Used for tool_result type
- }
- export type BedrockModelId = keyof typeof bedrockModels
- export const bedrockDefaultModelId: BedrockModelId = "anthropic.claude-3-5-sonnet-20241022-v2:0"
- export const bedrockModels = {
- "amazon.nova-pro-v1:0": {
- maxTokens: 5000,
- contextWindow: 300_000,
- supportsImages: true,
- supportsComputerUse: false,
- supportsPromptCache: false,
- inputPrice: 0.8,
- outputPrice: 3.2,
- cacheWritesPrice: 0.8, // per million tokens
- cacheReadsPrice: 0.2, // per million tokens
- },
- "amazon.nova-lite-v1:0": {
- maxTokens: 5000,
- contextWindow: 300_000,
- supportsImages: true,
- supportsComputerUse: false,
- supportsPromptCache: false,
- inputPrice: 0.06,
- outputPrice: 0.024,
- cacheWritesPrice: 0.06, // per million tokens
- cacheReadsPrice: 0.015, // per million tokens
- },
- "amazon.nova-micro-v1:0": {
- maxTokens: 5000,
- contextWindow: 128_000,
- supportsImages: false,
- supportsComputerUse: false,
- supportsPromptCache: false,
- inputPrice: 0.035,
- outputPrice: 0.14,
- cacheWritesPrice: 0.035, // per million tokens
- cacheReadsPrice: 0.00875, // per million tokens
- },
- "anthropic.claude-3-5-sonnet-20241022-v2:0": {
- maxTokens: 8192,
- contextWindow: 200_000,
- supportsImages: true,
- supportsComputerUse: true,
- supportsPromptCache: false,
- inputPrice: 3.0,
- outputPrice: 15.0,
- cacheWritesPrice: 3.75, // per million tokens
- cacheReadsPrice: 0.3, // per million tokens
- },
- "anthropic.claude-3-5-haiku-20241022-v1:0": {
- maxTokens: 8192,
- contextWindow: 200_000,
- supportsImages: false,
- supportsPromptCache: false,
- inputPrice: 1.0,
- outputPrice: 5.0,
- cacheWritesPrice: 1.0,
- cacheReadsPrice: 0.08,
- },
- "anthropic.claude-3-5-sonnet-20240620-v1:0": {
- maxTokens: 8192,
- contextWindow: 200_000,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 3.0,
- outputPrice: 15.0,
- },
- "anthropic.claude-3-opus-20240229-v1:0": {
- maxTokens: 4096,
- contextWindow: 200_000,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 15.0,
- outputPrice: 75.0,
- },
- "anthropic.claude-3-sonnet-20240229-v1:0": {
- maxTokens: 4096,
- contextWindow: 200_000,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 3.0,
- outputPrice: 15.0,
- },
- "anthropic.claude-3-haiku-20240307-v1:0": {
- maxTokens: 4096,
- contextWindow: 200_000,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 0.25,
- outputPrice: 1.25,
- },
- "meta.llama3-3-70b-instruct-v1:0": {
- maxTokens: 8192,
- contextWindow: 128_000,
- supportsImages: false,
- supportsComputerUse: false,
- supportsPromptCache: false,
- inputPrice: 0.72,
- outputPrice: 0.72,
- },
- "meta.llama3-2-90b-instruct-v1:0": {
- maxTokens: 8192,
- contextWindow: 128_000,
- supportsImages: true,
- supportsComputerUse: false,
- supportsPromptCache: false,
- inputPrice: 0.72,
- outputPrice: 0.72,
- },
- "meta.llama3-2-11b-instruct-v1:0": {
- maxTokens: 8192,
- contextWindow: 128_000,
- supportsImages: true,
- supportsComputerUse: false,
- supportsPromptCache: false,
- inputPrice: 0.16,
- outputPrice: 0.16,
- },
- "meta.llama3-2-3b-instruct-v1:0": {
- maxTokens: 8192,
- contextWindow: 128_000,
- supportsImages: false,
- supportsComputerUse: false,
- supportsPromptCache: false,
- inputPrice: 0.15,
- outputPrice: 0.15,
- },
- "meta.llama3-2-1b-instruct-v1:0": {
- maxTokens: 8192,
- contextWindow: 128_000,
- supportsImages: false,
- supportsComputerUse: false,
- supportsPromptCache: false,
- inputPrice: 0.1,
- outputPrice: 0.1,
- },
- "meta.llama3-1-405b-instruct-v1:0": {
- maxTokens: 8192,
- contextWindow: 128_000,
- supportsImages: false,
- supportsComputerUse: false,
- supportsPromptCache: false,
- inputPrice: 2.4,
- outputPrice: 2.4,
- },
- "meta.llama3-1-70b-instruct-v1:0": {
- maxTokens: 8192,
- contextWindow: 128_000,
- supportsImages: false,
- supportsComputerUse: false,
- supportsPromptCache: false,
- inputPrice: 0.72,
- outputPrice: 0.72,
- },
- "meta.llama3-1-8b-instruct-v1:0": {
- maxTokens: 8192,
- contextWindow: 8_000,
- supportsImages: false,
- supportsComputerUse: false,
- supportsPromptCache: false,
- inputPrice: 0.22,
- outputPrice: 0.22,
- },
- "meta.llama3-70b-instruct-v1:0": {
- maxTokens: 2048,
- contextWindow: 8_000,
- supportsImages: false,
- supportsComputerUse: false,
- supportsPromptCache: false,
- inputPrice: 2.65,
- outputPrice: 3.5,
- },
- "meta.llama3-8b-instruct-v1:0": {
- maxTokens: 2048,
- contextWindow: 4_000,
- supportsImages: false,
- supportsComputerUse: false,
- supportsPromptCache: false,
- inputPrice: 0.3,
- outputPrice: 0.6,
- },
- } as const satisfies Record<string, ModelInfo>
- // Glama
- // https://glama.ai/models
- export const glamaDefaultModelId = "anthropic/claude-3-5-sonnet"
- export const glamaDefaultModelInfo: ModelInfo = {
- maxTokens: 8192,
- contextWindow: 200_000,
- supportsImages: true,
- supportsComputerUse: true,
- supportsPromptCache: true,
- inputPrice: 3.0,
- outputPrice: 15.0,
- cacheWritesPrice: 3.75,
- cacheReadsPrice: 0.3,
- description:
- "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._",
- }
- // OpenRouter
- // https://openrouter.ai/models?order=newest&supported_parameters=tools
- export const openRouterDefaultModelId = "anthropic/claude-3.5-sonnet:beta" // will always exist in openRouterModels
- export const openRouterDefaultModelInfo: ModelInfo = {
- maxTokens: 8192,
- contextWindow: 200_000,
- supportsImages: true,
- supportsComputerUse: true,
- supportsPromptCache: true,
- inputPrice: 3.0,
- outputPrice: 15.0,
- cacheWritesPrice: 3.75,
- cacheReadsPrice: 0.3,
- description:
- "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._",
- }
- // Vertex AI
- // https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude
- export type VertexModelId = keyof typeof vertexModels
- export const vertexDefaultModelId: VertexModelId = "claude-3-5-sonnet-v2@20241022"
- export const vertexModels = {
- "claude-3-5-sonnet-v2@20241022": {
- maxTokens: 8192,
- contextWindow: 200_000,
- supportsImages: true,
- supportsComputerUse: true,
- supportsPromptCache: false,
- inputPrice: 3.0,
- outputPrice: 15.0,
- },
- "claude-3-5-sonnet@20240620": {
- maxTokens: 8192,
- contextWindow: 200_000,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 3.0,
- outputPrice: 15.0,
- },
- "claude-3-5-haiku@20241022": {
- maxTokens: 8192,
- contextWindow: 200_000,
- supportsImages: false,
- supportsPromptCache: false,
- inputPrice: 1.0,
- outputPrice: 5.0,
- },
- "claude-3-opus@20240229": {
- maxTokens: 4096,
- contextWindow: 200_000,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 15.0,
- outputPrice: 75.0,
- },
- "claude-3-haiku@20240307": {
- maxTokens: 4096,
- contextWindow: 200_000,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 0.25,
- outputPrice: 1.25,
- },
- } as const satisfies Record<string, ModelInfo>
- export const openAiModelInfoSaneDefaults: ModelInfo = {
- maxTokens: -1,
- contextWindow: 128_000,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 0,
- outputPrice: 0,
- }
- // Gemini
- // https://ai.google.dev/gemini-api/docs/models/gemini
- export type GeminiModelId = keyof typeof geminiModels
- export const geminiDefaultModelId: GeminiModelId = "gemini-2.0-flash-thinking-exp-01-21"
- export const geminiModels = {
- "gemini-2.0-flash-thinking-exp-01-21": {
- maxTokens: 65_536,
- contextWindow: 1_048_576,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 0,
- outputPrice: 0,
- },
- "gemini-2.0-flash-thinking-exp-1219": {
- maxTokens: 8192,
- contextWindow: 32_767,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 0,
- outputPrice: 0,
- },
- "gemini-2.0-flash-exp": {
- maxTokens: 8192,
- contextWindow: 1_048_576,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 0,
- outputPrice: 0,
- },
- "gemini-1.5-flash-002": {
- maxTokens: 8192,
- contextWindow: 1_048_576,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 0,
- outputPrice: 0,
- },
- "gemini-1.5-flash-exp-0827": {
- maxTokens: 8192,
- contextWindow: 1_048_576,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 0,
- outputPrice: 0,
- },
- "gemini-1.5-flash-8b-exp-0827": {
- maxTokens: 8192,
- contextWindow: 1_048_576,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 0,
- outputPrice: 0,
- },
- "gemini-1.5-pro-002": {
- maxTokens: 8192,
- contextWindow: 2_097_152,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 0,
- outputPrice: 0,
- },
- "gemini-1.5-pro-exp-0827": {
- maxTokens: 8192,
- contextWindow: 2_097_152,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 0,
- outputPrice: 0,
- },
- "gemini-exp-1206": {
- maxTokens: 8192,
- contextWindow: 2_097_152,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 0,
- outputPrice: 0,
- },
- } as const satisfies Record<string, ModelInfo>
- // OpenAI Native
- // https://openai.com/api/pricing/
- export type OpenAiNativeModelId = keyof typeof openAiNativeModels
- export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-4o"
- export const openAiNativeModels = {
- // don't support tool use yet
- o1: {
- maxTokens: 100_000,
- contextWindow: 200_000,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 15,
- outputPrice: 60,
- },
- "o1-preview": {
- maxTokens: 32_768,
- contextWindow: 128_000,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 15,
- outputPrice: 60,
- },
- "o1-mini": {
- maxTokens: 65_536,
- contextWindow: 128_000,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 3,
- outputPrice: 12,
- },
- "gpt-4o": {
- maxTokens: 4_096,
- contextWindow: 128_000,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 5,
- outputPrice: 15,
- },
- "gpt-4o-mini": {
- maxTokens: 16_384,
- contextWindow: 128_000,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 0.15,
- outputPrice: 0.6,
- },
- } as const satisfies Record<string, ModelInfo>
- // DeepSeek
- // https://platform.deepseek.com/docs/api
- export type DeepSeekModelId = keyof typeof deepSeekModels
- export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat"
- export const deepSeekModels = {
- "deepseek-chat": {
- maxTokens: 8192,
- contextWindow: 64_000,
- supportsImages: false,
- supportsPromptCache: false,
- inputPrice: 0.014, // $0.014 per million tokens
- outputPrice: 0.28, // $0.28 per million tokens
- description: `DeepSeek-V3 achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally.`,
- },
- "deepseek-reasoner": {
- maxTokens: 8192,
- contextWindow: 64_000,
- supportsImages: false,
- supportsPromptCache: false,
- inputPrice: 0.55, // $0.55 per million tokens
- outputPrice: 2.19, // $2.19 per million tokens
- description: `DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks.`,
- },
- } as const satisfies Record<string, ModelInfo>
- // Azure OpenAI
- // https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
- // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#api-specs
- export const azureOpenAiDefaultApiVersion = "2024-08-01-preview"
- // Mistral
- // https://docs.mistral.ai/getting-started/models/models_overview/
- export type MistralModelId = keyof typeof mistralModels
- export const mistralDefaultModelId: MistralModelId = "codestral-latest"
- export const mistralModels = {
- "codestral-latest": {
- maxTokens: 32_768,
- contextWindow: 256_000,
- supportsImages: false,
- supportsPromptCache: false,
- inputPrice: 0.3,
- outputPrice: 0.9,
- },
- } as const satisfies Record<string, ModelInfo>
|