| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136 |
- import { Anthropic } from "@anthropic-ai/sdk"
- import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta/messages/index.mjs"
- import { ApiConfiguration, ModelInfo, ApiHandlerOptions } from "../shared/api"
- import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "./providers/constants"
- import { GlamaHandler } from "./providers/glama"
- import { AnthropicHandler } from "./providers/anthropic"
- import { AwsBedrockHandler } from "./providers/bedrock"
- import { OpenRouterHandler } from "./providers/openrouter"
- import { VertexHandler } from "./providers/vertex"
- import { AnthropicVertexHandler } from "./providers/anthropic-vertex"
- import { OpenAiHandler } from "./providers/openai"
- import { OllamaHandler } from "./providers/ollama"
- import { LmStudioHandler } from "./providers/lmstudio"
- import { GeminiHandler } from "./providers/gemini"
- import { OpenAiNativeHandler } from "./providers/openai-native"
- import { DeepSeekHandler } from "./providers/deepseek"
- import { MistralHandler } from "./providers/mistral"
- import { VsCodeLmHandler } from "./providers/vscode-lm"
- import { ApiStream } from "./transform/stream"
- import { UnboundHandler } from "./providers/unbound"
- import { RequestyHandler } from "./providers/requesty"
- import { HumanRelayHandler } from "./providers/human-relay"
- import { FakeAIHandler } from "./providers/fake-ai"
- import { XAIHandler } from "./providers/xai"
- export interface SingleCompletionHandler {
- completePrompt(prompt: string): Promise<string>
- }
- export interface ApiHandler {
- createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[], cacheKey?: string): ApiStream
- getModel(): { id: string; info: ModelInfo }
- /**
- * Counts tokens for content blocks
- * All providers extend BaseProvider which provides a default tiktoken implementation,
- * but they can override this to use their native token counting endpoints
- *
- * @param content The content to count tokens for
- * @returns A promise resolving to the token count
- */
- countTokens(content: Array<Anthropic.Messages.ContentBlockParam>): Promise<number>
- }
- export function buildApiHandler(configuration: ApiConfiguration): ApiHandler {
- const { apiProvider, ...options } = configuration
- switch (apiProvider) {
- case "anthropic":
- return new AnthropicHandler(options)
- case "glama":
- return new GlamaHandler(options)
- case "openrouter":
- return new OpenRouterHandler(options)
- case "bedrock":
- return new AwsBedrockHandler(options)
- case "vertex":
- if (options.apiModelId?.startsWith("claude")) {
- return new AnthropicVertexHandler(options)
- } else {
- return new VertexHandler(options)
- }
- case "openai":
- return new OpenAiHandler(options)
- case "ollama":
- return new OllamaHandler(options)
- case "lmstudio":
- return new LmStudioHandler(options)
- case "gemini":
- return new GeminiHandler(options)
- case "openai-native":
- return new OpenAiNativeHandler(options)
- case "deepseek":
- return new DeepSeekHandler(options)
- case "vscode-lm":
- return new VsCodeLmHandler(options)
- case "mistral":
- return new MistralHandler(options)
- case "unbound":
- return new UnboundHandler(options)
- case "requesty":
- return new RequestyHandler(options)
- case "human-relay":
- return new HumanRelayHandler()
- case "fake-ai":
- return new FakeAIHandler(options)
- case "xai":
- return new XAIHandler(options)
- default:
- return new AnthropicHandler(options)
- }
- }
- export function getModelParams({
- options,
- model,
- defaultMaxTokens,
- defaultTemperature = 0,
- defaultReasoningEffort,
- }: {
- options: ApiHandlerOptions
- model: ModelInfo
- defaultMaxTokens?: number
- defaultTemperature?: number
- defaultReasoningEffort?: "low" | "medium" | "high"
- }) {
- const {
- modelMaxTokens: customMaxTokens,
- modelMaxThinkingTokens: customMaxThinkingTokens,
- modelTemperature: customTemperature,
- reasoningEffort: customReasoningEffort,
- } = options
- let maxTokens = model.maxTokens ?? defaultMaxTokens
- let thinking: BetaThinkingConfigParam | undefined = undefined
- let temperature = customTemperature ?? defaultTemperature
- const reasoningEffort = customReasoningEffort ?? defaultReasoningEffort
- if (model.thinking) {
- // Only honor `customMaxTokens` for thinking models.
- maxTokens = customMaxTokens ?? maxTokens
- // Clamp the thinking budget to be at most 80% of max tokens and at
- // least 1024 tokens.
- const maxBudgetTokens = Math.floor((maxTokens || ANTHROPIC_DEFAULT_MAX_TOKENS) * 0.8)
- const budgetTokens = Math.max(Math.min(customMaxThinkingTokens ?? maxBudgetTokens, maxBudgetTokens), 1024)
- thinking = { type: "enabled", budget_tokens: budgetTokens }
- // Anthropic "Thinking" models require a temperature of 1.0.
- temperature = 1.0
- }
- return { maxTokens, thinking, temperature, reasoningEffort }
- }
|