1 year ago · aec03d71c3
--- a/src/api/providers/anthropic.ts
+++ b/src/api/providers/anthropic.ts
@@ -29,7 +29,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 
															 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
														
 
															 		let stream: AnthropicStream<Anthropic.Messages.RawMessageStreamEvent>
														
 
															 		const cacheControl: CacheControlEphemeral = { type: "ephemeral" }
														
 
															-		let { id: modelId, maxTokens, thinking, temperature } = this.getModel()
														
 
															+		let { id: modelId, maxTokens, thinking, temperature, virtualId } = this.getModel()
														
 
															 		switch (modelId) {
														
 
															 			case "claude-3-7-sonnet-20250219":
														
@@ -82,6 +82,15 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 
															 						// prompt caching: https://x.com/alexalbert__/status/1823751995901272068
														
 
															 						// https://github.com/anthropics/anthropic-sdk-typescript?tab=readme-ov-file#default-headers
														
 
															 						// https://github.com/anthropics/anthropic-sdk-typescript/commit/c920b77fc67bd839bfeb6716ceab9d7c9bbe7393
														
 
															+
														
 
															+						// Check for the thinking-128k variant first
														
 
															+						if (virtualId === "claude-3-7-sonnet-20250219:thinking-128k") {
														
 
															+							return {
														
 
															+								headers: { "anthropic-beta": "output-128k-2025-02-19" },
														
 
															+							}
														
 
															+						}
														
 
															+
														
 
															+						// Then check for models that support prompt caching
														
 
															 						switch (modelId) {
														
 
															 							case "claude-3-5-sonnet-20241022":
														
 
															 							case "claude-3-5-haiku-20241022":
														
@@ -184,31 +193,58 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 
															 		let id = modelId && modelId in anthropicModels ? (modelId as AnthropicModelId) : anthropicDefaultModelId
														
 
															 		const info: ModelInfo = anthropicModels[id]
														
 
															+		// Track the original model ID for special variant handling
														
 
															+		const virtualId = id
														
 
															+
														
 
															 		// The `:thinking` variant is a virtual identifier for the
														
 
															 		// `claude-3-7-sonnet-20250219` model with a thinking budget.
														
 
															 		// We can handle this more elegantly in the future.
														
 
															-		if (id === "claude-3-7-sonnet-20250219:thinking") {
														
 
															+		if (id === "claude-3-7-sonnet-20250219:thinking" || id === "claude-3-7-sonnet-20250219:thinking-128k") {
														
 
															 			id = "claude-3-7-sonnet-20250219"
														
 
															 		}
														
 
															 		return {
														
 
															 			id,
														
 
															 			info,
														
 
															+			virtualId, // Include the original ID to use for header selection
														
 
															 			...getModelParams({ options: this.options, model: info, defaultMaxTokens: ANTHROPIC_DEFAULT_MAX_TOKENS }),
														
 
															 		}
														
 
															 	}
														
 
															 	async completePrompt(prompt: string) {
														
 
															-		let { id: modelId, maxTokens, thinking, temperature } = this.getModel()
														
 
															-
														
 
															-		const message = await this.client.messages.create({
														
 
															-			model: modelId,
														
 
															-			max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
														
 
															-			thinking,
														
 
															-			temperature,
														
 
															-			messages: [{ role: "user", content: prompt }],
														
 
															-			stream: false,
														
 
															-		})
														
 
															+		let { id: modelId, maxTokens, thinking, temperature, virtualId } = this.getModel()
														
 
															+
														
 
															+		const message = await this.client.messages.create(
														
 
															+			{
														
 
															+				model: modelId,
														
 
															+				max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
														
 
															+				thinking,
														
 
															+				temperature,
														
 
															+				messages: [{ role: "user", content: prompt }],
														
 
															+				stream: false,
														
 
															+			},
														
 
															+			(() => {
														
 
															+				// Check for the thinking-128k variant first
														
 
															+				if (virtualId === "claude-3-7-sonnet-20250219:thinking-128k") {
														
 
															+					return {
														
 
															+						headers: { "anthropic-beta": "output-128k-2025-02-19" },
														
 
															+					}
														
 
															+				}
														
 
															+
														
 
															+				// Then check for models that support prompt caching
														
 
															+				switch (modelId) {
														
 
															+					case "claude-3-5-sonnet-20241022":
														
 
															+					case "claude-3-5-haiku-20241022":
														
 
															+					case "claude-3-opus-20240229":
														
 
															+					case "claude-3-haiku-20240307":
														
 
															+						return {
														
 
															+							headers: { "anthropic-beta": "prompt-caching-2024-07-31" },
														
 
															+						}
														
 
															+					default:
														
 
															+						return undefined
														
 
															+				}
														
 
															+			})(),
														
 
															+		)
														
 
															 		const content = message.content.find(({ type }) => type === "text")
														
 
															 		return content?.type === "text" ? content.text : ""
														
@@ -223,17 +259,40 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 
															 	override async countTokens(content: Array<Anthropic.Messages.ContentBlockParam>): Promise<number> {
														
 
															 		try {
														
 
															 			// Use the current model
														
 
															-			const actualModelId = this.getModel().id
														
 
															+			const { id: actualModelId, virtualId } = this.getModel()
														
 
															-			const response = await this.client.messages.countTokens({
														
 
															-				model: actualModelId,
														
 
															-				messages: [
														
 
															-					{
														
 
															-						role: "user",
														
 
															-						content: content,
														
 
															-					},
														
 
															-				],
														
 
															-			})
														
 
															+			const response = await this.client.messages.countTokens(
														
 
															+				{
														
 
															+					model: actualModelId,
														
 
															+					messages: [
														
 
															+						{
														
 
															+							role: "user",
														
 
															+							content: content,
														
 
															+						},
														
 
															+					],
														
 
															+				},
														
 
															+				(() => {
														
 
															+					// Check for the thinking-128k variant first
														
 
															+					if (virtualId === "claude-3-7-sonnet-20250219:thinking-128k") {
														
 
															+						return {
														
 
															+							headers: { "anthropic-beta": "output-128k-2025-02-19" },
														
 
															+						}
														
 
															+					}
														
 
															+
														
 
															+					// Then check for models that support prompt caching
														
 
															+					switch (actualModelId) {
														
 
															+						case "claude-3-5-sonnet-20241022":
														
 
															+						case "claude-3-5-haiku-20241022":
														
 
															+						case "claude-3-opus-20240229":
														
 
															+						case "claude-3-haiku-20240307":
														
 
															+							return {
														
 
															+								headers: { "anthropic-beta": "prompt-caching-2024-07-31" },
														
 
															+							}
														
 
															+						default:
														
 
															+							return undefined
														
 
															+					}
														
 
															+				})(),
														
 
															+			)
														
 
															 			return response.input_tokens
														
 
															 		} catch (error) {
														
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@@ -98,6 +98,18 @@ export interface ModelInfo {
 
															 export type AnthropicModelId = keyof typeof anthropicModels
														
 
															 export const anthropicDefaultModelId: AnthropicModelId = "claude-3-7-sonnet-20250219"
														
 
															 export const anthropicModels = {
														
 
															+	"claude-3-7-sonnet-20250219:thinking-128k": {
														
 
															+		maxTokens: 128_000,
														
 
															+		contextWindow: 200_000,
														
 
															+		supportsImages: true,
														
 
															+		supportsComputerUse: true,
														
 
															+		supportsPromptCache: true,
														
 
															+		inputPrice: 3.0, // $3 per million input tokens
														
 
															+		outputPrice: 15.0, // $15 per million output tokens
														
 
															+		cacheWritesPrice: 3.75, // $3.75 per million tokens
														
 
															+		cacheReadsPrice: 0.3, // $0.30 per million tokens
														
 
															+		thinking: true,
														
 
															+	},
														
 
															 	"claude-3-7-sonnet-20250219:thinking": {
														
 
															 		maxTokens: 64_000,
														
 
															 		contextWindow: 200_000,