Explorar o código

Added support for Claude Sonnet 3.7 thinking via Vertex AI

Catalin Lupuleti hai 10 meses
pai
achega
3514f6506b

+ 5 - 5
package-lock.json

@@ -10,7 +10,7 @@
 			"dependencies": {
 				"@anthropic-ai/bedrock-sdk": "^0.10.2",
 				"@anthropic-ai/sdk": "^0.37.0",
-				"@anthropic-ai/vertex-sdk": "^0.4.1",
+				"@anthropic-ai/vertex-sdk": "^0.7.0",
 				"@aws-sdk/client-bedrock-runtime": "^3.706.0",
 				"@google/generative-ai": "^0.18.0",
 				"@mistralai/mistralai": "^1.3.6",
@@ -150,11 +150,11 @@
 			"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="
 		},
 		"node_modules/@anthropic-ai/vertex-sdk": {
-			"version": "0.4.3",
-			"resolved": "https://registry.npmjs.org/@anthropic-ai/vertex-sdk/-/vertex-sdk-0.4.3.tgz",
-			"integrity": "sha512-2Uef0C5P2Hx+T88RnUSRA3u4aZqmqnrRSOb2N64ozgKPiSUPTM5JlggAq2b32yWMj5d3MLYa6spJXKMmHXOcoA==",
+			"version": "0.7.0",
+			"resolved": "https://registry.npmjs.org/@anthropic-ai/vertex-sdk/-/vertex-sdk-0.7.0.tgz",
+			"integrity": "sha512-zNm3hUXgYmYDTyveIxOyxbcnh5VXFkrLo4bSnG6LAfGzW7k3k2iCNDSVKtR9qZrK2BCid7JtVu7jsEKaZ/9dSw==",
 			"dependencies": {
-				"@anthropic-ai/sdk": ">=0.14 <1",
+				"@anthropic-ai/sdk": ">=0.35 <1",
 				"google-auth-library": "^9.4.2"
 			}
 		},

+ 1 - 1
package.json

@@ -305,7 +305,7 @@
 	"dependencies": {
 		"@anthropic-ai/bedrock-sdk": "^0.10.2",
 		"@anthropic-ai/sdk": "^0.37.0",
-		"@anthropic-ai/vertex-sdk": "^0.4.1",
+		"@anthropic-ai/vertex-sdk": "^0.7.0",
 		"@aws-sdk/client-bedrock-runtime": "^3.706.0",
 		"@google/generative-ai": "^0.18.0",
 		"@mistralai/mistralai": "^1.3.6",

+ 91 - 19
src/api/providers/vertex.ts

@@ -2,6 +2,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
 import { AnthropicVertex } from "@anthropic-ai/vertex-sdk"
 import { Stream as AnthropicStream } from "@anthropic-ai/sdk/streaming"
 import { ApiHandler, SingleCompletionHandler } from "../"
+import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta"
 import { ApiHandlerOptions, ModelInfo, vertexDefaultModelId, VertexModelId, vertexModels } from "../../shared/api"
 import { ApiStream } from "../transform/stream"
 
@@ -70,15 +71,25 @@ interface VertexMessageStreamEvent {
 	usage?: {
 		output_tokens: number
 	}
-	content_block?: {
-		type: "text"
-		text: string
-	}
+	content_block?:
+		| {
+				type: "text"
+				text: string
+		  }
+		| {
+				type: "thinking"
+				thinking: string
+		  }
 	index?: number
-	delta?: {
-		type: "text_delta"
-		text: string
-	}
+	delta?:
+		| {
+				type: "text_delta"
+				text: string
+		  }
+		| {
+				type: "thinking_delta"
+				thinking: string
+		  }
 }
 
 // https://docs.anthropic.com/en/api/claude-on-vertex-ai
@@ -145,6 +156,7 @@ export class VertexHandler implements ApiHandler, SingleCompletionHandler {
 
 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
 		const model = this.getModel()
+		let { id, info, temperature, maxTokens, thinking } = model
 		const useCache = model.info.supportsPromptCache
 
 		// Find indices of user messages that we want to cache
@@ -158,9 +170,10 @@ export class VertexHandler implements ApiHandler, SingleCompletionHandler {
 
 		// Create the stream with appropriate caching configuration
 		const params = {
-			model: model.id,
-			max_tokens: model.info.maxTokens || 8192,
-			temperature: this.options.modelTemperature ?? 0,
+			model: id,
+			max_tokens: maxTokens,
+			temperature,
+			thinking,
 			// Cache the system prompt if caching is enabled
 			system: useCache
 				? [
@@ -220,6 +233,19 @@ export class VertexHandler implements ApiHandler, SingleCompletionHandler {
 							}
 							break
 						}
+						case "thinking": {
+							if (chunk.index! > 0) {
+								yield {
+									type: "reasoning",
+									text: "\n",
+								}
+							}
+							yield {
+								type: "reasoning",
+								text: (chunk.content_block as any).thinking,
+							}
+							break
+						}
 					}
 					break
 				}
@@ -232,6 +258,13 @@ export class VertexHandler implements ApiHandler, SingleCompletionHandler {
 							}
 							break
 						}
+						case "thinking_delta": {
+							yield {
+								type: "reasoning",
+								text: (chunk.delta as any).thinking,
+							}
+							break
+						}
 					}
 					break
 				}
@@ -239,24 +272,63 @@ export class VertexHandler implements ApiHandler, SingleCompletionHandler {
 		}
 	}
 
-	getModel(): { id: VertexModelId; info: ModelInfo } {
+	getModel(): {
+		id: VertexModelId
+		info: ModelInfo
+		temperature: number
+		maxTokens: number
+		thinking?: BetaThinkingConfigParam
+	} {
 		const modelId = this.options.apiModelId
+		let temperature = this.options.modelTemperature ?? 0
+		let thinking: BetaThinkingConfigParam | undefined = undefined
+
 		if (modelId && modelId in vertexModels) {
 			const id = modelId as VertexModelId
-			return { id, info: vertexModels[id] }
+			const info: ModelInfo = vertexModels[id]
+
+			// The `:thinking` variant is a virtual identifier for thinking-enabled models
+			// Similar to how it's handled in the Anthropic provider
+			let actualId = id
+			if (id.endsWith(":thinking")) {
+				actualId = id.replace(":thinking", "") as VertexModelId
+			}
+
+			const maxTokens = this.options.modelMaxTokens || info.maxTokens || 8192
+
+			if (info.thinking) {
+				temperature = 1.0 // Thinking requires temperature 1.0
+				const maxBudgetTokens = Math.floor(maxTokens * 0.8)
+				const budgetTokens = Math.max(
+					Math.min(
+						this.options.vertexThinking ?? this.options.anthropicThinking ?? maxBudgetTokens,
+						maxBudgetTokens,
+					),
+					1024,
+				)
+				thinking = { type: "enabled", budget_tokens: budgetTokens }
+			}
+
+			return { id: actualId, info, temperature, maxTokens, thinking }
 		}
-		return { id: vertexDefaultModelId, info: vertexModels[vertexDefaultModelId] }
+
+		const id = vertexDefaultModelId
+		const info = vertexModels[id]
+		const maxTokens = this.options.modelMaxTokens || info.maxTokens || 8192
+
+		return { id, info, temperature, maxTokens, thinking }
 	}
 
 	async completePrompt(prompt: string): Promise<string> {
 		try {
-			const model = this.getModel()
-			const useCache = model.info.supportsPromptCache
+			let { id, info, temperature, maxTokens, thinking } = this.getModel()
+			const useCache = info.supportsPromptCache
 
 			const params = {
-				model: model.id,
-				max_tokens: model.info.maxTokens || 8192,
-				temperature: this.options.modelTemperature ?? 0,
+				model: id,
+				max_tokens: maxTokens,
+				temperature,
+				thinking,
 				system: "", // No system prompt needed for single completions
 				messages: [
 					{

+ 5 - 0
src/core/webview/ClineProvider.ts

@@ -1652,6 +1652,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 			lmStudioBaseUrl,
 			anthropicBaseUrl,
 			anthropicThinking,
+			vertexThinking,
 			geminiApiKey,
 			openAiNativeApiKey,
 			deepSeekApiKey,
@@ -1701,6 +1702,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 			this.updateGlobalState("lmStudioBaseUrl", lmStudioBaseUrl),
 			this.updateGlobalState("anthropicBaseUrl", anthropicBaseUrl),
 			this.updateGlobalState("anthropicThinking", anthropicThinking),
+			this.updateGlobalState("vertexThinking", vertexThinking),
 			this.storeSecret("geminiApiKey", geminiApiKey),
 			this.storeSecret("openAiNativeApiKey", openAiNativeApiKey),
 			this.storeSecret("deepSeekApiKey", deepSeekApiKey),
@@ -2158,6 +2160,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 			lmStudioBaseUrl,
 			anthropicBaseUrl,
 			anthropicThinking,
+			vertexThinking,
 			geminiApiKey,
 			openAiNativeApiKey,
 			deepSeekApiKey,
@@ -2242,6 +2245,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 			this.getGlobalState("lmStudioBaseUrl") as Promise<string | undefined>,
 			this.getGlobalState("anthropicBaseUrl") as Promise<string | undefined>,
 			this.getGlobalState("anthropicThinking") as Promise<number | undefined>,
+			this.getGlobalState("vertexThinking") as Promise<number | undefined>,
 			this.getSecret("geminiApiKey") as Promise<string | undefined>,
 			this.getSecret("openAiNativeApiKey") as Promise<string | undefined>,
 			this.getSecret("deepSeekApiKey") as Promise<string | undefined>,
@@ -2343,6 +2347,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 				lmStudioBaseUrl,
 				anthropicBaseUrl,
 				anthropicThinking,
+				vertexThinking,
 				geminiApiKey,
 				openAiNativeApiKey,
 				deepSeekApiKey,

+ 14 - 0
src/shared/api.ts

@@ -41,6 +41,7 @@ export interface ApiHandlerOptions {
 	awsUseProfile?: boolean
 	vertexProjectId?: string
 	vertexRegion?: string
+	vertexThinking?: number
 	openAiBaseUrl?: string
 	openAiApiKey?: string
 	openAiModelId?: string
@@ -436,6 +437,18 @@ export const openRouterDefaultModelInfo: ModelInfo = {
 export type VertexModelId = keyof typeof vertexModels
 export const vertexDefaultModelId: VertexModelId = "claude-3-7-sonnet@20250219"
 export const vertexModels = {
+	"claude-3-7-sonnet@20250219:thinking": {
+		maxTokens: 64000,
+		contextWindow: 200_000,
+		supportsImages: true,
+		supportsComputerUse: true,
+		supportsPromptCache: true,
+		inputPrice: 3.0,
+		outputPrice: 15.0,
+		cacheWritesPrice: 3.75,
+		cacheReadsPrice: 0.3,
+		thinking: true,
+	},
 	"claude-3-7-sonnet@20250219": {
 		maxTokens: 8192,
 		contextWindow: 200_000,
@@ -446,6 +459,7 @@ export const vertexModels = {
 		outputPrice: 15.0,
 		cacheWritesPrice: 3.75,
 		cacheReadsPrice: 0.3,
+		thinking: false,
 	},
 	"claude-3-5-sonnet-v2@20241022": {
 		maxTokens: 8192,

+ 2 - 0
src/shared/globalState.ts

@@ -24,6 +24,7 @@ export type GlobalStateKey =
 	| "awsUseProfile"
 	| "vertexProjectId"
 	| "vertexRegion"
+	| "vertexThinking"
 	| "lastShownAnnouncementId"
 	| "customInstructions"
 	| "alwaysAllowReadOnly"
@@ -43,6 +44,7 @@ export type GlobalStateKey =
 	| "lmStudioBaseUrl"
 	| "anthropicBaseUrl"
 	| "anthropicThinking"
+	| "vertexThinking"
 	| "azureApiVersion"
 	| "openAiStreamingEnabled"
 	| "openRouterModelId"

+ 3 - 0
webview-ui/src/components/settings/ApiOptions.tsx

@@ -7,6 +7,7 @@ import * as vscodemodels from "vscode"
 import {
 	ApiConfiguration,
 	ModelInfo,
+	ApiProvider,
 	anthropicDefaultModelId,
 	anthropicModels,
 	azureOpenAiDefaultApiVersion,
@@ -1380,9 +1381,11 @@ const ApiOptions = ({
 						/>
 					</div>
 					<ThinkingBudget
+						key={`${selectedProvider}-${selectedModelId}`}
 						apiConfiguration={apiConfiguration}
 						setApiConfigurationField={setApiConfigurationField}
 						modelInfo={selectedModelInfo}
+						provider={selectedProvider as ApiProvider}
 					/>
 					<ModelInfoView
 						selectedModelId={selectedModelId}

+ 22 - 8
webview-ui/src/components/settings/ThinkingBudget.tsx

@@ -1,5 +1,5 @@
-import { useEffect } from "react"
-
+import { useEffect, useMemo } from "react"
+import { ApiProvider } from "../../../../src/shared/api"
 import { Slider } from "@/components/ui"
 
 import { ApiConfiguration, ModelInfo } from "../../../../src/shared/api"
@@ -8,24 +8,38 @@ interface ThinkingBudgetProps {
 	apiConfiguration: ApiConfiguration
 	setApiConfigurationField: <K extends keyof ApiConfiguration>(field: K, value: ApiConfiguration[K]) => void
 	modelInfo?: ModelInfo
+	provider?: ApiProvider
 }
 
-export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, modelInfo }: ThinkingBudgetProps) => {
+export const ThinkingBudget = ({
+	apiConfiguration,
+	setApiConfigurationField,
+	modelInfo,
+	provider,
+}: ThinkingBudgetProps) => {
+	const isVertexProvider = provider === "vertex"
+	const budgetField = isVertexProvider ? "vertexThinking" : "anthropicThinking"
+
 	const tokens = apiConfiguration?.modelMaxTokens || modelInfo?.maxTokens || 64_000
 	const tokensMin = 8192
 	const tokensMax = modelInfo?.maxTokens || 64_000
 
-	const thinkingTokens = apiConfiguration?.anthropicThinking || 8192
+	// Get the appropriate thinking tokens based on provider
+	const thinkingTokens = useMemo(() => {
+		const value = isVertexProvider ? apiConfiguration?.vertexThinking : apiConfiguration?.anthropicThinking
+		return value || Math.min(Math.floor(0.8 * tokens), 8192)
+	}, [apiConfiguration, isVertexProvider, tokens])
+
 	const thinkingTokensMin = 1024
 	const thinkingTokensMax = Math.floor(0.8 * tokens)
 
 	useEffect(() => {
 		if (thinkingTokens > thinkingTokensMax) {
-			setApiConfigurationField("anthropicThinking", thinkingTokensMax)
+			setApiConfigurationField(budgetField, thinkingTokensMax)
 		}
-	}, [thinkingTokens, thinkingTokensMax, setApiConfigurationField])
+	}, [thinkingTokens, thinkingTokensMax, setApiConfigurationField, budgetField])
 
-	if (!modelInfo || !modelInfo.thinking) {
+	if (!modelInfo?.thinking) {
 		return null
 	}
 
@@ -52,7 +66,7 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
 						max={thinkingTokensMax}
 						step={1024}
 						value={[thinkingTokens]}
-						onValueChange={([value]) => setApiConfigurationField("anthropicThinking", value)}
+						onValueChange={([value]) => setApiConfigurationField(budgetField, value)}
 					/>
 					<div className="w-12 text-sm text-center">{thinkingTokens}</div>
 				</div>