Просмотр исходного кода

fix(vertex): add opus 4.6 1m model support on Vertex (#9230)

* fix(vertex): add opus 4.6 1m and global endpoint support

* fix(vertex): enable thinking for opus 4.6 1m in webview
Saoud Rizwan 2 месяцев назад
Родитель
Сommit
5dcaa8c

+ 5 - 0
.changeset/soft-cars-move.md

@@ -0,0 +1,5 @@
+---
+"cline": patch
+---
+
+Add Vertex Claude Opus 4.6 1M model option and global endpoint support, and pass the 1M beta header for Vertex Claude requests.

+ 1 - 1
docs/provider-config/gcp-vertex-ai.mdx

@@ -53,7 +53,7 @@ Vertex AI supports multiple regions. Select a region that meets your latency, co
 -   **asia-southeast1 (Singapore)**
 -   **asia-southeast1 (Singapore)**
 -   **global (Global)**
 -   **global (Global)**
 
 
-The Global endpoint may offer higher availability and reduce resource exhausted errors. Only Gemini models are supported.
+The Global endpoint may offer higher availability and reduce resource exhausted errors. Gemini models and supported Claude models can use it, depending on model availability in your project.
 
 
 #### 2.2 Enable the Claude 3.5 Sonnet v2 Model
 #### 2.2 Enable the Claude 3.5 Sonnet v2 Model
 
 

+ 14 - 6
src/core/api/providers/vertex.ts

@@ -1,7 +1,7 @@
 import { Tool as AnthropicTool } from "@anthropic-ai/sdk/resources/index"
 import { Tool as AnthropicTool } from "@anthropic-ai/sdk/resources/index"
 import { AnthropicVertex } from "@anthropic-ai/vertex-sdk"
 import { AnthropicVertex } from "@anthropic-ai/vertex-sdk"
 import { FunctionDeclaration as GoogleTool } from "@google/genai"
 import { FunctionDeclaration as GoogleTool } from "@google/genai"
-import { ModelInfo, VertexModelId, vertexDefaultModelId, vertexModels } from "@shared/api"
+import { CLAUDE_SONNET_1M_SUFFIX, ModelInfo, VertexModelId, vertexDefaultModelId, vertexModels } from "@shared/api"
 import { buildExternalBasicHeaders } from "@/services/EnvUtils"
 import { buildExternalBasicHeaders } from "@/services/EnvUtils"
 import { ClineStorageMessage } from "@/shared/messages/content"
 import { ClineStorageMessage } from "@/shared/messages/content"
 import { ClineTool } from "@/shared/tools"
 import { ClineTool } from "@/shared/tools"
@@ -73,10 +73,14 @@ export class VertexHandler implements ApiHandler {
 	@withRetry()
 	@withRetry()
 	async *createMessage(systemPrompt: string, messages: ClineStorageMessage[], tools?: ClineTool[]): ApiStream {
 	async *createMessage(systemPrompt: string, messages: ClineStorageMessage[], tools?: ClineTool[]): ApiStream {
 		const model = this.getModel()
 		const model = this.getModel()
-		const modelId = model.id
+		const rawModelId = model.id
+		const modelId = rawModelId.endsWith(CLAUDE_SONNET_1M_SUFFIX)
+			? rawModelId.slice(0, -CLAUDE_SONNET_1M_SUFFIX.length)
+			: rawModelId
+		const enable1mContextWindow = rawModelId.endsWith(CLAUDE_SONNET_1M_SUFFIX)
 
 
 		// For Gemini models, use the GeminiHandler
 		// For Gemini models, use the GeminiHandler
-		if (!modelId.includes("claude")) {
+		if (!rawModelId.includes("claude")) {
 			const geminiHandler = this.ensureGeminiHandler()
 			const geminiHandler = this.ensureGeminiHandler()
 			yield* geminiHandler.createMessage(systemPrompt, messages, tools as GoogleTool[])
 			yield* geminiHandler.createMessage(systemPrompt, messages, tools as GoogleTool[])
 			return
 			return
@@ -117,9 +121,13 @@ export class VertexHandler implements ApiHandler {
 				// NOTE: Forcing tool use when tools are provided will result in error when thinking is also enabled.
 				// NOTE: Forcing tool use when tools are provided will result in error when thinking is also enabled.
 				tool_choice: nativeToolsOn && !reasoningOn ? { type: "any" } : undefined,
 				tool_choice: nativeToolsOn && !reasoningOn ? { type: "any" } : undefined,
 			},
 			},
-			{
-				headers: {},
-			},
+			enable1mContextWindow
+				? {
+						headers: {
+							"anthropic-beta": "context-1m-2025-08-07",
+						},
+					}
+				: undefined,
 		)
 		)
 
 
 		const lastStartedToolCall = { id: "", name: "", arguments: "" }
 		const lastStartedToolCall = { id: "", name: "", arguments: "" }

+ 14 - 0
src/shared/api.ts

@@ -880,11 +880,25 @@ export const vertexModels = {
 		contextWindow: 200_000,
 		contextWindow: 200_000,
 		supportsImages: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		supportsPromptCache: true,
+		supportsGlobalEndpoint: true,
+		inputPrice: 5.0,
+		outputPrice: 25.0,
+		cacheWritesPrice: 6.25,
+		cacheReadsPrice: 0.5,
+		supportsReasoning: true,
+	},
+	"claude-opus-4-6:1m": {
+		maxTokens: 8192,
+		contextWindow: 1_000_000,
+		supportsImages: true,
+		supportsPromptCache: true,
+		supportsGlobalEndpoint: true,
 		inputPrice: 5.0,
 		inputPrice: 5.0,
 		outputPrice: 25.0,
 		outputPrice: 25.0,
 		cacheWritesPrice: 6.25,
 		cacheWritesPrice: 6.25,
 		cacheReadsPrice: 0.5,
 		cacheReadsPrice: 0.5,
 		supportsReasoning: true,
 		supportsReasoning: true,
+		tiers: CLAUDE_OPUS_1M_TIERS,
 	},
 	},
 	"claude-opus-4-5@20251101": {
 	"claude-opus-4-5@20251101": {
 		maxTokens: 8192,
 		maxTokens: 8192,

+ 1 - 0
webview-ui/src/components/settings/providers/VertexProvider.tsx

@@ -25,6 +25,7 @@ interface VertexProviderProps {
 // Vertex models that support thinking
 // Vertex models that support thinking
 const SUPPORTED_THINKING_MODELS = [
 const SUPPORTED_THINKING_MODELS = [
 	"claude-opus-4-6",
 	"claude-opus-4-6",
+	"claude-opus-4-6:1m",
 	"claude-haiku-4-5@20251001",
 	"claude-haiku-4-5@20251001",
 	"claude-sonnet-4-5@20250929",
 	"claude-sonnet-4-5@20250929",
 	"claude-3-7-sonnet@20250219",
 	"claude-3-7-sonnet@20250219",