Просмотр исходного кода

Non-thinking sonnet has 8192 max tokens (#1860)

Matt Rubens 9 месяцев назад
Родитель
Сommit
95ba760daf

+ 1 - 1
src/api/providers/__tests__/anthropic.test.ts

@@ -218,7 +218,7 @@ describe("AnthropicHandler", () => {
 			})
 
 			const result = handler.getModel()
-			expect(result.maxTokens).toBe(16_384)
+			expect(result.maxTokens).toBe(8192)
 			expect(result.thinking).toBeUndefined()
 			expect(result.temperature).toBe(0)
 		})

+ 2 - 2
src/api/providers/__tests__/vertex.test.ts

@@ -309,7 +309,7 @@ describe("VertexHandler", () => {
 					},
 				],
 				generationConfig: {
-					maxOutputTokens: 16384,
+					maxOutputTokens: 8192,
 					temperature: 0,
 				},
 			})
@@ -914,7 +914,7 @@ describe("VertexHandler", () => {
 			})
 
 			const result = handler.getModel()
-			expect(result.maxTokens).toBe(16_384)
+			expect(result.maxTokens).toBe(8192)
 			expect(result.thinking).toBeUndefined()
 			expect(result.temperature).toBe(0)
 		})

+ 0 - 3
src/api/providers/glama.ts

@@ -217,9 +217,6 @@ export async function getGlamaModels() {
 			}
 
 			switch (rawModel.id) {
-				case rawModel.id.startsWith("anthropic/claude-3-7-sonnet"):
-					modelInfo.maxTokens = 16384
-					break
 				case rawModel.id.startsWith("anthropic/"):
 					modelInfo.maxTokens = 8192
 					break

+ 1 - 1
src/api/providers/openrouter.ts

@@ -261,7 +261,7 @@ export async function getOpenRouterModels(options?: ApiHandlerOptions) {
 					modelInfo.supportsPromptCache = true
 					modelInfo.cacheWritesPrice = 3.75
 					modelInfo.cacheReadsPrice = 0.3
-					modelInfo.maxTokens = rawModel.id === "anthropic/claude-3.7-sonnet:thinking" ? 128_000 : 16_384
+					modelInfo.maxTokens = rawModel.id === "anthropic/claude-3.7-sonnet:thinking" ? 128_000 : 8192
 					break
 				case rawModel.id.startsWith("anthropic/claude-3.5-sonnet-20240620"):
 					modelInfo.supportsPromptCache = true

+ 0 - 3
src/api/providers/unbound.ts

@@ -211,9 +211,6 @@ export async function getUnboundModels() {
 				}
 
 				switch (true) {
-					case modelId.startsWith("anthropic/claude-3-7-sonnet"):
-						modelInfo.maxTokens = 16384
-						break
 					case modelId.startsWith("anthropic/"):
 						modelInfo.maxTokens = 8192
 						break

+ 2 - 2
src/shared/api.ts

@@ -174,7 +174,7 @@ export const anthropicModels = {
 		thinking: true,
 	},
 	"claude-3-7-sonnet-20250219": {
-		maxTokens: 16_384,
+		maxTokens: 8192,
 		contextWindow: 200_000,
 		supportsImages: true,
 		supportsComputerUse: true,
@@ -667,7 +667,7 @@ export const vertexModels = {
 		thinking: true,
 	},
 	"claude-3-7-sonnet@20250219": {
-		maxTokens: 16_384,
+		maxTokens: 8192,
 		contextWindow: 200_000,
 		supportsImages: true,
 		supportsComputerUse: true,

+ 1 - 1
webview-ui/src/components/ui/hooks/useOpenRouterModelProviders.ts

@@ -75,7 +75,7 @@ async function getOpenRouterProvidersForModel(modelId: string) {
 					modelInfo.supportsPromptCache = true
 					modelInfo.cacheWritesPrice = 3.75
 					modelInfo.cacheReadsPrice = 0.3
-					modelInfo.maxTokens = id === "anthropic/claude-3.7-sonnet:thinking" ? 64_000 : 16_384
+					modelInfo.maxTokens = id === "anthropic/claude-3.7-sonnet:thinking" ? 64_000 : 8192
 					break
 				case modelId.startsWith("anthropic/claude-3.5-sonnet-20240620"):
 					modelInfo.supportsPromptCache = true

+ 1 - 1
webview-ui/src/utils/__tests__/model-utils.test.ts

@@ -15,7 +15,7 @@ describe("Model utility functions", () => {
 		/**
 		 * Testing the specific fix in commit cc79178f:
 		 * For thinking models, use apiConfig.modelMaxTokens if available,
-		 * otherwise fall back to 16_384 (not modelInfo.maxTokens)
+		 * otherwise fall back to 8192 (not modelInfo.maxTokens)
 		 */
 
 		it("should return apiConfig.modelMaxTokens for thinking models when provided", () => {