Przeglądaj źródła

Clean up maxTokens logic

Matt Rubens 10 miesięcy temu
rodzic
commit
91ef9fbdb3

+ 2 - 2
src/api/providers/glama.ts

@@ -69,7 +69,7 @@ export class GlamaHandler implements ApiHandler, SingleCompletionHandler {
 		let maxTokens: number | undefined
 
 		if (this.getModel().id.startsWith("anthropic/")) {
-			maxTokens = 8_192
+			maxTokens = this.getModel().info.maxTokens
 		}
 
 		const requestOptions: OpenAI.Chat.ChatCompletionCreateParams = {
@@ -177,7 +177,7 @@ export class GlamaHandler implements ApiHandler, SingleCompletionHandler {
 			}
 
 			if (this.getModel().id.startsWith("anthropic/")) {
-				requestOptions.max_tokens = 8192
+				requestOptions.max_tokens = this.getModel().info.maxTokens
 			}
 
 			const response = await this.client.chat.completions.create(requestOptions)

+ 3 - 32
src/api/providers/openrouter.ts

@@ -54,20 +54,8 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
 
 		// prompt caching: https://openrouter.ai/docs/prompt-caching
 		// this is specifically for claude models (some models may 'support prompt caching' automatically without this)
-		switch (this.getModel().id) {
-			case "anthropic/claude-3.7-sonnet":
-			case "anthropic/claude-3.5-sonnet":
-			case "anthropic/claude-3.5-sonnet:beta":
-			case "anthropic/claude-3.5-sonnet-20240620":
-			case "anthropic/claude-3.5-sonnet-20240620:beta":
-			case "anthropic/claude-3-5-haiku":
-			case "anthropic/claude-3-5-haiku:beta":
-			case "anthropic/claude-3-5-haiku-20241022":
-			case "anthropic/claude-3-5-haiku-20241022:beta":
-			case "anthropic/claude-3-haiku":
-			case "anthropic/claude-3-haiku:beta":
-			case "anthropic/claude-3-opus":
-			case "anthropic/claude-3-opus:beta":
+		switch (true) {
+			case this.getModel().id.startsWith("anthropic/"):
 				openAiMessages[0] = {
 					role: "system",
 					content: [
@@ -103,23 +91,6 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
 				break
 		}
 
-		// Not sure how openrouter defaults max tokens when no value is provided, but the anthropic api requires this value and since they offer both 4096 and 8192 variants, we should ensure 8192.
-		// (models usually default to max tokens allowed)
-		let maxTokens: number | undefined
-		switch (this.getModel().id) {
-			case "anthropic/claude-3.7-sonnet":
-			case "anthropic/claude-3.5-sonnet":
-			case "anthropic/claude-3.5-sonnet:beta":
-			case "anthropic/claude-3.5-sonnet-20240620":
-			case "anthropic/claude-3.5-sonnet-20240620:beta":
-			case "anthropic/claude-3-5-haiku":
-			case "anthropic/claude-3-5-haiku:beta":
-			case "anthropic/claude-3-5-haiku-20241022":
-			case "anthropic/claude-3-5-haiku-20241022:beta":
-				maxTokens = 8_192
-				break
-		}
-
 		let defaultTemperature = OPENROUTER_DEFAULT_TEMPERATURE
 		let topP: number | undefined = undefined
 
@@ -140,7 +111,7 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
 		let fullResponseText = ""
 		const stream = await this.client.chat.completions.create({
 			model: this.getModel().id,
-			max_tokens: maxTokens,
+			max_tokens: this.getModel().info.maxTokens,
 			temperature: this.options.modelTemperature ?? defaultTemperature,
 			top_p: topP,
 			messages: openAiMessages,

+ 2 - 2
src/api/providers/unbound.ts

@@ -71,7 +71,7 @@ export class UnboundHandler implements ApiHandler, SingleCompletionHandler {
 		let maxTokens: number | undefined
 
 		if (this.getModel().id.startsWith("anthropic/")) {
-			maxTokens = 8_192
+			maxTokens = this.getModel().info.maxTokens
 		}
 
 		const { data: completion, response } = await this.client.chat.completions
@@ -150,7 +150,7 @@ export class UnboundHandler implements ApiHandler, SingleCompletionHandler {
 			}
 
 			if (this.getModel().id.startsWith("anthropic/")) {
-				requestOptions.max_tokens = 8192
+				requestOptions.max_tokens = this.getModel().info.maxTokens
 			}
 
 			const response = await this.client.chat.completions.create(requestOptions)

+ 56 - 21
src/core/webview/ClineProvider.ts

@@ -1926,6 +1926,17 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 						cacheReadsPrice: parsePrice(rawModel.cached_price),
 					}
 
+					switch (rawModel.id) {
+						case rawModel.id.startsWith("anthropic/claude-3-7-sonnet"):
+							modelInfo.maxTokens = 16384
+							break
+						case rawModel.id.startsWith("anthropic/"):
+							modelInfo.maxTokens = 8192
+							break
+						default:
+							break
+					}
+
 					models[rawModel.id] = modelInfo
 				}
 			} else {
@@ -2076,6 +2087,17 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 						cacheReadsPrice: parsePrice(rawModel.pricePerToken?.cacheRead),
 					}
 
+					switch (rawModel.id) {
+						case rawModel.id.startsWith("anthropic/claude-3-7-sonnet"):
+							modelInfo.maxTokens = 16384
+							break
+						case rawModel.id.startsWith("anthropic/"):
+							modelInfo.maxTokens = 8192
+							break
+						default:
+							break
+					}
+
 					models[rawModel.id] = modelInfo
 				}
 			} else {
@@ -2127,46 +2149,46 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 						description: rawModel.description,
 					}
 
-					switch (rawModel.id) {
-						case "anthropic/claude-3.7-sonnet":
-						case "anthropic/claude-3.7-sonnet:beta":
-						case "anthropic/claude-3.5-sonnet":
-						case "anthropic/claude-3.5-sonnet:beta":
-							// NOTE: this needs to be synced with api.ts/openrouter default model info.
+					// NOTE: this needs to be synced with api.ts/openrouter default model info.
+					switch (true) {
+						case rawModel.id.startsWith("anthropic/claude-3.7-sonnet"):
 							modelInfo.supportsComputerUse = true
 							modelInfo.supportsPromptCache = true
 							modelInfo.cacheWritesPrice = 3.75
 							modelInfo.cacheReadsPrice = 0.3
+							modelInfo.maxTokens = 16384
 							break
-						case "anthropic/claude-3.5-sonnet-20240620":
-						case "anthropic/claude-3.5-sonnet-20240620:beta":
+						case rawModel.id.startsWith("anthropic/claude-3.5-sonnet-20240620"):
 							modelInfo.supportsPromptCache = true
 							modelInfo.cacheWritesPrice = 3.75
 							modelInfo.cacheReadsPrice = 0.3
+							modelInfo.maxTokens = 8192
 							break
-						case "anthropic/claude-3-5-haiku":
-						case "anthropic/claude-3-5-haiku:beta":
-						case "anthropic/claude-3-5-haiku-20241022":
-						case "anthropic/claude-3-5-haiku-20241022:beta":
-						case "anthropic/claude-3.5-haiku":
-						case "anthropic/claude-3.5-haiku:beta":
-						case "anthropic/claude-3.5-haiku-20241022":
-						case "anthropic/claude-3.5-haiku-20241022:beta":
+						case rawModel.id.startsWith("anthropic/claude-3.5-sonnet"):
+							modelInfo.supportsComputerUse = true
+							modelInfo.supportsPromptCache = true
+							modelInfo.cacheWritesPrice = 3.75
+							modelInfo.cacheReadsPrice = 0.3
+							modelInfo.maxTokens = 8192
+							break
+						case rawModel.id.startsWith("anthropic/claude-3-5-haiku"):
 							modelInfo.supportsPromptCache = true
 							modelInfo.cacheWritesPrice = 1.25
 							modelInfo.cacheReadsPrice = 0.1
+							modelInfo.maxTokens = 8192
 							break
-						case "anthropic/claude-3-opus":
-						case "anthropic/claude-3-opus:beta":
+						case rawModel.id.startsWith("anthropic/claude-3-opus"):
 							modelInfo.supportsPromptCache = true
 							modelInfo.cacheWritesPrice = 18.75
 							modelInfo.cacheReadsPrice = 1.5
+							modelInfo.maxTokens = 8192
 							break
-						case "anthropic/claude-3-haiku":
-						case "anthropic/claude-3-haiku:beta":
+						case rawModel.id.startsWith("anthropic/claude-3-haiku"):
+						default:
 							modelInfo.supportsPromptCache = true
 							modelInfo.cacheWritesPrice = 0.3
 							modelInfo.cacheReadsPrice = 0.03
+							modelInfo.maxTokens = 8192
 							break
 					}
 
@@ -2200,7 +2222,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 			if (response.data) {
 				const rawModels: Record<string, any> = response.data
 				for (const [modelId, model] of Object.entries(rawModels)) {
-					models[modelId] = {
+					const modelInfo: ModelInfo = {
 						maxTokens: model?.maxTokens ? parseInt(model.maxTokens) : undefined,
 						contextWindow: model?.contextWindow ? parseInt(model.contextWindow) : 0,
 						supportsImages: model?.supportsImages ?? false,
@@ -2211,6 +2233,19 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 						cacheWritesPrice: model?.cacheWritePrice ? parseFloat(model.cacheWritePrice) : undefined,
 						cacheReadsPrice: model?.cacheReadPrice ? parseFloat(model.cacheReadPrice) : undefined,
 					}
+
+					switch (true) {
+						case modelId.startsWith("anthropic/claude-3-7-sonnet"):
+							modelInfo.maxTokens = 16384
+							break
+						case modelId.startsWith("anthropic/"):
+							modelInfo.maxTokens = 8192
+							break
+						default:
+							break
+					}
+
+					models[modelId] = modelInfo
 				}
 			}
 			await fs.writeFile(unboundModelsFilePath, JSON.stringify(models))

+ 1 - 1
src/shared/api.ts

@@ -97,7 +97,7 @@ export type AnthropicModelId = keyof typeof anthropicModels
 export const anthropicDefaultModelId: AnthropicModelId = "claude-3-7-sonnet-20250219"
 export const anthropicModels = {
 	"claude-3-7-sonnet-20250219": {
-		maxTokens: 64_000,
+		maxTokens: 16384,
 		contextWindow: 200_000,
 		supportsImages: true,
 		supportsComputerUse: true,