Просмотр исходного кода

fix: use actual max_completion_tokens from OpenRouter API (#5240)

- Update parseOpenRouterModel to always use actual max_completion_tokens from OpenRouter API
- Remove artificial restriction that only reasoning budget and Anthropic models get their actual max tokens
- Fall back to 20% of context window when max_completion_tokens is null
- Update getModelMaxOutputTokens to use same fallback logic for consistency
- Update tests to reflect new behavior
- Fixes issue where reserved tokens showed ~209k instead of actual model limits (e.g. GPT-4o: 16,384)
Shariq Riaz 5 месяцев назад
Родитель
Сommit
9faf28acac
3 измененных файлов с 7 добавлено и 6 удалено
  1. 1 3
      src/api/providers/fetchers/openrouter.ts
  2. 3 2
      src/shared/__tests__/api.spec.ts
  3. 3 1
      src/shared/api.ts

+ 1 - 3
src/api/providers/fetchers/openrouter.ts

@@ -190,10 +190,8 @@ export const parseOpenRouterModel = ({
 
 	const supportsPromptCache = typeof cacheWritesPrice !== "undefined" && typeof cacheReadsPrice !== "undefined"
 
-	const useMaxTokens = OPEN_ROUTER_REASONING_BUDGET_MODELS.has(id) || id.startsWith("anthropic/")
-
 	const modelInfo: ModelInfo = {
-		maxTokens: useMaxTokens ? maxTokens || 0 : 0,
+		maxTokens: maxTokens || Math.ceil(model.context_length * 0.2),
 		contextWindow: model.context_length,
 		supportsImages: modality?.includes("image") ?? false,
 		supportsPromptCache,

+ 3 - 2
src/shared/__tests__/api.spec.ts

@@ -66,7 +66,7 @@ describe("getMaxTokensForModel", () => {
 		expect(getModelMaxOutputTokens({ modelId, model, settings })).toBe(8000)
 	})
 
-	it("should return undefined for non-thinking models with undefined maxTokens", () => {
+	it("should return 20% of context window for non-thinking models with undefined maxTokens", () => {
 		const model: ModelInfo = {
 			contextWindow: 200_000,
 			supportsPromptCache: true,
@@ -76,7 +76,8 @@ describe("getMaxTokensForModel", () => {
 			modelMaxTokens: 4000,
 		}
 
-		expect(getModelMaxOutputTokens({ modelId, model, settings })).toBeUndefined()
+		// Should return 20% of context window when maxTokens is undefined
+		expect(getModelMaxOutputTokens({ modelId, model, settings })).toBe(40000)
 	})
 
 	test("should return maxTokens from modelInfo when thinking is false", () => {

+ 3 - 1
src/shared/api.ts

@@ -71,7 +71,9 @@ export const getModelMaxOutputTokens = ({
 		return ANTHROPIC_DEFAULT_MAX_TOKENS
 	}
 
-	return model.maxTokens ?? undefined
+	// If maxTokens is 0 or undefined, fall back to 20% of context window
+	// This matches the sliding window logic
+	return model.maxTokens || Math.ceil(model.contextWindow * 0.2)
 }
 
 // GetModelsOptions