2 месяцев назад · 507a600ee9
--- a/src/shared/__tests__/api.spec.ts
+++ b/src/shared/__tests__/api.spec.ts
@@ -194,17 +194,18 @@ describe("getModelMaxOutputTokens", () => {
 
				 		expect(result).toBe(20_000) // Should use model.maxTokens since it's exactly at 20%
			
 
				 	})
			
 
				 
			
 
				-	test("should apply 20% cap for GPT-5 models like other models", () => {
			
 
				+	test("should bypass 20% cap for GPT-5 models and use exact configured max tokens", () => {
			
 
				 		const model: ModelInfo = {
			
 
				 			contextWindow: 200_000,
			
 
				 			supportsPromptCache: false,
			
 
				-			maxTokens: 128_000, // 64% of context window, should be capped
			
 
				+			maxTokens: 128_000, // 64% of context window, normally would be capped
			
 
				 		}
			
 
				 
			
 
				 		const settings: ProviderSettings = {
			
 
				 			apiProvider: "openai",
			
 
				 		}
			
 
				 
			
 
				+		// Test various GPT-5 model IDs
			
 
				 		const gpt5ModelIds = ["gpt-5", "gpt-5-turbo", "GPT-5", "openai/gpt-5-preview", "gpt-5-32k", "GPT-5-TURBO"]
			
 
				 
			
 
				 		gpt5ModelIds.forEach((modelId) => {
			
@@ -214,8 +215,8 @@ describe("getModelMaxOutputTokens", () => {
 
				 				settings,
			
 
				 				format: "openai",
			
 
				 			})
			
 
				-			// Should be capped to 20% of context window: 200_000 * 0.2 = 40_000
			
 
				-			expect(result).toBe(40_000)
			
 
				+			// Should use full 128k tokens, not capped to 20% (40k)
			
 
				+			expect(result).toBe(128_000)
			
 
				 		})
			
 
				 	})
			
 
				 
			
@@ -245,11 +246,23 @@ describe("getModelMaxOutputTokens", () => {
 
				 		})
			
 
				 	})
			
 
				 
			
 
				-	test("should cap GPT-5 models to min(model.maxTokens, 20% of contextWindow)", () => {
			
 
				+	test("should handle GPT-5 models with various max token configurations", () => {
			
 
				 		const testCases = [
			
 
				-			{ maxTokens: 128_000, contextWindow: 200_000, expected: 40_000 },
			
 
				-			{ maxTokens: 64_000, contextWindow: 200_000, expected: 40_000 },
			
 
				-			{ maxTokens: 256_000, contextWindow: 400_000, expected: 80_000 },
			
 
				+			{
			
 
				+				maxTokens: 128_000,
			
 
				+				contextWindow: 200_000,
			
 
				+				expected: 128_000, // Uses full 128k
			
 
				+			},
			
 
				+			{
			
 
				+				maxTokens: 64_000,
			
 
				+				contextWindow: 200_000,
			
 
				+				expected: 64_000, // Uses configured 64k
			
 
				+			},
			
 
				+			{
			
 
				+				maxTokens: 256_000,
			
 
				+				contextWindow: 400_000,
			
 
				+				expected: 256_000, // Uses full 256k even though it's 64% of context
			
 
				+			},
			
 
				 		]
			
 
				 
			
 
				 		testCases.forEach(({ maxTokens, contextWindow, expected }) => {
			
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@@ -116,7 +116,17 @@ export const getModelMaxOutputTokens = ({
 
				 	}
			
 
				 
			
 
				 	// If model has explicit maxTokens, clamp it to 20% of the context window
			
 
				+	// Exception: GPT-5 models should use their exact configured max output tokens
			
 
				 	if (model.maxTokens) {
			
 
				+		// Check if this is a GPT-5 model (case-insensitive)
			
 
				+		const isGpt5Model = modelId.toLowerCase().includes("gpt-5")
			
 
				+
			
 
				+		// GPT-5 models bypass the 20% cap and use their full configured max tokens
			
 
				+		if (isGpt5Model) {
			
 
				+			return model.maxTokens
			
 
				+		}
			
 
				+
			
 
				+		// All other models are clamped to 20% of context window
			
 
				 		return Math.min(model.maxTokens, Math.ceil(model.contextWindow * 0.2))
			
 
				 	}