7 months ago · 8a3dcfb593
--- a/packages/types/src/providers/groq.ts
+++ b/packages/types/src/providers/groq.ts
@@ -17,7 +17,7 @@ export const groqDefaultModelId: GroqModelId = "llama-3.3-70b-versatile" // Defa
 
															 export const groqModels = {
														
 
															 	// Models based on API response: https://api.groq.com/openai/v1/models
														
 
															 	"llama-3.1-8b-instant": {
														
 
															-		maxTokens: 131072,
														
 
															+		maxTokens: 8192,
														
 
															 		contextWindow: 131072,
														
 
															 		supportsImages: false,
														
 
															 		supportsPromptCache: false,
														
@@ -26,7 +26,7 @@ export const groqModels = {
 
															 		description: "Meta Llama 3.1 8B Instant model, 128K context.",
														
 
															 	},
														
 
															 	"llama-3.3-70b-versatile": {
														
 
															-		maxTokens: 32768,
														
 
															+		maxTokens: 8192,
														
 
															 		contextWindow: 131072,
														
 
															 		supportsImages: false,
														
 
															 		supportsPromptCache: false,
														
@@ -53,7 +53,7 @@ export const groqModels = {
 
															 		description: "Meta Llama 4 Maverick 17B Instruct model, 128K context.",
														
 
															 	},
														
 
															 	"mistral-saba-24b": {
														
 
															-		maxTokens: 32768,
														
 
															+		maxTokens: 8192,
														
 
															 		contextWindow: 32768,
														
 
															 		supportsImages: false,
														
 
															 		supportsPromptCache: false,
														
@@ -62,7 +62,7 @@ export const groqModels = {
 
															 		description: "Mistral Saba 24B model, 32K context.",
														
 
															 	},
														
 
															 	"qwen-qwq-32b": {
														
 
															-		maxTokens: 131072,
														
 
															+		maxTokens: 8192,
														
 
															 		contextWindow: 131072,
														
 
															 		supportsImages: false,
														
 
															 		supportsPromptCache: false,
														
@@ -71,7 +71,7 @@ export const groqModels = {
 
															 		description: "Alibaba Qwen QwQ 32B model, 128K context.",
														
 
															 	},
														
 
															 	"qwen/qwen3-32b": {
														
 
															-		maxTokens: 40960,
														
 
															+		maxTokens: 8192,
														
 
															 		contextWindow: 131072,
														
 
															 		supportsImages: false,
														
 
															 		supportsPromptCache: false,
														
@@ -80,7 +80,7 @@ export const groqModels = {
 
															 		description: "Alibaba Qwen 3 32B model, 128K context.",
														
 
															 	},
														
 
															 	"deepseek-r1-distill-llama-70b": {
														
 
															-		maxTokens: 131072,
														
 
															+		maxTokens: 8192,
														
 
															 		contextWindow: 131072,
														
 
															 		supportsImages: false,
														
 
															 		supportsPromptCache: false,
														
@@ -89,7 +89,7 @@ export const groqModels = {
 
															 		description: "DeepSeek R1 Distill Llama 70B model, 128K context.",
														
 
															 	},
														
 
															 	"moonshotai/kimi-k2-instruct": {
														
 
															-		maxTokens: 131072,
														
 
															+		maxTokens: 8192,
														
 
															 		contextWindow: 131072,
														
 
															 		supportsImages: false,
														
 
															 		supportsPromptCache: false,
														
--- a/src/api/transform/model-params.ts
+++ b/src/api/transform/model-params.ts
@@ -5,6 +5,7 @@ import {
 
															 	DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS,
														
 
															 	shouldUseReasoningBudget,
														
 
															 	shouldUseReasoningEffort,
														
 
															+	getModelMaxOutputTokens,
														
 
															 } from "../../shared/api"
														
 
															 import {
														
@@ -76,20 +77,25 @@ export function getModelParams({
 
															 		reasoningEffort: customReasoningEffort,
														
 
															 	} = settings
														
 
															-	let maxTokens = model.maxTokens ?? undefined
														
 
															+	// Use the centralized logic for computing maxTokens
														
 
															+	const maxTokens = getModelMaxOutputTokens({
														
 
															+		modelId,
														
 
															+		model,
														
 
															+		settings,
														
 
															+		format,
														
 
															+	})
														
 
															+
														
 
															 	let temperature = customTemperature ?? defaultTemperature
														
 
															 	let reasoningBudget: ModelParams["reasoningBudget"] = undefined
														
 
															 	let reasoningEffort: ModelParams["reasoningEffort"] = undefined
														
 
															 	if (shouldUseReasoningBudget({ model, settings })) {
														
 
															-		// If `customMaxTokens` is not specified use the default.
														
 
															-		maxTokens = customMaxTokens ?? DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS
														
 
															-
														
 
															 		// If `customMaxThinkingTokens` is not specified use the default.
														
 
															 		reasoningBudget = customMaxThinkingTokens ?? DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS
														
 
															 		// Reasoning cannot exceed 80% of the `maxTokens` value.
														
 
															-		if (reasoningBudget > Math.floor(maxTokens * 0.8)) {
														
 
															+		// maxTokens should always be defined for reasoning budget models, but add a guard just in case
														
 
															+		if (maxTokens && reasoningBudget > Math.floor(maxTokens * 0.8)) {
														
 
															 			reasoningBudget = Math.floor(maxTokens * 0.8)
														
 
															 		}
														
@@ -106,24 +112,6 @@ export function getModelParams({
 
															 		reasoningEffort = customReasoningEffort ?? model.reasoningEffort
														
 
															 	}
														
 
															-	// TODO: We should consolidate this logic to compute `maxTokens` with
														
 
															-	// `getModelMaxOutputTokens` in order to maintain a single source of truth.
														
 
															-
														
 
															-	const isAnthropic = format === "anthropic" || (format === "openrouter" && modelId.startsWith("anthropic/"))
														
 
															-
														
 
															-	// For "Hybrid" reasoning models, we should discard the model's actual
														
 
															-	// `maxTokens` value if we're not using reasoning. We do this for Anthropic
														
 
															-	// models only for now. Should we do this for Gemini too?
														
 
															-	if (model.supportsReasoningBudget && !reasoningBudget && isAnthropic) {
														
 
															-		maxTokens = ANTHROPIC_DEFAULT_MAX_TOKENS
														
 
															-	}
														
 
															-
														
 
															-	// For Anthropic models we should always make sure a `maxTokens` value is
														
 
															-	// set.
														
 
															-	if (!maxTokens && isAnthropic) {
														
 
															-		maxTokens = ANTHROPIC_DEFAULT_MAX_TOKENS
														
 
															-	}
														
 
															-
														
 
															 	const params: BaseModelParams = { maxTokens, temperature, reasoningEffort, reasoningBudget }
														
 
															 	if (format === "anthropic") {
														
--- a/src/shared/__tests__/api.spec.ts
+++ b/src/shared/__tests__/api.spec.ts
@@ -76,7 +76,7 @@ describe("getModelMaxOutputTokens", () => {
 
															 		expect(result).toBe(32000)
														
 
															 	})
														
 
															-	test("should return 20% of context window when maxTokens is undefined", () => {
														
 
															+	test("should return default of 8192 when maxTokens is undefined", () => {
														
 
															 		const modelWithoutMaxTokens: ModelInfo = {
														
 
															 			contextWindow: 100000,
														
 
															 			supportsPromptCache: true,
														
@@ -88,7 +88,7 @@ describe("getModelMaxOutputTokens", () => {
 
															 			settings: {},
														
 
															 		})
														
 
															-		expect(result).toBe(20000) // 20% of 100000
														
 
															+		expect(result).toBe(8192)
														
 
															 	})
														
 
															 	test("should return ANTHROPIC_DEFAULT_MAX_TOKENS for Anthropic models that support reasoning budget but aren't using it", () => {
														
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@@ -58,14 +58,15 @@ export const getModelMaxOutputTokens = ({
 
															 	modelId,
														
 
															 	model,
														
 
															 	settings,
														
 
															+	format,
														
 
															 }: {
														
 
															 	modelId: string
														
 
															 	model: ModelInfo
														
 
															 	settings?: ProviderSettings
														
 
															+	format?: "anthropic" | "openai" | "gemini" | "openrouter"
														
 
															 }): number | undefined => {
														
 
															 	// Check for Claude Code specific max output tokens setting
														
 
															 	if (settings?.apiProvider === "claude-code") {
														
 
															-		// Return the configured value or default to CLAUDE_CODE_DEFAULT_MAX_OUTPUT_TOKENS
														
 
															 		return settings.claudeCodeMaxOutputTokens || CLAUDE_CODE_DEFAULT_MAX_OUTPUT_TOKENS
														
 
															 	}
														
@@ -73,21 +74,33 @@ export const getModelMaxOutputTokens = ({
 
															 		return settings?.modelMaxTokens || DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS
														
 
															 	}
														
 
															-	const isAnthropicModel = modelId.includes("claude")
														
 
															+	const isAnthropicContext =
														
 
															+		modelId.includes("claude") ||
														
 
															+		format === "anthropic" ||
														
 
															+		(format === "openrouter" && modelId.startsWith("anthropic/"))
														
 
															-	// For "Hybrid" reasoning models, we should discard the model's actual
														
 
															-	// `maxTokens` value if we're not using reasoning. We do this for Anthropic
														
 
															-	// models only for now. Should we do this for Gemini too?
														
 
															-	if (model.supportsReasoningBudget && isAnthropicModel) {
														
 
															+	// For "Hybrid" reasoning models, discard the model's actual maxTokens for Anthropic contexts
														
 
															+	if (model.supportsReasoningBudget && isAnthropicContext) {
														
 
															 		return ANTHROPIC_DEFAULT_MAX_TOKENS
														
 
															 	}
														
 
															-	// If maxTokens is 0 or undefined or the full context window, fall back to 20% of context window
														
 
															+	// For Anthropic contexts, always ensure a maxTokens value is set
														
 
															+	if (isAnthropicContext && (!model.maxTokens || model.maxTokens === 0)) {
														
 
															+		return ANTHROPIC_DEFAULT_MAX_TOKENS
														
 
															+	}
														
 
															+
														
 
															+	// If model has explicit maxTokens and it's not the full context window, use it
														
 
															 	if (model.maxTokens && model.maxTokens !== model.contextWindow) {
														
 
															 		return model.maxTokens
														
 
															-	} else {
														
 
															-		return Math.ceil(model.contextWindow * 0.2)
														
 
															 	}
														
 
															+
														
 
															+	// For non-Anthropic formats without explicit maxTokens, return undefined
														
 
															+	if (format) {
														
 
															+		return undefined
														
 
															+	}
														
 
															+
														
 
															+	// Default fallback
														
 
															+	return ANTHROPIC_DEFAULT_MAX_TOKENS
														
 
															 }
														
 
															 // GetModelsOptions
														
--- a/webview-ui/src/__tests__/ContextWindowProgressLogic.spec.ts
+++ b/webview-ui/src/__tests__/ContextWindowProgressLogic.spec.ts
@@ -7,41 +7,41 @@ export {} // This makes the file a proper TypeScript module
 
															 describe("ContextWindowProgress Logic", () => {
														
 
															 	// Using the shared utility function from model-utils.ts instead of reimplementing it
														
 
															-	test("calculates correct token distribution with default 20% reservation", () => {
														
 
															-		const contextWindow = 4000
														
 
															+	test("calculates correct token distribution with default 8192 reservation", () => {
														
 
															+		const contextWindow = 10000
														
 
															 		const contextTokens = 1000
														
 
															 		const result = calculateTokenDistribution(contextWindow, contextTokens)
														
 
															 		// Expected calculations:
														
 
															-		// reservedForOutput = 0.2 * 4000 = 800
														
 
															-		// availableSize = 4000 - 1000 - 800 = 2200
														
 
															-		// total = 1000 + 800 + 2200 = 4000
														
 
															-		expect(result.reservedForOutput).toBe(800)
														
 
															-		expect(result.availableSize).toBe(2200)
														
 
															+		// reservedForOutput = 8192 (ANTHROPIC_DEFAULT_MAX_TOKENS)
														
 
															+		// availableSize = 10000 - 1000 - 8192 = 808
														
 
															+		// total = 1000 + 8192 + 808 = 10000
														
 
															+		expect(result.reservedForOutput).toBe(8192)
														
 
															+		expect(result.availableSize).toBe(808)
														
 
															 		// Check percentages
														
 
															-		expect(result.currentPercent).toBeCloseTo(25) // 1000/4000 * 100 = 25%
														
 
															-		expect(result.reservedPercent).toBeCloseTo(20) // 800/4000 * 100 = 20%
														
 
															-		expect(result.availablePercent).toBeCloseTo(55) // 2200/4000 * 100 = 55%
														
 
															+		expect(result.currentPercent).toBeCloseTo(10) // 1000/10000 * 100 = 10%
														
 
															+		expect(result.reservedPercent).toBeCloseTo(81.92) // 8192/10000 * 100 = 81.92%
														
 
															+		expect(result.availablePercent).toBeCloseTo(8.08) // 808/10000 * 100 = 8.08%
														
 
															 		// Verify percentages sum to 100%
														
 
															 		expect(result.currentPercent + result.reservedPercent + result.availablePercent).toBeCloseTo(100)
														
 
															 	})
														
 
															 	test("uses provided maxTokens when available instead of default calculation", () => {
														
 
															-		const contextWindow = 4000
														
 
															+		const contextWindow = 10000
														
 
															 		const contextTokens = 1000
														
 
															-		// First calculate with default 20% reservation (no maxTokens provided)
														
 
															+		// First calculate with default 8192 reservation (no maxTokens provided)
														
 
															 		const defaultResult = calculateTokenDistribution(contextWindow, contextTokens)
														
 
															 		// Then calculate with custom maxTokens value
														
 
															-		const customMaxTokens = 1500 // Custom maxTokens instead of default 20%
														
 
															+		const customMaxTokens = 1500 // Custom maxTokens instead of default 8192
														
 
															 		const customResult = calculateTokenDistribution(contextWindow, contextTokens, customMaxTokens)
														
 
															-		// VERIFY MAXTOKEN PROP EFFECT: Custom maxTokens should be used directly instead of 20% calculation
														
 
															-		const defaultReserved = Math.ceil(contextWindow * 0.2) // 800 tokens (20% of 4000)
														
 
															+		// VERIFY MAXTOKEN PROP EFFECT: Custom maxTokens should be used directly instead of 8192 calculation
														
 
															+		const defaultReserved = 8192 // ANTHROPIC_DEFAULT_MAX_TOKENS
														
 
															 		expect(defaultResult.reservedForOutput).toBe(defaultReserved)
														
 
															 		expect(customResult.reservedForOutput).toBe(customMaxTokens) // Should use exact provided value
														
@@ -51,13 +51,13 @@ describe("ContextWindowProgress Logic", () => {
 
															 		expect(defaultTooltip).not.toBe(customTooltip)
														
 
															 		// Verify the effect on available space
														
 
															-		expect(customResult.availableSize).toBe(4000 - 1000 - 1500) // 1500 tokens available
														
 
															-		expect(defaultResult.availableSize).toBe(4000 - 1000 - 800) // 2200 tokens available
														
 
															+		expect(customResult.availableSize).toBe(10000 - 1000 - 1500) // 7500 tokens available
														
 
															+		expect(defaultResult.availableSize).toBe(10000 - 1000 - 8192) // 808 tokens available
														
 
															 		// Verify the effect on percentages
														
 
															-		// With custom maxTokens (1500), the reserved percentage should be higher
														
 
															-		expect(defaultResult.reservedPercent).toBeCloseTo(20) // 800/4000 * 100 = 20%
														
 
															-		expect(customResult.reservedPercent).toBeCloseTo(37.5) // 1500/4000 * 100 = 37.5%
														
 
															+		// With custom maxTokens (1500), the reserved percentage should be lower than default
														
 
															+		expect(defaultResult.reservedPercent).toBeCloseTo(81.92) // 8192/10000 * 100 = 81.92%
														
 
															+		expect(customResult.reservedPercent).toBeCloseTo(15) // 1500/10000 * 100 = 15%
														
 
															 		// Verify percentages still sum to 100%
														
 
															 		expect(customResult.currentPercent + customResult.reservedPercent + customResult.availablePercent).toBeCloseTo(
														
@@ -66,19 +66,19 @@ describe("ContextWindowProgress Logic", () => {
 
															 	})
														
 
															 	test("handles negative input values", () => {
														
 
															-		const contextWindow = 4000
														
 
															+		const contextWindow = 10000
														
 
															 		const contextTokens = -500 // Negative tokens should be handled gracefully
														
 
															 		const result = calculateTokenDistribution(contextWindow, contextTokens)
														
 
															 		// Expected calculations:
														
 
															 		// safeContextTokens = Math.max(0, -500) = 0
														
 
															-		// reservedForOutput = 0.2 * 4000 = 800
														
 
															-		// availableSize = 4000 - 0 - 800 = 3200
														
 
															-		// total = 0 + 800 + 3200 = 4000
														
 
															-		expect(result.currentPercent).toBeCloseTo(0) // 0/4000 * 100 = 0%
														
 
															-		expect(result.reservedPercent).toBeCloseTo(20) // 800/4000 * 100 = 20%
														
 
															-		expect(result.availablePercent).toBeCloseTo(80) // 3200/4000 * 100 = 80%
														
 
															+		// reservedForOutput = 8192 (ANTHROPIC_DEFAULT_MAX_TOKENS)
														
 
															+		// availableSize = 10000 - 0 - 8192 = 1808
														
 
															+		// total = 0 + 8192 + 1808 = 10000
														
 
															+		expect(result.currentPercent).toBeCloseTo(0) // 0/10000 * 100 = 0%
														
 
															+		expect(result.reservedPercent).toBeCloseTo(81.92) // 8192/10000 * 100 = 81.92%
														
 
															+		expect(result.availablePercent).toBeCloseTo(18.08) // 1808/10000 * 100 = 18.08%
														
 
															 	})
														
 
															 	test("handles zero context window gracefully", () => {
														
@@ -87,9 +87,9 @@ describe("ContextWindowProgress Logic", () => {
 
															 		const result = calculateTokenDistribution(contextWindow, contextTokens)
														
 
															-		// With zero context window, everything should be zero
														
 
															-		expect(result.reservedForOutput).toBe(0)
														
 
															-		expect(result.availableSize).toBe(0)
														
 
															+		// With zero context window, the function uses ANTHROPIC_DEFAULT_MAX_TOKENS but available size becomes 0
														
 
															+		expect(result.reservedForOutput).toBe(8192) // ANTHROPIC_DEFAULT_MAX_TOKENS
														
 
															+		expect(result.availableSize).toBe(0) // max(0, 0 - 1000 - 8192) = 0
														
 
															 		// The percentages maintain total of 100% even with zero context window
														
 
															 		// due to how the division handles this edge case
														
@@ -98,20 +98,20 @@ describe("ContextWindowProgress Logic", () => {
 
															 	})
														
 
															 	test("handles case where tokens exceed context window", () => {
														
 
															-		const contextWindow = 4000
														
 
															-		const contextTokens = 5000 // More tokens than the window size
														
 
															+		const contextWindow = 10000
														
 
															+		const contextTokens = 12000 // More tokens than the window size
														
 
															 		const result = calculateTokenDistribution(contextWindow, contextTokens)
														
 
															 		// Expected calculations:
														
 
															-		// reservedForOutput = 0.2 * 4000 = 800
														
 
															-		// availableSize = Math.max(0, 4000 - 5000 - 800) = 0
														
 
															-		expect(result.reservedForOutput).toBe(800)
														
 
															+		// reservedForOutput = 8192 (ANTHROPIC_DEFAULT_MAX_TOKENS)
														
 
															+		// availableSize = Math.max(0, 10000 - 12000 - 8192) = 0
														
 
															+		expect(result.reservedForOutput).toBe(8192)
														
 
															 		expect(result.availableSize).toBe(0)
														
 
															-		// Percentages should be calculated based on total (5000 + 800 + 0 = 5800)
														
 
															-		expect(result.currentPercent).toBeCloseTo((5000 / 5800) * 100)
														
 
															-		expect(result.reservedPercent).toBeCloseTo((800 / 5800) * 100)
														
 
															+		// Percentages should be calculated based on total (12000 + 8192 + 0 = 20192)
														
 
															+		expect(result.currentPercent).toBeCloseTo((12000 / 20192) * 100)
														
 
															+		expect(result.reservedPercent).toBeCloseTo((8192 / 20192) * 100)
														
 
															 		expect(result.availablePercent).toBeCloseTo(0)
														
 
															 		// Verify percentages sum to 100%
														
--- a/webview-ui/src/utils/__tests__/model-utils.spec.ts
+++ b/webview-ui/src/utils/__tests__/model-utils.spec.ts
@@ -17,33 +17,33 @@ describe("calculateTokenDistribution", () => {
 
															 		expect(Math.round(result.currentPercent + result.reservedPercent + result.availablePercent)).toBe(100)
														
 
															 	})
														
 
															-	it("should default to 20% of context window when maxTokens not provided", () => {
														
 
															-		const contextWindow = 10000
														
 
															+	it("should default to 8192 when maxTokens not provided", () => {
														
 
															+		const contextWindow = 20000
														
 
															 		const contextTokens = 5000
														
 
															 		const result = calculateTokenDistribution(contextWindow, contextTokens)
														
 
															-		expect(result.reservedForOutput).toBe(2000) // 20% of 10000
														
 
															-		expect(result.availableSize).toBe(3000) // 10000 - 5000 - 2000
														
 
															+		expect(result.reservedForOutput).toBe(8192)
														
 
															+		expect(result.availableSize).toBe(6808) // 20000 - 5000 - 8192
														
 
															 	})
														
 
															 	it("should handle negative or zero inputs by using positive fallbacks", () => {
														
 
															 		const result = calculateTokenDistribution(-1000, -500)
														
 
															 		expect(result.currentPercent).toBe(0)
														
 
															-		expect(result.reservedPercent).toBe(0)
														
 
															+		expect(result.reservedPercent).toBe(100) // 8192 / 8192 = 100%
														
 
															 		expect(result.availablePercent).toBe(0)
														
 
															-		expect(result.reservedForOutput).toBe(0) // With negative inputs, both context window and tokens become 0, so 20% of 0 is 0
														
 
															-		expect(result.availableSize).toBe(0)
														
 
															+		expect(result.reservedForOutput).toBe(8192) // Uses ANTHROPIC_DEFAULT_MAX_TOKENS
														
 
															+		expect(result.availableSize).toBe(0) // max(0, 0 - 0 - 8192) = 0
														
 
															 	})
														
 
															-	it("should handle zero total tokens without division by zero errors", () => {
														
 
															-		const result = calculateTokenDistribution(0, 0, 0)
														
 
															+	it("should handle zero context window without division by zero errors", () => {
														
 
															+		const result = calculateTokenDistribution(0, 0)
														
 
															 		expect(result.currentPercent).toBe(0)
														
 
															-		expect(result.reservedPercent).toBe(0)
														
 
															+		expect(result.reservedPercent).toBe(100) // When contextWindow is 0, reserved gets 100%
														
 
															 		expect(result.availablePercent).toBe(0)
														
 
															-		expect(result.reservedForOutput).toBe(0)
														
 
															+		expect(result.reservedForOutput).toBe(8192) // Uses ANTHROPIC_DEFAULT_MAX_TOKENS when no maxTokens provided
														
 
															 		expect(result.availableSize).toBe(0)
														
 
															 	})
														
 
															 })
														
--- a/webview-ui/src/utils/model-utils.ts
+++ b/webview-ui/src/utils/model-utils.ts
@@ -1,3 +1,5 @@
 
															+import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "@roo-code/types"
														
 
															+
														
 
															 /**
														
 
															  * Result of token distribution calculation
														
 
															  */
														
@@ -34,7 +36,7 @@ export interface TokenDistributionResult {
 
															  *
														
 
															  * @param contextWindow The total size of the context window
														
 
															  * @param contextTokens The number of tokens currently used
														
 
															- * @param maxTokens Optional override for tokens reserved for model output (otherwise uses 20% of window)
														
 
															+ * @param maxTokens Optional override for tokens reserved for model output (otherwise uses 8192)
														
 
															  * @returns Distribution of tokens with percentages and raw numbers
														
 
															  */
														
 
															 export const calculateTokenDistribution = (
														
@@ -47,9 +49,9 @@ export const calculateTokenDistribution = (
 
															 	const safeContextTokens = Math.max(0, contextTokens)
														
 
															 	// Get the actual max tokens value from the model
														
 
															-	// If maxTokens is valid, use it, otherwise reserve 20% of the context window as a default
														
 
															+	// If maxTokens is valid (positive and not equal to context window), use it, otherwise reserve 8192 tokens as a default
														
 
															 	const reservedForOutput =
														
 
															-		maxTokens && maxTokens > 0 && maxTokens !== safeContextWindow ? maxTokens : Math.ceil(safeContextWindow * 0.2)
														
 
															+		maxTokens && maxTokens > 0 && maxTokens !== safeContextWindow ? maxTokens : ANTHROPIC_DEFAULT_MAX_TOKENS
														
 
															 	// Calculate sizes directly without buffer display
														
 
															 	const availableSize = Math.max(0, safeContextWindow - safeContextTokens - reservedForOutput)