Browse Source

Allow control over maxTokens for thinking models

Chris Estreich 10 months ago
parent
commit
5e53d00ebc

+ 10 - 2
src/api/providers/anthropic.ts

@@ -31,7 +31,7 @@ export class AnthropicHandler implements ApiHandler, SingleCompletionHandler {
 		let stream: AnthropicStream<Anthropic.Messages.RawMessageStreamEvent>
 		let stream: AnthropicStream<Anthropic.Messages.RawMessageStreamEvent>
 		const cacheControl: CacheControlEphemeral = { type: "ephemeral" }
 		const cacheControl: CacheControlEphemeral = { type: "ephemeral" }
 		let { id: modelId, info: modelInfo } = this.getModel()
 		let { id: modelId, info: modelInfo } = this.getModel()
-		const maxTokens = modelInfo.maxTokens || 8192
+		const maxTokens = this.options.modelMaxTokens || modelInfo.maxTokens || 8192
 		let temperature = this.options.modelTemperature ?? ANTHROPIC_DEFAULT_TEMPERATURE
 		let temperature = this.options.modelTemperature ?? ANTHROPIC_DEFAULT_TEMPERATURE
 		let thinking: BetaThinkingConfigParam | undefined = undefined
 		let thinking: BetaThinkingConfigParam | undefined = undefined
 
 
@@ -41,7 +41,15 @@ export class AnthropicHandler implements ApiHandler, SingleCompletionHandler {
 			// `claude-3-7-sonnet-20250219` model with a thinking budget.
 			// `claude-3-7-sonnet-20250219` model with a thinking budget.
 			// We can handle this more elegantly in the future.
 			// We can handle this more elegantly in the future.
 			modelId = "claude-3-7-sonnet-20250219"
 			modelId = "claude-3-7-sonnet-20250219"
-			const budgetTokens = this.options.anthropicThinking ?? Math.max(maxTokens * 0.8, 1024)
+
+			// Clamp the thinking budget to be at most 80% of max tokens and at
+			// least 1024 tokens.
+			const maxBudgetTokens = Math.floor(maxTokens * 0.8)
+			const budgetTokens = Math.max(
+				Math.min(this.options.anthropicThinking ?? maxBudgetTokens, maxBudgetTokens),
+				1024,
+			)
+
 			thinking = { type: "enabled", budget_tokens: budgetTokens }
 			thinking = { type: "enabled", budget_tokens: budgetTokens }
 			temperature = 1.0
 			temperature = 1.0
 		}
 		}

+ 10 - 3
src/api/providers/openrouter.ts

@@ -108,12 +108,19 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
 			topP = 0.95
 			topP = 0.95
 		}
 		}
 
 
+		const maxTokens = this.options.modelMaxTokens || modelInfo.maxTokens
 		let temperature = this.options.modelTemperature ?? defaultTemperature
 		let temperature = this.options.modelTemperature ?? defaultTemperature
 		let thinking: BetaThinkingConfigParam | undefined = undefined
 		let thinking: BetaThinkingConfigParam | undefined = undefined
 
 
 		if (modelInfo.thinking) {
 		if (modelInfo.thinking) {
-			const maxTokens = modelInfo.maxTokens || 8192
-			const budgetTokens = this.options.anthropicThinking ?? Math.max(maxTokens * 0.8, 1024)
+			// Clamp the thinking budget to be at most 80% of max tokens and at
+			// least 1024 tokens.
+			const maxBudgetTokens = Math.floor((maxTokens || 8192) * 0.8)
+			const budgetTokens = Math.max(
+				Math.min(this.options.anthropicThinking ?? maxBudgetTokens, maxBudgetTokens),
+				1024,
+			)
+
 			thinking = { type: "enabled", budget_tokens: budgetTokens }
 			thinking = { type: "enabled", budget_tokens: budgetTokens }
 			temperature = 1.0
 			temperature = 1.0
 		}
 		}
@@ -271,7 +278,7 @@ export async function getOpenRouterModels() {
 					modelInfo.supportsPromptCache = true
 					modelInfo.supportsPromptCache = true
 					modelInfo.cacheWritesPrice = 3.75
 					modelInfo.cacheWritesPrice = 3.75
 					modelInfo.cacheReadsPrice = 0.3
 					modelInfo.cacheReadsPrice = 0.3
-					modelInfo.maxTokens = 16384
+					modelInfo.maxTokens = 64_000
 					break
 					break
 				case rawModel.id.startsWith("anthropic/claude-3.5-sonnet-20240620"):
 				case rawModel.id.startsWith("anthropic/claude-3.5-sonnet-20240620"):
 					modelInfo.supportsPromptCache = true
 					modelInfo.supportsPromptCache = true

+ 14 - 4
src/core/Cline.ts

@@ -87,6 +87,7 @@ export type ClineOptions = {
 
 
 export class Cline {
 export class Cline {
 	readonly taskId: string
 	readonly taskId: string
+	readonly apiConfiguration: ApiConfiguration
 	api: ApiHandler
 	api: ApiHandler
 	private terminalManager: TerminalManager
 	private terminalManager: TerminalManager
 	private urlContentFetcher: UrlContentFetcher
 	private urlContentFetcher: UrlContentFetcher
@@ -148,6 +149,7 @@ export class Cline {
 		}
 		}
 
 
 		this.taskId = crypto.randomUUID()
 		this.taskId = crypto.randomUUID()
+		this.apiConfiguration = apiConfiguration
 		this.api = buildApiHandler(apiConfiguration)
 		this.api = buildApiHandler(apiConfiguration)
 		this.terminalManager = new TerminalManager()
 		this.terminalManager = new TerminalManager()
 		this.urlContentFetcher = new UrlContentFetcher(provider.context)
 		this.urlContentFetcher = new UrlContentFetcher(provider.context)
@@ -961,13 +963,21 @@ export class Cline {
 				cacheWrites = 0,
 				cacheWrites = 0,
 				cacheReads = 0,
 				cacheReads = 0,
 			}: ClineApiReqInfo = JSON.parse(previousRequest)
 			}: ClineApiReqInfo = JSON.parse(previousRequest)
+
 			const totalTokens = tokensIn + tokensOut + cacheWrites + cacheReads
 			const totalTokens = tokensIn + tokensOut + cacheWrites + cacheReads
 
 
-			const trimmedMessages = truncateConversationIfNeeded(
-				this.apiConversationHistory,
+			const modelInfo = this.api.getModel().info
+			const maxTokens = modelInfo.thinking
+				? this.apiConfiguration.modelMaxTokens || modelInfo.maxTokens
+				: modelInfo.maxTokens
+			const contextWindow = modelInfo.contextWindow
+
+			const trimmedMessages = truncateConversationIfNeeded({
+				messages: this.apiConversationHistory,
 				totalTokens,
 				totalTokens,
-				this.api.getModel().info,
-			)
+				maxTokens,
+				contextWindow,
+			})
 
 
 			if (trimmedMessages !== this.apiConversationHistory) {
 			if (trimmedMessages !== this.apiConversationHistory) {
 				await this.overwriteApiConversationHistory(trimmedMessages)
 				await this.overwriteApiConversationHistory(trimmedMessages)

+ 88 - 14
src/core/sliding-window/__tests__/sliding-window.test.ts

@@ -119,11 +119,21 @@ describe("getMaxTokens", () => {
 		// Max tokens = 100000 - 50000 = 50000
 		// Max tokens = 100000 - 50000 = 50000
 
 
 		// Below max tokens - no truncation
 		// Below max tokens - no truncation
-		const result1 = truncateConversationIfNeeded(messages, 49999, modelInfo)
+		const result1 = truncateConversationIfNeeded({
+			messages,
+			totalTokens: 49999,
+			contextWindow: modelInfo.contextWindow,
+			maxTokens: modelInfo.maxTokens,
+		})
 		expect(result1).toEqual(messages)
 		expect(result1).toEqual(messages)
 
 
 		// Above max tokens - truncate
 		// Above max tokens - truncate
-		const result2 = truncateConversationIfNeeded(messages, 50001, modelInfo)
+		const result2 = truncateConversationIfNeeded({
+			messages,
+			totalTokens: 50001,
+			contextWindow: modelInfo.contextWindow,
+			maxTokens: modelInfo.maxTokens,
+		})
 		expect(result2).not.toEqual(messages)
 		expect(result2).not.toEqual(messages)
 		expect(result2.length).toBe(3) // Truncated with 0.5 fraction
 		expect(result2.length).toBe(3) // Truncated with 0.5 fraction
 	})
 	})
@@ -133,11 +143,21 @@ describe("getMaxTokens", () => {
 		// Max tokens = 100000 - (100000 * 0.2) = 80000
 		// Max tokens = 100000 - (100000 * 0.2) = 80000
 
 
 		// Below max tokens - no truncation
 		// Below max tokens - no truncation
-		const result1 = truncateConversationIfNeeded(messages, 79999, modelInfo)
+		const result1 = truncateConversationIfNeeded({
+			messages,
+			totalTokens: 79999,
+			contextWindow: modelInfo.contextWindow,
+			maxTokens: modelInfo.maxTokens,
+		})
 		expect(result1).toEqual(messages)
 		expect(result1).toEqual(messages)
 
 
 		// Above max tokens - truncate
 		// Above max tokens - truncate
-		const result2 = truncateConversationIfNeeded(messages, 80001, modelInfo)
+		const result2 = truncateConversationIfNeeded({
+			messages,
+			totalTokens: 80001,
+			contextWindow: modelInfo.contextWindow,
+			maxTokens: modelInfo.maxTokens,
+		})
 		expect(result2).not.toEqual(messages)
 		expect(result2).not.toEqual(messages)
 		expect(result2.length).toBe(3) // Truncated with 0.5 fraction
 		expect(result2.length).toBe(3) // Truncated with 0.5 fraction
 	})
 	})
@@ -147,11 +167,21 @@ describe("getMaxTokens", () => {
 		// Max tokens = 50000 - 10000 = 40000
 		// Max tokens = 50000 - 10000 = 40000
 
 
 		// Below max tokens - no truncation
 		// Below max tokens - no truncation
-		const result1 = truncateConversationIfNeeded(messages, 39999, modelInfo)
+		const result1 = truncateConversationIfNeeded({
+			messages,
+			totalTokens: 39999,
+			contextWindow: modelInfo.contextWindow,
+			maxTokens: modelInfo.maxTokens,
+		})
 		expect(result1).toEqual(messages)
 		expect(result1).toEqual(messages)
 
 
 		// Above max tokens - truncate
 		// Above max tokens - truncate
-		const result2 = truncateConversationIfNeeded(messages, 40001, modelInfo)
+		const result2 = truncateConversationIfNeeded({
+			messages,
+			totalTokens: 40001,
+			contextWindow: modelInfo.contextWindow,
+			maxTokens: modelInfo.maxTokens,
+		})
 		expect(result2).not.toEqual(messages)
 		expect(result2).not.toEqual(messages)
 		expect(result2.length).toBe(3) // Truncated with 0.5 fraction
 		expect(result2.length).toBe(3) // Truncated with 0.5 fraction
 	})
 	})
@@ -161,11 +191,21 @@ describe("getMaxTokens", () => {
 		// Max tokens = 200000 - 30000 = 170000
 		// Max tokens = 200000 - 30000 = 170000
 
 
 		// Below max tokens - no truncation
 		// Below max tokens - no truncation
-		const result1 = truncateConversationIfNeeded(messages, 169999, modelInfo)
+		const result1 = truncateConversationIfNeeded({
+			messages,
+			totalTokens: 169999,
+			contextWindow: modelInfo.contextWindow,
+			maxTokens: modelInfo.maxTokens,
+		})
 		expect(result1).toEqual(messages)
 		expect(result1).toEqual(messages)
 
 
 		// Above max tokens - truncate
 		// Above max tokens - truncate
-		const result2 = truncateConversationIfNeeded(messages, 170001, modelInfo)
+		const result2 = truncateConversationIfNeeded({
+			messages,
+			totalTokens: 170001,
+			contextWindow: modelInfo.contextWindow,
+			maxTokens: modelInfo.maxTokens,
+		})
 		expect(result2).not.toEqual(messages)
 		expect(result2).not.toEqual(messages)
 		expect(result2.length).toBe(3) // Truncated with 0.5 fraction
 		expect(result2.length).toBe(3) // Truncated with 0.5 fraction
 	})
 	})
@@ -194,7 +234,12 @@ describe("truncateConversationIfNeeded", () => {
 		const maxTokens = 100000 - 30000 // 70000
 		const maxTokens = 100000 - 30000 // 70000
 		const totalTokens = 69999 // Below threshold
 		const totalTokens = 69999 // Below threshold
 
 
-		const result = truncateConversationIfNeeded(messages, totalTokens, modelInfo)
+		const result = truncateConversationIfNeeded({
+			messages,
+			totalTokens,
+			contextWindow: modelInfo.contextWindow,
+			maxTokens: modelInfo.maxTokens,
+		})
 		expect(result).toEqual(messages) // No truncation occurs
 		expect(result).toEqual(messages) // No truncation occurs
 	})
 	})
 
 
@@ -207,7 +252,12 @@ describe("truncateConversationIfNeeded", () => {
 		// With 4 messages after the first, 0.5 fraction means remove 2 messages
 		// With 4 messages after the first, 0.5 fraction means remove 2 messages
 		const expectedResult = [messages[0], messages[3], messages[4]]
 		const expectedResult = [messages[0], messages[3], messages[4]]
 
 
-		const result = truncateConversationIfNeeded(messages, totalTokens, modelInfo)
+		const result = truncateConversationIfNeeded({
+			messages,
+			totalTokens,
+			contextWindow: modelInfo.contextWindow,
+			maxTokens: modelInfo.maxTokens,
+		})
 		expect(result).toEqual(expectedResult)
 		expect(result).toEqual(expectedResult)
 	})
 	})
 
 
@@ -218,14 +268,38 @@ describe("truncateConversationIfNeeded", () => {
 
 
 		// Test below threshold
 		// Test below threshold
 		const belowThreshold = 69999
 		const belowThreshold = 69999
-		expect(truncateConversationIfNeeded(messages, belowThreshold, modelInfo1)).toEqual(
-			truncateConversationIfNeeded(messages, belowThreshold, modelInfo2),
+		expect(
+			truncateConversationIfNeeded({
+				messages,
+				totalTokens: belowThreshold,
+				contextWindow: modelInfo1.contextWindow,
+				maxTokens: modelInfo1.maxTokens,
+			}),
+		).toEqual(
+			truncateConversationIfNeeded({
+				messages,
+				totalTokens: belowThreshold,
+				contextWindow: modelInfo2.contextWindow,
+				maxTokens: modelInfo2.maxTokens,
+			}),
 		)
 		)
 
 
 		// Test above threshold
 		// Test above threshold
 		const aboveThreshold = 70001
 		const aboveThreshold = 70001
-		expect(truncateConversationIfNeeded(messages, aboveThreshold, modelInfo1)).toEqual(
-			truncateConversationIfNeeded(messages, aboveThreshold, modelInfo2),
+		expect(
+			truncateConversationIfNeeded({
+				messages,
+				totalTokens: aboveThreshold,
+				contextWindow: modelInfo1.contextWindow,
+				maxTokens: modelInfo1.maxTokens,
+			}),
+		).toEqual(
+			truncateConversationIfNeeded({
+				messages,
+				totalTokens: aboveThreshold,
+				contextWindow: modelInfo2.contextWindow,
+				maxTokens: modelInfo2.maxTokens,
+			}),
 		)
 		)
 	})
 	})
 })
 })

+ 18 - 19
src/core/sliding-window/index.ts

@@ -1,7 +1,5 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import { Anthropic } from "@anthropic-ai/sdk"
 
 
-import { ModelInfo } from "../../shared/api"
-
 /**
 /**
  * Truncates a conversation by removing a fraction of the messages.
  * Truncates a conversation by removing a fraction of the messages.
  *
  *
@@ -26,28 +24,29 @@ export function truncateConversation(
 }
 }
 
 
 /**
 /**
- * Conditionally truncates the conversation messages if the total token count exceeds the model's limit.
+ * Conditionally truncates the conversation messages if the total token count
+ * exceeds the model's limit.
  *
  *
  * @param {Anthropic.Messages.MessageParam[]} messages - The conversation messages.
  * @param {Anthropic.Messages.MessageParam[]} messages - The conversation messages.
  * @param {number} totalTokens - The total number of tokens in the conversation.
  * @param {number} totalTokens - The total number of tokens in the conversation.
- * @param {ModelInfo} modelInfo - Model metadata including context window size.
+ * @param {number} contextWindow - The context window size.
+ * @param {number} maxTokens - The maximum number of tokens allowed.
  * @returns {Anthropic.Messages.MessageParam[]} The original or truncated conversation messages.
  * @returns {Anthropic.Messages.MessageParam[]} The original or truncated conversation messages.
  */
  */
-export function truncateConversationIfNeeded(
-	messages: Anthropic.Messages.MessageParam[],
-	totalTokens: number,
-	modelInfo: ModelInfo,
-): Anthropic.Messages.MessageParam[] {
-	return totalTokens < getMaxTokens(modelInfo) ? messages : truncateConversation(messages, 0.5)
+
+type TruncateOptions = {
+	messages: Anthropic.Messages.MessageParam[]
+	totalTokens: number
+	contextWindow: number
+	maxTokens?: number
 }
 }
 
 
-/**
- * Calculates the maximum allowed tokens
- *
- * @param {ModelInfo} modelInfo - The model information containing the context window size.
- * @returns {number} The maximum number of tokens allowed
- */
-function getMaxTokens(modelInfo: ModelInfo): number {
-	// The buffer needs to be at least as large as `modelInfo.maxTokens`, or 20% of the context window if for some reason it's not set.
-	return modelInfo.contextWindow - (modelInfo.maxTokens || modelInfo.contextWindow * 0.2)
+export function truncateConversationIfNeeded({
+	messages,
+	totalTokens,
+	contextWindow,
+	maxTokens,
+}: TruncateOptions): Anthropic.Messages.MessageParam[] {
+	const allowedTokens = contextWindow - (maxTokens || contextWindow * 0.2)
+	return totalTokens < allowedTokens ? messages : truncateConversation(messages, 0.5)
 }
 }

+ 5 - 0
src/core/webview/ClineProvider.ts

@@ -1671,6 +1671,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 			requestyModelId,
 			requestyModelId,
 			requestyModelInfo,
 			requestyModelInfo,
 			modelTemperature,
 			modelTemperature,
+			modelMaxTokens,
 		} = apiConfiguration
 		} = apiConfiguration
 		await Promise.all([
 		await Promise.all([
 			this.updateGlobalState("apiProvider", apiProvider),
 			this.updateGlobalState("apiProvider", apiProvider),
@@ -1719,6 +1720,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 			this.updateGlobalState("requestyModelId", requestyModelId),
 			this.updateGlobalState("requestyModelId", requestyModelId),
 			this.updateGlobalState("requestyModelInfo", requestyModelInfo),
 			this.updateGlobalState("requestyModelInfo", requestyModelInfo),
 			this.updateGlobalState("modelTemperature", modelTemperature),
 			this.updateGlobalState("modelTemperature", modelTemperature),
+			this.updateGlobalState("modelMaxTokens", modelMaxTokens),
 		])
 		])
 		if (this.cline) {
 		if (this.cline) {
 			this.cline.api = buildApiHandler(apiConfiguration)
 			this.cline.api = buildApiHandler(apiConfiguration)
@@ -2210,6 +2212,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 			requestyModelId,
 			requestyModelId,
 			requestyModelInfo,
 			requestyModelInfo,
 			modelTemperature,
 			modelTemperature,
+			modelMaxTokens,
 			maxOpenTabsContext,
 			maxOpenTabsContext,
 		] = await Promise.all([
 		] = await Promise.all([
 			this.getGlobalState("apiProvider") as Promise<ApiProvider | undefined>,
 			this.getGlobalState("apiProvider") as Promise<ApiProvider | undefined>,
@@ -2293,6 +2296,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 			this.getGlobalState("requestyModelId") as Promise<string | undefined>,
 			this.getGlobalState("requestyModelId") as Promise<string | undefined>,
 			this.getGlobalState("requestyModelInfo") as Promise<ModelInfo | undefined>,
 			this.getGlobalState("requestyModelInfo") as Promise<ModelInfo | undefined>,
 			this.getGlobalState("modelTemperature") as Promise<number | undefined>,
 			this.getGlobalState("modelTemperature") as Promise<number | undefined>,
+			this.getGlobalState("modelMaxTokens") as Promise<number | undefined>,
 			this.getGlobalState("maxOpenTabsContext") as Promise<number | undefined>,
 			this.getGlobalState("maxOpenTabsContext") as Promise<number | undefined>,
 		])
 		])
 
 
@@ -2358,6 +2362,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 				requestyModelId,
 				requestyModelId,
 				requestyModelInfo,
 				requestyModelInfo,
 				modelTemperature,
 				modelTemperature,
+				modelMaxTokens,
 			},
 			},
 			lastShownAnnouncementId,
 			lastShownAnnouncementId,
 			customInstructions,
 			customInstructions,

+ 3 - 8
src/shared/api.ts

@@ -68,6 +68,7 @@ export interface ApiHandlerOptions {
 	requestyModelId?: string
 	requestyModelId?: string
 	requestyModelInfo?: ModelInfo
 	requestyModelInfo?: ModelInfo
 	modelTemperature?: number
 	modelTemperature?: number
+	modelMaxTokens?: number
 }
 }
 
 
 export type ApiConfiguration = ApiHandlerOptions & {
 export type ApiConfiguration = ApiHandlerOptions & {
@@ -92,19 +93,13 @@ export interface ModelInfo {
 	thinking?: boolean
 	thinking?: boolean
 }
 }
 
 
-export const THINKING_BUDGET = {
-	step: 1024,
-	min: 1024,
-	default: 8 * 1024,
-}
-
 // Anthropic
 // Anthropic
 // https://docs.anthropic.com/en/docs/about-claude/models
 // https://docs.anthropic.com/en/docs/about-claude/models
 export type AnthropicModelId = keyof typeof anthropicModels
 export type AnthropicModelId = keyof typeof anthropicModels
 export const anthropicDefaultModelId: AnthropicModelId = "claude-3-7-sonnet-20250219"
 export const anthropicDefaultModelId: AnthropicModelId = "claude-3-7-sonnet-20250219"
 export const anthropicModels = {
 export const anthropicModels = {
 	"claude-3-7-sonnet-20250219:thinking": {
 	"claude-3-7-sonnet-20250219:thinking": {
-		maxTokens: 16384,
+		maxTokens: 64_000,
 		contextWindow: 200_000,
 		contextWindow: 200_000,
 		supportsImages: true,
 		supportsImages: true,
 		supportsComputerUse: true,
 		supportsComputerUse: true,
@@ -116,7 +111,7 @@ export const anthropicModels = {
 		thinking: true,
 		thinking: true,
 	},
 	},
 	"claude-3-7-sonnet-20250219": {
 	"claude-3-7-sonnet-20250219": {
-		maxTokens: 16384,
+		maxTokens: 64_000,
 		contextWindow: 200_000,
 		contextWindow: 200_000,
 		supportsImages: true,
 		supportsImages: true,
 		supportsComputerUse: true,
 		supportsComputerUse: true,

+ 1 - 0
src/shared/globalState.ts

@@ -81,5 +81,6 @@ export type GlobalStateKey =
 	| "requestyModelInfo"
 	| "requestyModelInfo"
 	| "unboundModelInfo"
 	| "unboundModelInfo"
 	| "modelTemperature"
 	| "modelTemperature"
+	| "modelMaxTokens"
 	| "mistralCodestralUrl"
 	| "mistralCodestralUrl"
 	| "maxOpenTabsContext"
 	| "maxOpenTabsContext"

+ 49 - 16
webview-ui/src/components/settings/ThinkingBudget.tsx

@@ -1,6 +1,8 @@
+import { useEffect } from "react"
+
 import { Slider } from "@/components/ui"
 import { Slider } from "@/components/ui"
 
 
-import { ApiConfiguration, ModelInfo, THINKING_BUDGET } from "../../../../src/shared/api"
+import { ApiConfiguration, ModelInfo } from "../../../../src/shared/api"
 
 
 interface ThinkingBudgetProps {
 interface ThinkingBudgetProps {
 	apiConfiguration: ApiConfiguration
 	apiConfiguration: ApiConfiguration
@@ -9,21 +11,52 @@ interface ThinkingBudgetProps {
 }
 }
 
 
 export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, modelInfo }: ThinkingBudgetProps) => {
 export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, modelInfo }: ThinkingBudgetProps) => {
-	const budget = apiConfiguration?.anthropicThinking ?? THINKING_BUDGET.default
-
-	return modelInfo && modelInfo.thinking ? (
-		<div className="flex flex-col gap-1 mt-2">
-			<div className="font-medium">Thinking Budget</div>
-			<div className="flex items-center gap-1">
-				<Slider
-					min={THINKING_BUDGET.min}
-					max={(modelInfo.maxTokens ?? THINKING_BUDGET.default) - 1}
-					step={THINKING_BUDGET.step}
-					value={[budget]}
-					onValueChange={(value) => setApiConfigurationField("anthropicThinking", value[0])}
-				/>
-				<div className="w-12 text-sm text-center">{budget}</div>
+	const tokens = apiConfiguration?.modelMaxTokens || modelInfo?.maxTokens || 64_000
+	const tokensMin = 8192
+	const tokensMax = modelInfo?.maxTokens || 64_000
+
+	const thinkingTokens = apiConfiguration?.anthropicThinking || 8192
+	const thinkingTokensMin = 1024
+	const thinkingTokensMax = Math.floor(0.8 * tokens)
+
+	useEffect(() => {
+		if (thinkingTokens > thinkingTokensMax) {
+			setApiConfigurationField("anthropicThinking", thinkingTokensMax)
+		}
+	}, [thinkingTokens, thinkingTokensMax, setApiConfigurationField])
+
+	if (!modelInfo || !modelInfo.thinking) {
+		return null
+	}
+
+	return (
+		<div className="flex flex-col gap-2">
+			<div className="flex flex-col gap-1 mt-2">
+				<div className="font-medium">Max Tokens</div>
+				<div className="flex items-center gap-1">
+					<Slider
+						min={tokensMin}
+						max={tokensMax}
+						step={1024}
+						value={[tokens]}
+						onValueChange={([value]) => setApiConfigurationField("modelMaxTokens", value)}
+					/>
+					<div className="w-12 text-sm text-center">{tokens}</div>
+				</div>
+			</div>
+			<div className="flex flex-col gap-1 mt-2">
+				<div className="font-medium">Max Thinking Tokens</div>
+				<div className="flex items-center gap-1">
+					<Slider
+						min={thinkingTokensMin}
+						max={thinkingTokensMax}
+						step={1024}
+						value={[thinkingTokens]}
+						onValueChange={([value]) => setApiConfigurationField("anthropicThinking", value)}
+					/>
+					<div className="w-12 text-sm text-center">{thinkingTokens}</div>
+				</div>
 			</div>
 			</div>
 		</div>
 		</div>
-	) : null
+	)
 }
 }