Browse Source

Revert "feat: Add reasoning budget support to Bedrock models for extended thinking (#4201)" (#4626)

Matt Rubens 6 months ago
parent
commit
47e84b4572

+ 0 - 3
packages/types/src/providers/bedrock.ts

@@ -73,7 +73,6 @@ export const bedrockModels = {
 		supportsImages: true,
 		supportsComputerUse: true,
 		supportsPromptCache: true,
-		supportsReasoningBudget: true,
 		inputPrice: 3.0,
 		outputPrice: 15.0,
 		cacheWritesPrice: 3.75,
@@ -88,7 +87,6 @@ export const bedrockModels = {
 		supportsImages: true,
 		supportsComputerUse: true,
 		supportsPromptCache: true,
-		supportsReasoningBudget: true,
 		inputPrice: 15.0,
 		outputPrice: 75.0,
 		cacheWritesPrice: 18.75,
@@ -103,7 +101,6 @@ export const bedrockModels = {
 		supportsImages: true,
 		supportsComputerUse: true,
 		supportsPromptCache: true,
-		supportsReasoningBudget: true,
 		inputPrice: 3.0,
 		outputPrice: 15.0,
 		cacheWritesPrice: 3.75,

+ 0 - 280
src/api/providers/__tests__/bedrock-reasoning.test.ts

@@ -1,280 +0,0 @@
-import { AwsBedrockHandler } from "../bedrock"
-import { BedrockRuntimeClient, ConverseStreamCommand } from "@aws-sdk/client-bedrock-runtime"
-import { logger } from "../../../utils/logging"
-
-// Mock the AWS SDK
-jest.mock("@aws-sdk/client-bedrock-runtime")
-jest.mock("../../../utils/logging")
-
-// Store the command payload for verification
-let capturedPayload: any = null
-
-describe("AwsBedrockHandler - Extended Thinking", () => {
-	let handler: AwsBedrockHandler
-	let mockSend: jest.Mock
-
-	beforeEach(() => {
-		capturedPayload = null
-		mockSend = jest.fn()
-
-		// Mock ConverseStreamCommand to capture the payload
-		;(ConverseStreamCommand as unknown as jest.Mock).mockImplementation((payload) => {
-			capturedPayload = payload
-			return {
-				input: payload,
-			}
-		})
-		;(BedrockRuntimeClient as jest.Mock).mockImplementation(() => ({
-			send: mockSend,
-			config: { region: "us-east-1" },
-		}))
-		;(logger.info as jest.Mock).mockImplementation(() => {})
-		;(logger.error as jest.Mock).mockImplementation(() => {})
-	})
-
-	afterEach(() => {
-		jest.clearAllMocks()
-	})
-
-	describe("Extended Thinking Support", () => {
-		it("should include thinking parameter for Claude Sonnet 4 when reasoning is enabled", async () => {
-			handler = new AwsBedrockHandler({
-				apiProvider: "bedrock",
-				apiModelId: "anthropic.claude-sonnet-4-20250514-v1:0",
-				awsRegion: "us-east-1",
-				enableReasoningEffort: true,
-				modelMaxTokens: 8192,
-				modelMaxThinkingTokens: 4096,
-			})
-
-			// Mock the stream response
-			mockSend.mockResolvedValue({
-				stream: (async function* () {
-					yield {
-						messageStart: { role: "assistant" },
-					}
-					yield {
-						contentBlockStart: {
-							content_block: { type: "thinking", thinking: "Let me think..." },
-							contentBlockIndex: 0,
-						},
-					}
-					yield {
-						contentBlockDelta: {
-							delta: { type: "thinking_delta", thinking: " about this problem." },
-						},
-					}
-					yield {
-						contentBlockStart: {
-							start: { text: "Here's the answer:" },
-							contentBlockIndex: 1,
-						},
-					}
-					yield {
-						metadata: {
-							usage: { inputTokens: 100, outputTokens: 50 },
-						},
-					}
-				})(),
-			})
-
-			const messages = [{ role: "user" as const, content: "Test message" }]
-			const stream = handler.createMessage("System prompt", messages)
-
-			const chunks = []
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// Verify the command was called with the correct payload
-			expect(mockSend).toHaveBeenCalledTimes(1)
-			expect(capturedPayload).toBeDefined()
-			expect(capturedPayload.additionalModelRequestFields).toBeDefined()
-			expect(capturedPayload.additionalModelRequestFields.thinking).toEqual({
-				type: "enabled",
-				budget_tokens: 4096, // Uses the full modelMaxThinkingTokens value
-			})
-
-			// Verify reasoning chunks were yielded
-			const reasoningChunks = chunks.filter((c) => c.type === "reasoning")
-			expect(reasoningChunks).toHaveLength(2)
-			expect(reasoningChunks[0].text).toBe("Let me think...")
-			expect(reasoningChunks[1].text).toBe(" about this problem.")
-
-			// Verify that topP is NOT present when thinking is enabled
-			expect(capturedPayload.inferenceConfig).not.toHaveProperty("topP")
-		})
-
-		it("should pass thinking parameters from metadata", async () => {
-			handler = new AwsBedrockHandler({
-				apiProvider: "bedrock",
-				apiModelId: "anthropic.claude-3-7-sonnet-20250219-v1:0",
-				awsRegion: "us-east-1",
-			})
-
-			mockSend.mockResolvedValue({
-				stream: (async function* () {
-					yield { messageStart: { role: "assistant" } }
-					yield { metadata: { usage: { inputTokens: 100, outputTokens: 50 } } }
-				})(),
-			})
-
-			const messages = [{ role: "user" as const, content: "Test message" }]
-			const metadata = {
-				taskId: "test-task",
-				thinking: {
-					enabled: true,
-					maxTokens: 16384,
-					maxThinkingTokens: 8192,
-				},
-			}
-
-			const stream = handler.createMessage("System prompt", messages, metadata)
-			const chunks = []
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// Verify the thinking parameter was passed correctly
-			expect(mockSend).toHaveBeenCalledTimes(1)
-			expect(capturedPayload).toBeDefined()
-			expect(capturedPayload.additionalModelRequestFields).toBeDefined()
-			expect(capturedPayload.additionalModelRequestFields.thinking).toEqual({
-				type: "enabled",
-				budget_tokens: 8192,
-			})
-
-			// Verify that topP is NOT present when thinking is enabled via metadata
-			expect(capturedPayload.inferenceConfig).not.toHaveProperty("topP")
-		})
-
-		it("should log when extended thinking is enabled", async () => {
-			handler = new AwsBedrockHandler({
-				apiProvider: "bedrock",
-				apiModelId: "anthropic.claude-opus-4-20250514-v1:0",
-				awsRegion: "us-east-1",
-				enableReasoningEffort: true,
-				modelMaxThinkingTokens: 5000,
-			})
-
-			mockSend.mockResolvedValue({
-				stream: (async function* () {
-					yield { messageStart: { role: "assistant" } }
-				})(),
-			})
-
-			const messages = [{ role: "user" as const, content: "Test" }]
-			const stream = handler.createMessage("System prompt", messages)
-
-			for await (const chunk of stream) {
-				// consume stream
-			}
-
-			// Verify logging
-			expect(logger.info).toHaveBeenCalledWith(
-				expect.stringContaining("Extended thinking enabled"),
-				expect.objectContaining({
-					ctx: "bedrock",
-					modelId: "anthropic.claude-opus-4-20250514-v1:0",
-				}),
-			)
-		})
-
-		it("should include topP when thinking is disabled", async () => {
-			handler = new AwsBedrockHandler({
-				apiProvider: "bedrock",
-				apiModelId: "anthropic.claude-3-7-sonnet-20250219-v1:0",
-				awsRegion: "us-east-1",
-				// Note: no enableReasoningEffort = true, so thinking is disabled
-			})
-
-			mockSend.mockResolvedValue({
-				stream: (async function* () {
-					yield { messageStart: { role: "assistant" } }
-					yield {
-						contentBlockStart: {
-							start: { text: "Hello" },
-							contentBlockIndex: 0,
-						},
-					}
-					yield {
-						contentBlockDelta: {
-							delta: { text: " world" },
-						},
-					}
-					yield { metadata: { usage: { inputTokens: 100, outputTokens: 50 } } }
-				})(),
-			})
-
-			const messages = [{ role: "user" as const, content: "Test message" }]
-			const stream = handler.createMessage("System prompt", messages)
-
-			const chunks = []
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// Verify that topP IS present when thinking is disabled
-			expect(mockSend).toHaveBeenCalledTimes(1)
-			expect(capturedPayload).toBeDefined()
-			expect(capturedPayload.inferenceConfig).toHaveProperty("topP", 0.1)
-
-			// Verify that additionalModelRequestFields is not present or empty
-			expect(capturedPayload.additionalModelRequestFields).toBeUndefined()
-		})
-
-		it("should enable reasoning when enableReasoningEffort is true in settings", async () => {
-			handler = new AwsBedrockHandler({
-				apiProvider: "bedrock",
-				apiModelId: "anthropic.claude-sonnet-4-20250514-v1:0",
-				awsRegion: "us-east-1",
-				enableReasoningEffort: true, // This should trigger reasoning
-				modelMaxThinkingTokens: 4096,
-			})
-
-			mockSend.mockResolvedValue({
-				stream: (async function* () {
-					yield { messageStart: { role: "assistant" } }
-					yield {
-						contentBlockStart: {
-							content_block: { type: "thinking", thinking: "Let me think..." },
-							contentBlockIndex: 0,
-						},
-					}
-					yield {
-						contentBlockDelta: {
-							delta: { type: "thinking_delta", thinking: " about this problem." },
-						},
-					}
-					yield { metadata: { usage: { inputTokens: 100, outputTokens: 50 } } }
-				})(),
-			})
-
-			const messages = [{ role: "user" as const, content: "Test message" }]
-			const stream = handler.createMessage("System prompt", messages)
-
-			const chunks = []
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// Verify thinking was enabled via settings
-			expect(mockSend).toHaveBeenCalledTimes(1)
-			expect(capturedPayload).toBeDefined()
-			expect(capturedPayload.additionalModelRequestFields).toBeDefined()
-			expect(capturedPayload.additionalModelRequestFields.thinking).toEqual({
-				type: "enabled",
-				budget_tokens: 4096,
-			})
-
-			// Verify that topP is NOT present when thinking is enabled via settings
-			expect(capturedPayload.inferenceConfig).not.toHaveProperty("topP")
-
-			// Verify reasoning chunks were yielded
-			const reasoningChunks = chunks.filter((c) => c.type === "reasoning")
-			expect(reasoningChunks).toHaveLength(2)
-			expect(reasoningChunks[0].text).toBe("Let me think...")
-			expect(reasoningChunks[1].text).toBe(" about this problem.")
-		})
-	})
-})

+ 39 - 237
src/api/providers/bedrock.ts

@@ -29,8 +29,6 @@ import { logger } from "../../utils/logging"
 import { MultiPointStrategy } from "../transform/cache-strategy/multi-point-strategy"
 import { ModelInfo as CacheModelInfo } from "../transform/cache-strategy/types"
 import { convertToBedrockConverseMessages as sharedConverter } from "../transform/bedrock-converse-format"
-import { getModelParams } from "../transform/model-params"
-import { shouldUseReasoningBudget } from "../../shared/api"
 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
 
 /************************************************************************************
@@ -42,63 +40,8 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ".
 // Define interface for Bedrock inference config
 interface BedrockInferenceConfig {
 	maxTokens: number
-	temperature?: number
-	topP?: number
-}
-
-// Define interface for Bedrock thinking configuration
-interface BedrockThinkingConfig {
-	thinking: {
-		type: "enabled"
-		budget_tokens: number
-	}
-	[key: string]: any // Add index signature to be compatible with DocumentType
-}
-
-// Define interface for Bedrock payload
-interface BedrockPayload {
-	modelId: BedrockModelId | string
-	messages: Message[]
-	system?: SystemContentBlock[]
-	inferenceConfig: BedrockInferenceConfig
-	anthropic_version?: string
-	additionalModelRequestFields?: BedrockThinkingConfig
-}
-
-// Define specific types for content block events to avoid 'as any' usage
-// These handle the multiple possible structures returned by AWS SDK
-interface ContentBlockStartEvent {
-	start?: {
-		text?: string
-		thinking?: string
-	}
-	contentBlockIndex?: number
-	// Alternative structure used by some AWS SDK versions
-	content_block?: {
-		type?: string
-		thinking?: string
-	}
-	// Official AWS SDK structure for reasoning (as documented)
-	contentBlock?: {
-		type?: string
-		thinking?: string
-		reasoningContent?: {
-			text?: string
-		}
-	}
-}
-
-interface ContentBlockDeltaEvent {
-	delta?: {
-		text?: string
-		thinking?: string
-		type?: string
-		// AWS SDK structure for reasoning content deltas
-		reasoningContent?: {
-			text?: string
-		}
-	}
-	contentBlockIndex?: number
+	temperature: number
+	topP: number
 }
 
 // Define types for stream events based on AWS SDK
@@ -110,8 +53,18 @@ export interface StreamEvent {
 		stopReason?: "end_turn" | "tool_use" | "max_tokens" | "stop_sequence"
 		additionalModelResponseFields?: Record<string, unknown>
 	}
-	contentBlockStart?: ContentBlockStartEvent
-	contentBlockDelta?: ContentBlockDeltaEvent
+	contentBlockStart?: {
+		start?: {
+			text?: string
+		}
+		contentBlockIndex?: number
+	}
+	contentBlockDelta?: {
+		delta?: {
+			text?: string
+		}
+		contentBlockIndex?: number
+	}
 	metadata?: {
 		usage?: {
 			inputTokens: number
@@ -302,17 +255,13 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 	override async *createMessage(
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
-		metadata?: ApiHandlerCreateMessageMetadata & {
-			thinking?: {
-				enabled: boolean
-				maxTokens?: number
-				maxThinkingTokens?: number
-			}
-		},
+		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
-		const modelConfig = this.getModel()
+		let modelConfig = this.getModel()
+		// Handle cross-region inference
 		const usePromptCache = Boolean(this.options.awsUsePromptCache && this.supportsAwsPromptCache(modelConfig))
 
+		// Generate a conversation ID based on the first few messages to maintain cache consistency
 		const conversationId =
 			messages.length > 0
 				? `conv_${messages[0].role}_${
@@ -322,6 +271,7 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 					}`
 				: "default_conversation"
 
+		// Convert messages to Bedrock format, passing the model info and conversation ID
 		const formatted = this.convertToBedrockConverseMessages(
 			messages,
 			systemPrompt,
@@ -330,50 +280,18 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 			conversationId,
 		)
 
-		let additionalModelRequestFields: BedrockThinkingConfig | undefined
-		let thinkingEnabled = false
-
-		// Determine if thinking should be enabled
-		// metadata?.thinking?.enabled: Explicitly enabled through API metadata (direct request)
-		// shouldUseReasoningBudget(): Enabled through user settings (enableReasoningEffort = true)
-		const isThinkingExplicitlyEnabled = metadata?.thinking?.enabled
-		const isThinkingEnabledBySettings =
-			shouldUseReasoningBudget({ model: modelConfig.info, settings: this.options }) &&
-			modelConfig.reasoning &&
-			modelConfig.reasoningBudget
-
-		if ((isThinkingExplicitlyEnabled || isThinkingEnabledBySettings) && modelConfig.info.supportsReasoningBudget) {
-			thinkingEnabled = true
-			additionalModelRequestFields = {
-				thinking: {
-					type: "enabled",
-					budget_tokens: metadata?.thinking?.maxThinkingTokens || modelConfig.reasoningBudget || 4096,
-				},
-			}
-			logger.info("Extended thinking enabled for Bedrock request", {
-				ctx: "bedrock",
-				modelId: modelConfig.id,
-				thinking: additionalModelRequestFields.thinking,
-			})
-		}
-
+		// Construct the payload
 		const inferenceConfig: BedrockInferenceConfig = {
-			maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number),
-			temperature: modelConfig.temperature ?? (this.options.modelTemperature as number),
+			maxTokens: modelConfig.info.maxTokens as number,
+			temperature: this.options.modelTemperature as number,
+			topP: 0.1,
 		}
 
-		if (!thinkingEnabled) {
-			inferenceConfig.topP = 0.1
-		}
-
-		const payload: BedrockPayload = {
+		const payload = {
 			modelId: modelConfig.id,
 			messages: formatted.messages,
 			system: formatted.system,
 			inferenceConfig,
-			...(additionalModelRequestFields && { additionalModelRequestFields }),
-			// Add anthropic_version when using thinking features
-			...(thinkingEnabled && { anthropic_version: "bedrock-2023-05-31" }),
 		}
 
 		// Create AbortController with 10 minute timeout
@@ -479,74 +397,19 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 				}
 
 				// Handle content blocks
-				if (streamEvent.contentBlockStart) {
-					const cbStart = streamEvent.contentBlockStart
-
-					// Check if this is a reasoning block (official AWS SDK structure)
-					if (cbStart.contentBlock?.reasoningContent) {
-						if (cbStart.contentBlockIndex && cbStart.contentBlockIndex > 0) {
-							yield { type: "reasoning", text: "\n" }
-						}
-						yield {
-							type: "reasoning",
-							text: cbStart.contentBlock.reasoningContent.text || "",
-						}
-					}
-					// Check for thinking block - handle both possible AWS SDK structures
-					// cbStart.contentBlock: newer/official structure
-					// cbStart.content_block: alternative structure seen in some AWS SDK versions
-					else if (cbStart.contentBlock?.type === "thinking" || cbStart.content_block?.type === "thinking") {
-						const contentBlock = cbStart.contentBlock || cbStart.content_block
-						if (cbStart.contentBlockIndex && cbStart.contentBlockIndex > 0) {
-							yield { type: "reasoning", text: "\n" }
-						}
-						if (contentBlock?.thinking) {
-							yield {
-								type: "reasoning",
-								text: contentBlock.thinking,
-							}
-						}
-					} else if (cbStart.start?.text) {
-						yield {
-							type: "text",
-							text: cbStart.start.text,
-						}
+				if (streamEvent.contentBlockStart?.start?.text) {
+					yield {
+						type: "text",
+						text: streamEvent.contentBlockStart.start.text,
 					}
 					continue
 				}
 
 				// Handle content deltas
-				if (streamEvent.contentBlockDelta) {
-					const cbDelta = streamEvent.contentBlockDelta
-					const delta = cbDelta.delta
-
-					// Process reasoning and text content deltas
-					// Multiple structures are supported for AWS SDK compatibility:
-					// - delta.reasoningContent.text: official AWS docs structure for reasoning
-					// - delta.thinking: alternative structure for thinking content
-					// - delta.text: standard text content
-					if (delta) {
-						// Check for reasoningContent property (official AWS SDK structure)
-						if (delta.reasoningContent?.text) {
-							yield {
-								type: "reasoning",
-								text: delta.reasoningContent.text,
-							}
-							continue
-						}
-
-						// Handle alternative thinking structure (fallback for older SDK versions)
-						if (delta.type === "thinking_delta" && delta.thinking) {
-							yield {
-								type: "reasoning",
-								text: delta.thinking,
-							}
-						} else if (delta.text) {
-							yield {
-								type: "text",
-								text: delta.text,
-							}
-						}
+				if (streamEvent.contentBlockDelta?.delta?.text) {
+					yield {
+						type: "text",
+						text: streamEvent.contentBlockDelta.delta.text,
 					}
 					continue
 				}
@@ -581,17 +444,10 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 		try {
 			const modelConfig = this.getModel()
 
-			// For completePrompt, thinking is typically not used, but we should still check
-			// if thinking was somehow enabled in the model config
-			const thinkingEnabled =
-				shouldUseReasoningBudget({ model: modelConfig.info, settings: this.options }) &&
-				modelConfig.reasoning &&
-				modelConfig.reasoningBudget
-
 			const inferenceConfig: BedrockInferenceConfig = {
-				maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number),
-				temperature: modelConfig.temperature ?? (this.options.modelTemperature as number),
-				...(thinkingEnabled ? {} : { topP: 0.1 }), // Only set topP when thinking is NOT enabled
+				maxTokens: modelConfig.info.maxTokens as number,
+				temperature: this.options.modelTemperature as number,
+				topP: 0.1,
 			}
 
 			// For completePrompt, use a unique conversation ID based on the prompt
@@ -866,24 +722,9 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 		return model
 	}
 
-	override getModel(): {
-		id: BedrockModelId | string
-		info: ModelInfo
-		maxTokens?: number
-		temperature?: number
-		reasoning?: any
-		reasoningBudget?: number
-	} {
+	override getModel(): { id: BedrockModelId | string; info: ModelInfo } {
 		if (this.costModelConfig?.id?.trim().length > 0) {
-			// Get model params for cost model config
-			const params = getModelParams({
-				format: "anthropic",
-				modelId: this.costModelConfig.id,
-				model: this.costModelConfig.info,
-				settings: this.options,
-				defaultTemperature: BEDROCK_DEFAULT_TEMPERATURE,
-			})
-			return { ...this.costModelConfig, ...params }
+			return this.costModelConfig
 		}
 
 		let modelConfig = undefined
@@ -911,24 +752,8 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 			}
 		}
 
-		// Get model params including reasoning configuration
-		const params = getModelParams({
-			format: "anthropic",
-			modelId: modelConfig.id,
-			model: modelConfig.info,
-			settings: this.options,
-			defaultTemperature: BEDROCK_DEFAULT_TEMPERATURE,
-		})
-
 		// Don't override maxTokens/contextWindow here; handled in getModelById (and includes user overrides)
-		return { ...modelConfig, ...params } as {
-			id: BedrockModelId | string
-			info: ModelInfo
-			maxTokens?: number
-			temperature?: number
-			reasoning?: any
-			reasoningBudget?: number
-		}
+		return modelConfig as { id: BedrockModelId | string; info: ModelInfo }
 	}
 
 	/************************************************************************************
@@ -1080,33 +905,10 @@ Suggestions:
 			messageTemplate: `Invalid ARN format. ARN should follow the pattern: arn:aws:bedrock:region:account-id:resource-type/resource-name`,
 			logLevel: "error",
 		},
-		VALIDATION_ERROR: {
-			patterns: [
-				"input tag",
-				"does not match any of the expected tags",
-				"field required",
-				"validation",
-				"invalid parameter",
-			],
-			messageTemplate: `Parameter validation error: {errorMessage}
-
-This error indicates that the request parameters don't match AWS Bedrock's expected format.
-
-Common causes:
-1. Extended thinking parameter format is incorrect
-2. Model-specific parameters are not supported by this model
-3. API parameter structure has changed
-
-Please check:
-- Model supports the requested features (extended thinking, etc.)
-- Parameter format matches AWS Bedrock specification
-- Model ID is correct for the requested features`,
-			logLevel: "error",
-		},
 		// Default/generic error
 		GENERIC: {
 			patterns: [], // Empty patterns array means this is the default
-			messageTemplate: `Unknown Error: {errorMessage}`,
+			messageTemplate: `Unknown Error`,
 			logLevel: "error",
 		},
 	}

+ 6 - 8
webview-ui/src/components/settings/ApiOptions.tsx

@@ -465,14 +465,12 @@ const ApiOptions = ({
 				</>
 			)}
 
-			{selectedProviderModels.length > 0 && (
-				<ThinkingBudget
-					key={`${selectedProvider}-${selectedModelId}`}
-					apiConfiguration={apiConfiguration}
-					setApiConfigurationField={setApiConfigurationField}
-					modelInfo={selectedModelInfo}
-				/>
-			)}
+			<ThinkingBudget
+				key={`${selectedProvider}-${selectedModelId}`}
+				apiConfiguration={apiConfiguration}
+				setApiConfigurationField={setApiConfigurationField}
+				modelInfo={selectedModelInfo}
+			/>
 
 			{!fromWelcomeView && (
 				<>

+ 1 - 5
webview-ui/src/components/settings/ThinkingBudget.tsx

@@ -65,11 +65,7 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
 						<div className="flex items-center gap-1">
 							<Slider
 								min={8192}
-								max={Math.max(
-									modelInfo.maxTokens || 8192,
-									customMaxOutputTokens,
-									DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS,
-								)}
+								max={modelInfo.maxTokens}
 								step={1024}
 								value={[customMaxOutputTokens]}
 								onValueChange={([value]) => setApiConfigurationField("modelMaxTokens", value)}

+ 16 - 16
webview-ui/src/components/settings/providers/Bedrock.tsx

@@ -108,24 +108,24 @@ export const Bedrock = ({ apiConfiguration, setApiConfigurationField, selectedMo
 				{t("settings:providers.awsCrossRegion")}
 			</Checkbox>
 			{selectedModelInfo?.supportsPromptCache && (
-				<>
-					<Checkbox
-						checked={apiConfiguration?.awsUsePromptCache || false}
-						onChange={handleInputChange("awsUsePromptCache", noTransform)}>
-						<div className="flex items-center gap-1">
-							<span>{t("settings:providers.enablePromptCaching")}</span>
-							<i
-								className="codicon codicon-info text-vscode-descriptionForeground"
-								title={t("settings:providers.enablePromptCachingTitle")}
-								style={{ fontSize: "12px" }}
-							/>
-						</div>
-					</Checkbox>
-					<div className="text-sm text-vscode-descriptionForeground ml-6 mt-1">
-						{t("settings:providers.cacheUsageNote")}
+				<Checkbox
+					checked={apiConfiguration?.awsUsePromptCache || false}
+					onChange={handleInputChange("awsUsePromptCache", noTransform)}>
+					<div className="flex items-center gap-1">
+						<span>{t("settings:providers.enablePromptCaching")}</span>
+						<i
+							className="codicon codicon-info text-vscode-descriptionForeground"
+							title={t("settings:providers.enablePromptCachingTitle")}
+							style={{ fontSize: "12px" }}
+						/>
 					</div>
-				</>
+				</Checkbox>
 			)}
+			<div>
+				<div className="text-sm text-vscode-descriptionForeground ml-6 mt-1">
+					{t("settings:providers.cacheUsageNote")}
+				</div>
+			</div>
 			<Checkbox
 				checked={awsEndpointSelected}
 				onChange={(isChecked) => {