6 months ago · 47e84b4572
--- a/packages/types/src/providers/bedrock.ts
+++ b/packages/types/src/providers/bedrock.ts
@@ -73,7 +73,6 @@ export const bedrockModels = {
 
				 		supportsImages: true,
			
 
				 		supportsComputerUse: true,
			
 
				 		supportsPromptCache: true,
			
 
				-		supportsReasoningBudget: true,
			
 
				 		inputPrice: 3.0,
			
 
				 		outputPrice: 15.0,
			
 
				 		cacheWritesPrice: 3.75,
			
@@ -88,7 +87,6 @@ export const bedrockModels = {
 
				 		supportsImages: true,
			
 
				 		supportsComputerUse: true,
			
 
				 		supportsPromptCache: true,
			
 
				-		supportsReasoningBudget: true,
			
 
				 		inputPrice: 15.0,
			
 
				 		outputPrice: 75.0,
			
 
				 		cacheWritesPrice: 18.75,
			
@@ -103,7 +101,6 @@ export const bedrockModels = {
 
				 		supportsImages: true,
			
 
				 		supportsComputerUse: true,
			
 
				 		supportsPromptCache: true,
			
 
				-		supportsReasoningBudget: true,
			
 
				 		inputPrice: 3.0,
			
 
				 		outputPrice: 15.0,
			
 
				 		cacheWritesPrice: 3.75,
			
--- a/src/api/providers/__tests__/bedrock-reasoning.test.ts
+++ b/src/api/providers/__tests__/bedrock-reasoning.test.ts
@@ -1,280 +0,0 @@
 
				-import { AwsBedrockHandler } from "../bedrock"
			
 
				-import { BedrockRuntimeClient, ConverseStreamCommand } from "@aws-sdk/client-bedrock-runtime"
			
 
				-import { logger } from "../../../utils/logging"
			
 
				-
			
 
				-// Mock the AWS SDK
			
 
				-jest.mock("@aws-sdk/client-bedrock-runtime")
			
 
				-jest.mock("../../../utils/logging")
			
 
				-
			
 
				-// Store the command payload for verification
			
 
				-let capturedPayload: any = null
			
 
				-
			
 
				-describe("AwsBedrockHandler - Extended Thinking", () => {
			
 
				-	let handler: AwsBedrockHandler
			
 
				-	let mockSend: jest.Mock
			
 
				-
			
 
				-	beforeEach(() => {
			
 
				-		capturedPayload = null
			
 
				-		mockSend = jest.fn()
			
 
				-
			
 
				-		// Mock ConverseStreamCommand to capture the payload
			
 
				-		;(ConverseStreamCommand as unknown as jest.Mock).mockImplementation((payload) => {
			
 
				-			capturedPayload = payload
			
 
				-			return {
			
 
				-				input: payload,
			
 
				-			}
			
 
				-		})
			
 
				-		;(BedrockRuntimeClient as jest.Mock).mockImplementation(() => ({
			
 
				-			send: mockSend,
			
 
				-			config: { region: "us-east-1" },
			
 
				-		}))
			
 
				-		;(logger.info as jest.Mock).mockImplementation(() => {})
			
 
				-		;(logger.error as jest.Mock).mockImplementation(() => {})
			
 
				-	})
			
 
				-
			
 
				-	afterEach(() => {
			
 
				-		jest.clearAllMocks()
			
 
				-	})
			
 
				-
			
 
				-	describe("Extended Thinking Support", () => {
			
 
				-		it("should include thinking parameter for Claude Sonnet 4 when reasoning is enabled", async () => {
			
 
				-			handler = new AwsBedrockHandler({
			
 
				-				apiProvider: "bedrock",
			
 
				-				apiModelId: "anthropic.claude-sonnet-4-20250514-v1:0",
			
 
				-				awsRegion: "us-east-1",
			
 
				-				enableReasoningEffort: true,
			
 
				-				modelMaxTokens: 8192,
			
 
				-				modelMaxThinkingTokens: 4096,
			
 
				-			})
			
 
				-
			
 
				-			// Mock the stream response
			
 
				-			mockSend.mockResolvedValue({
			
 
				-				stream: (async function* () {
			
 
				-					yield {
			
 
				-						messageStart: { role: "assistant" },
			
 
				-					}
			
 
				-					yield {
			
 
				-						contentBlockStart: {
			
 
				-							content_block: { type: "thinking", thinking: "Let me think..." },
			
 
				-							contentBlockIndex: 0,
			
 
				-						},
			
 
				-					}
			
 
				-					yield {
			
 
				-						contentBlockDelta: {
			
 
				-							delta: { type: "thinking_delta", thinking: " about this problem." },
			
 
				-						},
			
 
				-					}
			
 
				-					yield {
			
 
				-						contentBlockStart: {
			
 
				-							start: { text: "Here's the answer:" },
			
 
				-							contentBlockIndex: 1,
			
 
				-						},
			
 
				-					}
			
 
				-					yield {
			
 
				-						metadata: {
			
 
				-							usage: { inputTokens: 100, outputTokens: 50 },
			
 
				-						},
			
 
				-					}
			
 
				-				})(),
			
 
				-			})
			
 
				-
			
 
				-			const messages = [{ role: "user" as const, content: "Test message" }]
			
 
				-			const stream = handler.createMessage("System prompt", messages)
			
 
				-
			
 
				-			const chunks = []
			
 
				-			for await (const chunk of stream) {
			
 
				-				chunks.push(chunk)
			
 
				-			}
			
 
				-
			
 
				-			// Verify the command was called with the correct payload
			
 
				-			expect(mockSend).toHaveBeenCalledTimes(1)
			
 
				-			expect(capturedPayload).toBeDefined()
			
 
				-			expect(capturedPayload.additionalModelRequestFields).toBeDefined()
			
 
				-			expect(capturedPayload.additionalModelRequestFields.thinking).toEqual({
			
 
				-				type: "enabled",
			
 
				-				budget_tokens: 4096, // Uses the full modelMaxThinkingTokens value
			
 
				-			})
			
 
				-
			
 
				-			// Verify reasoning chunks were yielded
			
 
				-			const reasoningChunks = chunks.filter((c) => c.type === "reasoning")
			
 
				-			expect(reasoningChunks).toHaveLength(2)
			
 
				-			expect(reasoningChunks[0].text).toBe("Let me think...")
			
 
				-			expect(reasoningChunks[1].text).toBe(" about this problem.")
			
 
				-
			
 
				-			// Verify that topP is NOT present when thinking is enabled
			
 
				-			expect(capturedPayload.inferenceConfig).not.toHaveProperty("topP")
			
 
				-		})
			
 
				-
			
 
				-		it("should pass thinking parameters from metadata", async () => {
			
 
				-			handler = new AwsBedrockHandler({
			
 
				-				apiProvider: "bedrock",
			
 
				-				apiModelId: "anthropic.claude-3-7-sonnet-20250219-v1:0",
			
 
				-				awsRegion: "us-east-1",
			
 
				-			})
			
 
				-
			
 
				-			mockSend.mockResolvedValue({
			
 
				-				stream: (async function* () {
			
 
				-					yield { messageStart: { role: "assistant" } }
			
 
				-					yield { metadata: { usage: { inputTokens: 100, outputTokens: 50 } } }
			
 
				-				})(),
			
 
				-			})
			
 
				-
			
 
				-			const messages = [{ role: "user" as const, content: "Test message" }]
			
 
				-			const metadata = {
			
 
				-				taskId: "test-task",
			
 
				-				thinking: {
			
 
				-					enabled: true,
			
 
				-					maxTokens: 16384,
			
 
				-					maxThinkingTokens: 8192,
			
 
				-				},
			
 
				-			}
			
 
				-
			
 
				-			const stream = handler.createMessage("System prompt", messages, metadata)
			
 
				-			const chunks = []
			
 
				-			for await (const chunk of stream) {
			
 
				-				chunks.push(chunk)
			
 
				-			}
			
 
				-
			
 
				-			// Verify the thinking parameter was passed correctly
			
 
				-			expect(mockSend).toHaveBeenCalledTimes(1)
			
 
				-			expect(capturedPayload).toBeDefined()
			
 
				-			expect(capturedPayload.additionalModelRequestFields).toBeDefined()
			
 
				-			expect(capturedPayload.additionalModelRequestFields.thinking).toEqual({
			
 
				-				type: "enabled",
			
 
				-				budget_tokens: 8192,
			
 
				-			})
			
 
				-
			
 
				-			// Verify that topP is NOT present when thinking is enabled via metadata
			
 
				-			expect(capturedPayload.inferenceConfig).not.toHaveProperty("topP")
			
 
				-		})
			
 
				-
			
 
				-		it("should log when extended thinking is enabled", async () => {
			
 
				-			handler = new AwsBedrockHandler({
			
 
				-				apiProvider: "bedrock",
			
 
				-				apiModelId: "anthropic.claude-opus-4-20250514-v1:0",
			
 
				-				awsRegion: "us-east-1",
			
 
				-				enableReasoningEffort: true,
			
 
				-				modelMaxThinkingTokens: 5000,
			
 
				-			})
			
 
				-
			
 
				-			mockSend.mockResolvedValue({
			
 
				-				stream: (async function* () {
			
 
				-					yield { messageStart: { role: "assistant" } }
			
 
				-				})(),
			
 
				-			})
			
 
				-
			
 
				-			const messages = [{ role: "user" as const, content: "Test" }]
			
 
				-			const stream = handler.createMessage("System prompt", messages)
			
 
				-
			
 
				-			for await (const chunk of stream) {
			
 
				-				// consume stream
			
 
				-			}
			
 
				-
			
 
				-			// Verify logging
			
 
				-			expect(logger.info).toHaveBeenCalledWith(
			
 
				-				expect.stringContaining("Extended thinking enabled"),
			
 
				-				expect.objectContaining({
			
 
				-					ctx: "bedrock",
			
 
				-					modelId: "anthropic.claude-opus-4-20250514-v1:0",
			
 
				-				}),
			
 
				-			)
			
 
				-		})
			
 
				-
			
 
				-		it("should include topP when thinking is disabled", async () => {
			
 
				-			handler = new AwsBedrockHandler({
			
 
				-				apiProvider: "bedrock",
			
 
				-				apiModelId: "anthropic.claude-3-7-sonnet-20250219-v1:0",
			
 
				-				awsRegion: "us-east-1",
			
 
				-				// Note: no enableReasoningEffort = true, so thinking is disabled
			
 
				-			})
			
 
				-
			
 
				-			mockSend.mockResolvedValue({
			
 
				-				stream: (async function* () {
			
 
				-					yield { messageStart: { role: "assistant" } }
			
 
				-					yield {
			
 
				-						contentBlockStart: {
			
 
				-							start: { text: "Hello" },
			
 
				-							contentBlockIndex: 0,
			
 
				-						},
			
 
				-					}
			
 
				-					yield {
			
 
				-						contentBlockDelta: {
			
 
				-							delta: { text: " world" },
			
 
				-						},
			
 
				-					}
			
 
				-					yield { metadata: { usage: { inputTokens: 100, outputTokens: 50 } } }
			
 
				-				})(),
			
 
				-			})
			
 
				-
			
 
				-			const messages = [{ role: "user" as const, content: "Test message" }]
			
 
				-			const stream = handler.createMessage("System prompt", messages)
			
 
				-
			
 
				-			const chunks = []
			
 
				-			for await (const chunk of stream) {
			
 
				-				chunks.push(chunk)
			
 
				-			}
			
 
				-
			
 
				-			// Verify that topP IS present when thinking is disabled
			
 
				-			expect(mockSend).toHaveBeenCalledTimes(1)
			
 
				-			expect(capturedPayload).toBeDefined()
			
 
				-			expect(capturedPayload.inferenceConfig).toHaveProperty("topP", 0.1)
			
 
				-
			
 
				-			// Verify that additionalModelRequestFields is not present or empty
			
 
				-			expect(capturedPayload.additionalModelRequestFields).toBeUndefined()
			
 
				-		})
			
 
				-
			
 
				-		it("should enable reasoning when enableReasoningEffort is true in settings", async () => {
			
 
				-			handler = new AwsBedrockHandler({
			
 
				-				apiProvider: "bedrock",
			
 
				-				apiModelId: "anthropic.claude-sonnet-4-20250514-v1:0",
			
 
				-				awsRegion: "us-east-1",
			
 
				-				enableReasoningEffort: true, // This should trigger reasoning
			
 
				-				modelMaxThinkingTokens: 4096,
			
 
				-			})
			
 
				-
			
 
				-			mockSend.mockResolvedValue({
			
 
				-				stream: (async function* () {
			
 
				-					yield { messageStart: { role: "assistant" } }
			
 
				-					yield {
			
 
				-						contentBlockStart: {
			
 
				-							content_block: { type: "thinking", thinking: "Let me think..." },
			
 
				-							contentBlockIndex: 0,
			
 
				-						},
			
 
				-					}
			
 
				-					yield {
			
 
				-						contentBlockDelta: {
			
 
				-							delta: { type: "thinking_delta", thinking: " about this problem." },
			
 
				-						},
			
 
				-					}
			
 
				-					yield { metadata: { usage: { inputTokens: 100, outputTokens: 50 } } }
			
 
				-				})(),
			
 
				-			})
			
 
				-
			
 
				-			const messages = [{ role: "user" as const, content: "Test message" }]
			
 
				-			const stream = handler.createMessage("System prompt", messages)
			
 
				-
			
 
				-			const chunks = []
			
 
				-			for await (const chunk of stream) {
			
 
				-				chunks.push(chunk)
			
 
				-			}
			
 
				-
			
 
				-			// Verify thinking was enabled via settings
			
 
				-			expect(mockSend).toHaveBeenCalledTimes(1)
			
 
				-			expect(capturedPayload).toBeDefined()
			
 
				-			expect(capturedPayload.additionalModelRequestFields).toBeDefined()
			
 
				-			expect(capturedPayload.additionalModelRequestFields.thinking).toEqual({
			
 
				-				type: "enabled",
			
 
				-				budget_tokens: 4096,
			
 
				-			})
			
 
				-
			
 
				-			// Verify that topP is NOT present when thinking is enabled via settings
			
 
				-			expect(capturedPayload.inferenceConfig).not.toHaveProperty("topP")
			
 
				-
			
 
				-			// Verify reasoning chunks were yielded
			
 
				-			const reasoningChunks = chunks.filter((c) => c.type === "reasoning")
			
 
				-			expect(reasoningChunks).toHaveLength(2)
			
 
				-			expect(reasoningChunks[0].text).toBe("Let me think...")
			
 
				-			expect(reasoningChunks[1].text).toBe(" about this problem.")
			
 
				-		})
			
 
				-	})
			
 
				-})
			
--- a/src/api/providers/bedrock.ts
+++ b/src/api/providers/bedrock.ts
@@ -29,8 +29,6 @@ import { logger } from "../../utils/logging"
 
				 import { MultiPointStrategy } from "../transform/cache-strategy/multi-point-strategy"
			
 
				 import { ModelInfo as CacheModelInfo } from "../transform/cache-strategy/types"
			
 
				 import { convertToBedrockConverseMessages as sharedConverter } from "../transform/bedrock-converse-format"
			
 
				-import { getModelParams } from "../transform/model-params"
			
 
				-import { shouldUseReasoningBudget } from "../../shared/api"
			
 
				 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
			
 
				 
			
 
				 /************************************************************************************
			
@@ -42,63 +40,8 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ".
 
				 // Define interface for Bedrock inference config
			
 
				 interface BedrockInferenceConfig {
			
 
				 	maxTokens: number
			
 
				-	temperature?: number
			
 
				-	topP?: number
			
 
				-}
			
 
				-
			
 
				-// Define interface for Bedrock thinking configuration
			
 
				-interface BedrockThinkingConfig {
			
 
				-	thinking: {
			
 
				-		type: "enabled"
			
 
				-		budget_tokens: number
			
 
				-	}
			
 
				-	[key: string]: any // Add index signature to be compatible with DocumentType
			
 
				-}
			
 
				-
			
 
				-// Define interface for Bedrock payload
			
 
				-interface BedrockPayload {
			
 
				-	modelId: BedrockModelId | string
			
 
				-	messages: Message[]
			
 
				-	system?: SystemContentBlock[]
			
 
				-	inferenceConfig: BedrockInferenceConfig
			
 
				-	anthropic_version?: string
			
 
				-	additionalModelRequestFields?: BedrockThinkingConfig
			
 
				-}
			
 
				-
			
 
				-// Define specific types for content block events to avoid 'as any' usage
			
 
				-// These handle the multiple possible structures returned by AWS SDK
			
 
				-interface ContentBlockStartEvent {
			
 
				-	start?: {
			
 
				-		text?: string
			
 
				-		thinking?: string
			
 
				-	}
			
 
				-	contentBlockIndex?: number
			
 
				-	// Alternative structure used by some AWS SDK versions
			
 
				-	content_block?: {
			
 
				-		type?: string
			
 
				-		thinking?: string
			
 
				-	}
			
 
				-	// Official AWS SDK structure for reasoning (as documented)
			
 
				-	contentBlock?: {
			
 
				-		type?: string
			
 
				-		thinking?: string
			
 
				-		reasoningContent?: {
			
 
				-			text?: string
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-interface ContentBlockDeltaEvent {
			
 
				-	delta?: {
			
 
				-		text?: string
			
 
				-		thinking?: string
			
 
				-		type?: string
			
 
				-		// AWS SDK structure for reasoning content deltas
			
 
				-		reasoningContent?: {
			
 
				-			text?: string
			
 
				-		}
			
 
				-	}
			
 
				-	contentBlockIndex?: number
			
 
				+	temperature: number
			
 
				+	topP: number
			
 
				 }
			
 
				 
			
 
				 // Define types for stream events based on AWS SDK
			
@@ -110,8 +53,18 @@ export interface StreamEvent {
 
				 		stopReason?: "end_turn" | "tool_use" | "max_tokens" | "stop_sequence"
			
 
				 		additionalModelResponseFields?: Record<string, unknown>
			
 
				 	}
			
 
				-	contentBlockStart?: ContentBlockStartEvent
			
 
				-	contentBlockDelta?: ContentBlockDeltaEvent
			
 
				+	contentBlockStart?: {
			
 
				+		start?: {
			
 
				+			text?: string
			
 
				+		}
			
 
				+		contentBlockIndex?: number
			
 
				+	}
			
 
				+	contentBlockDelta?: {
			
 
				+		delta?: {
			
 
				+			text?: string
			
 
				+		}
			
 
				+		contentBlockIndex?: number
			
 
				+	}
			
 
				 	metadata?: {
			
 
				 		usage?: {
			
 
				 			inputTokens: number
			
@@ -302,17 +255,13 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 
				 	override async *createMessage(
			
 
				 		systemPrompt: string,
			
 
				 		messages: Anthropic.Messages.MessageParam[],
			
 
				-		metadata?: ApiHandlerCreateMessageMetadata & {
			
 
				-			thinking?: {
			
 
				-				enabled: boolean
			
 
				-				maxTokens?: number
			
 
				-				maxThinkingTokens?: number
			
 
				-			}
			
 
				-		},
			
 
				+		metadata?: ApiHandlerCreateMessageMetadata,
			
 
				 	): ApiStream {
			
 
				-		const modelConfig = this.getModel()
			
 
				+		let modelConfig = this.getModel()
			
 
				+		// Handle cross-region inference
			
 
				 		const usePromptCache = Boolean(this.options.awsUsePromptCache && this.supportsAwsPromptCache(modelConfig))
			
 
				 
			
 
				+		// Generate a conversation ID based on the first few messages to maintain cache consistency
			
 
				 		const conversationId =
			
 
				 			messages.length > 0
			
 
				 				? `conv_${messages[0].role}_${
			
@@ -322,6 +271,7 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 
				 					}`
			
 
				 				: "default_conversation"
			
 
				 
			
 
				+		// Convert messages to Bedrock format, passing the model info and conversation ID
			
 
				 		const formatted = this.convertToBedrockConverseMessages(
			
 
				 			messages,
			
 
				 			systemPrompt,
			
@@ -330,50 +280,18 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 
				 			conversationId,
			
 
				 		)
			
 
				 
			
 
				-		let additionalModelRequestFields: BedrockThinkingConfig | undefined
			
 
				-		let thinkingEnabled = false
			
 
				-
			
 
				-		// Determine if thinking should be enabled
			
 
				-		// metadata?.thinking?.enabled: Explicitly enabled through API metadata (direct request)
			
 
				-		// shouldUseReasoningBudget(): Enabled through user settings (enableReasoningEffort = true)
			
 
				-		const isThinkingExplicitlyEnabled = metadata?.thinking?.enabled
			
 
				-		const isThinkingEnabledBySettings =
			
 
				-			shouldUseReasoningBudget({ model: modelConfig.info, settings: this.options }) &&
			
 
				-			modelConfig.reasoning &&
			
 
				-			modelConfig.reasoningBudget
			
 
				-
			
 
				-		if ((isThinkingExplicitlyEnabled || isThinkingEnabledBySettings) && modelConfig.info.supportsReasoningBudget) {
			
 
				-			thinkingEnabled = true
			
 
				-			additionalModelRequestFields = {
			
 
				-				thinking: {
			
 
				-					type: "enabled",
			
 
				-					budget_tokens: metadata?.thinking?.maxThinkingTokens || modelConfig.reasoningBudget || 4096,
			
 
				-				},
			
 
				-			}
			
 
				-			logger.info("Extended thinking enabled for Bedrock request", {
			
 
				-				ctx: "bedrock",
			
 
				-				modelId: modelConfig.id,
			
 
				-				thinking: additionalModelRequestFields.thinking,
			
 
				-			})
			
 
				-		}
			
 
				-
			
 
				+		// Construct the payload
			
 
				 		const inferenceConfig: BedrockInferenceConfig = {
			
 
				-			maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number),
			
 
				-			temperature: modelConfig.temperature ?? (this.options.modelTemperature as number),
			
 
				+			maxTokens: modelConfig.info.maxTokens as number,
			
 
				+			temperature: this.options.modelTemperature as number,
			
 
				+			topP: 0.1,
			
 
				 		}
			
 
				 
			
 
				-		if (!thinkingEnabled) {
			
 
				-			inferenceConfig.topP = 0.1
			
 
				-		}
			
 
				-
			
 
				-		const payload: BedrockPayload = {
			
 
				+		const payload = {
			
 
				 			modelId: modelConfig.id,
			
 
				 			messages: formatted.messages,
			
 
				 			system: formatted.system,
			
 
				 			inferenceConfig,
			
 
				-			...(additionalModelRequestFields && { additionalModelRequestFields }),
			
 
				-			// Add anthropic_version when using thinking features
			
 
				-			...(thinkingEnabled && { anthropic_version: "bedrock-2023-05-31" }),
			
 
				 		}
			
 
				 
			
 
				 		// Create AbortController with 10 minute timeout
			
@@ -479,74 +397,19 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 
				 				}
			
 
				 
			
 
				 				// Handle content blocks
			
 
				-				if (streamEvent.contentBlockStart) {
			
 
				-					const cbStart = streamEvent.contentBlockStart
			
 
				-
			
 
				-					// Check if this is a reasoning block (official AWS SDK structure)
			
 
				-					if (cbStart.contentBlock?.reasoningContent) {
			
 
				-						if (cbStart.contentBlockIndex && cbStart.contentBlockIndex > 0) {
			
 
				-							yield { type: "reasoning", text: "\n" }
			
 
				-						}
			
 
				-						yield {
			
 
				-							type: "reasoning",
			
 
				-							text: cbStart.contentBlock.reasoningContent.text || "",
			
 
				-						}
			
 
				-					}
			
 
				-					// Check for thinking block - handle both possible AWS SDK structures
			
 
				-					// cbStart.contentBlock: newer/official structure
			
 
				-					// cbStart.content_block: alternative structure seen in some AWS SDK versions
			
 
				-					else if (cbStart.contentBlock?.type === "thinking" || cbStart.content_block?.type === "thinking") {
			
 
				-						const contentBlock = cbStart.contentBlock || cbStart.content_block
			
 
				-						if (cbStart.contentBlockIndex && cbStart.contentBlockIndex > 0) {
			
 
				-							yield { type: "reasoning", text: "\n" }
			
 
				-						}
			
 
				-						if (contentBlock?.thinking) {
			
 
				-							yield {
			
 
				-								type: "reasoning",
			
 
				-								text: contentBlock.thinking,
			
 
				-							}
			
 
				-						}
			
 
				-					} else if (cbStart.start?.text) {
			
 
				-						yield {
			
 
				-							type: "text",
			
 
				-							text: cbStart.start.text,
			
 
				-						}
			
 
				+				if (streamEvent.contentBlockStart?.start?.text) {
			
 
				+					yield {
			
 
				+						type: "text",
			
 
				+						text: streamEvent.contentBlockStart.start.text,
			
 
				 					}
			
 
				 					continue
			
 
				 				}
			
 
				 
			
 
				 				// Handle content deltas
			
 
				-				if (streamEvent.contentBlockDelta) {
			
 
				-					const cbDelta = streamEvent.contentBlockDelta
			
 
				-					const delta = cbDelta.delta
			
 
				-
			
 
				-					// Process reasoning and text content deltas
			
 
				-					// Multiple structures are supported for AWS SDK compatibility:
			
 
				-					// - delta.reasoningContent.text: official AWS docs structure for reasoning
			
 
				-					// - delta.thinking: alternative structure for thinking content
			
 
				-					// - delta.text: standard text content
			
 
				-					if (delta) {
			
 
				-						// Check for reasoningContent property (official AWS SDK structure)
			
 
				-						if (delta.reasoningContent?.text) {
			
 
				-							yield {
			
 
				-								type: "reasoning",
			
 
				-								text: delta.reasoningContent.text,
			
 
				-							}
			
 
				-							continue
			
 
				-						}
			
 
				-
			
 
				-						// Handle alternative thinking structure (fallback for older SDK versions)
			
 
				-						if (delta.type === "thinking_delta" && delta.thinking) {
			
 
				-							yield {
			
 
				-								type: "reasoning",
			
 
				-								text: delta.thinking,
			
 
				-							}
			
 
				-						} else if (delta.text) {
			
 
				-							yield {
			
 
				-								type: "text",
			
 
				-								text: delta.text,
			
 
				-							}
			
 
				-						}
			
 
				+				if (streamEvent.contentBlockDelta?.delta?.text) {
			
 
				+					yield {
			
 
				+						type: "text",
			
 
				+						text: streamEvent.contentBlockDelta.delta.text,
			
 
				 					}
			
 
				 					continue
			
 
				 				}
			
@@ -581,17 +444,10 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 
				 		try {
			
 
				 			const modelConfig = this.getModel()
			
 
				 
			
 
				-			// For completePrompt, thinking is typically not used, but we should still check
			
 
				-			// if thinking was somehow enabled in the model config
			
 
				-			const thinkingEnabled =
			
 
				-				shouldUseReasoningBudget({ model: modelConfig.info, settings: this.options }) &&
			
 
				-				modelConfig.reasoning &&
			
 
				-				modelConfig.reasoningBudget
			
 
				-
			
 
				 			const inferenceConfig: BedrockInferenceConfig = {
			
 
				-				maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number),
			
 
				-				temperature: modelConfig.temperature ?? (this.options.modelTemperature as number),
			
 
				-				...(thinkingEnabled ? {} : { topP: 0.1 }), // Only set topP when thinking is NOT enabled
			
 
				+				maxTokens: modelConfig.info.maxTokens as number,
			
 
				+				temperature: this.options.modelTemperature as number,
			
 
				+				topP: 0.1,
			
 
				 			}
			
 
				 
			
 
				 			// For completePrompt, use a unique conversation ID based on the prompt
			
@@ -866,24 +722,9 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 
				 		return model
			
 
				 	}
			
 
				 
			
 
				-	override getModel(): {
			
 
				-		id: BedrockModelId | string
			
 
				-		info: ModelInfo
			
 
				-		maxTokens?: number
			
 
				-		temperature?: number
			
 
				-		reasoning?: any
			
 
				-		reasoningBudget?: number
			
 
				-	} {
			
 
				+	override getModel(): { id: BedrockModelId | string; info: ModelInfo } {
			
 
				 		if (this.costModelConfig?.id?.trim().length > 0) {
			
 
				-			// Get model params for cost model config
			
 
				-			const params = getModelParams({
			
 
				-				format: "anthropic",
			
 
				-				modelId: this.costModelConfig.id,
			
 
				-				model: this.costModelConfig.info,
			
 
				-				settings: this.options,
			
 
				-				defaultTemperature: BEDROCK_DEFAULT_TEMPERATURE,
			
 
				-			})
			
 
				-			return { ...this.costModelConfig, ...params }
			
 
				+			return this.costModelConfig
			
 
				 		}
			
 
				 
			
 
				 		let modelConfig = undefined
			
@@ -911,24 +752,8 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		// Get model params including reasoning configuration
			
 
				-		const params = getModelParams({
			
 
				-			format: "anthropic",
			
 
				-			modelId: modelConfig.id,
			
 
				-			model: modelConfig.info,
			
 
				-			settings: this.options,
			
 
				-			defaultTemperature: BEDROCK_DEFAULT_TEMPERATURE,
			
 
				-		})
			
 
				-
			
 
				 		// Don't override maxTokens/contextWindow here; handled in getModelById (and includes user overrides)
			
 
				-		return { ...modelConfig, ...params } as {
			
 
				-			id: BedrockModelId | string
			
 
				-			info: ModelInfo
			
 
				-			maxTokens?: number
			
 
				-			temperature?: number
			
 
				-			reasoning?: any
			
 
				-			reasoningBudget?: number
			
 
				-		}
			
 
				+		return modelConfig as { id: BedrockModelId | string; info: ModelInfo }
			
 
				 	}
			
 
				 
			
 
				 	/************************************************************************************
			
@@ -1080,33 +905,10 @@ Suggestions:
 
				 			messageTemplate: `Invalid ARN format. ARN should follow the pattern: arn:aws:bedrock:region:account-id:resource-type/resource-name`,
			
 
				 			logLevel: "error",
			
 
				 		},
			
 
				-		VALIDATION_ERROR: {
			
 
				-			patterns: [
			
 
				-				"input tag",
			
 
				-				"does not match any of the expected tags",
			
 
				-				"field required",
			
 
				-				"validation",
			
 
				-				"invalid parameter",
			
 
				-			],
			
 
				-			messageTemplate: `Parameter validation error: {errorMessage}
			
 
				-
			
 
				-This error indicates that the request parameters don't match AWS Bedrock's expected format.
			
 
				-
			
 
				-Common causes:
			
 
				-1. Extended thinking parameter format is incorrect
			
 
				-2. Model-specific parameters are not supported by this model
			
 
				-3. API parameter structure has changed
			
 
				-
			
 
				-Please check:
			
 
				-- Model supports the requested features (extended thinking, etc.)
			
 
				-- Parameter format matches AWS Bedrock specification
			
 
				-- Model ID is correct for the requested features`,
			
 
				-			logLevel: "error",
			
 
				-		},
			
 
				 		// Default/generic error
			
 
				 		GENERIC: {
			
 
				 			patterns: [], // Empty patterns array means this is the default
			
 
				-			messageTemplate: `Unknown Error: {errorMessage}`,
			
 
				+			messageTemplate: `Unknown Error`,
			
 
				 			logLevel: "error",
			
 
				 		},
			
 
				 	}
			
--- a/webview-ui/src/components/settings/ApiOptions.tsx
+++ b/webview-ui/src/components/settings/ApiOptions.tsx
@@ -465,14 +465,12 @@ const ApiOptions = ({
 
				 				</>
			
 
				 			)}
			
 
				 
			
 
				-			{selectedProviderModels.length > 0 && (
			
 
				-				<ThinkingBudget
			
 
				-					key={`${selectedProvider}-${selectedModelId}`}
			
 
				-					apiConfiguration={apiConfiguration}
			
 
				-					setApiConfigurationField={setApiConfigurationField}
			
 
				-					modelInfo={selectedModelInfo}
			
 
				-				/>
			
 
				-			)}
			
 
				+			<ThinkingBudget
			
 
				+				key={`${selectedProvider}-${selectedModelId}`}
			
 
				+				apiConfiguration={apiConfiguration}
			
 
				+				setApiConfigurationField={setApiConfigurationField}
			
 
				+				modelInfo={selectedModelInfo}
			
 
				+			/>
			
 
				 
			
 
				 			{!fromWelcomeView && (
			
 
				 				<>
			
--- a/webview-ui/src/components/settings/ThinkingBudget.tsx
+++ b/webview-ui/src/components/settings/ThinkingBudget.tsx
@@ -65,11 +65,7 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
 
				 						<div className="flex items-center gap-1">
			
 
				 							<Slider
			
 
				 								min={8192}
			
 
				-								max={Math.max(
			
 
				-									modelInfo.maxTokens || 8192,
			
 
				-									customMaxOutputTokens,
			
 
				-									DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS,
			
 
				-								)}
			
 
				+								max={modelInfo.maxTokens}
			
 
				 								step={1024}
			
 
				 								value={[customMaxOutputTokens]}
			
 
				 								onValueChange={([value]) => setApiConfigurationField("modelMaxTokens", value)}
			
--- a/webview-ui/src/components/settings/providers/Bedrock.tsx
+++ b/webview-ui/src/components/settings/providers/Bedrock.tsx
@@ -108,24 +108,24 @@ export const Bedrock = ({ apiConfiguration, setApiConfigurationField, selectedMo
 
				 				{t("settings:providers.awsCrossRegion")}
			
 
				 			</Checkbox>
			
 
				 			{selectedModelInfo?.supportsPromptCache && (
			
 
				-				<>
			
 
				-					<Checkbox
			
 
				-						checked={apiConfiguration?.awsUsePromptCache || false}
			
 
				-						onChange={handleInputChange("awsUsePromptCache", noTransform)}>
			
 
				-						<div className="flex items-center gap-1">
			
 
				-							<span>{t("settings:providers.enablePromptCaching")}</span>
			
 
				-							<i
			
 
				-								className="codicon codicon-info text-vscode-descriptionForeground"
			
 
				-								title={t("settings:providers.enablePromptCachingTitle")}
			
 
				-								style={{ fontSize: "12px" }}
			
 
				-							/>
			
 
				-						</div>
			
 
				-					</Checkbox>
			
 
				-					<div className="text-sm text-vscode-descriptionForeground ml-6 mt-1">
			
 
				-						{t("settings:providers.cacheUsageNote")}
			
 
				+				<Checkbox
			
 
				+					checked={apiConfiguration?.awsUsePromptCache || false}
			
 
				+					onChange={handleInputChange("awsUsePromptCache", noTransform)}>
			
 
				+					<div className="flex items-center gap-1">
			
 
				+						<span>{t("settings:providers.enablePromptCaching")}</span>
			
 
				+						<i
			
 
				+							className="codicon codicon-info text-vscode-descriptionForeground"
			
 
				+							title={t("settings:providers.enablePromptCachingTitle")}
			
 
				+							style={{ fontSize: "12px" }}
			
 
				+						/>
			
 
				 					</div>
			
 
				-				</>
			
 
				+				</Checkbox>
			
 
				 			)}
			
 
				+			<div>
			
 
				+				<div className="text-sm text-vscode-descriptionForeground ml-6 mt-1">
			
 
				+					{t("settings:providers.cacheUsageNote")}
			
 
				+				</div>
			
 
				+			</div>
			
 
				 			<Checkbox
			
 
				 				checked={awsEndpointSelected}
			
 
				 				onChange={(isChecked) => {