Просмотр исходного кода

refactor: migrate Anthropic provider to @ai-sdk/anthropic (#11287)

* refactor: migrate Anthropic provider to @ai-sdk/anthropic

Replace the raw @anthropic-ai/sdk implementation with @ai-sdk/anthropic
(Vercel AI SDK) for consistency with other providers (Bedrock, DeepSeek,
Mistral, etc.).

Changes:
- Replace Anthropic() client with createAnthropic() from @ai-sdk/anthropic
- Replace manual stream parsing with streamText() + processAiSdkStreamPart()
- Replace client.messages.create() with generateText() for completePrompt()
- Use convertToAiSdkMessages() and convertToolsForAiSdk() for format conversion
- Handle prompt caching via AI SDK providerOptions (cacheControl on messages)
- Handle extended thinking via providerOptions.anthropic.thinking
- Add getThoughtSignature() and getRedactedThinkingBlocks() for thinking
  signature round-tripping (matching Bedrock pattern, improves on original
  which had a TODO for this)
- Add isAiSdkProvider() returning true
- Update tests to mock @ai-sdk/anthropic and ai instead of raw SDK

* fix: address PR review - remove apiKey fallback and use system+systemProviderOptions pattern
Daniel 4 дней назад
Родитель
Сommit
1e0fc89fa1
4 измененных файлов с 688 добавлено и 832 удалено
  1. 3 0
      pnpm-lock.yaml
  2. 420 542
      src/api/providers/__tests__/anthropic.spec.ts
  3. 264 290
      src/api/providers/anthropic.ts
  4. 1 0
      src/package.json

+ 3 - 0
pnpm-lock.yaml

@@ -749,6 +749,9 @@ importers:
       '@ai-sdk/amazon-bedrock':
         specifier: ^4.0.51
         version: 4.0.51([email protected])
+      '@ai-sdk/anthropic':
+        specifier: ^3.0.38
+        version: 3.0.38([email protected])
       '@ai-sdk/baseten':
         specifier: ^1.0.31
         version: 1.0.31([email protected])

+ 420 - 542
src/api/providers/__tests__/anthropic.spec.ts

@@ -12,79 +12,80 @@ vitest.mock("@roo-code/telemetry", () => ({
 	},
 }))
 
-const mockCreate = vitest.fn()
-
-vitest.mock("@anthropic-ai/sdk", () => {
-	const mockAnthropicConstructor = vitest.fn().mockImplementation(() => ({
-		messages: {
-			create: mockCreate.mockImplementation(async (options) => {
-				if (!options.stream) {
-					return {
-						id: "test-completion",
-						content: [{ type: "text", text: "Test response" }],
-						role: "assistant",
-						model: options.model,
-						usage: {
-							input_tokens: 10,
-							output_tokens: 5,
-						},
-					}
-				}
-				return {
-					async *[Symbol.asyncIterator]() {
-						yield {
-							type: "message_start",
-							message: {
-								usage: {
-									input_tokens: 100,
-									output_tokens: 50,
-									cache_creation_input_tokens: 20,
-									cache_read_input_tokens: 10,
-								},
-							},
-						}
-						yield {
-							type: "content_block_start",
-							index: 0,
-							content_block: {
-								type: "text",
-								text: "Hello",
-							},
-						}
-						yield {
-							type: "content_block_delta",
-							delta: {
-								type: "text_delta",
-								text: " world",
-							},
-						}
-					},
-				}
-			}),
-		},
-	}))
+// Mock the AI SDK
+const mockStreamText = vitest.fn()
+const mockGenerateText = vitest.fn()
+
+vitest.mock("ai", () => ({
+	streamText: (...args: any[]) => mockStreamText(...args),
+	generateText: (...args: any[]) => mockGenerateText(...args),
+	tool: vitest.fn(),
+	jsonSchema: vitest.fn(),
+	ToolSet: {},
+}))
 
-	return {
-		Anthropic: mockAnthropicConstructor,
-	}
-})
+// Mock the @ai-sdk/anthropic provider
+const mockCreateAnthropic = vitest.fn()
+
+vitest.mock("@ai-sdk/anthropic", () => ({
+	createAnthropic: (...args: any[]) => mockCreateAnthropic(...args),
+}))
+
+// Mock ai-sdk transform utilities
+vitest.mock("../../transform/ai-sdk", () => ({
+	convertToAiSdkMessages: vitest.fn().mockReturnValue([{ role: "user", content: [{ type: "text", text: "Hello" }] }]),
+	convertToolsForAiSdk: vitest.fn().mockReturnValue(undefined),
+	processAiSdkStreamPart: vitest.fn().mockImplementation(function* (part: any) {
+		if (part.type === "text-delta") {
+			yield { type: "text", text: part.text }
+		} else if (part.type === "reasoning-delta") {
+			yield { type: "reasoning", text: part.text }
+		} else if (part.type === "tool-input-start") {
+			yield { type: "tool_call_start", id: part.id, name: part.toolName }
+		} else if (part.type === "tool-input-delta") {
+			yield { type: "tool_call_delta", id: part.id, delta: part.delta }
+		} else if (part.type === "tool-input-end") {
+			yield { type: "tool_call_end", id: part.id }
+		}
+	}),
+	mapToolChoice: vitest.fn().mockReturnValue(undefined),
+	handleAiSdkError: vitest.fn().mockImplementation((error: any) => error),
+}))
 
-// Import after mock
+// Import mocked modules
+import { convertToAiSdkMessages, convertToolsForAiSdk, mapToolChoice } from "../../transform/ai-sdk"
 import { Anthropic } from "@anthropic-ai/sdk"
 
-const mockAnthropicConstructor = vitest.mocked(Anthropic)
+// Helper: create a mock provider function
+function createMockProviderFn() {
+	const providerFn = vitest.fn().mockReturnValue("mock-model")
+	return providerFn
+}
 
 describe("AnthropicHandler", () => {
 	let handler: AnthropicHandler
 	let mockOptions: ApiHandlerOptions
+	let mockProviderFn: ReturnType<typeof createMockProviderFn>
 
 	beforeEach(() => {
 		mockOptions = {
 			apiKey: "test-api-key",
 			apiModelId: "claude-3-5-sonnet-20241022",
 		}
+
+		mockProviderFn = createMockProviderFn()
+		mockCreateAnthropic.mockReturnValue(mockProviderFn)
+
 		handler = new AnthropicHandler(mockOptions)
 		vitest.clearAllMocks()
+
+		// Re-set mock defaults after clearAllMocks
+		mockCreateAnthropic.mockReturnValue(mockProviderFn)
+		vitest
+			.mocked(convertToAiSdkMessages)
+			.mockReturnValue([{ role: "user", content: [{ type: "text", text: "Hello" }] }])
+		vitest.mocked(convertToolsForAiSdk).mockReturnValue(undefined)
+		vitest.mocked(mapToolChoice).mockReturnValue(undefined)
 	})
 
 	describe("constructor", () => {
@@ -93,13 +94,15 @@ describe("AnthropicHandler", () => {
 			expect(handler.getModel().id).toBe(mockOptions.apiModelId)
 		})
 
-		it("should initialize with undefined API key", () => {
-			// The SDK will handle API key validation, so we just verify it initializes
+		it("should initialize with undefined API key and pass it through for env-var fallback", () => {
+			mockCreateAnthropic.mockClear()
 			const handlerWithoutKey = new AnthropicHandler({
 				...mockOptions,
 				apiKey: undefined,
 			})
 			expect(handlerWithoutKey).toBeInstanceOf(AnthropicHandler)
+			const callArgs = mockCreateAnthropic.mock.calls[0]![0]!
+			expect(callArgs.apiKey).toBeUndefined()
 		})
 
 		it("should use custom base URL if provided", () => {
@@ -112,44 +115,132 @@ describe("AnthropicHandler", () => {
 		})
 
 		it("use apiKey for passing token if anthropicUseAuthToken is not set", () => {
-			const handlerWithCustomUrl = new AnthropicHandler({
+			mockCreateAnthropic.mockClear()
+			const _ = new AnthropicHandler({
 				...mockOptions,
 			})
-			expect(handlerWithCustomUrl).toBeInstanceOf(AnthropicHandler)
-			expect(mockAnthropicConstructor).toHaveBeenCalledTimes(1)
-			expect(mockAnthropicConstructor.mock.calls[0]![0]!.apiKey).toEqual("test-api-key")
-			expect(mockAnthropicConstructor.mock.calls[0]![0]!.authToken).toBeUndefined()
+			expect(mockCreateAnthropic).toHaveBeenCalledTimes(1)
+			const callArgs = mockCreateAnthropic.mock.calls[0]![0]!
+			expect(callArgs.apiKey).toEqual("test-api-key")
+			expect(callArgs.authToken).toBeUndefined()
 		})
 
 		it("use apiKey for passing token if anthropicUseAuthToken is set but custom base URL is not given", () => {
-			const handlerWithCustomUrl = new AnthropicHandler({
+			mockCreateAnthropic.mockClear()
+			const _ = new AnthropicHandler({
 				...mockOptions,
 				anthropicUseAuthToken: true,
 			})
-			expect(handlerWithCustomUrl).toBeInstanceOf(AnthropicHandler)
-			expect(mockAnthropicConstructor).toHaveBeenCalledTimes(1)
-			expect(mockAnthropicConstructor.mock.calls[0]![0]!.apiKey).toEqual("test-api-key")
-			expect(mockAnthropicConstructor.mock.calls[0]![0]!.authToken).toBeUndefined()
+			expect(mockCreateAnthropic).toHaveBeenCalledTimes(1)
+			const callArgs = mockCreateAnthropic.mock.calls[0]![0]!
+			expect(callArgs.apiKey).toEqual("test-api-key")
+			expect(callArgs.authToken).toBeUndefined()
 		})
 
 		it("use authToken for passing token if both of anthropicBaseUrl and anthropicUseAuthToken are set", () => {
+			mockCreateAnthropic.mockClear()
 			const customBaseUrl = "https://custom.anthropic.com"
-			const handlerWithCustomUrl = new AnthropicHandler({
+			const _ = new AnthropicHandler({
 				...mockOptions,
 				anthropicBaseUrl: customBaseUrl,
 				anthropicUseAuthToken: true,
 			})
-			expect(handlerWithCustomUrl).toBeInstanceOf(AnthropicHandler)
-			expect(mockAnthropicConstructor).toHaveBeenCalledTimes(1)
-			expect(mockAnthropicConstructor.mock.calls[0]![0]!.authToken).toEqual("test-api-key")
-			expect(mockAnthropicConstructor.mock.calls[0]![0]!.apiKey).toBeUndefined()
+			expect(mockCreateAnthropic).toHaveBeenCalledTimes(1)
+			const callArgs = mockCreateAnthropic.mock.calls[0]![0]!
+			expect(callArgs.authToken).toEqual("test-api-key")
+			expect(callArgs.apiKey).toBeUndefined()
+		})
+
+		it("should include 1M context beta header when enabled", () => {
+			mockCreateAnthropic.mockClear()
+			const _ = new AnthropicHandler({
+				...mockOptions,
+				apiModelId: "claude-sonnet-4-5",
+				anthropicBeta1MContext: true,
+			})
+			expect(mockCreateAnthropic).toHaveBeenCalledTimes(1)
+			const callArgs = mockCreateAnthropic.mock.calls[0]![0]!
+			expect(callArgs.headers["anthropic-beta"]).toContain("context-1m-2025-08-07")
+		})
+
+		it("should include output-128k beta for thinking model", () => {
+			mockCreateAnthropic.mockClear()
+			const _ = new AnthropicHandler({
+				...mockOptions,
+				apiModelId: "claude-3-7-sonnet-20250219:thinking",
+			})
+			expect(mockCreateAnthropic).toHaveBeenCalledTimes(1)
+			const callArgs = mockCreateAnthropic.mock.calls[0]![0]!
+			expect(callArgs.headers["anthropic-beta"]).toContain("output-128k-2025-02-19")
 		})
 	})
 
 	describe("createMessage", () => {
 		const systemPrompt = "You are a helpful assistant."
 
+		function setupStreamTextMock(parts: any[], usage?: any, providerMetadata?: any) {
+			const asyncIterable = {
+				async *[Symbol.asyncIterator]() {
+					for (const part of parts) {
+						yield part
+					}
+				},
+			}
+			mockStreamText.mockReturnValue({
+				fullStream: asyncIterable,
+				usage: Promise.resolve(usage || { inputTokens: 100, outputTokens: 50 }),
+				providerMetadata: Promise.resolve(
+					providerMetadata || {
+						anthropic: {
+							cacheCreationInputTokens: 20,
+							cacheReadInputTokens: 10,
+						},
+					},
+				),
+			})
+		}
+
+		it("should stream text content using AI SDK", async () => {
+			setupStreamTextMock([
+				{ type: "text-delta", text: "Hello" },
+				{ type: "text-delta", text: " world" },
+			])
+
+			const stream = handler.createMessage(systemPrompt, [
+				{
+					role: "user",
+					content: [{ type: "text" as const, text: "First message" }],
+				},
+			])
+
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Verify text content
+			const textChunks = chunks.filter((chunk) => chunk.type === "text")
+			expect(textChunks).toHaveLength(2)
+			expect(textChunks[0].text).toBe("Hello")
+			expect(textChunks[1].text).toBe(" world")
+
+			// Verify usage information
+			const usageChunks = chunks.filter((chunk) => chunk.type === "usage")
+			expect(usageChunks.length).toBeGreaterThan(0)
+		})
+
 		it("should handle prompt caching for supported models", async () => {
+			setupStreamTextMock(
+				[{ type: "text-delta", text: "Hello" }],
+				{ inputTokens: 100, outputTokens: 50 },
+				{
+					anthropic: {
+						cacheCreationInputTokens: 20,
+						cacheReadInputTokens: 10,
+					},
+				},
+			)
+
 			const stream = handler.createMessage(systemPrompt, [
 				{
 					role: "user",
@@ -170,56 +261,271 @@ describe("AnthropicHandler", () => {
 				chunks.push(chunk)
 			}
 
-			// Verify usage information
-			const usageChunk = chunks.find((chunk) => chunk.type === "usage")
+			// Verify usage information includes cache metrics
+			const usageChunk = chunks.find(
+				(chunk) => chunk.type === "usage" && (chunk.cacheWriteTokens || chunk.cacheReadTokens),
+			)
 			expect(usageChunk).toBeDefined()
-			expect(usageChunk?.inputTokens).toBe(100)
-			expect(usageChunk?.outputTokens).toBe(50)
 			expect(usageChunk?.cacheWriteTokens).toBe(20)
 			expect(usageChunk?.cacheReadTokens).toBe(10)
 
-			// Verify text content
-			const textChunks = chunks.filter((chunk) => chunk.type === "text")
-			expect(textChunks).toHaveLength(2)
-			expect(textChunks[0].text).toBe("Hello")
-			expect(textChunks[1].text).toBe(" world")
+			// Verify streamText was called
+			expect(mockStreamText).toHaveBeenCalled()
+		})
+
+		it("should pass tools via AI SDK when tools are provided", async () => {
+			const mockTools = [
+				{
+					type: "function" as const,
+					function: {
+						name: "get_weather",
+						description: "Get the current weather",
+						parameters: {
+							type: "object",
+							properties: {
+								location: { type: "string" },
+							},
+							required: ["location"],
+						},
+					},
+				},
+			]
+
+			setupStreamTextMock([{ type: "text-delta", text: "Weather check" }])
+
+			const stream = handler.createMessage(
+				systemPrompt,
+				[{ role: "user", content: [{ type: "text" as const, text: "What's the weather?" }] }],
+				{ taskId: "test-task", tools: mockTools },
+			)
+
+			for await (const _chunk of stream) {
+				// Consume stream
+			}
+
+			// Verify tools were converted
+			expect(convertToolsForAiSdk).toHaveBeenCalled()
+			expect(mockStreamText).toHaveBeenCalled()
+		})
+
+		it("should handle tool_choice mapping", async () => {
+			setupStreamTextMock([{ type: "text-delta", text: "test" }])
+
+			const stream = handler.createMessage(
+				systemPrompt,
+				[{ role: "user", content: [{ type: "text" as const, text: "test" }] }],
+				{ taskId: "test-task", tool_choice: "auto" },
+			)
+
+			for await (const _chunk of stream) {
+				// Consume stream
+			}
+
+			expect(mapToolChoice).toHaveBeenCalledWith("auto")
+		})
+
+		it("should disable parallel tool use when parallelToolCalls is false", async () => {
+			setupStreamTextMock([{ type: "text-delta", text: "test" }])
+
+			const stream = handler.createMessage(
+				systemPrompt,
+				[{ role: "user", content: [{ type: "text" as const, text: "test" }] }],
+				{ taskId: "test-task", parallelToolCalls: false },
+			)
 
-			// Verify API
-			expect(mockCreate).toHaveBeenCalled()
+			for await (const _chunk of stream) {
+				// Consume stream
+			}
+
+			expect(mockStreamText).toHaveBeenCalledWith(
+				expect.objectContaining({
+					providerOptions: expect.objectContaining({
+						anthropic: expect.objectContaining({
+							disableParallelToolUse: true,
+						}),
+					}),
+				}),
+			)
+		})
+
+		it("should not set disableParallelToolUse when parallelToolCalls is true or undefined", async () => {
+			setupStreamTextMock([{ type: "text-delta", text: "test" }])
+
+			const stream = handler.createMessage(
+				systemPrompt,
+				[{ role: "user", content: [{ type: "text" as const, text: "test" }] }],
+				{ taskId: "test-task", parallelToolCalls: true },
+			)
+
+			for await (const _chunk of stream) {
+				// Consume stream
+			}
+
+			// providerOptions should not include disableParallelToolUse
+			const callArgs = mockStreamText.mock.calls[0]![0]
+			const anthropicOptions = callArgs?.providerOptions?.anthropic
+			expect(anthropicOptions?.disableParallelToolUse).toBeUndefined()
+		})
+
+		it("should handle tool call streaming via AI SDK", async () => {
+			setupStreamTextMock([
+				{ type: "tool-input-start", id: "toolu_123", toolName: "get_weather" },
+				{ type: "tool-input-delta", id: "toolu_123", delta: '{"location":' },
+				{ type: "tool-input-delta", id: "toolu_123", delta: '"London"}' },
+				{ type: "tool-input-end", id: "toolu_123" },
+			])
+
+			const stream = handler.createMessage(
+				systemPrompt,
+				[{ role: "user", content: [{ type: "text" as const, text: "What's the weather?" }] }],
+				{ taskId: "test-task" },
+			)
+
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			const startChunk = chunks.find((c) => c.type === "tool_call_start")
+			expect(startChunk).toBeDefined()
+			expect(startChunk?.id).toBe("toolu_123")
+			expect(startChunk?.name).toBe("get_weather")
+
+			const deltaChunks = chunks.filter((c) => c.type === "tool_call_delta")
+			expect(deltaChunks).toHaveLength(2)
+
+			const endChunk = chunks.find((c) => c.type === "tool_call_end")
+			expect(endChunk).toBeDefined()
+		})
+
+		it("should capture thinking signature from stream events", async () => {
+			const testSignature = "test-thinking-signature"
+			setupStreamTextMock([
+				{
+					type: "reasoning-delta",
+					text: "thinking...",
+					providerMetadata: { anthropic: { signature: testSignature } },
+				},
+				{ type: "text-delta", text: "Answer" },
+			])
+
+			const stream = handler.createMessage(systemPrompt, [
+				{ role: "user", content: [{ type: "text" as const, text: "test" }] },
+			])
+
+			for await (const _chunk of stream) {
+				// Consume stream
+			}
+
+			expect(handler.getThoughtSignature()).toBe(testSignature)
+		})
+
+		it("should capture redacted thinking blocks from stream events", async () => {
+			setupStreamTextMock([
+				{
+					type: "reasoning-delta",
+					text: "",
+					providerMetadata: { anthropic: { redactedData: "redacted-data-base64" } },
+				},
+				{ type: "text-delta", text: "Answer" },
+			])
+
+			const stream = handler.createMessage(systemPrompt, [
+				{ role: "user", content: [{ type: "text" as const, text: "test" }] },
+			])
+
+			for await (const _chunk of stream) {
+				// Consume stream
+			}
+
+			const redactedBlocks = handler.getRedactedThinkingBlocks()
+			expect(redactedBlocks).toBeDefined()
+			expect(redactedBlocks).toHaveLength(1)
+			expect(redactedBlocks![0]).toEqual({
+				type: "redacted_thinking",
+				data: "redacted-data-base64",
+			})
+		})
+
+		it("should reset thinking state between requests", async () => {
+			// First request with signature
+			setupStreamTextMock([
+				{
+					type: "reasoning-delta",
+					text: "thinking...",
+					providerMetadata: { anthropic: { signature: "sig-1" } },
+				},
+			])
+
+			const stream1 = handler.createMessage(systemPrompt, [
+				{ role: "user", content: [{ type: "text" as const, text: "test 1" }] },
+			])
+			for await (const _chunk of stream1) {
+				// Consume
+			}
+			expect(handler.getThoughtSignature()).toBe("sig-1")
+
+			// Second request without signature
+			setupStreamTextMock([{ type: "text-delta", text: "plain answer" }])
+
+			const stream2 = handler.createMessage(systemPrompt, [
+				{ role: "user", content: [{ type: "text" as const, text: "test 2" }] },
+			])
+			for await (const _chunk of stream2) {
+				// Consume
+			}
+			expect(handler.getThoughtSignature()).toBeUndefined()
+		})
+
+		it("should pass system prompt via system param with systemProviderOptions for cache control", async () => {
+			setupStreamTextMock([{ type: "text-delta", text: "test" }])
+
+			const stream = handler.createMessage(systemPrompt, [
+				{ role: "user", content: [{ type: "text" as const, text: "test" }] },
+			])
+
+			for await (const _chunk of stream) {
+				// Consume
+			}
+
+			// Verify streamText was called with system + systemProviderOptions (not as a message)
+			const callArgs = mockStreamText.mock.calls[0]![0]
+			expect(callArgs.system).toBe(systemPrompt)
+			expect(callArgs.systemProviderOptions).toEqual({
+				anthropic: { cacheControl: { type: "ephemeral" } },
+			})
+			// System prompt should NOT be in the messages array
+			const systemMessages = callArgs.messages.filter((m: any) => m.role === "system")
+			expect(systemMessages).toHaveLength(0)
 		})
 	})
 
 	describe("completePrompt", () => {
 		it("should complete prompt successfully", async () => {
+			mockGenerateText.mockResolvedValueOnce({
+				text: "Test response",
+			})
+
 			const result = await handler.completePrompt("Test prompt")
 			expect(result).toBe("Test response")
-			expect(mockCreate).toHaveBeenCalledWith({
-				model: mockOptions.apiModelId,
-				messages: [{ role: "user", content: "Test prompt" }],
-				max_tokens: 8192,
-				temperature: 0,
-				thinking: undefined,
-				stream: false,
-			})
+			expect(mockGenerateText).toHaveBeenCalledWith(
+				expect.objectContaining({
+					prompt: "Test prompt",
+					temperature: 0,
+				}),
+			)
 		})
 
 		it("should handle API errors", async () => {
-			mockCreate.mockRejectedValueOnce(new Error("Anthropic completion error: API Error"))
-			await expect(handler.completePrompt("Test prompt")).rejects.toThrow("Anthropic completion error: API Error")
-		})
-
-		it("should handle non-text content", async () => {
-			mockCreate.mockImplementationOnce(async () => ({
-				content: [{ type: "image" }],
-			}))
-			const result = await handler.completePrompt("Test prompt")
-			expect(result).toBe("")
+			const error = new Error("Anthropic completion error: API Error")
+			mockGenerateText.mockRejectedValueOnce(error)
+			await expect(handler.completePrompt("Test prompt")).rejects.toThrow()
 		})
 
 		it("should handle empty response", async () => {
-			mockCreate.mockImplementationOnce(async () => ({
-				content: [{ type: "text", text: "" }],
-			}))
+			mockGenerateText.mockResolvedValueOnce({
+				text: "",
+			})
 			const result = await handler.completePrompt("Test prompt")
 			expect(result).toBe("")
 		})
@@ -299,447 +605,19 @@ describe("AnthropicHandler", () => {
 		})
 	})
 
-	describe("reasoning block filtering", () => {
-		const systemPrompt = "You are a helpful assistant."
-
-		it("should filter out internal reasoning blocks before sending to API", async () => {
-			handler = new AnthropicHandler({
-				apiKey: "test-api-key",
-				apiModelId: "claude-3-5-sonnet-20241022",
-			})
-
-			// Messages with internal reasoning blocks (from stored conversation history)
-			const messagesWithReasoning: Anthropic.Messages.MessageParam[] = [
-				{
-					role: "user",
-					content: "Hello",
-				},
-				{
-					role: "assistant",
-					content: [
-						{
-							type: "reasoning" as any,
-							text: "This is internal reasoning that should be filtered",
-						},
-						{
-							type: "text",
-							text: "This is the response",
-						},
-					],
-				},
-				{
-					role: "user",
-					content: "Continue",
-				},
-			]
-
-			const stream = handler.createMessage(systemPrompt, messagesWithReasoning)
-			const chunks: any[] = []
-
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// Verify the API was called with filtered messages (no reasoning blocks)
-			const calledMessages = mockCreate.mock.calls[mockCreate.mock.calls.length - 1][0].messages
-			expect(calledMessages).toHaveLength(3)
-
-			// Check assistant message - should have reasoning block filtered out
-			const assistantMessage = calledMessages.find((m: any) => m.role === "assistant")
-			expect(assistantMessage).toBeDefined()
-			expect(assistantMessage.content).toEqual([{ type: "text", text: "This is the response" }])
-
-			// Verify reasoning blocks were NOT sent to the API
-			expect(assistantMessage.content).not.toContainEqual(expect.objectContaining({ type: "reasoning" }))
-		})
-
-		it("should filter empty messages after removing all reasoning blocks", async () => {
-			handler = new AnthropicHandler({
-				apiKey: "test-api-key",
-				apiModelId: "claude-3-5-sonnet-20241022",
-			})
-
-			// Message with only reasoning content (should be completely filtered)
-			const messagesWithOnlyReasoning: Anthropic.Messages.MessageParam[] = [
-				{
-					role: "user",
-					content: "Hello",
-				},
-				{
-					role: "assistant",
-					content: [
-						{
-							type: "reasoning" as any,
-							text: "Only reasoning, no actual text",
-						},
-					],
-				},
-				{
-					role: "user",
-					content: "Continue",
-				},
-			]
-
-			const stream = handler.createMessage(systemPrompt, messagesWithOnlyReasoning)
-			const chunks: any[] = []
-
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// Verify empty message was filtered out
-			const calledMessages = mockCreate.mock.calls[mockCreate.mock.calls.length - 1][0].messages
-			expect(calledMessages.length).toBe(2) // Only the two user messages
-			expect(calledMessages.every((m: any) => m.role === "user")).toBe(true)
+	describe("isAiSdkProvider", () => {
+		it("should return true", () => {
+			expect(handler.isAiSdkProvider()).toBe(true)
 		})
 	})
 
-	describe("native tool calling", () => {
-		const systemPrompt = "You are a helpful assistant."
-		const messages: Anthropic.Messages.MessageParam[] = [
-			{
-				role: "user",
-				content: [{ type: "text" as const, text: "What's the weather in London?" }],
-			},
-		]
-
-		const mockTools = [
-			{
-				type: "function" as const,
-				function: {
-					name: "get_weather",
-					description: "Get the current weather",
-					parameters: {
-						type: "object",
-						properties: {
-							location: { type: "string" },
-						},
-						required: ["location"],
-					},
-				},
-			},
-		]
-
-		it("should include tools in request when tools are provided", async () => {
-			const stream = handler.createMessage(systemPrompt, messages, {
-				taskId: "test-task",
-				tools: mockTools,
-			})
-
-			// Consume the stream to trigger the API call
-			for await (const _chunk of stream) {
-				// Just consume
-			}
-
-			expect(mockCreate).toHaveBeenCalledWith(
-				expect.objectContaining({
-					tools: expect.arrayContaining([
-						expect.objectContaining({
-							name: "get_weather",
-							description: "Get the current weather",
-							input_schema: expect.objectContaining({
-								type: "object",
-								properties: expect.objectContaining({
-									location: { type: "string" },
-								}),
-							}),
-						}),
-					]),
-				}),
-				expect.anything(),
-			)
-		})
-
-		it("should include tools when tools are provided", async () => {
-			const xmlHandler = new AnthropicHandler({
-				...mockOptions,
-			})
-
-			const stream = xmlHandler.createMessage(systemPrompt, messages, {
-				taskId: "test-task",
-				tools: mockTools,
-			})
-
-			// Consume the stream to trigger the API call
-			for await (const _chunk of stream) {
-				// Just consume
-			}
-
-			// Tool calling is request-driven: if tools are provided, we should include them.
-			expect(mockCreate).toHaveBeenCalledWith(
-				expect.objectContaining({
-					tools: expect.arrayContaining([
-						expect.objectContaining({
-							name: "get_weather",
-						}),
-					]),
-				}),
-				expect.anything(),
-			)
+	describe("thinking signature", () => {
+		it("should return undefined when no signature captured", () => {
+			expect(handler.getThoughtSignature()).toBeUndefined()
 		})
 
-		it("should always include tools in request (tools are always present after PR #10841)", async () => {
-			// Handler uses native protocol by default
-			const stream = handler.createMessage(systemPrompt, messages, {
-				taskId: "test-task",
-			})
-
-			// Consume the stream to trigger the API call
-			for await (const _chunk of stream) {
-				// Just consume
-			}
-
-			// Tools are now always present (minimum 6 from ALWAYS_AVAILABLE_TOOLS)
-			expect(mockCreate).toHaveBeenCalledWith(
-				expect.objectContaining({
-					tools: expect.any(Array),
-					tool_choice: expect.any(Object),
-				}),
-				expect.anything(),
-			)
-		})
-
-		it("should convert tool_choice 'auto' to Anthropic format", async () => {
-			// Handler uses native protocol by default
-			const stream = handler.createMessage(systemPrompt, messages, {
-				taskId: "test-task",
-				tools: mockTools,
-				tool_choice: "auto",
-			})
-
-			// Consume the stream to trigger the API call
-			for await (const _chunk of stream) {
-				// Just consume
-			}
-
-			expect(mockCreate).toHaveBeenCalledWith(
-				expect.objectContaining({
-					tool_choice: { type: "auto", disable_parallel_tool_use: false },
-				}),
-				expect.anything(),
-			)
-		})
-
-		it("should convert tool_choice 'required' to Anthropic 'any' format", async () => {
-			// Handler uses native protocol by default
-			const stream = handler.createMessage(systemPrompt, messages, {
-				taskId: "test-task",
-				tools: mockTools,
-				tool_choice: "required",
-			})
-
-			// Consume the stream to trigger the API call
-			for await (const _chunk of stream) {
-				// Just consume
-			}
-
-			expect(mockCreate).toHaveBeenCalledWith(
-				expect.objectContaining({
-					tool_choice: { type: "any", disable_parallel_tool_use: false },
-				}),
-				expect.anything(),
-			)
-		})
-
-		it("should set tool_choice to undefined when tool_choice is 'none' (tools are still passed)", async () => {
-			// Handler uses native protocol by default
-			const stream = handler.createMessage(systemPrompt, messages, {
-				taskId: "test-task",
-				tools: mockTools,
-				tool_choice: "none",
-			})
-
-			// Consume the stream to trigger the API call
-			for await (const _chunk of stream) {
-				// Just consume
-			}
-
-			// Tools are now always present (minimum 6 from ALWAYS_AVAILABLE_TOOLS)
-			// When tool_choice is 'none', the converter returns undefined for tool_choice
-			// but tools are still passed since they're always present
-			expect(mockCreate).toHaveBeenCalledWith(
-				expect.objectContaining({
-					tools: expect.any(Array),
-					tool_choice: undefined,
-				}),
-				expect.anything(),
-			)
-		})
-
-		it("should convert specific tool_choice to Anthropic 'tool' format", async () => {
-			// Handler uses native protocol by default
-			const stream = handler.createMessage(systemPrompt, messages, {
-				taskId: "test-task",
-				tools: mockTools,
-				tool_choice: { type: "function" as const, function: { name: "get_weather" } },
-			})
-
-			// Consume the stream to trigger the API call
-			for await (const _chunk of stream) {
-				// Just consume
-			}
-
-			expect(mockCreate).toHaveBeenCalledWith(
-				expect.objectContaining({
-					tool_choice: { type: "tool", name: "get_weather", disable_parallel_tool_use: false },
-				}),
-				expect.anything(),
-			)
-		})
-
-		it("should enable parallel tool calls when parallelToolCalls is true", async () => {
-			// Handler uses native protocol by default
-			const stream = handler.createMessage(systemPrompt, messages, {
-				taskId: "test-task",
-				tools: mockTools,
-				tool_choice: "auto",
-				parallelToolCalls: true,
-			})
-
-			// Consume the stream to trigger the API call
-			for await (const _chunk of stream) {
-				// Just consume
-			}
-
-			expect(mockCreate).toHaveBeenCalledWith(
-				expect.objectContaining({
-					tool_choice: { type: "auto", disable_parallel_tool_use: false },
-				}),
-				expect.anything(),
-			)
-		})
-
-		it("should handle tool_use blocks in stream and emit tool_call_partial", async () => {
-			mockCreate.mockImplementationOnce(async () => ({
-				async *[Symbol.asyncIterator]() {
-					yield {
-						type: "message_start",
-						message: {
-							usage: {
-								input_tokens: 100,
-								output_tokens: 50,
-							},
-						},
-					}
-					yield {
-						type: "content_block_start",
-						index: 0,
-						content_block: {
-							type: "tool_use",
-							id: "toolu_123",
-							name: "get_weather",
-						},
-					}
-				},
-			}))
-
-			// Handler uses native protocol by default
-			const stream = handler.createMessage(systemPrompt, messages, {
-				taskId: "test-task",
-				tools: mockTools,
-			})
-
-			const chunks: any[] = []
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// Find the tool_call_partial chunk
-			const toolCallChunk = chunks.find((chunk) => chunk.type === "tool_call_partial")
-			expect(toolCallChunk).toBeDefined()
-			expect(toolCallChunk).toEqual({
-				type: "tool_call_partial",
-				index: 0,
-				id: "toolu_123",
-				name: "get_weather",
-				arguments: undefined,
-			})
-		})
-
-		it("should handle input_json_delta in stream and emit tool_call_partial arguments", async () => {
-			mockCreate.mockImplementationOnce(async () => ({
-				async *[Symbol.asyncIterator]() {
-					yield {
-						type: "message_start",
-						message: {
-							usage: {
-								input_tokens: 100,
-								output_tokens: 50,
-							},
-						},
-					}
-					yield {
-						type: "content_block_start",
-						index: 0,
-						content_block: {
-							type: "tool_use",
-							id: "toolu_123",
-							name: "get_weather",
-						},
-					}
-					yield {
-						type: "content_block_delta",
-						index: 0,
-						delta: {
-							type: "input_json_delta",
-							partial_json: '{"location":',
-						},
-					}
-					yield {
-						type: "content_block_delta",
-						index: 0,
-						delta: {
-							type: "input_json_delta",
-							partial_json: '"London"}',
-						},
-					}
-					yield {
-						type: "content_block_stop",
-						index: 0,
-					}
-				},
-			}))
-
-			// Handler uses native protocol by default
-			const stream = handler.createMessage(systemPrompt, messages, {
-				taskId: "test-task",
-				tools: mockTools,
-			})
-
-			const chunks: any[] = []
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// Find the tool_call_partial chunks
-			const toolCallChunks = chunks.filter((chunk) => chunk.type === "tool_call_partial")
-			expect(toolCallChunks).toHaveLength(3)
-
-			// First chunk has id and name
-			expect(toolCallChunks[0]).toEqual({
-				type: "tool_call_partial",
-				index: 0,
-				id: "toolu_123",
-				name: "get_weather",
-				arguments: undefined,
-			})
-
-			// Subsequent chunks have arguments
-			expect(toolCallChunks[1]).toEqual({
-				type: "tool_call_partial",
-				index: 0,
-				id: undefined,
-				name: undefined,
-				arguments: '{"location":',
-			})
-
-			expect(toolCallChunks[2]).toEqual({
-				type: "tool_call_partial",
-				index: 0,
-				id: undefined,
-				name: undefined,
-				arguments: '"London"}',
-			})
+		it("should return undefined for redacted blocks when none captured", () => {
+			expect(handler.getRedactedThinkingBlocks()).toBeUndefined()
 		})
 	})
 })

+ 264 - 290
src/api/providers/anthropic.ts

@@ -1,7 +1,6 @@
-import { Anthropic } from "@anthropic-ai/sdk"
-import { Stream as AnthropicStream } from "@anthropic-ai/sdk/streaming"
-import { CacheControlEphemeral } from "@anthropic-ai/sdk/resources"
-import OpenAI from "openai"
+import type { Anthropic } from "@anthropic-ai/sdk"
+import { createAnthropic } from "@ai-sdk/anthropic"
+import { streamText, generateText, ToolSet } from "ai"
 
 import {
 	type ModelInfo,
@@ -14,317 +13,277 @@ import {
 import { TelemetryService } from "@roo-code/telemetry"
 
 import type { ApiHandlerOptions } from "../../shared/api"
+import { shouldUseReasoningBudget } from "../../shared/api"
 
-import { ApiStream } from "../transform/stream"
+import type { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
 import { getModelParams } from "../transform/model-params"
-import { filterNonAnthropicBlocks } from "../transform/anthropic-filter"
-import { handleProviderError } from "./utils/error-handler"
+import {
+	convertToAiSdkMessages,
+	convertToolsForAiSdk,
+	processAiSdkStreamPart,
+	mapToolChoice,
+	handleAiSdkError,
+} from "../transform/ai-sdk"
+import { calculateApiCostAnthropic } from "../../shared/cost"
 
+import { DEFAULT_HEADERS } from "./constants"
 import { BaseProvider } from "./base-provider"
 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
-import { calculateApiCostAnthropic } from "../../shared/cost"
-import {
-	convertOpenAIToolsToAnthropic,
-	convertOpenAIToolChoiceToAnthropic,
-} from "../../core/prompts/tools/native-tools/converters"
 
 export class AnthropicHandler extends BaseProvider implements SingleCompletionHandler {
 	private options: ApiHandlerOptions
-	private client: Anthropic
+	private provider: ReturnType<typeof createAnthropic>
 	private readonly providerName = "Anthropic"
+	private lastThoughtSignature: string | undefined
+	private lastRedactedThinkingBlocks: Array<{ type: "redacted_thinking"; data: string }> = []
 
 	constructor(options: ApiHandlerOptions) {
 		super()
 		this.options = options
 
-		const apiKeyFieldName =
-			this.options.anthropicBaseUrl && this.options.anthropicUseAuthToken ? "authToken" : "apiKey"
+		const useAuthToken = Boolean(options.anthropicBaseUrl && options.anthropicUseAuthToken)
 
-		this.client = new Anthropic({
-			baseURL: this.options.anthropicBaseUrl || undefined,
-			[apiKeyFieldName]: this.options.apiKey,
-		})
-	}
+		// Build beta headers for model-specific features
+		const betas: string[] = []
+		const modelId = options.apiModelId
+
+		if (modelId === "claude-3-7-sonnet-20250219:thinking") {
+			betas.push("output-128k-2025-02-19")
+		}
 
-	async *createMessage(
-		systemPrompt: string,
-		messages: Anthropic.Messages.MessageParam[],
-		metadata?: ApiHandlerCreateMessageMetadata,
-	): ApiStream {
-		let stream: AnthropicStream<Anthropic.Messages.RawMessageStreamEvent>
-		const cacheControl: CacheControlEphemeral = { type: "ephemeral" }
-		let {
-			id: modelId,
-			betas = ["fine-grained-tool-streaming-2025-05-14"],
-			maxTokens,
-			temperature,
-			reasoning: thinking,
-		} = this.getModel()
-
-		// Filter out non-Anthropic blocks (reasoning, thoughtSignature, etc.) before sending to the API
-		const sanitizedMessages = filterNonAnthropicBlocks(messages)
-
-		// Add 1M context beta flag if enabled for supported models (Claude Sonnet 4/4.5, Opus 4.6)
 		if (
 			(modelId === "claude-sonnet-4-20250514" ||
 				modelId === "claude-sonnet-4-5" ||
 				modelId === "claude-opus-4-6") &&
-			this.options.anthropicBeta1MContext
+			options.anthropicBeta1MContext
 		) {
 			betas.push("context-1m-2025-08-07")
 		}
 
-		const nativeToolParams = {
-			tools: convertOpenAIToolsToAnthropic(metadata?.tools ?? []),
-			tool_choice: convertOpenAIToolChoiceToAnthropic(metadata?.tool_choice, metadata?.parallelToolCalls),
+		this.provider = createAnthropic({
+			baseURL: options.anthropicBaseUrl || undefined,
+			...(useAuthToken ? { authToken: options.apiKey } : { apiKey: options.apiKey }),
+			headers: {
+				...DEFAULT_HEADERS,
+				...(betas.length > 0 ? { "anthropic-beta": betas.join(",") } : {}),
+			},
+		})
+	}
+
+	override async *createMessage(
+		systemPrompt: string,
+		messages: Anthropic.Messages.MessageParam[],
+		metadata?: ApiHandlerCreateMessageMetadata,
+	): ApiStream {
+		const modelConfig = this.getModel()
+
+		// Reset thinking state for this request
+		this.lastThoughtSignature = undefined
+		this.lastRedactedThinkingBlocks = []
+
+		// Convert messages to AI SDK format
+		const aiSdkMessages = convertToAiSdkMessages(messages)
+
+		// Convert tools to AI SDK format
+		const openAiTools = this.convertToolsForOpenAI(metadata?.tools)
+		const aiSdkTools = convertToolsForAiSdk(openAiTools) as ToolSet | undefined
+
+		// Build Anthropic provider options
+		const anthropicProviderOptions: Record<string, unknown> = {}
+
+		// Configure thinking/reasoning if the model supports it
+		const isThinkingEnabled =
+			shouldUseReasoningBudget({ model: modelConfig.info, settings: this.options }) &&
+			modelConfig.reasoning &&
+			modelConfig.reasoningBudget
+
+		if (isThinkingEnabled) {
+			anthropicProviderOptions.thinking = {
+				type: "enabled",
+				budgetTokens: modelConfig.reasoningBudget,
+			}
+		}
+
+		// Forward parallelToolCalls setting
+		// When parallelToolCalls is explicitly false, disable parallel tool use
+		if (metadata?.parallelToolCalls === false) {
+			anthropicProviderOptions.disableParallelToolUse = true
 		}
 
-		switch (modelId) {
-			case "claude-sonnet-4-5":
-			case "claude-sonnet-4-20250514":
-			case "claude-opus-4-6":
-			case "claude-opus-4-5-20251101":
-			case "claude-opus-4-1-20250805":
-			case "claude-opus-4-20250514":
-			case "claude-3-7-sonnet-20250219":
-			case "claude-3-5-sonnet-20241022":
-			case "claude-3-5-haiku-20241022":
-			case "claude-3-opus-20240229":
-			case "claude-haiku-4-5-20251001":
-			case "claude-3-haiku-20240307": {
-				/**
-				 * The latest message will be the new user message, one before
-				 * will be the assistant message from a previous request, and
-				 * the user message before that will be a previously cached user
-				 * message. So we need to mark the latest user message as
-				 * ephemeral to cache it for the next request, and mark the
-				 * second to last user message as ephemeral to let the server
-				 * know the last message to retrieve from the cache for the
-				 * current request.
-				 */
-				const userMsgIndices = sanitizedMessages.reduce(
-					(acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc),
-					[] as number[],
-				)
+		// Apply cache control to user messages
+		// Strategy: cache the last 2 user messages (write-to-cache + read-from-cache)
+		const cacheProviderOption = { anthropic: { cacheControl: { type: "ephemeral" as const } } }
+
+		const userMsgIndices = messages.reduce(
+			(acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc),
+			[] as number[],
+		)
+
+		const targetIndices = new Set<number>()
+		const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
+		const secondLastUserMsgIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1
 
-				const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
-				const secondLastMsgUserIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1
-
-				try {
-					stream = await this.client.messages.create(
-						{
-							model: modelId,
-							max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
-							temperature,
-							thinking,
-							// Setting cache breakpoint for system prompt so new tasks can reuse it.
-							system: [{ text: systemPrompt, type: "text", cache_control: cacheControl }],
-							messages: sanitizedMessages.map((message, index) => {
-								if (index === lastUserMsgIndex || index === secondLastMsgUserIndex) {
-									return {
-										...message,
-										content:
-											typeof message.content === "string"
-												? [{ type: "text", text: message.content, cache_control: cacheControl }]
-												: message.content.map((content, contentIndex) =>
-														contentIndex === message.content.length - 1
-															? { ...content, cache_control: cacheControl }
-															: content,
-													),
-									}
-								}
-								return message
-							}),
-							stream: true,
-							...nativeToolParams,
-						},
-						(() => {
-							// prompt caching: https://x.com/alexalbert__/status/1823751995901272068
-							// https://github.com/anthropics/anthropic-sdk-typescript?tab=readme-ov-file#default-headers
-							// https://github.com/anthropics/anthropic-sdk-typescript/commit/c920b77fc67bd839bfeb6716ceab9d7c9bbe7393
-
-							// Then check for models that support prompt caching
-							switch (modelId) {
-								case "claude-sonnet-4-5":
-								case "claude-sonnet-4-20250514":
-								case "claude-opus-4-6":
-								case "claude-opus-4-5-20251101":
-								case "claude-opus-4-1-20250805":
-								case "claude-opus-4-20250514":
-								case "claude-3-7-sonnet-20250219":
-								case "claude-3-5-sonnet-20241022":
-								case "claude-3-5-haiku-20241022":
-								case "claude-3-opus-20240229":
-								case "claude-haiku-4-5-20251001":
-								case "claude-3-haiku-20240307":
-									betas.push("prompt-caching-2024-07-31")
-									return { headers: { "anthropic-beta": betas.join(",") } }
-								default:
-									return undefined
-							}
-						})(),
-					)
-				} catch (error) {
-					TelemetryService.instance.captureException(
-						new ApiProviderError(
-							error instanceof Error ? error.message : String(error),
-							this.providerName,
-							modelId,
-							"createMessage",
-						),
-					)
-					throw error
+		if (lastUserMsgIndex >= 0) targetIndices.add(lastUserMsgIndex)
+		if (secondLastUserMsgIndex >= 0) targetIndices.add(secondLastUserMsgIndex)
+
+		if (targetIndices.size > 0) {
+			this.applyCacheControlToAiSdkMessages(messages, aiSdkMessages, targetIndices, cacheProviderOption)
+		}
+
+		// Build streamText request
+		// Cast providerOptions to any to bypass strict JSONObject typing — the AI SDK accepts the correct runtime values
+		const requestOptions: Parameters<typeof streamText>[0] = {
+			model: this.provider(modelConfig.id),
+			system: systemPrompt,
+			...({
+				systemProviderOptions: { anthropic: { cacheControl: { type: "ephemeral" } } },
+			} as Record<string, unknown>),
+			messages: aiSdkMessages,
+			temperature: modelConfig.temperature,
+			maxOutputTokens: modelConfig.maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
+			tools: aiSdkTools,
+			toolChoice: mapToolChoice(metadata?.tool_choice),
+			...(Object.keys(anthropicProviderOptions).length > 0 && {
+				providerOptions: { anthropic: anthropicProviderOptions } as any,
+			}),
+		}
+
+		try {
+			const result = streamText(requestOptions)
+
+			for await (const part of result.fullStream) {
+				// Capture thinking signature from stream events
+				// The AI SDK's @ai-sdk/anthropic emits the signature as a reasoning-delta
+				// event with providerMetadata.anthropic.signature
+				const partAny = part as any
+				if (partAny.providerMetadata?.anthropic?.signature) {
+					this.lastThoughtSignature = partAny.providerMetadata.anthropic.signature
 				}
-				break
-			}
-			default: {
-				try {
-					stream = (await this.client.messages.create({
-						model: modelId,
-						max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
-						temperature,
-						system: [{ text: systemPrompt, type: "text" }],
-						messages: sanitizedMessages,
-						stream: true,
-						...nativeToolParams,
-					})) as any
-				} catch (error) {
-					TelemetryService.instance.captureException(
-						new ApiProviderError(
-							error instanceof Error ? error.message : String(error),
-							this.providerName,
-							modelId,
-							"createMessage",
-						),
-					)
-					throw error
+
+				// Capture redacted thinking blocks from stream events
+				if (partAny.providerMetadata?.anthropic?.redactedData) {
+					this.lastRedactedThinkingBlocks.push({
+						type: "redacted_thinking",
+						data: partAny.providerMetadata.anthropic.redactedData,
+					})
 				}
-				break
+
+				for (const chunk of processAiSdkStreamPart(part)) {
+					yield chunk
+				}
+			}
+
+			// Yield usage metrics at the end, including cache metrics from providerMetadata
+			const usage = await result.usage
+			const providerMetadata = await result.providerMetadata
+			if (usage) {
+				yield this.processUsageMetrics(usage, modelConfig.info, providerMetadata)
 			}
+		} catch (error) {
+			const errorMessage = error instanceof Error ? error.message : String(error)
+			TelemetryService.instance.captureException(
+				new ApiProviderError(errorMessage, this.providerName, modelConfig.id, "createMessage"),
+			)
+			throw handleAiSdkError(error, this.providerName)
 		}
+	}
 
-		let inputTokens = 0
-		let outputTokens = 0
-		let cacheWriteTokens = 0
-		let cacheReadTokens = 0
-
-		for await (const chunk of stream) {
-			switch (chunk.type) {
-				case "message_start": {
-					// Tells us cache reads/writes/input/output.
-					const {
-						input_tokens = 0,
-						output_tokens = 0,
-						cache_creation_input_tokens,
-						cache_read_input_tokens,
-					} = chunk.message.usage
-
-					yield {
-						type: "usage",
-						inputTokens: input_tokens,
-						outputTokens: output_tokens,
-						cacheWriteTokens: cache_creation_input_tokens || undefined,
-						cacheReadTokens: cache_read_input_tokens || undefined,
-					}
+	/**
+	 * Process usage metrics from the AI SDK response, including Anthropic's cache metrics.
+	 */
+	private processUsageMetrics(
+		usage: { inputTokens?: number; outputTokens?: number },
+		info: ModelInfo,
+		providerMetadata?: Record<string, Record<string, unknown>>,
+	): ApiStreamUsageChunk {
+		const inputTokens = usage.inputTokens ?? 0
+		const outputTokens = usage.outputTokens ?? 0
+
+		// Extract cache metrics from Anthropic's providerMetadata
+		const anthropicMeta = providerMetadata?.anthropic as
+			| { cacheCreationInputTokens?: number; cacheReadInputTokens?: number }
+			| undefined
+		const cacheWriteTokens = anthropicMeta?.cacheCreationInputTokens ?? 0
+		const cacheReadTokens = anthropicMeta?.cacheReadInputTokens ?? 0
+
+		const { totalCost } = calculateApiCostAnthropic(
+			info,
+			inputTokens,
+			outputTokens,
+			cacheWriteTokens,
+			cacheReadTokens,
+		)
 
-					inputTokens += input_tokens
-					outputTokens += output_tokens
-					cacheWriteTokens += cache_creation_input_tokens || 0
-					cacheReadTokens += cache_read_input_tokens || 0
+		return {
+			type: "usage",
+			inputTokens,
+			outputTokens,
+			cacheWriteTokens: cacheWriteTokens > 0 ? cacheWriteTokens : undefined,
+			cacheReadTokens: cacheReadTokens > 0 ? cacheReadTokens : undefined,
+			totalCost,
+		}
+	}
 
-					break
-				}
-				case "message_delta":
-					// Tells us stop_reason, stop_sequence, and output tokens
-					// along the way and at the end of the message.
-					yield {
-						type: "usage",
-						inputTokens: 0,
-						outputTokens: chunk.usage.output_tokens || 0,
+	/**
+	 * Apply cacheControl providerOptions to the correct AI SDK messages by walking
+	 * the original Anthropic messages and converted AI SDK messages in parallel.
+	 *
+	 * convertToAiSdkMessages() can split a single Anthropic user message (containing
+	 * tool_results + text) into 2 AI SDK messages (tool role + user role). This method
+	 * accounts for that split so cache control lands on the right message.
+	 */
+	private applyCacheControlToAiSdkMessages(
+		originalMessages: Anthropic.Messages.MessageParam[],
+		aiSdkMessages: { role: string; providerOptions?: Record<string, Record<string, unknown>> }[],
+		targetOriginalIndices: Set<number>,
+		cacheProviderOption: Record<string, Record<string, unknown>>,
+	): void {
+		let aiSdkIdx = 0
+		for (let origIdx = 0; origIdx < originalMessages.length; origIdx++) {
+			const origMsg = originalMessages[origIdx]
+
+			if (typeof origMsg.content === "string") {
+				if (targetOriginalIndices.has(origIdx) && aiSdkIdx < aiSdkMessages.length) {
+					aiSdkMessages[aiSdkIdx].providerOptions = {
+						...aiSdkMessages[aiSdkIdx].providerOptions,
+						...cacheProviderOption,
 					}
+				}
+				aiSdkIdx++
+			} else if (origMsg.role === "user") {
+				const hasToolResults = origMsg.content.some((part) => (part as { type: string }).type === "tool_result")
+				const hasNonToolContent = origMsg.content.some(
+					(part) => (part as { type: string }).type === "text" || (part as { type: string }).type === "image",
+				)
 
-					break
-				case "message_stop":
-					// No usage data, just an indicator that the message is done.
-					break
-				case "content_block_start":
-					switch (chunk.content_block.type) {
-						case "thinking":
-							// We may receive multiple text blocks, in which
-							// case just insert a line break between them.
-							if (chunk.index > 0) {
-								yield { type: "reasoning", text: "\n" }
-							}
-
-							yield { type: "reasoning", text: chunk.content_block.thinking }
-							break
-						case "text":
-							// We may receive multiple text blocks, in which
-							// case just insert a line break between them.
-							if (chunk.index > 0) {
-								yield { type: "text", text: "\n" }
-							}
-
-							yield { type: "text", text: chunk.content_block.text }
-							break
-						case "tool_use": {
-							// Emit initial tool call partial with id and name
-							yield {
-								type: "tool_call_partial",
-								index: chunk.index,
-								id: chunk.content_block.id,
-								name: chunk.content_block.name,
-								arguments: undefined,
-							}
-							break
+				if (hasToolResults && hasNonToolContent) {
+					const userMsgIdx = aiSdkIdx + 1
+					if (targetOriginalIndices.has(origIdx) && userMsgIdx < aiSdkMessages.length) {
+						aiSdkMessages[userMsgIdx].providerOptions = {
+							...aiSdkMessages[userMsgIdx].providerOptions,
+							...cacheProviderOption,
 						}
 					}
-					break
-				case "content_block_delta":
-					switch (chunk.delta.type) {
-						case "thinking_delta":
-							yield { type: "reasoning", text: chunk.delta.thinking }
-							break
-						case "text_delta":
-							yield { type: "text", text: chunk.delta.text }
-							break
-						case "input_json_delta": {
-							// Emit tool call partial chunks as arguments stream in
-							yield {
-								type: "tool_call_partial",
-								index: chunk.index,
-								id: undefined,
-								name: undefined,
-								arguments: chunk.delta.partial_json,
-							}
-							break
+					aiSdkIdx += 2
+				} else if (hasToolResults) {
+					if (targetOriginalIndices.has(origIdx) && aiSdkIdx < aiSdkMessages.length) {
+						aiSdkMessages[aiSdkIdx].providerOptions = {
+							...aiSdkMessages[aiSdkIdx].providerOptions,
+							...cacheProviderOption,
 						}
 					}
-
-					break
-				case "content_block_stop":
-					// Block complete - no action needed for now.
-					// NativeToolCallParser handles tool call completion
-					// Note: Signature for multi-turn thinking would require using stream.finalMessage()
-					// after iteration completes, which requires restructuring the streaming approach.
-					break
-			}
-		}
-
-		if (inputTokens > 0 || outputTokens > 0 || cacheWriteTokens > 0 || cacheReadTokens > 0) {
-			const { totalCost } = calculateApiCostAnthropic(
-				this.getModel().info,
-				inputTokens,
-				outputTokens,
-				cacheWriteTokens,
-				cacheReadTokens,
-			)
-
-			yield {
-				type: "usage",
-				inputTokens: 0,
-				outputTokens: 0,
-				totalCost,
+					aiSdkIdx++
+				} else {
+					if (targetOriginalIndices.has(origIdx) && aiSdkIdx < aiSdkMessages.length) {
+						aiSdkMessages[aiSdkIdx].providerOptions = {
+							...aiSdkMessages[aiSdkIdx].providerOptions,
+							...cacheProviderOption,
+						}
+					}
+					aiSdkIdx++
+				}
+			} else {
+				aiSdkIdx++
 			}
 		}
 	}
@@ -339,7 +298,6 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 			(id === "claude-sonnet-4-20250514" || id === "claude-sonnet-4-5" || id === "claude-opus-4-6") &&
 			this.options.anthropicBeta1MContext
 		) {
-			// Use the tier pricing for 1M context
 			const tier = info.tiers?.[0]
 			if (tier) {
 				info = {
@@ -368,37 +326,53 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 		return {
 			id: id === "claude-3-7-sonnet-20250219:thinking" ? "claude-3-7-sonnet-20250219" : id,
 			info,
-			betas: id === "claude-3-7-sonnet-20250219:thinking" ? ["output-128k-2025-02-19"] : undefined,
 			...params,
 		}
 	}
 
-	async completePrompt(prompt: string) {
-		let { id: model, temperature } = this.getModel()
+	async completePrompt(prompt: string): Promise<string> {
+		const { id, temperature } = this.getModel()
 
-		let message
 		try {
-			message = await this.client.messages.create({
-				model,
-				max_tokens: ANTHROPIC_DEFAULT_MAX_TOKENS,
-				thinking: undefined,
+			const { text } = await generateText({
+				model: this.provider(id),
+				prompt,
+				maxOutputTokens: ANTHROPIC_DEFAULT_MAX_TOKENS,
 				temperature,
-				messages: [{ role: "user", content: prompt }],
-				stream: false,
 			})
+
+			return text
 		} catch (error) {
 			TelemetryService.instance.captureException(
 				new ApiProviderError(
 					error instanceof Error ? error.message : String(error),
 					this.providerName,
-					model,
+					id,
 					"completePrompt",
 				),
 			)
-			throw error
+			throw handleAiSdkError(error, this.providerName)
 		}
+	}
+
+	/**
+	 * Returns the thinking signature captured from the last Anthropic response.
+	 * Claude models with extended thinking return a cryptographic signature
+	 * which must be round-tripped back for multi-turn conversations with tool use.
+	 */
+	getThoughtSignature(): string | undefined {
+		return this.lastThoughtSignature
+	}
+
+	/**
+	 * Returns any redacted thinking blocks captured from the last Anthropic response.
+	 * Anthropic returns these when safety filters trigger on reasoning content.
+	 */
+	getRedactedThinkingBlocks(): Array<{ type: "redacted_thinking"; data: string }> | undefined {
+		return this.lastRedactedThinkingBlocks.length > 0 ? this.lastRedactedThinkingBlocks : undefined
+	}
 
-		const content = message.content.find(({ type }) => type === "text")
-		return content?.type === "text" ? content.text : ""
+	override isAiSdkProvider(): boolean {
+		return true
 	}
 }

+ 1 - 0
src/package.json

@@ -451,6 +451,7 @@
 	},
 	"dependencies": {
 		"@ai-sdk/amazon-bedrock": "^4.0.51",
+		"@ai-sdk/anthropic": "^3.0.38",
 		"@ai-sdk/baseten": "^1.0.31",
 		"@ai-sdk/deepseek": "^2.0.18",
 		"@ai-sdk/fireworks": "^2.0.32",