AI
/
Roo-Code
mirror da https://github.com/RooCodeInc/Roo-Code.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
							import type { Anthropic } from "@anthropic-ai/sdk"
import { createVertexAnthropic } from "@ai-sdk/google-vertex/anthropic"
import { streamText, generateText, ToolSet } from "ai"

import {
	type ModelInfo,
	type VertexModelId,
	vertexDefaultModelId,
	vertexModels,
	ANTHROPIC_DEFAULT_MAX_TOKENS,
	VERTEX_1M_CONTEXT_MODEL_IDS,
	ApiProviderError,
} from "@roo-code/types"
import { TelemetryService } from "@roo-code/telemetry"

import type { ApiHandlerOptions } from "../../shared/api"
import { shouldUseReasoningBudget } from "../../shared/api"

import type { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
import { getModelParams } from "../transform/model-params"
import {
	convertToAiSdkMessages,
	convertToolsForAiSdk,
	processAiSdkStreamPart,
	mapToolChoice,
	handleAiSdkError,
} from "../transform/ai-sdk"
import { calculateApiCostAnthropic } from "../../shared/cost"

import { DEFAULT_HEADERS } from "./constants"
import { BaseProvider } from "./base-provider"
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"

// https://docs.anthropic.com/en/api/claude-on-vertex-ai
export class AnthropicVertexHandler extends BaseProvider implements SingleCompletionHandler {
	protected options: ApiHandlerOptions
	private provider: ReturnType<typeof createVertexAnthropic>
	private readonly providerName = "Vertex (Anthropic)"
	private lastThoughtSignature: string | undefined
	private lastRedactedThinkingBlocks: Array<{ type: "redacted_thinking"; data: string }> = []

	constructor(options: ApiHandlerOptions) {
		super()
		this.options = options

		// https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude#regions
		const projectId = this.options.vertexProjectId ?? "not-provided"
		const region = this.options.vertexRegion ?? "us-east5"

		// Build googleAuthOptions based on provided credentials
		let googleAuthOptions: { credentials?: object; keyFile?: string } | undefined
		if (options.vertexJsonCredentials) {
			try {
				googleAuthOptions = { credentials: JSON.parse(options.vertexJsonCredentials) }
			} catch {
				// If JSON parsing fails, ignore and try other auth methods
			}
		} else if (options.vertexKeyFile) {
			googleAuthOptions = { keyFile: options.vertexKeyFile }
		}

		// Build beta headers for 1M context support
		const modelId = options.apiModelId
		const betas: string[] = []

		if (modelId) {
			const supports1MContext = VERTEX_1M_CONTEXT_MODEL_IDS.includes(
				modelId as (typeof VERTEX_1M_CONTEXT_MODEL_IDS)[number],
			)
			if (supports1MContext && options.vertex1MContext) {
				betas.push("context-1m-2025-08-07")
			}
		}

		this.provider = createVertexAnthropic({
			project: projectId,
			location: region,
			googleAuthOptions,
			headers: {
				...DEFAULT_HEADERS,
				...(betas.length > 0 ? { "anthropic-beta": betas.join(",") } : {}),
			},
		})
	}

	override async *createMessage(
		systemPrompt: string,
		messages: Anthropic.Messages.MessageParam[],
		metadata?: ApiHandlerCreateMessageMetadata,
	): ApiStream {
		const modelConfig = this.getModel()

		// Reset thinking state for this request
		this.lastThoughtSignature = undefined
		this.lastRedactedThinkingBlocks = []

		// Convert messages to AI SDK format
		const aiSdkMessages = convertToAiSdkMessages(messages)

		// Convert tools to AI SDK format
		const openAiTools = this.convertToolsForOpenAI(metadata?.tools)
		const aiSdkTools = convertToolsForAiSdk(openAiTools) as ToolSet | undefined

		// Build Anthropic provider options
		const anthropicProviderOptions: Record<string, unknown> = {}

		// Configure thinking/reasoning if the model supports it
		const isThinkingEnabled =
			shouldUseReasoningBudget({ model: modelConfig.info, settings: this.options }) &&
			modelConfig.reasoning &&
			modelConfig.reasoningBudget

		if (isThinkingEnabled) {
			anthropicProviderOptions.thinking = {
				type: "enabled",
				budgetTokens: modelConfig.reasoningBudget,
			}
		}

		// Forward parallelToolCalls setting
		// When parallelToolCalls is explicitly false, disable parallel tool use
		if (metadata?.parallelToolCalls === false) {
			anthropicProviderOptions.disableParallelToolUse = true
		}

		/**
		 * Vertex API has specific limitations for prompt caching:
		 * 1. Maximum of 4 blocks can have cache_control
		 * 2. Only text blocks can be cached (images and other content types cannot)
		 * 3. Cache control can only be applied to user messages, not assistant messages
		 *
		 * Our caching strategy:
		 * - Cache the system prompt (1 block)
		 * - Cache the last text block of the second-to-last user message (1 block)
		 * - Cache the last text block of the last user message (1 block)
		 * This ensures we stay under the 4-block limit while maintaining effective caching
		 * for the most relevant context.
		 */
		const cacheProviderOption = { anthropic: { cacheControl: { type: "ephemeral" as const } } }

		const userMsgIndices = messages.reduce(
			(acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc),
			[] as number[],
		)

		const targetIndices = new Set<number>()
		const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
		const secondLastUserMsgIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1

		if (lastUserMsgIndex >= 0) targetIndices.add(lastUserMsgIndex)
		if (secondLastUserMsgIndex >= 0) targetIndices.add(secondLastUserMsgIndex)

		if (targetIndices.size > 0) {
			this.applyCacheControlToAiSdkMessages(messages, aiSdkMessages, targetIndices, cacheProviderOption)
		}

		// Build streamText request
		// Cast providerOptions to any to bypass strict JSONObject typing — the AI SDK accepts the correct runtime values
		const requestOptions: Parameters<typeof streamText>[0] = {
			model: this.provider(modelConfig.id),
			system: systemPrompt,
			...({
				systemProviderOptions: { anthropic: { cacheControl: { type: "ephemeral" } } },
			} as Record<string, unknown>),
			messages: aiSdkMessages,
			temperature: modelConfig.temperature,
			maxOutputTokens: modelConfig.maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
			tools: aiSdkTools,
			toolChoice: mapToolChoice(metadata?.tool_choice),
			...(Object.keys(anthropicProviderOptions).length > 0 && {
				providerOptions: { anthropic: anthropicProviderOptions } as any,
			}),
		}

		try {
			const result = streamText(requestOptions)

			let lastStreamError: string | undefined
			for await (const part of result.fullStream) {
				// Capture thinking signature from stream events
				// The AI SDK's @ai-sdk/anthropic emits the signature as a reasoning-delta
				// event with providerMetadata.anthropic.signature
				const partAny = part as any
				if (partAny.providerMetadata?.anthropic?.signature) {
					this.lastThoughtSignature = partAny.providerMetadata.anthropic.signature
				}

				// Capture redacted thinking blocks from stream events
				if (partAny.providerMetadata?.anthropic?.redactedData) {
					this.lastRedactedThinkingBlocks.push({
						type: "redacted_thinking",
						data: partAny.providerMetadata.anthropic.redactedData,
					})
				}

				for (const chunk of processAiSdkStreamPart(part)) {
					if (chunk.type === "error") {
						lastStreamError = chunk.message
					}
					yield chunk
				}
			}

			// Yield usage metrics at the end, including cache metrics from providerMetadata
			try {
				const usage = await result.usage
				const providerMetadata = await result.providerMetadata
				if (usage) {
					yield this.processUsageMetrics(usage, modelConfig.info, providerMetadata)
				}
			} catch (usageError) {
				if (lastStreamError) {
					throw new Error(lastStreamError)
				}
				throw usageError
			}
		} catch (error) {
			const errorMessage = error instanceof Error ? error.message : String(error)
			TelemetryService.instance.captureException(
				new ApiProviderError(errorMessage, this.providerName, modelConfig.id, "createMessage"),
			)
			throw handleAiSdkError(error, this.providerName)
		}
	}

	/**
	 * Process usage metrics from the AI SDK response, including Anthropic's cache metrics.
	 */
	private processUsageMetrics(
		usage: { inputTokens?: number; outputTokens?: number },
		info: ModelInfo,
		providerMetadata?: Record<string, Record<string, unknown>>,
	): ApiStreamUsageChunk {
		const inputTokens = usage.inputTokens ?? 0
		const outputTokens = usage.outputTokens ?? 0

		// Extract cache metrics from Anthropic's providerMetadata
		const anthropicMeta = providerMetadata?.anthropic as
			| { cacheCreationInputTokens?: number; cacheReadInputTokens?: number }
			| undefined
		const cacheWriteTokens = anthropicMeta?.cacheCreationInputTokens ?? 0
		const cacheReadTokens = anthropicMeta?.cacheReadInputTokens ?? 0

		const { totalCost } = calculateApiCostAnthropic(
			info,
			inputTokens,
			outputTokens,
			cacheWriteTokens,
			cacheReadTokens,
		)

		return {
			type: "usage",
			inputTokens,
			outputTokens,
			cacheWriteTokens: cacheWriteTokens > 0 ? cacheWriteTokens : undefined,
			cacheReadTokens: cacheReadTokens > 0 ? cacheReadTokens : undefined,
			totalCost,
		}
	}

	/**
	 * Apply cacheControl providerOptions to the correct AI SDK messages by walking
	 * the original Anthropic messages and converted AI SDK messages in parallel.
	 *
	 * convertToAiSdkMessages() can split a single Anthropic user message (containing
	 * tool_results + text) into 2 AI SDK messages (tool role + user role). This method
	 * accounts for that split so cache control lands on the right message.
	 */
	private applyCacheControlToAiSdkMessages(
		originalMessages: Anthropic.Messages.MessageParam[],
		aiSdkMessages: { role: string; providerOptions?: Record<string, Record<string, unknown>> }[],
		targetOriginalIndices: Set<number>,
		cacheProviderOption: Record<string, Record<string, unknown>>,
	): void {
		let aiSdkIdx = 0
		for (let origIdx = 0; origIdx < originalMessages.length; origIdx++) {
			const origMsg = originalMessages[origIdx]

			if (typeof origMsg.content === "string") {
				if (targetOriginalIndices.has(origIdx) && aiSdkIdx < aiSdkMessages.length) {
					aiSdkMessages[aiSdkIdx].providerOptions = {
						...aiSdkMessages[aiSdkIdx].providerOptions,
						...cacheProviderOption,
					}
				}
				aiSdkIdx++
			} else if (origMsg.role === "user") {
				const hasToolResults = origMsg.content.some((part) => (part as { type: string }).type === "tool_result")
				const hasNonToolContent = origMsg.content.some(
					(part) => (part as { type: string }).type === "text" || (part as { type: string }).type === "image",
				)

				if (hasToolResults && hasNonToolContent) {
					const userMsgIdx = aiSdkIdx + 1
					if (targetOriginalIndices.has(origIdx) && userMsgIdx < aiSdkMessages.length) {
						aiSdkMessages[userMsgIdx].providerOptions = {
							...aiSdkMessages[userMsgIdx].providerOptions,
							...cacheProviderOption,
						}
					}
					aiSdkIdx += 2
				} else if (hasToolResults) {
					if (targetOriginalIndices.has(origIdx) && aiSdkIdx < aiSdkMessages.length) {
						aiSdkMessages[aiSdkIdx].providerOptions = {
							...aiSdkMessages[aiSdkIdx].providerOptions,
							...cacheProviderOption,
						}
					}
					aiSdkIdx++
				} else {
					if (targetOriginalIndices.has(origIdx) && aiSdkIdx < aiSdkMessages.length) {
						aiSdkMessages[aiSdkIdx].providerOptions = {
							...aiSdkMessages[aiSdkIdx].providerOptions,
							...cacheProviderOption,
						}
					}
					aiSdkIdx++
				}
			} else {
				aiSdkIdx++
			}
		}
	}

	getModel() {
		const modelId = this.options.apiModelId
		let id = modelId && modelId in vertexModels ? (modelId as VertexModelId) : vertexDefaultModelId
		let info: ModelInfo = vertexModels[id]

		// Check if 1M context beta should be enabled for supported models
		const supports1MContext = VERTEX_1M_CONTEXT_MODEL_IDS.includes(
			id as (typeof VERTEX_1M_CONTEXT_MODEL_IDS)[number],
		)
		const enable1MContext = supports1MContext && this.options.vertex1MContext

		// If 1M context beta is enabled, update the model info with tier pricing
		if (enable1MContext) {
			const tier = info.tiers?.[0]
			if (tier) {
				info = {
					...info,
					contextWindow: tier.contextWindow,
					inputPrice: tier.inputPrice,
					outputPrice: tier.outputPrice,
					cacheWritesPrice: tier.cacheWritesPrice,
					cacheReadsPrice: tier.cacheReadsPrice,
				}
			}
		}

		const params = getModelParams({
			format: "anthropic",
			modelId: id,
			model: info,
			settings: this.options,
			defaultTemperature: 0,
		})

		// Build betas array for request headers (kept for backward compatibility / testing)
		const betas: string[] = []

		if (enable1MContext) {
			betas.push("context-1m-2025-08-07")
		}

		// The `:thinking` suffix indicates that the model is a "Hybrid"
		// reasoning model and that reasoning is required to be enabled.
		// The actual model ID honored by Anthropic's API does not have this
		// suffix.
		return {
			id: id.endsWith(":thinking") ? id.replace(":thinking", "") : id,
			info,
			betas: betas.length > 0 ? betas : undefined,
			...params,
		}
	}

	async completePrompt(prompt: string): Promise<string> {
		const { id, temperature } = this.getModel()

		try {
			const { text } = await generateText({
				model: this.provider(id),
				prompt,
				maxOutputTokens: ANTHROPIC_DEFAULT_MAX_TOKENS,
				temperature,
			})

			return text
		} catch (error) {
			TelemetryService.instance.captureException(
				new ApiProviderError(
					error instanceof Error ? error.message : String(error),
					this.providerName,
					id,
					"completePrompt",
				),
			)
			throw handleAiSdkError(error, this.providerName)
		}
	}

	/**
	 * Returns the thinking signature captured from the last Anthropic response.
	 * Claude models with extended thinking return a cryptographic signature
	 * which must be round-tripped back for multi-turn conversations with tool use.
	 */
	getThoughtSignature(): string | undefined {
		return this.lastThoughtSignature
	}

	/**
	 * Returns any redacted thinking blocks captured from the last Anthropic response.
	 * Anthropic returns these when safety filters trigger on reasoning content.
	 */
	getRedactedThinkingBlocks(): Array<{ type: "redacted_thinking"; data: string }> | undefined {
		return this.lastRedactedThinkingBlocks.length > 0 ? this.lastRedactedThinkingBlocks : undefined
	}

	override isAiSdkProvider(): boolean {
		return true
	}
}