|
|
@@ -1,6 +1,6 @@
|
|
|
-import { Anthropic } from "@anthropic-ai/sdk"
|
|
|
-import { AnthropicVertex } from "@anthropic-ai/vertex-sdk"
|
|
|
-import { GoogleAuth, JWTInput } from "google-auth-library"
|
|
|
+import type { Anthropic } from "@anthropic-ai/sdk"
|
|
|
+import { createVertexAnthropic } from "@ai-sdk/google-vertex/anthropic"
|
|
|
+import { streamText, generateText, ToolSet } from "ai"
|
|
|
|
|
|
import {
|
|
|
type ModelInfo,
|
|
|
@@ -9,58 +9,78 @@ import {
|
|
|
vertexModels,
|
|
|
ANTHROPIC_DEFAULT_MAX_TOKENS,
|
|
|
VERTEX_1M_CONTEXT_MODEL_IDS,
|
|
|
+ ApiProviderError,
|
|
|
} from "@roo-code/types"
|
|
|
-import { safeJsonParse } from "@roo-code/core"
|
|
|
+import { TelemetryService } from "@roo-code/telemetry"
|
|
|
|
|
|
-import { ApiHandlerOptions } from "../../shared/api"
|
|
|
+import type { ApiHandlerOptions } from "../../shared/api"
|
|
|
+import { shouldUseReasoningBudget } from "../../shared/api"
|
|
|
|
|
|
-import { ApiStream } from "../transform/stream"
|
|
|
-import { addCacheBreakpoints } from "../transform/caching/vertex"
|
|
|
+import type { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
|
|
|
import { getModelParams } from "../transform/model-params"
|
|
|
-import { filterNonAnthropicBlocks } from "../transform/anthropic-filter"
|
|
|
import {
|
|
|
- convertOpenAIToolsToAnthropic,
|
|
|
- convertOpenAIToolChoiceToAnthropic,
|
|
|
-} from "../../core/prompts/tools/native-tools/converters"
|
|
|
-
|
|
|
+ convertToAiSdkMessages,
|
|
|
+ convertToolsForAiSdk,
|
|
|
+ processAiSdkStreamPart,
|
|
|
+ mapToolChoice,
|
|
|
+ handleAiSdkError,
|
|
|
+} from "../transform/ai-sdk"
|
|
|
+import { calculateApiCostAnthropic } from "../../shared/cost"
|
|
|
+
|
|
|
+import { DEFAULT_HEADERS } from "./constants"
|
|
|
import { BaseProvider } from "./base-provider"
|
|
|
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
|
|
|
|
|
|
// https://docs.anthropic.com/en/api/claude-on-vertex-ai
|
|
|
export class AnthropicVertexHandler extends BaseProvider implements SingleCompletionHandler {
|
|
|
protected options: ApiHandlerOptions
|
|
|
- private client: AnthropicVertex
|
|
|
+ private provider: ReturnType<typeof createVertexAnthropic>
|
|
|
+ private readonly providerName = "Vertex (Anthropic)"
|
|
|
+ private lastThoughtSignature: string | undefined
|
|
|
+ private lastRedactedThinkingBlocks: Array<{ type: "redacted_thinking"; data: string }> = []
|
|
|
|
|
|
constructor(options: ApiHandlerOptions) {
|
|
|
super()
|
|
|
-
|
|
|
this.options = options
|
|
|
|
|
|
// https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude#regions
|
|
|
const projectId = this.options.vertexProjectId ?? "not-provided"
|
|
|
const region = this.options.vertexRegion ?? "us-east5"
|
|
|
|
|
|
- if (this.options.vertexJsonCredentials) {
|
|
|
- this.client = new AnthropicVertex({
|
|
|
- projectId,
|
|
|
- region,
|
|
|
- googleAuth: new GoogleAuth({
|
|
|
- scopes: ["https://www.googleapis.com/auth/cloud-platform"],
|
|
|
- credentials: safeJsonParse<JWTInput>(this.options.vertexJsonCredentials, undefined),
|
|
|
- }),
|
|
|
- })
|
|
|
- } else if (this.options.vertexKeyFile) {
|
|
|
- this.client = new AnthropicVertex({
|
|
|
- projectId,
|
|
|
- region,
|
|
|
- googleAuth: new GoogleAuth({
|
|
|
- scopes: ["https://www.googleapis.com/auth/cloud-platform"],
|
|
|
- keyFile: this.options.vertexKeyFile,
|
|
|
- }),
|
|
|
- })
|
|
|
- } else {
|
|
|
- this.client = new AnthropicVertex({ projectId, region })
|
|
|
+ // Build googleAuthOptions based on provided credentials
|
|
|
+ let googleAuthOptions: { credentials?: object; keyFile?: string } | undefined
|
|
|
+ if (options.vertexJsonCredentials) {
|
|
|
+ try {
|
|
|
+ googleAuthOptions = { credentials: JSON.parse(options.vertexJsonCredentials) }
|
|
|
+ } catch {
|
|
|
+ // If JSON parsing fails, ignore and try other auth methods
|
|
|
+ }
|
|
|
+ } else if (options.vertexKeyFile) {
|
|
|
+ googleAuthOptions = { keyFile: options.vertexKeyFile }
|
|
|
+ }
|
|
|
+
|
|
|
+ // Build beta headers for 1M context support
|
|
|
+ const modelId = options.apiModelId
|
|
|
+ const betas: string[] = []
|
|
|
+
|
|
|
+ if (modelId) {
|
|
|
+ const supports1MContext = VERTEX_1M_CONTEXT_MODEL_IDS.includes(
|
|
|
+ modelId as (typeof VERTEX_1M_CONTEXT_MODEL_IDS)[number],
|
|
|
+ )
|
|
|
+ if (supports1MContext && options.vertex1MContext) {
|
|
|
+ betas.push("context-1m-2025-08-07")
|
|
|
+ }
|
|
|
}
|
|
|
+
|
|
|
+ this.provider = createVertexAnthropic({
|
|
|
+ project: projectId,
|
|
|
+ location: region,
|
|
|
+ googleAuthOptions,
|
|
|
+ headers: {
|
|
|
+ ...DEFAULT_HEADERS,
|
|
|
+ ...(betas.length > 0 ? { "anthropic-beta": betas.join(",") } : {}),
|
|
|
+ },
|
|
|
+ })
|
|
|
}
|
|
|
|
|
|
override async *createMessage(
|
|
|
@@ -68,16 +88,39 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
|
|
|
messages: Anthropic.Messages.MessageParam[],
|
|
|
metadata?: ApiHandlerCreateMessageMetadata,
|
|
|
): ApiStream {
|
|
|
- let { id, info, temperature, maxTokens, reasoning: thinking, betas } = this.getModel()
|
|
|
+ const modelConfig = this.getModel()
|
|
|
+
|
|
|
+ // Reset thinking state for this request
|
|
|
+ this.lastThoughtSignature = undefined
|
|
|
+ this.lastRedactedThinkingBlocks = []
|
|
|
+
|
|
|
+ // Convert messages to AI SDK format
|
|
|
+ const aiSdkMessages = convertToAiSdkMessages(messages)
|
|
|
+
|
|
|
+ // Convert tools to AI SDK format
|
|
|
+ const openAiTools = this.convertToolsForOpenAI(metadata?.tools)
|
|
|
+ const aiSdkTools = convertToolsForAiSdk(openAiTools) as ToolSet | undefined
|
|
|
+
|
|
|
+ // Build Anthropic provider options
|
|
|
+ const anthropicProviderOptions: Record<string, unknown> = {}
|
|
|
|
|
|
- const { supportsPromptCache } = info
|
|
|
+ // Configure thinking/reasoning if the model supports it
|
|
|
+ const isThinkingEnabled =
|
|
|
+ shouldUseReasoningBudget({ model: modelConfig.info, settings: this.options }) &&
|
|
|
+ modelConfig.reasoning &&
|
|
|
+ modelConfig.reasoningBudget
|
|
|
|
|
|
- // Filter out non-Anthropic blocks (reasoning, thoughtSignature, etc.) before sending to the API
|
|
|
- const sanitizedMessages = filterNonAnthropicBlocks(messages)
|
|
|
+ if (isThinkingEnabled) {
|
|
|
+ anthropicProviderOptions.thinking = {
|
|
|
+ type: "enabled",
|
|
|
+ budgetTokens: modelConfig.reasoningBudget,
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- const nativeToolParams = {
|
|
|
- tools: convertOpenAIToolsToAnthropic(metadata?.tools ?? []),
|
|
|
- tool_choice: convertOpenAIToolChoiceToAnthropic(metadata?.tool_choice, metadata?.parallelToolCalls),
|
|
|
+ // Forward parallelToolCalls setting
|
|
|
+ // When parallelToolCalls is explicitly false, disable parallel tool use
|
|
|
+ if (metadata?.parallelToolCalls === false) {
|
|
|
+ anthropicProviderOptions.disableParallelToolUse = true
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
@@ -93,114 +136,178 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
|
|
|
* This ensures we stay under the 4-block limit while maintaining effective caching
|
|
|
* for the most relevant context.
|
|
|
*/
|
|
|
- const params: Anthropic.Messages.MessageCreateParamsStreaming = {
|
|
|
- model: id,
|
|
|
- max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
|
|
|
- temperature,
|
|
|
- thinking,
|
|
|
- // Cache the system prompt if caching is enabled.
|
|
|
- system: supportsPromptCache
|
|
|
- ? [{ text: systemPrompt, type: "text" as const, cache_control: { type: "ephemeral" } }]
|
|
|
- : systemPrompt,
|
|
|
- messages: supportsPromptCache ? addCacheBreakpoints(sanitizedMessages) : sanitizedMessages,
|
|
|
- stream: true,
|
|
|
- ...nativeToolParams,
|
|
|
- }
|
|
|
+ const cacheProviderOption = { anthropic: { cacheControl: { type: "ephemeral" as const } } }
|
|
|
+
|
|
|
+ const userMsgIndices = messages.reduce(
|
|
|
+ (acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc),
|
|
|
+ [] as number[],
|
|
|
+ )
|
|
|
|
|
|
- // and prompt caching
|
|
|
- const requestOptions = betas?.length ? { headers: { "anthropic-beta": betas.join(",") } } : undefined
|
|
|
+ const targetIndices = new Set<number>()
|
|
|
+ const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
|
|
|
+ const secondLastUserMsgIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1
|
|
|
|
|
|
- const stream = await this.client.messages.create(params, requestOptions)
|
|
|
+ if (lastUserMsgIndex >= 0) targetIndices.add(lastUserMsgIndex)
|
|
|
+ if (secondLastUserMsgIndex >= 0) targetIndices.add(secondLastUserMsgIndex)
|
|
|
|
|
|
- for await (const chunk of stream) {
|
|
|
- switch (chunk.type) {
|
|
|
- case "message_start": {
|
|
|
- const usage = chunk.message!.usage
|
|
|
+ if (targetIndices.size > 0) {
|
|
|
+ this.applyCacheControlToAiSdkMessages(messages, aiSdkMessages, targetIndices, cacheProviderOption)
|
|
|
+ }
|
|
|
|
|
|
- yield {
|
|
|
- type: "usage",
|
|
|
- inputTokens: usage.input_tokens || 0,
|
|
|
- outputTokens: usage.output_tokens || 0,
|
|
|
- cacheWriteTokens: usage.cache_creation_input_tokens || undefined,
|
|
|
- cacheReadTokens: usage.cache_read_input_tokens || undefined,
|
|
|
- }
|
|
|
+ // Build streamText request
|
|
|
+ // Cast providerOptions to any to bypass strict JSONObject typing — the AI SDK accepts the correct runtime values
|
|
|
+ const requestOptions: Parameters<typeof streamText>[0] = {
|
|
|
+ model: this.provider(modelConfig.id),
|
|
|
+ system: systemPrompt,
|
|
|
+ ...({
|
|
|
+ systemProviderOptions: { anthropic: { cacheControl: { type: "ephemeral" } } },
|
|
|
+ } as Record<string, unknown>),
|
|
|
+ messages: aiSdkMessages,
|
|
|
+ temperature: modelConfig.temperature,
|
|
|
+ maxOutputTokens: modelConfig.maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
|
|
|
+ tools: aiSdkTools,
|
|
|
+ toolChoice: mapToolChoice(metadata?.tool_choice),
|
|
|
+ ...(Object.keys(anthropicProviderOptions).length > 0 && {
|
|
|
+ providerOptions: { anthropic: anthropicProviderOptions } as any,
|
|
|
+ }),
|
|
|
+ }
|
|
|
|
|
|
- break
|
|
|
+ try {
|
|
|
+ const result = streamText(requestOptions)
|
|
|
+
|
|
|
+ for await (const part of result.fullStream) {
|
|
|
+ // Capture thinking signature from stream events
|
|
|
+ // The AI SDK's @ai-sdk/anthropic emits the signature as a reasoning-delta
|
|
|
+ // event with providerMetadata.anthropic.signature
|
|
|
+ const partAny = part as any
|
|
|
+ if (partAny.providerMetadata?.anthropic?.signature) {
|
|
|
+ this.lastThoughtSignature = partAny.providerMetadata.anthropic.signature
|
|
|
}
|
|
|
- case "message_delta": {
|
|
|
- yield {
|
|
|
- type: "usage",
|
|
|
- inputTokens: 0,
|
|
|
- outputTokens: chunk.usage!.output_tokens || 0,
|
|
|
- }
|
|
|
|
|
|
- break
|
|
|
+ // Capture redacted thinking blocks from stream events
|
|
|
+ if (partAny.providerMetadata?.anthropic?.redactedData) {
|
|
|
+ this.lastRedactedThinkingBlocks.push({
|
|
|
+ type: "redacted_thinking",
|
|
|
+ data: partAny.providerMetadata.anthropic.redactedData,
|
|
|
+ })
|
|
|
}
|
|
|
- case "content_block_start": {
|
|
|
- switch (chunk.content_block!.type) {
|
|
|
- case "text": {
|
|
|
- if (chunk.index! > 0) {
|
|
|
- yield { type: "text", text: "\n" }
|
|
|
- }
|
|
|
-
|
|
|
- yield { type: "text", text: chunk.content_block!.text }
|
|
|
- break
|
|
|
- }
|
|
|
- case "thinking": {
|
|
|
- if (chunk.index! > 0) {
|
|
|
- yield { type: "reasoning", text: "\n" }
|
|
|
- }
|
|
|
|
|
|
- yield { type: "reasoning", text: (chunk.content_block as any).thinking }
|
|
|
- break
|
|
|
- }
|
|
|
- case "tool_use": {
|
|
|
- // Emit initial tool call partial with id and name
|
|
|
- yield {
|
|
|
- type: "tool_call_partial",
|
|
|
- index: chunk.index,
|
|
|
- id: chunk.content_block!.id,
|
|
|
- name: chunk.content_block!.name,
|
|
|
- arguments: undefined,
|
|
|
- }
|
|
|
- break
|
|
|
- }
|
|
|
- }
|
|
|
+ for (const chunk of processAiSdkStreamPart(part)) {
|
|
|
+ yield chunk
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- break
|
|
|
+ // Yield usage metrics at the end, including cache metrics from providerMetadata
|
|
|
+ const usage = await result.usage
|
|
|
+ const providerMetadata = await result.providerMetadata
|
|
|
+ if (usage) {
|
|
|
+ yield this.processUsageMetrics(usage, modelConfig.info, providerMetadata)
|
|
|
+ }
|
|
|
+ } catch (error) {
|
|
|
+ const errorMessage = error instanceof Error ? error.message : String(error)
|
|
|
+ TelemetryService.instance.captureException(
|
|
|
+ new ApiProviderError(errorMessage, this.providerName, modelConfig.id, "createMessage"),
|
|
|
+ )
|
|
|
+ throw handleAiSdkError(error, this.providerName)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Process usage metrics from the AI SDK response, including Anthropic's cache metrics.
|
|
|
+ */
|
|
|
+ private processUsageMetrics(
|
|
|
+ usage: { inputTokens?: number; outputTokens?: number },
|
|
|
+ info: ModelInfo,
|
|
|
+ providerMetadata?: Record<string, Record<string, unknown>>,
|
|
|
+ ): ApiStreamUsageChunk {
|
|
|
+ const inputTokens = usage.inputTokens ?? 0
|
|
|
+ const outputTokens = usage.outputTokens ?? 0
|
|
|
+
|
|
|
+ // Extract cache metrics from Anthropic's providerMetadata
|
|
|
+ const anthropicMeta = providerMetadata?.anthropic as
|
|
|
+ | { cacheCreationInputTokens?: number; cacheReadInputTokens?: number }
|
|
|
+ | undefined
|
|
|
+ const cacheWriteTokens = anthropicMeta?.cacheCreationInputTokens ?? 0
|
|
|
+ const cacheReadTokens = anthropicMeta?.cacheReadInputTokens ?? 0
|
|
|
+
|
|
|
+ const { totalCost } = calculateApiCostAnthropic(
|
|
|
+ info,
|
|
|
+ inputTokens,
|
|
|
+ outputTokens,
|
|
|
+ cacheWriteTokens,
|
|
|
+ cacheReadTokens,
|
|
|
+ )
|
|
|
+
|
|
|
+ return {
|
|
|
+ type: "usage",
|
|
|
+ inputTokens,
|
|
|
+ outputTokens,
|
|
|
+ cacheWriteTokens: cacheWriteTokens > 0 ? cacheWriteTokens : undefined,
|
|
|
+ cacheReadTokens: cacheReadTokens > 0 ? cacheReadTokens : undefined,
|
|
|
+ totalCost,
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Apply cacheControl providerOptions to the correct AI SDK messages by walking
|
|
|
+ * the original Anthropic messages and converted AI SDK messages in parallel.
|
|
|
+ *
|
|
|
+ * convertToAiSdkMessages() can split a single Anthropic user message (containing
|
|
|
+ * tool_results + text) into 2 AI SDK messages (tool role + user role). This method
|
|
|
+ * accounts for that split so cache control lands on the right message.
|
|
|
+ */
|
|
|
+ private applyCacheControlToAiSdkMessages(
|
|
|
+ originalMessages: Anthropic.Messages.MessageParam[],
|
|
|
+ aiSdkMessages: { role: string; providerOptions?: Record<string, Record<string, unknown>> }[],
|
|
|
+ targetOriginalIndices: Set<number>,
|
|
|
+ cacheProviderOption: Record<string, Record<string, unknown>>,
|
|
|
+ ): void {
|
|
|
+ let aiSdkIdx = 0
|
|
|
+ for (let origIdx = 0; origIdx < originalMessages.length; origIdx++) {
|
|
|
+ const origMsg = originalMessages[origIdx]
|
|
|
+
|
|
|
+ if (typeof origMsg.content === "string") {
|
|
|
+ if (targetOriginalIndices.has(origIdx) && aiSdkIdx < aiSdkMessages.length) {
|
|
|
+ aiSdkMessages[aiSdkIdx].providerOptions = {
|
|
|
+ ...aiSdkMessages[aiSdkIdx].providerOptions,
|
|
|
+ ...cacheProviderOption,
|
|
|
+ }
|
|
|
}
|
|
|
- case "content_block_delta": {
|
|
|
- switch (chunk.delta!.type) {
|
|
|
- case "text_delta": {
|
|
|
- yield { type: "text", text: chunk.delta!.text }
|
|
|
- break
|
|
|
+ aiSdkIdx++
|
|
|
+ } else if (origMsg.role === "user") {
|
|
|
+ const hasToolResults = origMsg.content.some((part) => (part as { type: string }).type === "tool_result")
|
|
|
+ const hasNonToolContent = origMsg.content.some(
|
|
|
+ (part) => (part as { type: string }).type === "text" || (part as { type: string }).type === "image",
|
|
|
+ )
|
|
|
+
|
|
|
+ if (hasToolResults && hasNonToolContent) {
|
|
|
+ const userMsgIdx = aiSdkIdx + 1
|
|
|
+ if (targetOriginalIndices.has(origIdx) && userMsgIdx < aiSdkMessages.length) {
|
|
|
+ aiSdkMessages[userMsgIdx].providerOptions = {
|
|
|
+ ...aiSdkMessages[userMsgIdx].providerOptions,
|
|
|
+ ...cacheProviderOption,
|
|
|
}
|
|
|
- case "thinking_delta": {
|
|
|
- yield { type: "reasoning", text: (chunk.delta as any).thinking }
|
|
|
- break
|
|
|
+ }
|
|
|
+ aiSdkIdx += 2
|
|
|
+ } else if (hasToolResults) {
|
|
|
+ if (targetOriginalIndices.has(origIdx) && aiSdkIdx < aiSdkMessages.length) {
|
|
|
+ aiSdkMessages[aiSdkIdx].providerOptions = {
|
|
|
+ ...aiSdkMessages[aiSdkIdx].providerOptions,
|
|
|
+ ...cacheProviderOption,
|
|
|
}
|
|
|
- case "input_json_delta": {
|
|
|
- // Emit tool call partial chunks as arguments stream in
|
|
|
- yield {
|
|
|
- type: "tool_call_partial",
|
|
|
- index: chunk.index,
|
|
|
- id: undefined,
|
|
|
- name: undefined,
|
|
|
- arguments: (chunk.delta as any).partial_json,
|
|
|
- }
|
|
|
- break
|
|
|
+ }
|
|
|
+ aiSdkIdx++
|
|
|
+ } else {
|
|
|
+ if (targetOriginalIndices.has(origIdx) && aiSdkIdx < aiSdkMessages.length) {
|
|
|
+ aiSdkMessages[aiSdkIdx].providerOptions = {
|
|
|
+ ...aiSdkMessages[aiSdkIdx].providerOptions,
|
|
|
+ ...cacheProviderOption,
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
- break
|
|
|
- }
|
|
|
- case "content_block_stop": {
|
|
|
- // Block complete - no action needed for now.
|
|
|
- // NativeToolCallParser handles tool call completion
|
|
|
- // Note: Signature for multi-turn thinking would require using stream.finalMessage()
|
|
|
- // after iteration completes, which requires restructuring the streaming approach.
|
|
|
- break
|
|
|
+ aiSdkIdx++
|
|
|
}
|
|
|
+ } else {
|
|
|
+ aiSdkIdx++
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
@@ -239,10 +346,9 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
|
|
|
defaultTemperature: 0,
|
|
|
})
|
|
|
|
|
|
- // Build betas array for request headers
|
|
|
+ // Build betas array for request headers (kept for backward compatibility / testing)
|
|
|
const betas: string[] = []
|
|
|
|
|
|
- // Add 1M context beta flag if enabled for supported models
|
|
|
if (enable1MContext) {
|
|
|
betas.push("context-1m-2025-08-07")
|
|
|
}
|
|
|
@@ -259,46 +365,49 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- async completePrompt(prompt: string) {
|
|
|
- try {
|
|
|
- let {
|
|
|
- id,
|
|
|
- info: { supportsPromptCache },
|
|
|
- temperature,
|
|
|
- maxTokens = ANTHROPIC_DEFAULT_MAX_TOKENS,
|
|
|
- reasoning: thinking,
|
|
|
- } = this.getModel()
|
|
|
+ async completePrompt(prompt: string): Promise<string> {
|
|
|
+ const { id, temperature } = this.getModel()
|
|
|
|
|
|
- const params: Anthropic.Messages.MessageCreateParamsNonStreaming = {
|
|
|
- model: id,
|
|
|
- max_tokens: maxTokens,
|
|
|
+ try {
|
|
|
+ const { text } = await generateText({
|
|
|
+ model: this.provider(id),
|
|
|
+ prompt,
|
|
|
+ maxOutputTokens: ANTHROPIC_DEFAULT_MAX_TOKENS,
|
|
|
temperature,
|
|
|
- thinking,
|
|
|
- messages: [
|
|
|
- {
|
|
|
- role: "user",
|
|
|
- content: supportsPromptCache
|
|
|
- ? [{ type: "text" as const, text: prompt, cache_control: { type: "ephemeral" } }]
|
|
|
- : prompt,
|
|
|
- },
|
|
|
- ],
|
|
|
- stream: false,
|
|
|
- }
|
|
|
+ })
|
|
|
|
|
|
- const response = await this.client.messages.create(params)
|
|
|
- const content = response.content[0]
|
|
|
+ return text
|
|
|
+ } catch (error) {
|
|
|
+ TelemetryService.instance.captureException(
|
|
|
+ new ApiProviderError(
|
|
|
+ error instanceof Error ? error.message : String(error),
|
|
|
+ this.providerName,
|
|
|
+ id,
|
|
|
+ "completePrompt",
|
|
|
+ ),
|
|
|
+ )
|
|
|
+ throw handleAiSdkError(error, this.providerName)
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- if (content.type === "text") {
|
|
|
- return content.text
|
|
|
- }
|
|
|
+ /**
|
|
|
+ * Returns the thinking signature captured from the last Anthropic response.
|
|
|
+ * Claude models with extended thinking return a cryptographic signature
|
|
|
+ * which must be round-tripped back for multi-turn conversations with tool use.
|
|
|
+ */
|
|
|
+ getThoughtSignature(): string | undefined {
|
|
|
+ return this.lastThoughtSignature
|
|
|
+ }
|
|
|
|
|
|
- return ""
|
|
|
- } catch (error) {
|
|
|
- if (error instanceof Error) {
|
|
|
- throw new Error(`Vertex completion error: ${error.message}`)
|
|
|
- }
|
|
|
+ /**
|
|
|
+ * Returns any redacted thinking blocks captured from the last Anthropic response.
|
|
|
+ * Anthropic returns these when safety filters trigger on reasoning content.
|
|
|
+ */
|
|
|
+ getRedactedThinkingBlocks(): Array<{ type: "redacted_thinking"; data: string }> | undefined {
|
|
|
+ return this.lastRedactedThinkingBlocks.length > 0 ? this.lastRedactedThinkingBlocks : undefined
|
|
|
+ }
|
|
|
|
|
|
- throw error
|
|
|
- }
|
|
|
+ override isAiSdkProvider(): boolean {
|
|
|
+ return true
|
|
|
}
|
|
|
}
|