| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632 |
- import {
- BedrockRuntimeClient,
- ConverseStreamCommand,
- ConverseCommand,
- BedrockRuntimeClientConfig,
- ContentBlock,
- Message,
- SystemContentBlock,
- Tool,
- ToolConfiguration,
- ToolChoice,
- } from "@aws-sdk/client-bedrock-runtime"
- import OpenAI from "openai"
- import { fromIni } from "@aws-sdk/credential-providers"
- import { Anthropic } from "@anthropic-ai/sdk"
- import {
- type ModelInfo,
- type ProviderSettings,
- type BedrockModelId,
- type BedrockServiceTier,
- bedrockDefaultModelId,
- bedrockModels,
- bedrockDefaultPromptRouterModelId,
- BEDROCK_DEFAULT_TEMPERATURE,
- BEDROCK_MAX_TOKENS,
- BEDROCK_DEFAULT_CONTEXT,
- AWS_INFERENCE_PROFILE_MAPPING,
- BEDROCK_1M_CONTEXT_MODEL_IDS,
- BEDROCK_GLOBAL_INFERENCE_MODEL_IDS,
- BEDROCK_SERVICE_TIER_MODEL_IDS,
- BEDROCK_SERVICE_TIER_PRICING,
- ApiProviderError,
- } from "@roo-code/types"
- import { TelemetryService } from "@roo-code/telemetry"
- import { ApiStream } from "../transform/stream"
- import { BaseProvider } from "./base-provider"
- import { logger } from "../../utils/logging"
- import { Package } from "../../shared/package"
- import { MultiPointStrategy } from "../transform/cache-strategy/multi-point-strategy"
- import { ModelInfo as CacheModelInfo } from "../transform/cache-strategy/types"
- import { convertToBedrockConverseMessages as sharedConverter } from "../transform/bedrock-converse-format"
- import { getModelParams } from "../transform/model-params"
- import { shouldUseReasoningBudget } from "../../shared/api"
- import { normalizeToolSchema } from "../../utils/json-schema"
- import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
- /************************************************************************************
- *
- * TYPES
- *
- *************************************************************************************/
- // Define interface for Bedrock inference config
- interface BedrockInferenceConfig {
- maxTokens: number
- temperature?: number
- }
- // Define interface for Bedrock additional model request fields
- // This includes thinking configuration, 1M context beta, and other model-specific parameters
- interface BedrockAdditionalModelFields {
- thinking?: {
- type: "enabled"
- budget_tokens: number
- }
- anthropic_beta?: string[]
- [key: string]: any // Add index signature to be compatible with DocumentType
- }
- // Define interface for Bedrock payload
- interface BedrockPayload {
- modelId: BedrockModelId | string
- messages: Message[]
- system?: SystemContentBlock[]
- inferenceConfig: BedrockInferenceConfig
- anthropic_version?: string
- additionalModelRequestFields?: BedrockAdditionalModelFields
- toolConfig?: ToolConfiguration
- }
- // Extended payload type that includes service_tier as a top-level parameter
- // AWS Bedrock service tiers (STANDARD, FLEX, PRIORITY) are specified at the top level
- // https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html
- type BedrockPayloadWithServiceTier = BedrockPayload & {
- service_tier?: BedrockServiceTier
- }
- // Define specific types for content block events to avoid 'as any' usage
- // These handle the multiple possible structures returned by AWS SDK
- interface ContentBlockStartEvent {
- start?: {
- text?: string
- thinking?: string
- toolUse?: {
- toolUseId?: string
- name?: string
- }
- }
- contentBlockIndex?: number
- // Alternative structure used by some AWS SDK versions
- content_block?: {
- type?: string
- thinking?: string
- }
- // Official AWS SDK structure for reasoning (as documented)
- contentBlock?: {
- type?: string
- thinking?: string
- reasoningContent?: {
- text?: string
- }
- // Tool use block start
- toolUse?: {
- toolUseId?: string
- name?: string
- }
- }
- }
- interface ContentBlockDeltaEvent {
- delta?: {
- text?: string
- thinking?: string
- type?: string
- // AWS SDK structure for reasoning content deltas
- // Includes text (reasoning), signature (verification token), and redactedContent (safety-filtered)
- reasoningContent?: {
- text?: string
- signature?: string
- redactedContent?: Uint8Array
- }
- // Tool use input delta
- toolUse?: {
- input?: string
- }
- }
- contentBlockIndex?: number
- }
- // Define types for stream events based on AWS SDK
- export interface StreamEvent {
- messageStart?: {
- role?: string
- }
- messageStop?: {
- stopReason?: "end_turn" | "tool_use" | "max_tokens" | "stop_sequence"
- additionalModelResponseFields?: Record<string, unknown>
- }
- contentBlockStart?: ContentBlockStartEvent
- contentBlockDelta?: ContentBlockDeltaEvent
- metadata?: {
- usage?: {
- inputTokens: number
- outputTokens: number
- totalTokens?: number // Made optional since we don't use it
- // New cache-related fields
- cacheReadInputTokens?: number
- cacheWriteInputTokens?: number
- cacheReadInputTokenCount?: number
- cacheWriteInputTokenCount?: number
- }
- metrics?: {
- latencyMs: number
- }
- }
- // New trace field for prompt router
- trace?: {
- promptRouter?: {
- invokedModelId?: string
- usage?: {
- inputTokens: number
- outputTokens: number
- totalTokens?: number // Made optional since we don't use it
- // New cache-related fields
- cacheReadTokens?: number
- cacheWriteTokens?: number
- cacheReadInputTokenCount?: number
- cacheWriteInputTokenCount?: number
- }
- }
- }
- }
- // Type for usage information in stream events
- export type UsageType = {
- inputTokens?: number
- outputTokens?: number
- cacheReadInputTokens?: number
- cacheWriteInputTokens?: number
- cacheReadInputTokenCount?: number
- cacheWriteInputTokenCount?: number
- }
- /************************************************************************************
- *
- * PROVIDER
- *
- *************************************************************************************/
- export class AwsBedrockHandler extends BaseProvider implements SingleCompletionHandler {
- protected options: ProviderSettings
- private client: BedrockRuntimeClient
- private arnInfo: any
- private readonly providerName = "Bedrock"
- private lastThoughtSignature: string | undefined
- private lastRedactedThinkingBlocks: Array<{ type: "redacted_thinking"; data: string }> = []
- constructor(options: ProviderSettings) {
- super()
- this.options = options
- let region = this.options.awsRegion
- // process the various user input options, be opinionated about the intent of the options
- // and determine the model to use during inference and for cost calculations
- // There are variations on ARN strings that can be entered making the conditional logic
- // more involved than the non-ARN branch of logic
- if (this.options.awsCustomArn) {
- this.arnInfo = this.parseArn(this.options.awsCustomArn, region)
- if (!this.arnInfo.isValid) {
- logger.error("Invalid ARN format", {
- ctx: "bedrock",
- errorMessage: this.arnInfo.errorMessage,
- })
- // Throw a consistent error with a prefix that can be detected by callers
- const errorMessage =
- this.arnInfo.errorMessage ||
- "Invalid ARN format. ARN should follow the pattern: arn:aws:bedrock:region:account-id:resource-type/resource-name"
- throw new Error("INVALID_ARN_FORMAT:" + errorMessage)
- }
- if (this.arnInfo.region && this.arnInfo.region !== this.options.awsRegion) {
- // Log if there's a region mismatch between the ARN and the region selected by the user
- // We will use the ARNs region, so execution can continue, but log an info statement.
- // Log a warning if there's a region mismatch between the ARN and the region selected by the user
- // We will use the ARNs region, so execution can continue, but log an info statement.
- logger.info(this.arnInfo.errorMessage, {
- ctx: "bedrock",
- selectedRegion: this.options.awsRegion,
- arnRegion: this.arnInfo.region,
- })
- this.options.awsRegion = this.arnInfo.region
- }
- this.options.apiModelId = this.arnInfo.modelId
- if (this.arnInfo.awsUseCrossRegionInference) this.options.awsUseCrossRegionInference = true
- }
- if (!this.options.modelTemperature) {
- this.options.modelTemperature = BEDROCK_DEFAULT_TEMPERATURE
- }
- this.costModelConfig = this.getModel()
- const clientConfig: BedrockRuntimeClientConfig = {
- userAgentAppId: `RooCode#${Package.version}`,
- region: this.options.awsRegion,
- // Add the endpoint configuration when specified and enabled
- ...(this.options.awsBedrockEndpoint &&
- this.options.awsBedrockEndpointEnabled && { endpoint: this.options.awsBedrockEndpoint }),
- }
- if (this.options.awsUseApiKey && this.options.awsApiKey) {
- // Use API key/token-based authentication if enabled and API key is set
- clientConfig.token = { token: this.options.awsApiKey }
- clientConfig.authSchemePreference = ["httpBearerAuth"] // Otherwise there's no end of credential problems.
- clientConfig.requestHandler = {
- // This should be the default anyway, but without setting something
- // this provider fails to work with LiteLLM passthrough.
- requestTimeout: 0,
- }
- } else if (this.options.awsUseProfile && this.options.awsProfile) {
- // Use profile-based credentials if enabled and profile is set
- clientConfig.credentials = fromIni({
- profile: this.options.awsProfile,
- ignoreCache: true,
- })
- } else if (this.options.awsAccessKey && this.options.awsSecretKey) {
- // Use direct credentials if provided
- clientConfig.credentials = {
- accessKeyId: this.options.awsAccessKey,
- secretAccessKey: this.options.awsSecretKey,
- ...(this.options.awsSessionToken ? { sessionToken: this.options.awsSessionToken } : {}),
- }
- }
- this.client = new BedrockRuntimeClient(clientConfig)
- }
- // Helper to guess model info from custom modelId string if not in bedrockModels
- private guessModelInfoFromId(modelId: string): Partial<ModelInfo> {
- // Define a mapping for model ID patterns and their configurations
- const modelConfigMap: Record<string, Partial<ModelInfo>> = {
- "claude-4": {
- maxTokens: 8192,
- contextWindow: 200_000,
- supportsImages: true,
- supportsPromptCache: true,
- },
- "claude-3-7": {
- maxTokens: 8192,
- contextWindow: 200_000,
- supportsImages: true,
- supportsPromptCache: true,
- },
- "claude-3-5": {
- maxTokens: 8192,
- contextWindow: 200_000,
- supportsImages: true,
- supportsPromptCache: true,
- },
- "claude-4-opus": {
- maxTokens: 4096,
- contextWindow: 200_000,
- supportsImages: true,
- supportsPromptCache: true,
- },
- "claude-3-opus": {
- maxTokens: 4096,
- contextWindow: 200_000,
- supportsImages: true,
- supportsPromptCache: true,
- },
- "claude-3-haiku": {
- maxTokens: 4096,
- contextWindow: 200_000,
- supportsImages: true,
- supportsPromptCache: true,
- },
- }
- // Match the model ID to a configuration
- const id = modelId.toLowerCase()
- for (const [pattern, config] of Object.entries(modelConfigMap)) {
- if (id.includes(pattern)) {
- return config
- }
- }
- // Default fallback
- return {
- maxTokens: BEDROCK_MAX_TOKENS,
- contextWindow: BEDROCK_DEFAULT_CONTEXT,
- supportsImages: false,
- supportsPromptCache: false,
- }
- }
- override async *createMessage(
- systemPrompt: string,
- messages: Anthropic.Messages.MessageParam[],
- metadata?: ApiHandlerCreateMessageMetadata & {
- thinking?: {
- enabled: boolean
- maxTokens?: number
- maxThinkingTokens?: number
- }
- },
- ): ApiStream {
- const modelConfig = this.getModel()
- const usePromptCache = Boolean(this.options.awsUsePromptCache && this.supportsAwsPromptCache(modelConfig))
- const conversationId =
- messages.length > 0
- ? `conv_${messages[0].role}_${
- typeof messages[0].content === "string"
- ? messages[0].content.substring(0, 20)
- : "complex_content"
- }`
- : "default_conversation"
- const formatted = this.convertToBedrockConverseMessages(
- messages,
- systemPrompt,
- usePromptCache,
- modelConfig.info,
- conversationId,
- )
- let additionalModelRequestFields: BedrockAdditionalModelFields | undefined
- let thinkingEnabled = false
- // Determine if thinking should be enabled
- // metadata?.thinking?.enabled: Explicitly enabled through API metadata (direct request)
- // shouldUseReasoningBudget(): Enabled through user settings (enableReasoningEffort = true)
- const isThinkingExplicitlyEnabled = metadata?.thinking?.enabled
- const isThinkingEnabledBySettings =
- shouldUseReasoningBudget({ model: modelConfig.info, settings: this.options }) &&
- modelConfig.reasoning &&
- modelConfig.reasoningBudget
- if ((isThinkingExplicitlyEnabled || isThinkingEnabledBySettings) && modelConfig.info.supportsReasoningBudget) {
- thinkingEnabled = true
- additionalModelRequestFields = {
- thinking: {
- type: "enabled",
- budget_tokens: metadata?.thinking?.maxThinkingTokens || modelConfig.reasoningBudget || 4096,
- },
- }
- logger.info("Extended thinking enabled for Bedrock request", {
- ctx: "bedrock",
- modelId: modelConfig.id,
- thinking: additionalModelRequestFields.thinking,
- })
- }
- const inferenceConfig: BedrockInferenceConfig = {
- maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number),
- temperature: modelConfig.temperature ?? (this.options.modelTemperature as number),
- }
- // Check if 1M context is enabled for supported Claude 4 models
- // Use parseBaseModelId to handle cross-region inference prefixes
- const baseModelId = this.parseBaseModelId(modelConfig.id)
- const is1MContextEnabled =
- BEDROCK_1M_CONTEXT_MODEL_IDS.includes(baseModelId as any) && this.options.awsBedrock1MContext
- // Determine if service tier should be applied (checked later when building payload)
- const useServiceTier =
- this.options.awsBedrockServiceTier && BEDROCK_SERVICE_TIER_MODEL_IDS.includes(baseModelId as any)
- if (useServiceTier) {
- logger.info("Service tier specified for Bedrock request", {
- ctx: "bedrock",
- modelId: modelConfig.id,
- serviceTier: this.options.awsBedrockServiceTier,
- })
- }
- // Add anthropic_beta headers for various features
- // Start with an empty array and add betas as needed
- const anthropicBetas: string[] = []
- // Add 1M context beta if enabled
- if (is1MContextEnabled) {
- anthropicBetas.push("context-1m-2025-08-07")
- }
- // Add fine-grained tool streaming beta for Claude models
- // This enables proper tool use streaming for Anthropic models on Bedrock
- if (baseModelId.includes("claude")) {
- anthropicBetas.push("fine-grained-tool-streaming-2025-05-14")
- }
- // Apply anthropic_beta to additionalModelRequestFields if any betas are needed
- if (anthropicBetas.length > 0) {
- if (!additionalModelRequestFields) {
- additionalModelRequestFields = {} as BedrockAdditionalModelFields
- }
- additionalModelRequestFields.anthropic_beta = anthropicBetas
- }
- const toolConfig: ToolConfiguration = {
- tools: this.convertToolsForBedrock(metadata?.tools ?? []),
- toolChoice: this.convertToolChoiceForBedrock(metadata?.tool_choice),
- }
- // Build payload with optional service_tier at top level
- // Service tier is a top-level parameter per AWS documentation, NOT inside additionalModelRequestFields
- // https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html
- const payload: BedrockPayloadWithServiceTier = {
- modelId: modelConfig.id,
- messages: formatted.messages,
- system: formatted.system,
- inferenceConfig,
- ...(additionalModelRequestFields && { additionalModelRequestFields }),
- // Add anthropic_version at top level when using thinking features
- ...(thinkingEnabled && { anthropic_version: "bedrock-2023-05-31" }),
- toolConfig,
- // Add service_tier as a top-level parameter (not inside additionalModelRequestFields)
- ...(useServiceTier && { service_tier: this.options.awsBedrockServiceTier }),
- }
- // Create AbortController with 10 minute timeout
- const controller = new AbortController()
- let timeoutId: NodeJS.Timeout | undefined
- try {
- timeoutId = setTimeout(
- () => {
- controller.abort()
- },
- 10 * 60 * 1000,
- )
- const command = new ConverseStreamCommand(payload)
- const response = await this.client.send(command, {
- abortSignal: controller.signal,
- })
- if (!response.stream) {
- clearTimeout(timeoutId)
- throw new Error("No stream available in the response")
- }
- // Reset thinking state for this request
- this.lastThoughtSignature = undefined
- this.lastRedactedThinkingBlocks = []
- for await (const chunk of response.stream) {
- // Parse the chunk as JSON if it's a string (for tests)
- let streamEvent: StreamEvent
- try {
- streamEvent = typeof chunk === "string" ? JSON.parse(chunk) : (chunk as unknown as StreamEvent)
- } catch (e) {
- logger.error("Failed to parse stream event", {
- ctx: "bedrock",
- error: e instanceof Error ? e : String(e),
- chunk: typeof chunk === "string" ? chunk : "binary data",
- })
- continue
- }
- // Handle metadata events first
- if (streamEvent.metadata?.usage) {
- const usage = (streamEvent.metadata?.usage || {}) as UsageType
- // Check both field naming conventions for cache tokens
- const cacheReadTokens = usage.cacheReadInputTokens || usage.cacheReadInputTokenCount || 0
- const cacheWriteTokens = usage.cacheWriteInputTokens || usage.cacheWriteInputTokenCount || 0
- // Always include all available token information
- yield {
- type: "usage",
- inputTokens: usage.inputTokens || 0,
- outputTokens: usage.outputTokens || 0,
- cacheReadTokens: cacheReadTokens,
- cacheWriteTokens: cacheWriteTokens,
- }
- continue
- }
- if (streamEvent?.trace?.promptRouter?.invokedModelId) {
- try {
- //update the in-use model info to be based on the invoked Model Id for the router
- //so that pricing, context window, caching etc have values that can be used
- //However, we want to keep the id of the model to be the ID for the router for
- //subsequent requests so they are sent back through the router
- let invokedArnInfo = this.parseArn(streamEvent.trace.promptRouter.invokedModelId)
- let invokedModel = this.getModelById(invokedArnInfo.modelId as string, invokedArnInfo.modelType)
- if (invokedModel) {
- invokedModel.id = modelConfig.id
- this.costModelConfig = invokedModel
- }
- // Handle metadata events for the promptRouter.
- if (streamEvent?.trace?.promptRouter?.usage) {
- const routerUsage = streamEvent.trace.promptRouter.usage
- // Check both field naming conventions for cache tokens
- const cacheReadTokens =
- routerUsage.cacheReadTokens || routerUsage.cacheReadInputTokenCount || 0
- const cacheWriteTokens =
- routerUsage.cacheWriteTokens || routerUsage.cacheWriteInputTokenCount || 0
- yield {
- type: "usage",
- inputTokens: routerUsage.inputTokens || 0,
- outputTokens: routerUsage.outputTokens || 0,
- cacheReadTokens: cacheReadTokens,
- cacheWriteTokens: cacheWriteTokens,
- }
- }
- } catch (error) {
- logger.error("Error handling Bedrock invokedModelId", {
- ctx: "bedrock",
- error: error instanceof Error ? error : String(error),
- })
- } finally {
- // eslint-disable-next-line no-unsafe-finally
- continue
- }
- }
- // Handle message start
- if (streamEvent.messageStart) {
- continue
- }
- // Handle content blocks
- if (streamEvent.contentBlockStart) {
- const cbStart = streamEvent.contentBlockStart
- // Check if this is a reasoning block (AWS SDK structure)
- if (cbStart.contentBlock?.reasoningContent) {
- if (cbStart.contentBlockIndex && cbStart.contentBlockIndex > 0) {
- yield { type: "reasoning", text: "\n" }
- }
- yield {
- type: "reasoning",
- text: cbStart.contentBlock.reasoningContent.text || "",
- }
- }
- // Check for thinking block - handle both possible AWS SDK structures
- // cbStart.contentBlock: newer structure
- // cbStart.content_block: alternative structure seen in some AWS SDK versions
- else if (cbStart.contentBlock?.type === "thinking" || cbStart.content_block?.type === "thinking") {
- const contentBlock = cbStart.contentBlock || cbStart.content_block
- if (cbStart.contentBlockIndex && cbStart.contentBlockIndex > 0) {
- yield { type: "reasoning", text: "\n" }
- }
- if (contentBlock?.thinking) {
- yield {
- type: "reasoning",
- text: contentBlock.thinking,
- }
- }
- }
- // Handle tool use block start
- else if (cbStart.start?.toolUse || cbStart.contentBlock?.toolUse) {
- const toolUse = cbStart.start?.toolUse || cbStart.contentBlock?.toolUse
- if (toolUse) {
- yield {
- type: "tool_call_partial",
- index: cbStart.contentBlockIndex ?? 0,
- id: toolUse.toolUseId,
- name: toolUse.name,
- arguments: undefined,
- }
- }
- } else if (cbStart.start?.text) {
- yield {
- type: "text",
- text: cbStart.start.text,
- }
- }
- continue
- }
- // Handle content deltas
- if (streamEvent.contentBlockDelta) {
- const cbDelta = streamEvent.contentBlockDelta
- const delta = cbDelta.delta
- // Process reasoning and text content deltas
- // Multiple structures are supported for AWS SDK compatibility:
- // - delta.reasoningContent.text: AWS docs structure for reasoning
- // - delta.thinking: alternative structure for thinking content
- // - delta.text: standard text content
- // - delta.toolUse.input: tool input arguments
- if (delta) {
- // Check for reasoningContent property (AWS SDK structure)
- if (delta.reasoningContent?.text) {
- yield {
- type: "reasoning",
- text: delta.reasoningContent.text,
- }
- continue
- }
- // Capture the thinking signature from reasoningContent.signature delta.
- // Bedrock Converse API sends the signature as a separate delta after all
- // reasoning text deltas. This signature must be round-tripped back for
- // multi-turn conversations with tool use (Anthropic API requirement).
- if (delta.reasoningContent?.signature) {
- this.lastThoughtSignature = delta.reasoningContent.signature
- continue
- }
- // Capture redacted thinking content (opaque binary data from safety-filtered reasoning).
- // Anthropic returns this when extended thinking content is filtered. It must be
- // passed back verbatim in multi-turn conversations for proper reasoning continuity.
- if (delta.reasoningContent?.redactedContent) {
- const redactedContent = delta.reasoningContent.redactedContent
- this.lastRedactedThinkingBlocks.push({
- type: "redacted_thinking",
- data: Buffer.from(redactedContent).toString("base64"),
- })
- continue
- }
- // Handle tool use input delta
- if (delta.toolUse?.input) {
- yield {
- type: "tool_call_partial",
- index: cbDelta.contentBlockIndex ?? 0,
- id: undefined,
- name: undefined,
- arguments: delta.toolUse.input,
- }
- continue
- }
- // Handle alternative thinking structure (fallback for older SDK versions)
- if (delta.type === "thinking_delta" && delta.thinking) {
- yield {
- type: "reasoning",
- text: delta.thinking,
- }
- } else if (delta.text) {
- yield {
- type: "text",
- text: delta.text,
- }
- }
- }
- continue
- }
- // Handle message stop
- if (streamEvent.messageStop) {
- continue
- }
- }
- // Clear timeout after stream completes
- clearTimeout(timeoutId)
- } catch (error: unknown) {
- // Clear timeout on error
- clearTimeout(timeoutId)
- // Capture error in telemetry before processing
- const errorMessage = error instanceof Error ? error.message : String(error)
- const apiError = new ApiProviderError(errorMessage, this.providerName, modelConfig.id, "createMessage")
- TelemetryService.instance.captureException(apiError)
- // Check if this is a throttling error that should trigger retry logic
- const errorType = this.getErrorType(error)
- // For throttling errors, throw immediately without yielding chunks
- // This allows the retry mechanism in attemptApiRequest() to catch and handle it
- // The retry logic in Task.ts (around line 1817) expects errors to be thrown
- // on the first chunk for proper exponential backoff behavior
- if (errorType === "THROTTLING") {
- if (error instanceof Error) {
- throw error
- } else {
- throw new Error("Throttling error occurred")
- }
- }
- // For non-throttling errors, use the standard error handling with chunks
- const errorChunks = this.handleBedrockError(error, true) // true for streaming context
- // Yield each chunk individually to ensure type compatibility
- for (const chunk of errorChunks) {
- yield chunk as any // Cast to any to bypass type checking since we know the structure is correct
- }
- // Re-throw with enhanced error message for retry system
- const enhancedErrorMessage = this.formatErrorMessage(error, this.getErrorType(error), true)
- if (error instanceof Error) {
- const enhancedError = new Error(enhancedErrorMessage)
- // Preserve important properties from the original error
- enhancedError.name = error.name
- // Validate and preserve status property
- if ("status" in error && typeof (error as any).status === "number") {
- ;(enhancedError as any).status = (error as any).status
- }
- // Validate and preserve $metadata property
- if (
- "$metadata" in error &&
- typeof (error as any).$metadata === "object" &&
- (error as any).$metadata !== null
- ) {
- ;(enhancedError as any).$metadata = (error as any).$metadata
- }
- throw enhancedError
- } else {
- throw new Error("An unknown error occurred")
- }
- }
- }
- async completePrompt(prompt: string): Promise<string> {
- try {
- const modelConfig = this.getModel()
- // For completePrompt, thinking is typically not used, but we should still check
- // if thinking was somehow enabled in the model config
- const thinkingEnabled =
- shouldUseReasoningBudget({ model: modelConfig.info, settings: this.options }) &&
- modelConfig.reasoning &&
- modelConfig.reasoningBudget
- const inferenceConfig: BedrockInferenceConfig = {
- maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number),
- temperature: modelConfig.temperature ?? (this.options.modelTemperature as number),
- }
- // For completePrompt, use a unique conversation ID based on the prompt
- const conversationId = `prompt_${prompt.substring(0, 20)}`
- const payload = {
- modelId: modelConfig.id,
- messages: this.convertToBedrockConverseMessages(
- [
- {
- role: "user",
- content: prompt,
- },
- ],
- undefined,
- false,
- modelConfig.info,
- conversationId,
- ).messages,
- inferenceConfig,
- }
- const command = new ConverseCommand(payload)
- const response = await this.client.send(command)
- if (
- response?.output?.message?.content &&
- response.output.message.content.length > 0 &&
- response.output.message.content[0].text &&
- response.output.message.content[0].text.trim().length > 0
- ) {
- try {
- return response.output.message.content[0].text
- } catch (parseError) {
- logger.error("Failed to parse Bedrock response", {
- ctx: "bedrock",
- error: parseError instanceof Error ? parseError : String(parseError),
- })
- }
- }
- return ""
- } catch (error) {
- // Capture error in telemetry
- const model = this.getModel()
- const telemetryErrorMessage = error instanceof Error ? error.message : String(error)
- const apiError = new ApiProviderError(telemetryErrorMessage, this.providerName, model.id, "completePrompt")
- TelemetryService.instance.captureException(apiError)
- // Use the extracted error handling method for all errors
- const errorResult = this.handleBedrockError(error, false) // false for non-streaming context
- // Since we're in a non-streaming context, we know the result is a string
- const errorMessage = errorResult as string
- // Create enhanced error for retry system
- const enhancedError = new Error(errorMessage)
- if (error instanceof Error) {
- // Preserve important properties from the original error
- enhancedError.name = error.name
- // Validate and preserve status property
- if ("status" in error && typeof (error as any).status === "number") {
- ;(enhancedError as any).status = (error as any).status
- }
- // Validate and preserve $metadata property
- if (
- "$metadata" in error &&
- typeof (error as any).$metadata === "object" &&
- (error as any).$metadata !== null
- ) {
- ;(enhancedError as any).$metadata = (error as any).$metadata
- }
- }
- throw enhancedError
- }
- }
- /**
- * Convert Anthropic messages to Bedrock Converse format
- */
- private convertToBedrockConverseMessages(
- anthropicMessages: Anthropic.Messages.MessageParam[] | { role: string; content: string }[],
- systemMessage?: string,
- usePromptCache: boolean = false,
- modelInfo?: any,
- conversationId?: string, // Optional conversation ID to track cache points across messages
- ): { system: SystemContentBlock[]; messages: Message[] } {
- // First convert messages using shared converter for proper image handling
- const convertedMessages = sharedConverter(anthropicMessages as Anthropic.Messages.MessageParam[])
- // If prompt caching is disabled, return the converted messages directly
- if (!usePromptCache) {
- return {
- system: systemMessage ? [{ text: systemMessage } as SystemContentBlock] : [],
- messages: convertedMessages,
- }
- }
- // Convert model info to expected format for cache strategy
- const cacheModelInfo: CacheModelInfo = {
- maxTokens: modelInfo?.maxTokens || 8192,
- contextWindow: modelInfo?.contextWindow || 200_000,
- supportsPromptCache: modelInfo?.supportsPromptCache || false,
- maxCachePoints: modelInfo?.maxCachePoints || 0,
- minTokensPerCachePoint: modelInfo?.minTokensPerCachePoint || 50,
- cachableFields: modelInfo?.cachableFields || [],
- }
- // Get previous cache point placements for this conversation if available
- const previousPlacements =
- conversationId && this.previousCachePointPlacements[conversationId]
- ? this.previousCachePointPlacements[conversationId]
- : undefined
- // Create config for cache strategy
- const config = {
- modelInfo: cacheModelInfo,
- systemPrompt: systemMessage,
- messages: anthropicMessages as Anthropic.Messages.MessageParam[],
- usePromptCache,
- previousCachePointPlacements: previousPlacements,
- }
- // Get cache point placements
- let strategy = new MultiPointStrategy(config)
- const cacheResult = strategy.determineOptimalCachePoints()
- // Store cache point placements for future use if conversation ID is provided
- if (conversationId && cacheResult.messageCachePointPlacements) {
- this.previousCachePointPlacements[conversationId] = cacheResult.messageCachePointPlacements
- }
- // Apply cache points to the properly converted messages
- const messagesWithCache = convertedMessages.map((msg, index) => {
- const placement = cacheResult.messageCachePointPlacements?.find((p) => p.index === index)
- if (placement) {
- return {
- ...msg,
- content: [...(msg.content || []), { cachePoint: { type: "default" } } as ContentBlock],
- }
- }
- return msg
- })
- return {
- system: cacheResult.system,
- messages: messagesWithCache,
- }
- }
- /************************************************************************************
- *
- * MODEL IDENTIFICATION
- *
- *************************************************************************************/
- private costModelConfig: { id: BedrockModelId | string; info: ModelInfo } = {
- id: "",
- info: { maxTokens: 0, contextWindow: 0, supportsPromptCache: false, supportsImages: false },
- }
- private parseArn(arn: string, region?: string) {
- /*
- * VIA Roo analysis: platform-independent Regex. It's designed to parse Amazon Bedrock ARNs and doesn't rely on any platform-specific features
- * like file path separators, line endings, or case sensitivity behaviors. The forward slashes in the regex are properly escaped and
- * represent literal characters in the AWS ARN format, not filesystem paths. This regex will function consistently across Windows,
- * macOS, Linux, and any other operating system where JavaScript runs.
- *
- * Supports any AWS partition (aws, aws-us-gov, aws-cn, or future partitions).
- * The partition is not captured since we don't need to use it.
- *
- * This matches ARNs like:
- * - Foundation Model: arn:aws:bedrock:us-west-2::foundation-model/anthropic.claude-v2
- * - GovCloud Inference Profile: arn:aws-us-gov:bedrock:us-gov-west-1:123456789012:inference-profile/us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0
- * - Prompt Router: arn:aws:bedrock:us-west-2:123456789012:prompt-router/anthropic-claude
- * - Inference Profile: arn:aws:bedrock:us-west-2:123456789012:inference-profile/anthropic.claude-v2
- * - Cross Region Inference Profile: arn:aws:bedrock:us-west-2:123456789012:inference-profile/us.anthropic.claude-3-5-sonnet-20241022-v2:0
- * - Custom Model (Provisioned Throughput): arn:aws:bedrock:us-west-2:123456789012:provisioned-model/my-custom-model
- * - Imported Model: arn:aws:bedrock:us-west-2:123456789012:imported-model/my-imported-model
- *
- * match[0] - The entire matched string
- * match[1] - The region (e.g., "us-east-1", "us-gov-west-1")
- * match[2] - The account ID (can be empty string for AWS-managed resources)
- * match[3] - The resource type (e.g., "foundation-model")
- * match[4] - The resource ID (e.g., "anthropic.claude-3-sonnet-20240229-v1:0")
- */
- const arnRegex = /^arn:[^:]+:(?:bedrock|sagemaker):([^:]+):([^:]*):(?:([^\/]+)\/([\w\.\-:]+)|([^\/]+))$/
- let match = arn.match(arnRegex)
- if (match && match[1] && match[3] && match[4]) {
- // Create the result object
- const result: {
- isValid: boolean
- region?: string
- modelType?: string
- modelId?: string
- errorMessage?: string
- crossRegionInference: boolean
- } = {
- isValid: true,
- crossRegionInference: false, // Default to false
- }
- result.modelType = match[3]
- const originalModelId = match[4]
- result.modelId = this.parseBaseModelId(originalModelId)
- // Extract the region from the first capture group
- const arnRegion = match[1]
- result.region = arnRegion
- // Check if the original model ID had a region prefix
- if (originalModelId && result.modelId !== originalModelId) {
- // If the model ID changed after parsing, it had a region prefix
- let prefix = originalModelId.replace(result.modelId, "")
- result.crossRegionInference = AwsBedrockHandler.isSystemInferenceProfile(prefix)
- }
- // Check if region in ARN matches provided region (if specified)
- if (region && arnRegion !== region) {
- result.errorMessage = `Region mismatch: The region in your ARN (${arnRegion}) does not match your selected region (${region}). This may cause access issues. The provider will use the region from the ARN.`
- result.region = arnRegion
- }
- return result
- }
- // If we get here, the regex didn't match
- return {
- isValid: false,
- region: undefined,
- modelType: undefined,
- modelId: undefined,
- errorMessage: "Invalid ARN format. ARN should follow the Amazon Bedrock ARN pattern.",
- crossRegionInference: false,
- }
- }
- //This strips any region prefix that used on cross-region model inference ARNs
- private parseBaseModelId(modelId: string): string {
- if (!modelId) {
- return modelId
- }
- // Remove AWS cross-region inference profile prefixes
- // as defined in AWS_INFERENCE_PROFILE_MAPPING
- for (const [_, inferenceProfile] of AWS_INFERENCE_PROFILE_MAPPING) {
- if (modelId.startsWith(inferenceProfile)) {
- // Remove the inference profile prefix from the model ID
- return modelId.substring(inferenceProfile.length)
- }
- }
- // Also strip Global Inference profile prefix if present
- if (modelId.startsWith("global.")) {
- return modelId.substring("global.".length)
- }
- // Return the model ID as-is for all other cases
- return modelId
- }
- //Prompt Router responses come back in a different sequence and the model used is in the response and must be fetched by name
- getModelById(modelId: string, modelType?: string): { id: BedrockModelId | string; info: ModelInfo } {
- // Try to find the model in bedrockModels
- const baseModelId = this.parseBaseModelId(modelId) as BedrockModelId
- let model
- if (baseModelId in bedrockModels) {
- //Do a deep copy of the model info so that later in the code the model id and maxTokens can be set.
- // The bedrockModels array is a constant and updating the model ID from the returned invokedModelID value
- // in a prompt router response isn't possible on the constant.
- model = { id: baseModelId, info: JSON.parse(JSON.stringify(bedrockModels[baseModelId])) }
- } else if (modelType && modelType.includes("router")) {
- model = {
- id: bedrockDefaultPromptRouterModelId,
- info: JSON.parse(JSON.stringify(bedrockModels[bedrockDefaultPromptRouterModelId])),
- }
- } else {
- // Use heuristics for model info, then allow overrides from ProviderSettings
- const guessed = this.guessModelInfoFromId(modelId)
- model = {
- id: bedrockDefaultModelId,
- info: {
- ...JSON.parse(JSON.stringify(bedrockModels[bedrockDefaultModelId])),
- ...guessed,
- },
- }
- }
- // Always allow user to override detected/guessed maxTokens and contextWindow
- if (this.options.modelMaxTokens && this.options.modelMaxTokens > 0) {
- model.info.maxTokens = this.options.modelMaxTokens
- }
- if (this.options.awsModelContextWindow && this.options.awsModelContextWindow > 0) {
- model.info.contextWindow = this.options.awsModelContextWindow
- }
- return model
- }
- override getModel(): {
- id: BedrockModelId | string
- info: ModelInfo
- maxTokens?: number
- temperature?: number
- reasoning?: any
- reasoningBudget?: number
- } {
- if (this.costModelConfig?.id?.trim().length > 0) {
- // Get model params for cost model config
- const params = getModelParams({
- format: "anthropic",
- modelId: this.costModelConfig.id,
- model: this.costModelConfig.info,
- settings: this.options,
- defaultTemperature: BEDROCK_DEFAULT_TEMPERATURE,
- })
- return { ...this.costModelConfig, ...params }
- }
- let modelConfig = undefined
- // If custom ARN is provided, use it
- if (this.options.awsCustomArn) {
- modelConfig = this.getModelById(this.arnInfo.modelId, this.arnInfo.modelType)
- //If the user entered an ARN for a foundation-model they've done the same thing as picking from our list of options.
- //We leave the model data matching the same as if a drop-down input method was used by not overwriting the model ID with the user input ARN
- //Otherwise the ARN is not a foundation-model resource type that ARN should be used as the identifier in Bedrock interactions
- if (this.arnInfo.modelType !== "foundation-model") modelConfig.id = this.options.awsCustomArn
- } else {
- //a model was selected from the drop down
- modelConfig = this.getModelById(this.options.apiModelId as string)
- // Apply Global Inference prefix if enabled and supported (takes precedence over cross-region)
- const baseIdForGlobal = this.parseBaseModelId(modelConfig.id)
- if (
- this.options.awsUseGlobalInference &&
- BEDROCK_GLOBAL_INFERENCE_MODEL_IDS.includes(baseIdForGlobal as any)
- ) {
- modelConfig.id = `global.${baseIdForGlobal}`
- }
- // Otherwise, add cross-region inference prefix if enabled
- else if (this.options.awsUseCrossRegionInference && this.options.awsRegion) {
- const prefix = AwsBedrockHandler.getPrefixForRegion(this.options.awsRegion)
- if (prefix) {
- modelConfig.id = `${prefix}${modelConfig.id}`
- }
- }
- }
- // Check if 1M context is enabled for supported Claude 4 models
- // Use parseBaseModelId to handle cross-region inference prefixes
- const baseModelId = this.parseBaseModelId(modelConfig.id)
- if (BEDROCK_1M_CONTEXT_MODEL_IDS.includes(baseModelId as any) && this.options.awsBedrock1MContext) {
- // Update context window and pricing to 1M tier when 1M context beta is enabled
- const tier = modelConfig.info.tiers?.[0]
- modelConfig.info = {
- ...modelConfig.info,
- contextWindow: tier?.contextWindow ?? 1_000_000,
- inputPrice: tier?.inputPrice ?? modelConfig.info.inputPrice,
- outputPrice: tier?.outputPrice ?? modelConfig.info.outputPrice,
- cacheWritesPrice: tier?.cacheWritesPrice ?? modelConfig.info.cacheWritesPrice,
- cacheReadsPrice: tier?.cacheReadsPrice ?? modelConfig.info.cacheReadsPrice,
- }
- }
- // Get model params including reasoning configuration
- const params = getModelParams({
- format: "anthropic",
- modelId: modelConfig.id,
- model: modelConfig.info,
- settings: this.options,
- defaultTemperature: BEDROCK_DEFAULT_TEMPERATURE,
- })
- // Apply service tier pricing if specified and model supports it
- const baseModelIdForTier = this.parseBaseModelId(modelConfig.id)
- if (this.options.awsBedrockServiceTier && BEDROCK_SERVICE_TIER_MODEL_IDS.includes(baseModelIdForTier as any)) {
- const pricingMultiplier = BEDROCK_SERVICE_TIER_PRICING[this.options.awsBedrockServiceTier]
- if (pricingMultiplier && pricingMultiplier !== 1.0) {
- // Apply pricing multiplier to all price fields
- modelConfig.info = {
- ...modelConfig.info,
- inputPrice: modelConfig.info.inputPrice
- ? modelConfig.info.inputPrice * pricingMultiplier
- : undefined,
- outputPrice: modelConfig.info.outputPrice
- ? modelConfig.info.outputPrice * pricingMultiplier
- : undefined,
- cacheWritesPrice: modelConfig.info.cacheWritesPrice
- ? modelConfig.info.cacheWritesPrice * pricingMultiplier
- : undefined,
- cacheReadsPrice: modelConfig.info.cacheReadsPrice
- ? modelConfig.info.cacheReadsPrice * pricingMultiplier
- : undefined,
- }
- }
- }
- // Don't override maxTokens/contextWindow here; handled in getModelById (and includes user overrides)
- return { ...modelConfig, ...params } as {
- id: BedrockModelId | string
- info: ModelInfo
- maxTokens?: number
- temperature?: number
- reasoning?: any
- reasoningBudget?: number
- }
- }
- /************************************************************************************
- *
- * CACHE
- *
- *************************************************************************************/
- // Store previous cache point placements for maintaining consistency across consecutive messages
- private previousCachePointPlacements: { [conversationId: string]: any[] } = {}
- private supportsAwsPromptCache(modelConfig: { id: BedrockModelId | string; info: ModelInfo }): boolean | undefined {
- // Check if the model supports prompt cache
- // The cachableFields property is not part of the ModelInfo type in schemas
- // but it's used in the bedrockModels object in shared/api.ts
- return (
- modelConfig?.info?.supportsPromptCache &&
- // Use optional chaining and type assertion to access cachableFields
- (modelConfig?.info as any)?.cachableFields &&
- (modelConfig?.info as any)?.cachableFields?.length > 0
- )
- }
- /**
- * Removes any existing cachePoint nodes from content blocks
- */
- private removeCachePoints(content: any): any {
- if (Array.isArray(content)) {
- return content.map((block) => {
- // Use destructuring to remove cachePoint property
- const { cachePoint: _, ...rest } = block
- return rest
- })
- }
- return content
- }
- /************************************************************************************
- *
- * NATIVE TOOLS
- *
- *************************************************************************************/
- /**
- * Convert OpenAI tool definitions to Bedrock Converse format
- * Transforms JSON Schema to draft 2020-12 compliant format required by Claude models.
- * @param tools Array of OpenAI ChatCompletionTool definitions
- * @returns Array of Bedrock Tool definitions
- */
- private convertToolsForBedrock(tools: OpenAI.Chat.ChatCompletionTool[]): Tool[] {
- return tools
- .filter((tool) => tool.type === "function")
- .map(
- (tool) =>
- ({
- toolSpec: {
- name: tool.function.name,
- description: tool.function.description,
- inputSchema: {
- // Normalize schema to JSON Schema draft 2020-12 compliant format
- // This converts type: ["T", "null"] to anyOf: [{type: "T"}, {type: "null"}]
- json: normalizeToolSchema(tool.function.parameters as Record<string, unknown>),
- },
- },
- }) as Tool,
- )
- }
- /**
- * Convert OpenAI tool_choice to Bedrock ToolChoice format
- * @param toolChoice OpenAI tool_choice parameter
- * @returns Bedrock ToolChoice configuration
- */
- private convertToolChoiceForBedrock(
- toolChoice: OpenAI.Chat.ChatCompletionCreateParams["tool_choice"],
- ): ToolChoice | undefined {
- if (!toolChoice) {
- // Default to auto - model decides whether to use tools
- return { auto: {} } as ToolChoice
- }
- if (typeof toolChoice === "string") {
- switch (toolChoice) {
- case "none":
- return undefined // Bedrock doesn't have "none", just omit tools
- case "auto":
- return { auto: {} } as ToolChoice
- case "required":
- return { any: {} } as ToolChoice // Model must use at least one tool
- default:
- return { auto: {} } as ToolChoice
- }
- }
- // Handle object form { type: "function", function: { name: string } }
- if (typeof toolChoice === "object" && "function" in toolChoice) {
- return {
- tool: {
- name: toolChoice.function.name,
- },
- } as ToolChoice
- }
- return { auto: {} } as ToolChoice
- }
- /************************************************************************************
- *
- * AMAZON REGIONS
- *
- *************************************************************************************/
- private static getPrefixForRegion(region: string): string | undefined {
- // Use AWS recommended inference profile prefixes
- // Array is pre-sorted by pattern length (descending) to ensure more specific patterns match first
- for (const [regionPattern, inferenceProfile] of AWS_INFERENCE_PROFILE_MAPPING) {
- if (region.startsWith(regionPattern)) {
- return inferenceProfile
- }
- }
- return undefined
- }
- private static isSystemInferenceProfile(prefix: string): boolean {
- // Check if the prefix is defined in AWS_INFERENCE_PROFILE_MAPPING
- for (const [_, inferenceProfile] of AWS_INFERENCE_PROFILE_MAPPING) {
- if (prefix === inferenceProfile) {
- return true
- }
- }
- return false
- }
- /************************************************************************************
- *
- * ERROR HANDLING
- *
- *************************************************************************************/
- /**
- * Error type definitions for Bedrock API errors
- */
- private static readonly ERROR_TYPES: Record<
- string,
- {
- patterns: string[] // Strings to match in lowercase error message or name
- messageTemplate: string // Template with placeholders like {region}, {modelId}, etc.
- logLevel: "error" | "warn" | "info" // Log level for this error type
- }
- > = {
- ACCESS_DENIED: {
- patterns: ["access", "denied", "permission"],
- messageTemplate: `You don't have access to the model specified.
- Please verify:
- 1. Try cross-region inference if you're using a foundation model
- 2. If using an ARN, verify the ARN is correct and points to a valid model
- 3. Your AWS credentials have permission to access this model (check IAM policies)
- 4. The region in the ARN matches the region where the model is deployed
- 5. If using a provisioned model, ensure it's active and not in a failed state`,
- logLevel: "error",
- },
- NOT_FOUND: {
- patterns: ["not found", "does not exist"],
- messageTemplate: `The specified ARN does not exist or is invalid. Please check:
- 1. The ARN format is correct (arn:aws:bedrock:region:account-id:resource-type/resource-name)
- 2. The model exists in the specified region
- 3. The account ID in the ARN is correct`,
- logLevel: "error",
- },
- THROTTLING: {
- patterns: [
- "throttl",
- "rate",
- "limit",
- "bedrock is unable to process your request", // Amazon Bedrock specific throttling message
- "please wait",
- "quota exceeded",
- "service unavailable",
- "busy",
- "overloaded",
- "too many requests",
- "request limit",
- "concurrent requests",
- ],
- messageTemplate: `Request was throttled or rate limited. Please try:
- 1. Reducing the frequency of requests
- 2. If using a provisioned model, check its throughput settings
- 3. Contact AWS support to request a quota increase if needed
- `,
- logLevel: "error",
- },
- TOO_MANY_TOKENS: {
- patterns: ["too many tokens", "token limit exceeded", "context length", "maximum context length"],
- messageTemplate: `"Too many tokens" error detected.
- Possible Causes:
- 1. Input exceeds model's context window limit
- 2. Rate limiting (too many tokens per minute)
- 3. Quota exceeded for token usage
- 4. Other token-related service limitations
- Suggestions:
- 1. Reduce the size of your input
- 2. Split your request into smaller chunks
- 3. Use a model with a larger context window
- 4. If rate limited, reduce request frequency
- 5. Check your Amazon Bedrock quotas and limits
- `,
- logLevel: "error",
- },
- SERVICE_QUOTA_EXCEEDED: {
- patterns: ["service quota exceeded", "service quota", "quota exceeded for model"],
- messageTemplate: `Service quota exceeded. This error indicates you've reached AWS service limits.
- Please try:
- 1. Contact AWS support to request a quota increase
- 2. Reduce request frequency temporarily
- 3. Check your Amazon Bedrock quotas in the AWS console
- 4. Consider using a different model or region with available capacity
- `,
- logLevel: "error",
- },
- MODEL_NOT_READY: {
- patterns: ["model not ready", "model is not ready", "provisioned throughput not ready", "model loading"],
- messageTemplate: `Model is not ready or still loading. This can happen with:
- 1. Provisioned throughput models that are still initializing
- 2. Custom models that are being loaded
- 3. Models that are temporarily unavailable
- Please try:
- 1. Wait a few minutes and retry
- 2. Check the model status in Amazon Bedrock console
- 3. Verify the model is properly provisioned
- `,
- logLevel: "error",
- },
- INTERNAL_SERVER_ERROR: {
- patterns: ["internal server error", "internal error", "server error", "service error"],
- messageTemplate: `Amazon Bedrock internal server error. This is a temporary service issue.
- Please try:
- 1. Retry the request after a brief delay
- 2. If the error persists, check AWS service health
- 3. Contact AWS support if the issue continues
- `,
- logLevel: "error",
- },
- ON_DEMAND_NOT_SUPPORTED: {
- patterns: ["with on-demand throughput isn’t supported."],
- messageTemplate: `
- 1. Try enabling cross-region inference in settings.
- 2. Or, create an inference profile and then leverage the "Use custom ARN..." option of the model selector in settings.`,
- logLevel: "error",
- },
- ABORT: {
- patterns: ["aborterror"], // This will match error.name.toLowerCase() for AbortError
- messageTemplate: `Request was aborted: The operation timed out or was manually cancelled. Please try again or check your network connection.`,
- logLevel: "info",
- },
- INVALID_ARN_FORMAT: {
- patterns: ["invalid_arn_format:", "invalid arn format"],
- messageTemplate: `Invalid ARN format. ARN should follow the pattern: arn:aws:bedrock:region:account-id:resource-type/resource-name`,
- logLevel: "error",
- },
- VALIDATION_ERROR: {
- patterns: [
- "input tag",
- "does not match any of the expected tags",
- "field required",
- "validation",
- "invalid parameter",
- ],
- messageTemplate: `Parameter validation error: {errorMessage}
- This error indicates that the request parameters don't match Amazon Bedrock's expected format.
- Common causes:
- 1. Extended thinking parameter format is incorrect
- 2. Model-specific parameters are not supported by this model
- 3. API parameter structure has changed
- Please check:
- - Model supports the requested features (extended thinking, etc.)
- - Parameter format matches Amazon Bedrock specification
- - Model ID is correct for the requested features`,
- logLevel: "error",
- },
- // Default/generic error
- GENERIC: {
- patterns: [], // Empty patterns array means this is the default
- messageTemplate: `Unknown Error: {errorMessage}`,
- logLevel: "error",
- },
- }
- /**
- * Determines the error type based on the error message or name
- */
- private getErrorType(error: unknown): string {
- if (!(error instanceof Error)) {
- return "GENERIC"
- }
- // Check for HTTP 429 status code (Too Many Requests)
- if ((error as any).status === 429 || (error as any).$metadata?.httpStatusCode === 429) {
- return "THROTTLING"
- }
- // Check for Amazon Bedrock specific throttling exception names
- if ((error as any).name === "ThrottlingException" || (error as any).__type === "ThrottlingException") {
- return "THROTTLING"
- }
- const errorMessage = error.message.toLowerCase()
- const errorName = error.name.toLowerCase()
- // Check each error type's patterns in order of specificity (most specific first)
- const errorTypeOrder = [
- "SERVICE_QUOTA_EXCEEDED", // Most specific - check before THROTTLING
- "MODEL_NOT_READY",
- "TOO_MANY_TOKENS",
- "INTERNAL_SERVER_ERROR",
- "ON_DEMAND_NOT_SUPPORTED",
- "NOT_FOUND",
- "ACCESS_DENIED",
- "THROTTLING", // Less specific - check after more specific patterns
- ]
- for (const errorType of errorTypeOrder) {
- const definition = AwsBedrockHandler.ERROR_TYPES[errorType]
- if (!definition) continue
- // If any pattern matches in either message or name, return this error type
- if (definition.patterns.some((pattern) => errorMessage.includes(pattern) || errorName.includes(pattern))) {
- return errorType
- }
- }
- // Default to generic error
- return "GENERIC"
- }
- /**
- * Formats an error message based on the error type and context
- */
- private formatErrorMessage(error: unknown, errorType: string, _isStreamContext: boolean): string {
- const definition = AwsBedrockHandler.ERROR_TYPES[errorType] || AwsBedrockHandler.ERROR_TYPES.GENERIC
- let template = definition.messageTemplate
- // Prepare template variables
- const templateVars: Record<string, string> = {}
- if (error instanceof Error) {
- templateVars.errorMessage = error.message
- templateVars.errorName = error.name
- const modelConfig = this.getModel()
- templateVars.modelId = modelConfig.id
- templateVars.contextWindow = String(modelConfig.info.contextWindow || "unknown")
- }
- // Add context-specific template variables
- const region =
- typeof this?.client?.config?.region === "function"
- ? this?.client?.config?.region()
- : this?.client?.config?.region
- templateVars.regionInfo = `(${region})`
- // Replace template variables
- for (const [key, value] of Object.entries(templateVars)) {
- template = template.replace(new RegExp(`{${key}}`, "g"), value || "")
- }
- return template
- }
- /**
- * Handles Bedrock API errors and generates appropriate error messages
- * @param error The error that occurred
- * @param isStreamContext Whether the error occurred in a streaming context (true) or not (false)
- * @returns Error message string for non-streaming context or array of stream chunks for streaming context
- */
- private handleBedrockError(
- error: unknown,
- isStreamContext: boolean,
- ): string | Array<{ type: string; text?: string; inputTokens?: number; outputTokens?: number }> {
- // Determine error type
- const errorType = this.getErrorType(error)
- // Format error message
- const errorMessage = this.formatErrorMessage(error, errorType, isStreamContext)
- // Log the error
- const definition = AwsBedrockHandler.ERROR_TYPES[errorType]
- const logMethod = definition.logLevel
- const contextName = isStreamContext ? "createMessage" : "completePrompt"
- logger[logMethod](`${errorType} error in ${contextName}`, {
- ctx: "bedrock",
- customArn: this.options.awsCustomArn,
- errorType,
- errorMessage: error instanceof Error ? error.message : String(error),
- ...(error instanceof Error && error.stack ? { errorStack: error.stack } : {}),
- ...(this.client?.config?.region ? { clientRegion: this.client.config.region } : {}),
- })
- // Return appropriate response based on isStreamContext
- if (isStreamContext) {
- return [
- { type: "text", text: `Error: ${errorMessage}` },
- { type: "usage", inputTokens: 0, outputTokens: 0 },
- ]
- } else {
- // For non-streaming context, add the expected prefix
- return `Bedrock completion error: ${errorMessage}`
- }
- }
- /**
- * Returns the thinking signature captured from the last Bedrock Converse API response.
- * Claude models with extended thinking return a cryptographic signature in the
- * reasoning content delta, which must be round-tripped back for multi-turn
- * conversations with tool use (Anthropic API requirement).
- */
- getThoughtSignature(): string | undefined {
- return this.lastThoughtSignature
- }
- /**
- * Returns any redacted thinking blocks captured from the last Bedrock response.
- * Anthropic returns these when safety filters trigger on the model's internal
- * reasoning. They contain opaque binary data (base64-encoded) that must be
- * passed back verbatim for proper reasoning continuity.
- */
- getRedactedThinkingBlocks(): Array<{ type: "redacted_thinking"; data: string }> | undefined {
- return this.lastRedactedThinkingBlocks.length > 0 ? this.lastRedactedThinkingBlocks : undefined
- }
- }
|