bedrock.ts 56 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632
  1. import {
  2. BedrockRuntimeClient,
  3. ConverseStreamCommand,
  4. ConverseCommand,
  5. BedrockRuntimeClientConfig,
  6. ContentBlock,
  7. Message,
  8. SystemContentBlock,
  9. Tool,
  10. ToolConfiguration,
  11. ToolChoice,
  12. } from "@aws-sdk/client-bedrock-runtime"
  13. import OpenAI from "openai"
  14. import { fromIni } from "@aws-sdk/credential-providers"
  15. import { Anthropic } from "@anthropic-ai/sdk"
  16. import {
  17. type ModelInfo,
  18. type ProviderSettings,
  19. type BedrockModelId,
  20. type BedrockServiceTier,
  21. bedrockDefaultModelId,
  22. bedrockModels,
  23. bedrockDefaultPromptRouterModelId,
  24. BEDROCK_DEFAULT_TEMPERATURE,
  25. BEDROCK_MAX_TOKENS,
  26. BEDROCK_DEFAULT_CONTEXT,
  27. AWS_INFERENCE_PROFILE_MAPPING,
  28. BEDROCK_1M_CONTEXT_MODEL_IDS,
  29. BEDROCK_GLOBAL_INFERENCE_MODEL_IDS,
  30. BEDROCK_SERVICE_TIER_MODEL_IDS,
  31. BEDROCK_SERVICE_TIER_PRICING,
  32. ApiProviderError,
  33. } from "@roo-code/types"
  34. import { TelemetryService } from "@roo-code/telemetry"
  35. import { ApiStream } from "../transform/stream"
  36. import { BaseProvider } from "./base-provider"
  37. import { logger } from "../../utils/logging"
  38. import { Package } from "../../shared/package"
  39. import { MultiPointStrategy } from "../transform/cache-strategy/multi-point-strategy"
  40. import { ModelInfo as CacheModelInfo } from "../transform/cache-strategy/types"
  41. import { convertToBedrockConverseMessages as sharedConverter } from "../transform/bedrock-converse-format"
  42. import { getModelParams } from "../transform/model-params"
  43. import { shouldUseReasoningBudget } from "../../shared/api"
  44. import { normalizeToolSchema } from "../../utils/json-schema"
  45. import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
  46. /************************************************************************************
  47. *
  48. * TYPES
  49. *
  50. *************************************************************************************/
  51. // Define interface for Bedrock inference config
  52. interface BedrockInferenceConfig {
  53. maxTokens: number
  54. temperature?: number
  55. }
  56. // Define interface for Bedrock additional model request fields
  57. // This includes thinking configuration, 1M context beta, and other model-specific parameters
  58. interface BedrockAdditionalModelFields {
  59. thinking?: {
  60. type: "enabled"
  61. budget_tokens: number
  62. }
  63. anthropic_beta?: string[]
  64. [key: string]: any // Add index signature to be compatible with DocumentType
  65. }
  66. // Define interface for Bedrock payload
  67. interface BedrockPayload {
  68. modelId: BedrockModelId | string
  69. messages: Message[]
  70. system?: SystemContentBlock[]
  71. inferenceConfig: BedrockInferenceConfig
  72. anthropic_version?: string
  73. additionalModelRequestFields?: BedrockAdditionalModelFields
  74. toolConfig?: ToolConfiguration
  75. }
  76. // Extended payload type that includes service_tier as a top-level parameter
  77. // AWS Bedrock service tiers (STANDARD, FLEX, PRIORITY) are specified at the top level
  78. // https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html
  79. type BedrockPayloadWithServiceTier = BedrockPayload & {
  80. service_tier?: BedrockServiceTier
  81. }
  82. // Define specific types for content block events to avoid 'as any' usage
  83. // These handle the multiple possible structures returned by AWS SDK
  84. interface ContentBlockStartEvent {
  85. start?: {
  86. text?: string
  87. thinking?: string
  88. toolUse?: {
  89. toolUseId?: string
  90. name?: string
  91. }
  92. }
  93. contentBlockIndex?: number
  94. // Alternative structure used by some AWS SDK versions
  95. content_block?: {
  96. type?: string
  97. thinking?: string
  98. }
  99. // Official AWS SDK structure for reasoning (as documented)
  100. contentBlock?: {
  101. type?: string
  102. thinking?: string
  103. reasoningContent?: {
  104. text?: string
  105. }
  106. // Tool use block start
  107. toolUse?: {
  108. toolUseId?: string
  109. name?: string
  110. }
  111. }
  112. }
  113. interface ContentBlockDeltaEvent {
  114. delta?: {
  115. text?: string
  116. thinking?: string
  117. type?: string
  118. // AWS SDK structure for reasoning content deltas
  119. // Includes text (reasoning), signature (verification token), and redactedContent (safety-filtered)
  120. reasoningContent?: {
  121. text?: string
  122. signature?: string
  123. redactedContent?: Uint8Array
  124. }
  125. // Tool use input delta
  126. toolUse?: {
  127. input?: string
  128. }
  129. }
  130. contentBlockIndex?: number
  131. }
  132. // Define types for stream events based on AWS SDK
  133. export interface StreamEvent {
  134. messageStart?: {
  135. role?: string
  136. }
  137. messageStop?: {
  138. stopReason?: "end_turn" | "tool_use" | "max_tokens" | "stop_sequence"
  139. additionalModelResponseFields?: Record<string, unknown>
  140. }
  141. contentBlockStart?: ContentBlockStartEvent
  142. contentBlockDelta?: ContentBlockDeltaEvent
  143. metadata?: {
  144. usage?: {
  145. inputTokens: number
  146. outputTokens: number
  147. totalTokens?: number // Made optional since we don't use it
  148. // New cache-related fields
  149. cacheReadInputTokens?: number
  150. cacheWriteInputTokens?: number
  151. cacheReadInputTokenCount?: number
  152. cacheWriteInputTokenCount?: number
  153. }
  154. metrics?: {
  155. latencyMs: number
  156. }
  157. }
  158. // New trace field for prompt router
  159. trace?: {
  160. promptRouter?: {
  161. invokedModelId?: string
  162. usage?: {
  163. inputTokens: number
  164. outputTokens: number
  165. totalTokens?: number // Made optional since we don't use it
  166. // New cache-related fields
  167. cacheReadTokens?: number
  168. cacheWriteTokens?: number
  169. cacheReadInputTokenCount?: number
  170. cacheWriteInputTokenCount?: number
  171. }
  172. }
  173. }
  174. }
  175. // Type for usage information in stream events
  176. export type UsageType = {
  177. inputTokens?: number
  178. outputTokens?: number
  179. cacheReadInputTokens?: number
  180. cacheWriteInputTokens?: number
  181. cacheReadInputTokenCount?: number
  182. cacheWriteInputTokenCount?: number
  183. }
  184. /************************************************************************************
  185. *
  186. * PROVIDER
  187. *
  188. *************************************************************************************/
  189. export class AwsBedrockHandler extends BaseProvider implements SingleCompletionHandler {
  190. protected options: ProviderSettings
  191. private client: BedrockRuntimeClient
  192. private arnInfo: any
  193. private readonly providerName = "Bedrock"
  194. private lastThoughtSignature: string | undefined
  195. private lastRedactedThinkingBlocks: Array<{ type: "redacted_thinking"; data: string }> = []
  196. constructor(options: ProviderSettings) {
  197. super()
  198. this.options = options
  199. let region = this.options.awsRegion
  200. // process the various user input options, be opinionated about the intent of the options
  201. // and determine the model to use during inference and for cost calculations
  202. // There are variations on ARN strings that can be entered making the conditional logic
  203. // more involved than the non-ARN branch of logic
  204. if (this.options.awsCustomArn) {
  205. this.arnInfo = this.parseArn(this.options.awsCustomArn, region)
  206. if (!this.arnInfo.isValid) {
  207. logger.error("Invalid ARN format", {
  208. ctx: "bedrock",
  209. errorMessage: this.arnInfo.errorMessage,
  210. })
  211. // Throw a consistent error with a prefix that can be detected by callers
  212. const errorMessage =
  213. this.arnInfo.errorMessage ||
  214. "Invalid ARN format. ARN should follow the pattern: arn:aws:bedrock:region:account-id:resource-type/resource-name"
  215. throw new Error("INVALID_ARN_FORMAT:" + errorMessage)
  216. }
  217. if (this.arnInfo.region && this.arnInfo.region !== this.options.awsRegion) {
  218. // Log if there's a region mismatch between the ARN and the region selected by the user
  219. // We will use the ARNs region, so execution can continue, but log an info statement.
  220. // Log a warning if there's a region mismatch between the ARN and the region selected by the user
  221. // We will use the ARNs region, so execution can continue, but log an info statement.
  222. logger.info(this.arnInfo.errorMessage, {
  223. ctx: "bedrock",
  224. selectedRegion: this.options.awsRegion,
  225. arnRegion: this.arnInfo.region,
  226. })
  227. this.options.awsRegion = this.arnInfo.region
  228. }
  229. this.options.apiModelId = this.arnInfo.modelId
  230. if (this.arnInfo.awsUseCrossRegionInference) this.options.awsUseCrossRegionInference = true
  231. }
  232. if (!this.options.modelTemperature) {
  233. this.options.modelTemperature = BEDROCK_DEFAULT_TEMPERATURE
  234. }
  235. this.costModelConfig = this.getModel()
  236. const clientConfig: BedrockRuntimeClientConfig = {
  237. userAgentAppId: `RooCode#${Package.version}`,
  238. region: this.options.awsRegion,
  239. // Add the endpoint configuration when specified and enabled
  240. ...(this.options.awsBedrockEndpoint &&
  241. this.options.awsBedrockEndpointEnabled && { endpoint: this.options.awsBedrockEndpoint }),
  242. }
  243. if (this.options.awsUseApiKey && this.options.awsApiKey) {
  244. // Use API key/token-based authentication if enabled and API key is set
  245. clientConfig.token = { token: this.options.awsApiKey }
  246. clientConfig.authSchemePreference = ["httpBearerAuth"] // Otherwise there's no end of credential problems.
  247. clientConfig.requestHandler = {
  248. // This should be the default anyway, but without setting something
  249. // this provider fails to work with LiteLLM passthrough.
  250. requestTimeout: 0,
  251. }
  252. } else if (this.options.awsUseProfile && this.options.awsProfile) {
  253. // Use profile-based credentials if enabled and profile is set
  254. clientConfig.credentials = fromIni({
  255. profile: this.options.awsProfile,
  256. ignoreCache: true,
  257. })
  258. } else if (this.options.awsAccessKey && this.options.awsSecretKey) {
  259. // Use direct credentials if provided
  260. clientConfig.credentials = {
  261. accessKeyId: this.options.awsAccessKey,
  262. secretAccessKey: this.options.awsSecretKey,
  263. ...(this.options.awsSessionToken ? { sessionToken: this.options.awsSessionToken } : {}),
  264. }
  265. }
  266. this.client = new BedrockRuntimeClient(clientConfig)
  267. }
  268. // Helper to guess model info from custom modelId string if not in bedrockModels
  269. private guessModelInfoFromId(modelId: string): Partial<ModelInfo> {
  270. // Define a mapping for model ID patterns and their configurations
  271. const modelConfigMap: Record<string, Partial<ModelInfo>> = {
  272. "claude-4": {
  273. maxTokens: 8192,
  274. contextWindow: 200_000,
  275. supportsImages: true,
  276. supportsPromptCache: true,
  277. },
  278. "claude-3-7": {
  279. maxTokens: 8192,
  280. contextWindow: 200_000,
  281. supportsImages: true,
  282. supportsPromptCache: true,
  283. },
  284. "claude-3-5": {
  285. maxTokens: 8192,
  286. contextWindow: 200_000,
  287. supportsImages: true,
  288. supportsPromptCache: true,
  289. },
  290. "claude-4-opus": {
  291. maxTokens: 4096,
  292. contextWindow: 200_000,
  293. supportsImages: true,
  294. supportsPromptCache: true,
  295. },
  296. "claude-3-opus": {
  297. maxTokens: 4096,
  298. contextWindow: 200_000,
  299. supportsImages: true,
  300. supportsPromptCache: true,
  301. },
  302. "claude-3-haiku": {
  303. maxTokens: 4096,
  304. contextWindow: 200_000,
  305. supportsImages: true,
  306. supportsPromptCache: true,
  307. },
  308. }
  309. // Match the model ID to a configuration
  310. const id = modelId.toLowerCase()
  311. for (const [pattern, config] of Object.entries(modelConfigMap)) {
  312. if (id.includes(pattern)) {
  313. return config
  314. }
  315. }
  316. // Default fallback
  317. return {
  318. maxTokens: BEDROCK_MAX_TOKENS,
  319. contextWindow: BEDROCK_DEFAULT_CONTEXT,
  320. supportsImages: false,
  321. supportsPromptCache: false,
  322. }
  323. }
  324. override async *createMessage(
  325. systemPrompt: string,
  326. messages: Anthropic.Messages.MessageParam[],
  327. metadata?: ApiHandlerCreateMessageMetadata & {
  328. thinking?: {
  329. enabled: boolean
  330. maxTokens?: number
  331. maxThinkingTokens?: number
  332. }
  333. },
  334. ): ApiStream {
  335. const modelConfig = this.getModel()
  336. const usePromptCache = Boolean(this.options.awsUsePromptCache && this.supportsAwsPromptCache(modelConfig))
  337. const conversationId =
  338. messages.length > 0
  339. ? `conv_${messages[0].role}_${
  340. typeof messages[0].content === "string"
  341. ? messages[0].content.substring(0, 20)
  342. : "complex_content"
  343. }`
  344. : "default_conversation"
  345. const formatted = this.convertToBedrockConverseMessages(
  346. messages,
  347. systemPrompt,
  348. usePromptCache,
  349. modelConfig.info,
  350. conversationId,
  351. )
  352. let additionalModelRequestFields: BedrockAdditionalModelFields | undefined
  353. let thinkingEnabled = false
  354. // Determine if thinking should be enabled
  355. // metadata?.thinking?.enabled: Explicitly enabled through API metadata (direct request)
  356. // shouldUseReasoningBudget(): Enabled through user settings (enableReasoningEffort = true)
  357. const isThinkingExplicitlyEnabled = metadata?.thinking?.enabled
  358. const isThinkingEnabledBySettings =
  359. shouldUseReasoningBudget({ model: modelConfig.info, settings: this.options }) &&
  360. modelConfig.reasoning &&
  361. modelConfig.reasoningBudget
  362. if ((isThinkingExplicitlyEnabled || isThinkingEnabledBySettings) && modelConfig.info.supportsReasoningBudget) {
  363. thinkingEnabled = true
  364. additionalModelRequestFields = {
  365. thinking: {
  366. type: "enabled",
  367. budget_tokens: metadata?.thinking?.maxThinkingTokens || modelConfig.reasoningBudget || 4096,
  368. },
  369. }
  370. logger.info("Extended thinking enabled for Bedrock request", {
  371. ctx: "bedrock",
  372. modelId: modelConfig.id,
  373. thinking: additionalModelRequestFields.thinking,
  374. })
  375. }
  376. const inferenceConfig: BedrockInferenceConfig = {
  377. maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number),
  378. temperature: modelConfig.temperature ?? (this.options.modelTemperature as number),
  379. }
  380. // Check if 1M context is enabled for supported Claude 4 models
  381. // Use parseBaseModelId to handle cross-region inference prefixes
  382. const baseModelId = this.parseBaseModelId(modelConfig.id)
  383. const is1MContextEnabled =
  384. BEDROCK_1M_CONTEXT_MODEL_IDS.includes(baseModelId as any) && this.options.awsBedrock1MContext
  385. // Determine if service tier should be applied (checked later when building payload)
  386. const useServiceTier =
  387. this.options.awsBedrockServiceTier && BEDROCK_SERVICE_TIER_MODEL_IDS.includes(baseModelId as any)
  388. if (useServiceTier) {
  389. logger.info("Service tier specified for Bedrock request", {
  390. ctx: "bedrock",
  391. modelId: modelConfig.id,
  392. serviceTier: this.options.awsBedrockServiceTier,
  393. })
  394. }
  395. // Add anthropic_beta headers for various features
  396. // Start with an empty array and add betas as needed
  397. const anthropicBetas: string[] = []
  398. // Add 1M context beta if enabled
  399. if (is1MContextEnabled) {
  400. anthropicBetas.push("context-1m-2025-08-07")
  401. }
  402. // Add fine-grained tool streaming beta for Claude models
  403. // This enables proper tool use streaming for Anthropic models on Bedrock
  404. if (baseModelId.includes("claude")) {
  405. anthropicBetas.push("fine-grained-tool-streaming-2025-05-14")
  406. }
  407. // Apply anthropic_beta to additionalModelRequestFields if any betas are needed
  408. if (anthropicBetas.length > 0) {
  409. if (!additionalModelRequestFields) {
  410. additionalModelRequestFields = {} as BedrockAdditionalModelFields
  411. }
  412. additionalModelRequestFields.anthropic_beta = anthropicBetas
  413. }
  414. const toolConfig: ToolConfiguration = {
  415. tools: this.convertToolsForBedrock(metadata?.tools ?? []),
  416. toolChoice: this.convertToolChoiceForBedrock(metadata?.tool_choice),
  417. }
  418. // Build payload with optional service_tier at top level
  419. // Service tier is a top-level parameter per AWS documentation, NOT inside additionalModelRequestFields
  420. // https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html
  421. const payload: BedrockPayloadWithServiceTier = {
  422. modelId: modelConfig.id,
  423. messages: formatted.messages,
  424. system: formatted.system,
  425. inferenceConfig,
  426. ...(additionalModelRequestFields && { additionalModelRequestFields }),
  427. // Add anthropic_version at top level when using thinking features
  428. ...(thinkingEnabled && { anthropic_version: "bedrock-2023-05-31" }),
  429. toolConfig,
  430. // Add service_tier as a top-level parameter (not inside additionalModelRequestFields)
  431. ...(useServiceTier && { service_tier: this.options.awsBedrockServiceTier }),
  432. }
  433. // Create AbortController with 10 minute timeout
  434. const controller = new AbortController()
  435. let timeoutId: NodeJS.Timeout | undefined
  436. try {
  437. timeoutId = setTimeout(
  438. () => {
  439. controller.abort()
  440. },
  441. 10 * 60 * 1000,
  442. )
  443. const command = new ConverseStreamCommand(payload)
  444. const response = await this.client.send(command, {
  445. abortSignal: controller.signal,
  446. })
  447. if (!response.stream) {
  448. clearTimeout(timeoutId)
  449. throw new Error("No stream available in the response")
  450. }
  451. // Reset thinking state for this request
  452. this.lastThoughtSignature = undefined
  453. this.lastRedactedThinkingBlocks = []
  454. for await (const chunk of response.stream) {
  455. // Parse the chunk as JSON if it's a string (for tests)
  456. let streamEvent: StreamEvent
  457. try {
  458. streamEvent = typeof chunk === "string" ? JSON.parse(chunk) : (chunk as unknown as StreamEvent)
  459. } catch (e) {
  460. logger.error("Failed to parse stream event", {
  461. ctx: "bedrock",
  462. error: e instanceof Error ? e : String(e),
  463. chunk: typeof chunk === "string" ? chunk : "binary data",
  464. })
  465. continue
  466. }
  467. // Handle metadata events first
  468. if (streamEvent.metadata?.usage) {
  469. const usage = (streamEvent.metadata?.usage || {}) as UsageType
  470. // Check both field naming conventions for cache tokens
  471. const cacheReadTokens = usage.cacheReadInputTokens || usage.cacheReadInputTokenCount || 0
  472. const cacheWriteTokens = usage.cacheWriteInputTokens || usage.cacheWriteInputTokenCount || 0
  473. // Always include all available token information
  474. yield {
  475. type: "usage",
  476. inputTokens: usage.inputTokens || 0,
  477. outputTokens: usage.outputTokens || 0,
  478. cacheReadTokens: cacheReadTokens,
  479. cacheWriteTokens: cacheWriteTokens,
  480. }
  481. continue
  482. }
  483. if (streamEvent?.trace?.promptRouter?.invokedModelId) {
  484. try {
  485. //update the in-use model info to be based on the invoked Model Id for the router
  486. //so that pricing, context window, caching etc have values that can be used
  487. //However, we want to keep the id of the model to be the ID for the router for
  488. //subsequent requests so they are sent back through the router
  489. let invokedArnInfo = this.parseArn(streamEvent.trace.promptRouter.invokedModelId)
  490. let invokedModel = this.getModelById(invokedArnInfo.modelId as string, invokedArnInfo.modelType)
  491. if (invokedModel) {
  492. invokedModel.id = modelConfig.id
  493. this.costModelConfig = invokedModel
  494. }
  495. // Handle metadata events for the promptRouter.
  496. if (streamEvent?.trace?.promptRouter?.usage) {
  497. const routerUsage = streamEvent.trace.promptRouter.usage
  498. // Check both field naming conventions for cache tokens
  499. const cacheReadTokens =
  500. routerUsage.cacheReadTokens || routerUsage.cacheReadInputTokenCount || 0
  501. const cacheWriteTokens =
  502. routerUsage.cacheWriteTokens || routerUsage.cacheWriteInputTokenCount || 0
  503. yield {
  504. type: "usage",
  505. inputTokens: routerUsage.inputTokens || 0,
  506. outputTokens: routerUsage.outputTokens || 0,
  507. cacheReadTokens: cacheReadTokens,
  508. cacheWriteTokens: cacheWriteTokens,
  509. }
  510. }
  511. } catch (error) {
  512. logger.error("Error handling Bedrock invokedModelId", {
  513. ctx: "bedrock",
  514. error: error instanceof Error ? error : String(error),
  515. })
  516. } finally {
  517. // eslint-disable-next-line no-unsafe-finally
  518. continue
  519. }
  520. }
  521. // Handle message start
  522. if (streamEvent.messageStart) {
  523. continue
  524. }
  525. // Handle content blocks
  526. if (streamEvent.contentBlockStart) {
  527. const cbStart = streamEvent.contentBlockStart
  528. // Check if this is a reasoning block (AWS SDK structure)
  529. if (cbStart.contentBlock?.reasoningContent) {
  530. if (cbStart.contentBlockIndex && cbStart.contentBlockIndex > 0) {
  531. yield { type: "reasoning", text: "\n" }
  532. }
  533. yield {
  534. type: "reasoning",
  535. text: cbStart.contentBlock.reasoningContent.text || "",
  536. }
  537. }
  538. // Check for thinking block - handle both possible AWS SDK structures
  539. // cbStart.contentBlock: newer structure
  540. // cbStart.content_block: alternative structure seen in some AWS SDK versions
  541. else if (cbStart.contentBlock?.type === "thinking" || cbStart.content_block?.type === "thinking") {
  542. const contentBlock = cbStart.contentBlock || cbStart.content_block
  543. if (cbStart.contentBlockIndex && cbStart.contentBlockIndex > 0) {
  544. yield { type: "reasoning", text: "\n" }
  545. }
  546. if (contentBlock?.thinking) {
  547. yield {
  548. type: "reasoning",
  549. text: contentBlock.thinking,
  550. }
  551. }
  552. }
  553. // Handle tool use block start
  554. else if (cbStart.start?.toolUse || cbStart.contentBlock?.toolUse) {
  555. const toolUse = cbStart.start?.toolUse || cbStart.contentBlock?.toolUse
  556. if (toolUse) {
  557. yield {
  558. type: "tool_call_partial",
  559. index: cbStart.contentBlockIndex ?? 0,
  560. id: toolUse.toolUseId,
  561. name: toolUse.name,
  562. arguments: undefined,
  563. }
  564. }
  565. } else if (cbStart.start?.text) {
  566. yield {
  567. type: "text",
  568. text: cbStart.start.text,
  569. }
  570. }
  571. continue
  572. }
  573. // Handle content deltas
  574. if (streamEvent.contentBlockDelta) {
  575. const cbDelta = streamEvent.contentBlockDelta
  576. const delta = cbDelta.delta
  577. // Process reasoning and text content deltas
  578. // Multiple structures are supported for AWS SDK compatibility:
  579. // - delta.reasoningContent.text: AWS docs structure for reasoning
  580. // - delta.thinking: alternative structure for thinking content
  581. // - delta.text: standard text content
  582. // - delta.toolUse.input: tool input arguments
  583. if (delta) {
  584. // Check for reasoningContent property (AWS SDK structure)
  585. if (delta.reasoningContent?.text) {
  586. yield {
  587. type: "reasoning",
  588. text: delta.reasoningContent.text,
  589. }
  590. continue
  591. }
  592. // Capture the thinking signature from reasoningContent.signature delta.
  593. // Bedrock Converse API sends the signature as a separate delta after all
  594. // reasoning text deltas. This signature must be round-tripped back for
  595. // multi-turn conversations with tool use (Anthropic API requirement).
  596. if (delta.reasoningContent?.signature) {
  597. this.lastThoughtSignature = delta.reasoningContent.signature
  598. continue
  599. }
  600. // Capture redacted thinking content (opaque binary data from safety-filtered reasoning).
  601. // Anthropic returns this when extended thinking content is filtered. It must be
  602. // passed back verbatim in multi-turn conversations for proper reasoning continuity.
  603. if (delta.reasoningContent?.redactedContent) {
  604. const redactedContent = delta.reasoningContent.redactedContent
  605. this.lastRedactedThinkingBlocks.push({
  606. type: "redacted_thinking",
  607. data: Buffer.from(redactedContent).toString("base64"),
  608. })
  609. continue
  610. }
  611. // Handle tool use input delta
  612. if (delta.toolUse?.input) {
  613. yield {
  614. type: "tool_call_partial",
  615. index: cbDelta.contentBlockIndex ?? 0,
  616. id: undefined,
  617. name: undefined,
  618. arguments: delta.toolUse.input,
  619. }
  620. continue
  621. }
  622. // Handle alternative thinking structure (fallback for older SDK versions)
  623. if (delta.type === "thinking_delta" && delta.thinking) {
  624. yield {
  625. type: "reasoning",
  626. text: delta.thinking,
  627. }
  628. } else if (delta.text) {
  629. yield {
  630. type: "text",
  631. text: delta.text,
  632. }
  633. }
  634. }
  635. continue
  636. }
  637. // Handle message stop
  638. if (streamEvent.messageStop) {
  639. continue
  640. }
  641. }
  642. // Clear timeout after stream completes
  643. clearTimeout(timeoutId)
  644. } catch (error: unknown) {
  645. // Clear timeout on error
  646. clearTimeout(timeoutId)
  647. // Capture error in telemetry before processing
  648. const errorMessage = error instanceof Error ? error.message : String(error)
  649. const apiError = new ApiProviderError(errorMessage, this.providerName, modelConfig.id, "createMessage")
  650. TelemetryService.instance.captureException(apiError)
  651. // Check if this is a throttling error that should trigger retry logic
  652. const errorType = this.getErrorType(error)
  653. // For throttling errors, throw immediately without yielding chunks
  654. // This allows the retry mechanism in attemptApiRequest() to catch and handle it
  655. // The retry logic in Task.ts (around line 1817) expects errors to be thrown
  656. // on the first chunk for proper exponential backoff behavior
  657. if (errorType === "THROTTLING") {
  658. if (error instanceof Error) {
  659. throw error
  660. } else {
  661. throw new Error("Throttling error occurred")
  662. }
  663. }
  664. // For non-throttling errors, use the standard error handling with chunks
  665. const errorChunks = this.handleBedrockError(error, true) // true for streaming context
  666. // Yield each chunk individually to ensure type compatibility
  667. for (const chunk of errorChunks) {
  668. yield chunk as any // Cast to any to bypass type checking since we know the structure is correct
  669. }
  670. // Re-throw with enhanced error message for retry system
  671. const enhancedErrorMessage = this.formatErrorMessage(error, this.getErrorType(error), true)
  672. if (error instanceof Error) {
  673. const enhancedError = new Error(enhancedErrorMessage)
  674. // Preserve important properties from the original error
  675. enhancedError.name = error.name
  676. // Validate and preserve status property
  677. if ("status" in error && typeof (error as any).status === "number") {
  678. ;(enhancedError as any).status = (error as any).status
  679. }
  680. // Validate and preserve $metadata property
  681. if (
  682. "$metadata" in error &&
  683. typeof (error as any).$metadata === "object" &&
  684. (error as any).$metadata !== null
  685. ) {
  686. ;(enhancedError as any).$metadata = (error as any).$metadata
  687. }
  688. throw enhancedError
  689. } else {
  690. throw new Error("An unknown error occurred")
  691. }
  692. }
  693. }
  694. async completePrompt(prompt: string): Promise<string> {
  695. try {
  696. const modelConfig = this.getModel()
  697. // For completePrompt, thinking is typically not used, but we should still check
  698. // if thinking was somehow enabled in the model config
  699. const thinkingEnabled =
  700. shouldUseReasoningBudget({ model: modelConfig.info, settings: this.options }) &&
  701. modelConfig.reasoning &&
  702. modelConfig.reasoningBudget
  703. const inferenceConfig: BedrockInferenceConfig = {
  704. maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number),
  705. temperature: modelConfig.temperature ?? (this.options.modelTemperature as number),
  706. }
  707. // For completePrompt, use a unique conversation ID based on the prompt
  708. const conversationId = `prompt_${prompt.substring(0, 20)}`
  709. const payload = {
  710. modelId: modelConfig.id,
  711. messages: this.convertToBedrockConverseMessages(
  712. [
  713. {
  714. role: "user",
  715. content: prompt,
  716. },
  717. ],
  718. undefined,
  719. false,
  720. modelConfig.info,
  721. conversationId,
  722. ).messages,
  723. inferenceConfig,
  724. }
  725. const command = new ConverseCommand(payload)
  726. const response = await this.client.send(command)
  727. if (
  728. response?.output?.message?.content &&
  729. response.output.message.content.length > 0 &&
  730. response.output.message.content[0].text &&
  731. response.output.message.content[0].text.trim().length > 0
  732. ) {
  733. try {
  734. return response.output.message.content[0].text
  735. } catch (parseError) {
  736. logger.error("Failed to parse Bedrock response", {
  737. ctx: "bedrock",
  738. error: parseError instanceof Error ? parseError : String(parseError),
  739. })
  740. }
  741. }
  742. return ""
  743. } catch (error) {
  744. // Capture error in telemetry
  745. const model = this.getModel()
  746. const telemetryErrorMessage = error instanceof Error ? error.message : String(error)
  747. const apiError = new ApiProviderError(telemetryErrorMessage, this.providerName, model.id, "completePrompt")
  748. TelemetryService.instance.captureException(apiError)
  749. // Use the extracted error handling method for all errors
  750. const errorResult = this.handleBedrockError(error, false) // false for non-streaming context
  751. // Since we're in a non-streaming context, we know the result is a string
  752. const errorMessage = errorResult as string
  753. // Create enhanced error for retry system
  754. const enhancedError = new Error(errorMessage)
  755. if (error instanceof Error) {
  756. // Preserve important properties from the original error
  757. enhancedError.name = error.name
  758. // Validate and preserve status property
  759. if ("status" in error && typeof (error as any).status === "number") {
  760. ;(enhancedError as any).status = (error as any).status
  761. }
  762. // Validate and preserve $metadata property
  763. if (
  764. "$metadata" in error &&
  765. typeof (error as any).$metadata === "object" &&
  766. (error as any).$metadata !== null
  767. ) {
  768. ;(enhancedError as any).$metadata = (error as any).$metadata
  769. }
  770. }
  771. throw enhancedError
  772. }
  773. }
  774. /**
  775. * Convert Anthropic messages to Bedrock Converse format
  776. */
  777. private convertToBedrockConverseMessages(
  778. anthropicMessages: Anthropic.Messages.MessageParam[] | { role: string; content: string }[],
  779. systemMessage?: string,
  780. usePromptCache: boolean = false,
  781. modelInfo?: any,
  782. conversationId?: string, // Optional conversation ID to track cache points across messages
  783. ): { system: SystemContentBlock[]; messages: Message[] } {
  784. // First convert messages using shared converter for proper image handling
  785. const convertedMessages = sharedConverter(anthropicMessages as Anthropic.Messages.MessageParam[])
  786. // If prompt caching is disabled, return the converted messages directly
  787. if (!usePromptCache) {
  788. return {
  789. system: systemMessage ? [{ text: systemMessage } as SystemContentBlock] : [],
  790. messages: convertedMessages,
  791. }
  792. }
  793. // Convert model info to expected format for cache strategy
  794. const cacheModelInfo: CacheModelInfo = {
  795. maxTokens: modelInfo?.maxTokens || 8192,
  796. contextWindow: modelInfo?.contextWindow || 200_000,
  797. supportsPromptCache: modelInfo?.supportsPromptCache || false,
  798. maxCachePoints: modelInfo?.maxCachePoints || 0,
  799. minTokensPerCachePoint: modelInfo?.minTokensPerCachePoint || 50,
  800. cachableFields: modelInfo?.cachableFields || [],
  801. }
  802. // Get previous cache point placements for this conversation if available
  803. const previousPlacements =
  804. conversationId && this.previousCachePointPlacements[conversationId]
  805. ? this.previousCachePointPlacements[conversationId]
  806. : undefined
  807. // Create config for cache strategy
  808. const config = {
  809. modelInfo: cacheModelInfo,
  810. systemPrompt: systemMessage,
  811. messages: anthropicMessages as Anthropic.Messages.MessageParam[],
  812. usePromptCache,
  813. previousCachePointPlacements: previousPlacements,
  814. }
  815. // Get cache point placements
  816. let strategy = new MultiPointStrategy(config)
  817. const cacheResult = strategy.determineOptimalCachePoints()
  818. // Store cache point placements for future use if conversation ID is provided
  819. if (conversationId && cacheResult.messageCachePointPlacements) {
  820. this.previousCachePointPlacements[conversationId] = cacheResult.messageCachePointPlacements
  821. }
  822. // Apply cache points to the properly converted messages
  823. const messagesWithCache = convertedMessages.map((msg, index) => {
  824. const placement = cacheResult.messageCachePointPlacements?.find((p) => p.index === index)
  825. if (placement) {
  826. return {
  827. ...msg,
  828. content: [...(msg.content || []), { cachePoint: { type: "default" } } as ContentBlock],
  829. }
  830. }
  831. return msg
  832. })
  833. return {
  834. system: cacheResult.system,
  835. messages: messagesWithCache,
  836. }
  837. }
  838. /************************************************************************************
  839. *
  840. * MODEL IDENTIFICATION
  841. *
  842. *************************************************************************************/
  843. private costModelConfig: { id: BedrockModelId | string; info: ModelInfo } = {
  844. id: "",
  845. info: { maxTokens: 0, contextWindow: 0, supportsPromptCache: false, supportsImages: false },
  846. }
  847. private parseArn(arn: string, region?: string) {
  848. /*
  849. * VIA Roo analysis: platform-independent Regex. It's designed to parse Amazon Bedrock ARNs and doesn't rely on any platform-specific features
  850. * like file path separators, line endings, or case sensitivity behaviors. The forward slashes in the regex are properly escaped and
  851. * represent literal characters in the AWS ARN format, not filesystem paths. This regex will function consistently across Windows,
  852. * macOS, Linux, and any other operating system where JavaScript runs.
  853. *
  854. * Supports any AWS partition (aws, aws-us-gov, aws-cn, or future partitions).
  855. * The partition is not captured since we don't need to use it.
  856. *
  857. * This matches ARNs like:
  858. * - Foundation Model: arn:aws:bedrock:us-west-2::foundation-model/anthropic.claude-v2
  859. * - GovCloud Inference Profile: arn:aws-us-gov:bedrock:us-gov-west-1:123456789012:inference-profile/us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0
  860. * - Prompt Router: arn:aws:bedrock:us-west-2:123456789012:prompt-router/anthropic-claude
  861. * - Inference Profile: arn:aws:bedrock:us-west-2:123456789012:inference-profile/anthropic.claude-v2
  862. * - Cross Region Inference Profile: arn:aws:bedrock:us-west-2:123456789012:inference-profile/us.anthropic.claude-3-5-sonnet-20241022-v2:0
  863. * - Custom Model (Provisioned Throughput): arn:aws:bedrock:us-west-2:123456789012:provisioned-model/my-custom-model
  864. * - Imported Model: arn:aws:bedrock:us-west-2:123456789012:imported-model/my-imported-model
  865. *
  866. * match[0] - The entire matched string
  867. * match[1] - The region (e.g., "us-east-1", "us-gov-west-1")
  868. * match[2] - The account ID (can be empty string for AWS-managed resources)
  869. * match[3] - The resource type (e.g., "foundation-model")
  870. * match[4] - The resource ID (e.g., "anthropic.claude-3-sonnet-20240229-v1:0")
  871. */
  872. const arnRegex = /^arn:[^:]+:(?:bedrock|sagemaker):([^:]+):([^:]*):(?:([^\/]+)\/([\w\.\-:]+)|([^\/]+))$/
  873. let match = arn.match(arnRegex)
  874. if (match && match[1] && match[3] && match[4]) {
  875. // Create the result object
  876. const result: {
  877. isValid: boolean
  878. region?: string
  879. modelType?: string
  880. modelId?: string
  881. errorMessage?: string
  882. crossRegionInference: boolean
  883. } = {
  884. isValid: true,
  885. crossRegionInference: false, // Default to false
  886. }
  887. result.modelType = match[3]
  888. const originalModelId = match[4]
  889. result.modelId = this.parseBaseModelId(originalModelId)
  890. // Extract the region from the first capture group
  891. const arnRegion = match[1]
  892. result.region = arnRegion
  893. // Check if the original model ID had a region prefix
  894. if (originalModelId && result.modelId !== originalModelId) {
  895. // If the model ID changed after parsing, it had a region prefix
  896. let prefix = originalModelId.replace(result.modelId, "")
  897. result.crossRegionInference = AwsBedrockHandler.isSystemInferenceProfile(prefix)
  898. }
  899. // Check if region in ARN matches provided region (if specified)
  900. if (region && arnRegion !== region) {
  901. result.errorMessage = `Region mismatch: The region in your ARN (${arnRegion}) does not match your selected region (${region}). This may cause access issues. The provider will use the region from the ARN.`
  902. result.region = arnRegion
  903. }
  904. return result
  905. }
  906. // If we get here, the regex didn't match
  907. return {
  908. isValid: false,
  909. region: undefined,
  910. modelType: undefined,
  911. modelId: undefined,
  912. errorMessage: "Invalid ARN format. ARN should follow the Amazon Bedrock ARN pattern.",
  913. crossRegionInference: false,
  914. }
  915. }
  916. //This strips any region prefix that used on cross-region model inference ARNs
  917. private parseBaseModelId(modelId: string): string {
  918. if (!modelId) {
  919. return modelId
  920. }
  921. // Remove AWS cross-region inference profile prefixes
  922. // as defined in AWS_INFERENCE_PROFILE_MAPPING
  923. for (const [_, inferenceProfile] of AWS_INFERENCE_PROFILE_MAPPING) {
  924. if (modelId.startsWith(inferenceProfile)) {
  925. // Remove the inference profile prefix from the model ID
  926. return modelId.substring(inferenceProfile.length)
  927. }
  928. }
  929. // Also strip Global Inference profile prefix if present
  930. if (modelId.startsWith("global.")) {
  931. return modelId.substring("global.".length)
  932. }
  933. // Return the model ID as-is for all other cases
  934. return modelId
  935. }
  936. //Prompt Router responses come back in a different sequence and the model used is in the response and must be fetched by name
  937. getModelById(modelId: string, modelType?: string): { id: BedrockModelId | string; info: ModelInfo } {
  938. // Try to find the model in bedrockModels
  939. const baseModelId = this.parseBaseModelId(modelId) as BedrockModelId
  940. let model
  941. if (baseModelId in bedrockModels) {
  942. //Do a deep copy of the model info so that later in the code the model id and maxTokens can be set.
  943. // The bedrockModels array is a constant and updating the model ID from the returned invokedModelID value
  944. // in a prompt router response isn't possible on the constant.
  945. model = { id: baseModelId, info: JSON.parse(JSON.stringify(bedrockModels[baseModelId])) }
  946. } else if (modelType && modelType.includes("router")) {
  947. model = {
  948. id: bedrockDefaultPromptRouterModelId,
  949. info: JSON.parse(JSON.stringify(bedrockModels[bedrockDefaultPromptRouterModelId])),
  950. }
  951. } else {
  952. // Use heuristics for model info, then allow overrides from ProviderSettings
  953. const guessed = this.guessModelInfoFromId(modelId)
  954. model = {
  955. id: bedrockDefaultModelId,
  956. info: {
  957. ...JSON.parse(JSON.stringify(bedrockModels[bedrockDefaultModelId])),
  958. ...guessed,
  959. },
  960. }
  961. }
  962. // Always allow user to override detected/guessed maxTokens and contextWindow
  963. if (this.options.modelMaxTokens && this.options.modelMaxTokens > 0) {
  964. model.info.maxTokens = this.options.modelMaxTokens
  965. }
  966. if (this.options.awsModelContextWindow && this.options.awsModelContextWindow > 0) {
  967. model.info.contextWindow = this.options.awsModelContextWindow
  968. }
  969. return model
  970. }
  971. override getModel(): {
  972. id: BedrockModelId | string
  973. info: ModelInfo
  974. maxTokens?: number
  975. temperature?: number
  976. reasoning?: any
  977. reasoningBudget?: number
  978. } {
  979. if (this.costModelConfig?.id?.trim().length > 0) {
  980. // Get model params for cost model config
  981. const params = getModelParams({
  982. format: "anthropic",
  983. modelId: this.costModelConfig.id,
  984. model: this.costModelConfig.info,
  985. settings: this.options,
  986. defaultTemperature: BEDROCK_DEFAULT_TEMPERATURE,
  987. })
  988. return { ...this.costModelConfig, ...params }
  989. }
  990. let modelConfig = undefined
  991. // If custom ARN is provided, use it
  992. if (this.options.awsCustomArn) {
  993. modelConfig = this.getModelById(this.arnInfo.modelId, this.arnInfo.modelType)
  994. //If the user entered an ARN for a foundation-model they've done the same thing as picking from our list of options.
  995. //We leave the model data matching the same as if a drop-down input method was used by not overwriting the model ID with the user input ARN
  996. //Otherwise the ARN is not a foundation-model resource type that ARN should be used as the identifier in Bedrock interactions
  997. if (this.arnInfo.modelType !== "foundation-model") modelConfig.id = this.options.awsCustomArn
  998. } else {
  999. //a model was selected from the drop down
  1000. modelConfig = this.getModelById(this.options.apiModelId as string)
  1001. // Apply Global Inference prefix if enabled and supported (takes precedence over cross-region)
  1002. const baseIdForGlobal = this.parseBaseModelId(modelConfig.id)
  1003. if (
  1004. this.options.awsUseGlobalInference &&
  1005. BEDROCK_GLOBAL_INFERENCE_MODEL_IDS.includes(baseIdForGlobal as any)
  1006. ) {
  1007. modelConfig.id = `global.${baseIdForGlobal}`
  1008. }
  1009. // Otherwise, add cross-region inference prefix if enabled
  1010. else if (this.options.awsUseCrossRegionInference && this.options.awsRegion) {
  1011. const prefix = AwsBedrockHandler.getPrefixForRegion(this.options.awsRegion)
  1012. if (prefix) {
  1013. modelConfig.id = `${prefix}${modelConfig.id}`
  1014. }
  1015. }
  1016. }
  1017. // Check if 1M context is enabled for supported Claude 4 models
  1018. // Use parseBaseModelId to handle cross-region inference prefixes
  1019. const baseModelId = this.parseBaseModelId(modelConfig.id)
  1020. if (BEDROCK_1M_CONTEXT_MODEL_IDS.includes(baseModelId as any) && this.options.awsBedrock1MContext) {
  1021. // Update context window and pricing to 1M tier when 1M context beta is enabled
  1022. const tier = modelConfig.info.tiers?.[0]
  1023. modelConfig.info = {
  1024. ...modelConfig.info,
  1025. contextWindow: tier?.contextWindow ?? 1_000_000,
  1026. inputPrice: tier?.inputPrice ?? modelConfig.info.inputPrice,
  1027. outputPrice: tier?.outputPrice ?? modelConfig.info.outputPrice,
  1028. cacheWritesPrice: tier?.cacheWritesPrice ?? modelConfig.info.cacheWritesPrice,
  1029. cacheReadsPrice: tier?.cacheReadsPrice ?? modelConfig.info.cacheReadsPrice,
  1030. }
  1031. }
  1032. // Get model params including reasoning configuration
  1033. const params = getModelParams({
  1034. format: "anthropic",
  1035. modelId: modelConfig.id,
  1036. model: modelConfig.info,
  1037. settings: this.options,
  1038. defaultTemperature: BEDROCK_DEFAULT_TEMPERATURE,
  1039. })
  1040. // Apply service tier pricing if specified and model supports it
  1041. const baseModelIdForTier = this.parseBaseModelId(modelConfig.id)
  1042. if (this.options.awsBedrockServiceTier && BEDROCK_SERVICE_TIER_MODEL_IDS.includes(baseModelIdForTier as any)) {
  1043. const pricingMultiplier = BEDROCK_SERVICE_TIER_PRICING[this.options.awsBedrockServiceTier]
  1044. if (pricingMultiplier && pricingMultiplier !== 1.0) {
  1045. // Apply pricing multiplier to all price fields
  1046. modelConfig.info = {
  1047. ...modelConfig.info,
  1048. inputPrice: modelConfig.info.inputPrice
  1049. ? modelConfig.info.inputPrice * pricingMultiplier
  1050. : undefined,
  1051. outputPrice: modelConfig.info.outputPrice
  1052. ? modelConfig.info.outputPrice * pricingMultiplier
  1053. : undefined,
  1054. cacheWritesPrice: modelConfig.info.cacheWritesPrice
  1055. ? modelConfig.info.cacheWritesPrice * pricingMultiplier
  1056. : undefined,
  1057. cacheReadsPrice: modelConfig.info.cacheReadsPrice
  1058. ? modelConfig.info.cacheReadsPrice * pricingMultiplier
  1059. : undefined,
  1060. }
  1061. }
  1062. }
  1063. // Don't override maxTokens/contextWindow here; handled in getModelById (and includes user overrides)
  1064. return { ...modelConfig, ...params } as {
  1065. id: BedrockModelId | string
  1066. info: ModelInfo
  1067. maxTokens?: number
  1068. temperature?: number
  1069. reasoning?: any
  1070. reasoningBudget?: number
  1071. }
  1072. }
  1073. /************************************************************************************
  1074. *
  1075. * CACHE
  1076. *
  1077. *************************************************************************************/
  1078. // Store previous cache point placements for maintaining consistency across consecutive messages
  1079. private previousCachePointPlacements: { [conversationId: string]: any[] } = {}
  1080. private supportsAwsPromptCache(modelConfig: { id: BedrockModelId | string; info: ModelInfo }): boolean | undefined {
  1081. // Check if the model supports prompt cache
  1082. // The cachableFields property is not part of the ModelInfo type in schemas
  1083. // but it's used in the bedrockModels object in shared/api.ts
  1084. return (
  1085. modelConfig?.info?.supportsPromptCache &&
  1086. // Use optional chaining and type assertion to access cachableFields
  1087. (modelConfig?.info as any)?.cachableFields &&
  1088. (modelConfig?.info as any)?.cachableFields?.length > 0
  1089. )
  1090. }
  1091. /**
  1092. * Removes any existing cachePoint nodes from content blocks
  1093. */
  1094. private removeCachePoints(content: any): any {
  1095. if (Array.isArray(content)) {
  1096. return content.map((block) => {
  1097. // Use destructuring to remove cachePoint property
  1098. const { cachePoint: _, ...rest } = block
  1099. return rest
  1100. })
  1101. }
  1102. return content
  1103. }
  1104. /************************************************************************************
  1105. *
  1106. * NATIVE TOOLS
  1107. *
  1108. *************************************************************************************/
  1109. /**
  1110. * Convert OpenAI tool definitions to Bedrock Converse format
  1111. * Transforms JSON Schema to draft 2020-12 compliant format required by Claude models.
  1112. * @param tools Array of OpenAI ChatCompletionTool definitions
  1113. * @returns Array of Bedrock Tool definitions
  1114. */
  1115. private convertToolsForBedrock(tools: OpenAI.Chat.ChatCompletionTool[]): Tool[] {
  1116. return tools
  1117. .filter((tool) => tool.type === "function")
  1118. .map(
  1119. (tool) =>
  1120. ({
  1121. toolSpec: {
  1122. name: tool.function.name,
  1123. description: tool.function.description,
  1124. inputSchema: {
  1125. // Normalize schema to JSON Schema draft 2020-12 compliant format
  1126. // This converts type: ["T", "null"] to anyOf: [{type: "T"}, {type: "null"}]
  1127. json: normalizeToolSchema(tool.function.parameters as Record<string, unknown>),
  1128. },
  1129. },
  1130. }) as Tool,
  1131. )
  1132. }
  1133. /**
  1134. * Convert OpenAI tool_choice to Bedrock ToolChoice format
  1135. * @param toolChoice OpenAI tool_choice parameter
  1136. * @returns Bedrock ToolChoice configuration
  1137. */
  1138. private convertToolChoiceForBedrock(
  1139. toolChoice: OpenAI.Chat.ChatCompletionCreateParams["tool_choice"],
  1140. ): ToolChoice | undefined {
  1141. if (!toolChoice) {
  1142. // Default to auto - model decides whether to use tools
  1143. return { auto: {} } as ToolChoice
  1144. }
  1145. if (typeof toolChoice === "string") {
  1146. switch (toolChoice) {
  1147. case "none":
  1148. return undefined // Bedrock doesn't have "none", just omit tools
  1149. case "auto":
  1150. return { auto: {} } as ToolChoice
  1151. case "required":
  1152. return { any: {} } as ToolChoice // Model must use at least one tool
  1153. default:
  1154. return { auto: {} } as ToolChoice
  1155. }
  1156. }
  1157. // Handle object form { type: "function", function: { name: string } }
  1158. if (typeof toolChoice === "object" && "function" in toolChoice) {
  1159. return {
  1160. tool: {
  1161. name: toolChoice.function.name,
  1162. },
  1163. } as ToolChoice
  1164. }
  1165. return { auto: {} } as ToolChoice
  1166. }
  1167. /************************************************************************************
  1168. *
  1169. * AMAZON REGIONS
  1170. *
  1171. *************************************************************************************/
  1172. private static getPrefixForRegion(region: string): string | undefined {
  1173. // Use AWS recommended inference profile prefixes
  1174. // Array is pre-sorted by pattern length (descending) to ensure more specific patterns match first
  1175. for (const [regionPattern, inferenceProfile] of AWS_INFERENCE_PROFILE_MAPPING) {
  1176. if (region.startsWith(regionPattern)) {
  1177. return inferenceProfile
  1178. }
  1179. }
  1180. return undefined
  1181. }
  1182. private static isSystemInferenceProfile(prefix: string): boolean {
  1183. // Check if the prefix is defined in AWS_INFERENCE_PROFILE_MAPPING
  1184. for (const [_, inferenceProfile] of AWS_INFERENCE_PROFILE_MAPPING) {
  1185. if (prefix === inferenceProfile) {
  1186. return true
  1187. }
  1188. }
  1189. return false
  1190. }
  1191. /************************************************************************************
  1192. *
  1193. * ERROR HANDLING
  1194. *
  1195. *************************************************************************************/
  1196. /**
  1197. * Error type definitions for Bedrock API errors
  1198. */
  1199. private static readonly ERROR_TYPES: Record<
  1200. string,
  1201. {
  1202. patterns: string[] // Strings to match in lowercase error message or name
  1203. messageTemplate: string // Template with placeholders like {region}, {modelId}, etc.
  1204. logLevel: "error" | "warn" | "info" // Log level for this error type
  1205. }
  1206. > = {
  1207. ACCESS_DENIED: {
  1208. patterns: ["access", "denied", "permission"],
  1209. messageTemplate: `You don't have access to the model specified.
  1210. Please verify:
  1211. 1. Try cross-region inference if you're using a foundation model
  1212. 2. If using an ARN, verify the ARN is correct and points to a valid model
  1213. 3. Your AWS credentials have permission to access this model (check IAM policies)
  1214. 4. The region in the ARN matches the region where the model is deployed
  1215. 5. If using a provisioned model, ensure it's active and not in a failed state`,
  1216. logLevel: "error",
  1217. },
  1218. NOT_FOUND: {
  1219. patterns: ["not found", "does not exist"],
  1220. messageTemplate: `The specified ARN does not exist or is invalid. Please check:
  1221. 1. The ARN format is correct (arn:aws:bedrock:region:account-id:resource-type/resource-name)
  1222. 2. The model exists in the specified region
  1223. 3. The account ID in the ARN is correct`,
  1224. logLevel: "error",
  1225. },
  1226. THROTTLING: {
  1227. patterns: [
  1228. "throttl",
  1229. "rate",
  1230. "limit",
  1231. "bedrock is unable to process your request", // Amazon Bedrock specific throttling message
  1232. "please wait",
  1233. "quota exceeded",
  1234. "service unavailable",
  1235. "busy",
  1236. "overloaded",
  1237. "too many requests",
  1238. "request limit",
  1239. "concurrent requests",
  1240. ],
  1241. messageTemplate: `Request was throttled or rate limited. Please try:
  1242. 1. Reducing the frequency of requests
  1243. 2. If using a provisioned model, check its throughput settings
  1244. 3. Contact AWS support to request a quota increase if needed
  1245. `,
  1246. logLevel: "error",
  1247. },
  1248. TOO_MANY_TOKENS: {
  1249. patterns: ["too many tokens", "token limit exceeded", "context length", "maximum context length"],
  1250. messageTemplate: `"Too many tokens" error detected.
  1251. Possible Causes:
  1252. 1. Input exceeds model's context window limit
  1253. 2. Rate limiting (too many tokens per minute)
  1254. 3. Quota exceeded for token usage
  1255. 4. Other token-related service limitations
  1256. Suggestions:
  1257. 1. Reduce the size of your input
  1258. 2. Split your request into smaller chunks
  1259. 3. Use a model with a larger context window
  1260. 4. If rate limited, reduce request frequency
  1261. 5. Check your Amazon Bedrock quotas and limits
  1262. `,
  1263. logLevel: "error",
  1264. },
  1265. SERVICE_QUOTA_EXCEEDED: {
  1266. patterns: ["service quota exceeded", "service quota", "quota exceeded for model"],
  1267. messageTemplate: `Service quota exceeded. This error indicates you've reached AWS service limits.
  1268. Please try:
  1269. 1. Contact AWS support to request a quota increase
  1270. 2. Reduce request frequency temporarily
  1271. 3. Check your Amazon Bedrock quotas in the AWS console
  1272. 4. Consider using a different model or region with available capacity
  1273. `,
  1274. logLevel: "error",
  1275. },
  1276. MODEL_NOT_READY: {
  1277. patterns: ["model not ready", "model is not ready", "provisioned throughput not ready", "model loading"],
  1278. messageTemplate: `Model is not ready or still loading. This can happen with:
  1279. 1. Provisioned throughput models that are still initializing
  1280. 2. Custom models that are being loaded
  1281. 3. Models that are temporarily unavailable
  1282. Please try:
  1283. 1. Wait a few minutes and retry
  1284. 2. Check the model status in Amazon Bedrock console
  1285. 3. Verify the model is properly provisioned
  1286. `,
  1287. logLevel: "error",
  1288. },
  1289. INTERNAL_SERVER_ERROR: {
  1290. patterns: ["internal server error", "internal error", "server error", "service error"],
  1291. messageTemplate: `Amazon Bedrock internal server error. This is a temporary service issue.
  1292. Please try:
  1293. 1. Retry the request after a brief delay
  1294. 2. If the error persists, check AWS service health
  1295. 3. Contact AWS support if the issue continues
  1296. `,
  1297. logLevel: "error",
  1298. },
  1299. ON_DEMAND_NOT_SUPPORTED: {
  1300. patterns: ["with on-demand throughput isn’t supported."],
  1301. messageTemplate: `
  1302. 1. Try enabling cross-region inference in settings.
  1303. 2. Or, create an inference profile and then leverage the "Use custom ARN..." option of the model selector in settings.`,
  1304. logLevel: "error",
  1305. },
  1306. ABORT: {
  1307. patterns: ["aborterror"], // This will match error.name.toLowerCase() for AbortError
  1308. messageTemplate: `Request was aborted: The operation timed out or was manually cancelled. Please try again or check your network connection.`,
  1309. logLevel: "info",
  1310. },
  1311. INVALID_ARN_FORMAT: {
  1312. patterns: ["invalid_arn_format:", "invalid arn format"],
  1313. messageTemplate: `Invalid ARN format. ARN should follow the pattern: arn:aws:bedrock:region:account-id:resource-type/resource-name`,
  1314. logLevel: "error",
  1315. },
  1316. VALIDATION_ERROR: {
  1317. patterns: [
  1318. "input tag",
  1319. "does not match any of the expected tags",
  1320. "field required",
  1321. "validation",
  1322. "invalid parameter",
  1323. ],
  1324. messageTemplate: `Parameter validation error: {errorMessage}
  1325. This error indicates that the request parameters don't match Amazon Bedrock's expected format.
  1326. Common causes:
  1327. 1. Extended thinking parameter format is incorrect
  1328. 2. Model-specific parameters are not supported by this model
  1329. 3. API parameter structure has changed
  1330. Please check:
  1331. - Model supports the requested features (extended thinking, etc.)
  1332. - Parameter format matches Amazon Bedrock specification
  1333. - Model ID is correct for the requested features`,
  1334. logLevel: "error",
  1335. },
  1336. // Default/generic error
  1337. GENERIC: {
  1338. patterns: [], // Empty patterns array means this is the default
  1339. messageTemplate: `Unknown Error: {errorMessage}`,
  1340. logLevel: "error",
  1341. },
  1342. }
  1343. /**
  1344. * Determines the error type based on the error message or name
  1345. */
  1346. private getErrorType(error: unknown): string {
  1347. if (!(error instanceof Error)) {
  1348. return "GENERIC"
  1349. }
  1350. // Check for HTTP 429 status code (Too Many Requests)
  1351. if ((error as any).status === 429 || (error as any).$metadata?.httpStatusCode === 429) {
  1352. return "THROTTLING"
  1353. }
  1354. // Check for Amazon Bedrock specific throttling exception names
  1355. if ((error as any).name === "ThrottlingException" || (error as any).__type === "ThrottlingException") {
  1356. return "THROTTLING"
  1357. }
  1358. const errorMessage = error.message.toLowerCase()
  1359. const errorName = error.name.toLowerCase()
  1360. // Check each error type's patterns in order of specificity (most specific first)
  1361. const errorTypeOrder = [
  1362. "SERVICE_QUOTA_EXCEEDED", // Most specific - check before THROTTLING
  1363. "MODEL_NOT_READY",
  1364. "TOO_MANY_TOKENS",
  1365. "INTERNAL_SERVER_ERROR",
  1366. "ON_DEMAND_NOT_SUPPORTED",
  1367. "NOT_FOUND",
  1368. "ACCESS_DENIED",
  1369. "THROTTLING", // Less specific - check after more specific patterns
  1370. ]
  1371. for (const errorType of errorTypeOrder) {
  1372. const definition = AwsBedrockHandler.ERROR_TYPES[errorType]
  1373. if (!definition) continue
  1374. // If any pattern matches in either message or name, return this error type
  1375. if (definition.patterns.some((pattern) => errorMessage.includes(pattern) || errorName.includes(pattern))) {
  1376. return errorType
  1377. }
  1378. }
  1379. // Default to generic error
  1380. return "GENERIC"
  1381. }
  1382. /**
  1383. * Formats an error message based on the error type and context
  1384. */
  1385. private formatErrorMessage(error: unknown, errorType: string, _isStreamContext: boolean): string {
  1386. const definition = AwsBedrockHandler.ERROR_TYPES[errorType] || AwsBedrockHandler.ERROR_TYPES.GENERIC
  1387. let template = definition.messageTemplate
  1388. // Prepare template variables
  1389. const templateVars: Record<string, string> = {}
  1390. if (error instanceof Error) {
  1391. templateVars.errorMessage = error.message
  1392. templateVars.errorName = error.name
  1393. const modelConfig = this.getModel()
  1394. templateVars.modelId = modelConfig.id
  1395. templateVars.contextWindow = String(modelConfig.info.contextWindow || "unknown")
  1396. }
  1397. // Add context-specific template variables
  1398. const region =
  1399. typeof this?.client?.config?.region === "function"
  1400. ? this?.client?.config?.region()
  1401. : this?.client?.config?.region
  1402. templateVars.regionInfo = `(${region})`
  1403. // Replace template variables
  1404. for (const [key, value] of Object.entries(templateVars)) {
  1405. template = template.replace(new RegExp(`{${key}}`, "g"), value || "")
  1406. }
  1407. return template
  1408. }
  1409. /**
  1410. * Handles Bedrock API errors and generates appropriate error messages
  1411. * @param error The error that occurred
  1412. * @param isStreamContext Whether the error occurred in a streaming context (true) or not (false)
  1413. * @returns Error message string for non-streaming context or array of stream chunks for streaming context
  1414. */
  1415. private handleBedrockError(
  1416. error: unknown,
  1417. isStreamContext: boolean,
  1418. ): string | Array<{ type: string; text?: string; inputTokens?: number; outputTokens?: number }> {
  1419. // Determine error type
  1420. const errorType = this.getErrorType(error)
  1421. // Format error message
  1422. const errorMessage = this.formatErrorMessage(error, errorType, isStreamContext)
  1423. // Log the error
  1424. const definition = AwsBedrockHandler.ERROR_TYPES[errorType]
  1425. const logMethod = definition.logLevel
  1426. const contextName = isStreamContext ? "createMessage" : "completePrompt"
  1427. logger[logMethod](`${errorType} error in ${contextName}`, {
  1428. ctx: "bedrock",
  1429. customArn: this.options.awsCustomArn,
  1430. errorType,
  1431. errorMessage: error instanceof Error ? error.message : String(error),
  1432. ...(error instanceof Error && error.stack ? { errorStack: error.stack } : {}),
  1433. ...(this.client?.config?.region ? { clientRegion: this.client.config.region } : {}),
  1434. })
  1435. // Return appropriate response based on isStreamContext
  1436. if (isStreamContext) {
  1437. return [
  1438. { type: "text", text: `Error: ${errorMessage}` },
  1439. { type: "usage", inputTokens: 0, outputTokens: 0 },
  1440. ]
  1441. } else {
  1442. // For non-streaming context, add the expected prefix
  1443. return `Bedrock completion error: ${errorMessage}`
  1444. }
  1445. }
  1446. /**
  1447. * Returns the thinking signature captured from the last Bedrock Converse API response.
  1448. * Claude models with extended thinking return a cryptographic signature in the
  1449. * reasoning content delta, which must be round-tripped back for multi-turn
  1450. * conversations with tool use (Anthropic API requirement).
  1451. */
  1452. getThoughtSignature(): string | undefined {
  1453. return this.lastThoughtSignature
  1454. }
  1455. /**
  1456. * Returns any redacted thinking blocks captured from the last Bedrock response.
  1457. * Anthropic returns these when safety filters trigger on the model's internal
  1458. * reasoning. They contain opaque binary data (base64-encoded) that must be
  1459. * passed back verbatim for proper reasoning continuity.
  1460. */
  1461. getRedactedThinkingBlocks(): Array<{ type: "redacted_thinking"; data: string }> | undefined {
  1462. return this.lastRedactedThinkingBlocks.length > 0 ? this.lastRedactedThinkingBlocks : undefined
  1463. }
  1464. }