|
|
@@ -29,8 +29,6 @@ import { logger } from "../../utils/logging"
|
|
|
import { MultiPointStrategy } from "../transform/cache-strategy/multi-point-strategy"
|
|
|
import { ModelInfo as CacheModelInfo } from "../transform/cache-strategy/types"
|
|
|
import { convertToBedrockConverseMessages as sharedConverter } from "../transform/bedrock-converse-format"
|
|
|
-import { getModelParams } from "../transform/model-params"
|
|
|
-import { shouldUseReasoningBudget } from "../../shared/api"
|
|
|
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
|
|
|
|
|
|
/************************************************************************************
|
|
|
@@ -42,63 +40,8 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ".
|
|
|
// Define interface for Bedrock inference config
|
|
|
interface BedrockInferenceConfig {
|
|
|
maxTokens: number
|
|
|
- temperature?: number
|
|
|
- topP?: number
|
|
|
-}
|
|
|
-
|
|
|
-// Define interface for Bedrock thinking configuration
|
|
|
-interface BedrockThinkingConfig {
|
|
|
- thinking: {
|
|
|
- type: "enabled"
|
|
|
- budget_tokens: number
|
|
|
- }
|
|
|
- [key: string]: any // Add index signature to be compatible with DocumentType
|
|
|
-}
|
|
|
-
|
|
|
-// Define interface for Bedrock payload
|
|
|
-interface BedrockPayload {
|
|
|
- modelId: BedrockModelId | string
|
|
|
- messages: Message[]
|
|
|
- system?: SystemContentBlock[]
|
|
|
- inferenceConfig: BedrockInferenceConfig
|
|
|
- anthropic_version?: string
|
|
|
- additionalModelRequestFields?: BedrockThinkingConfig
|
|
|
-}
|
|
|
-
|
|
|
-// Define specific types for content block events to avoid 'as any' usage
|
|
|
-// These handle the multiple possible structures returned by AWS SDK
|
|
|
-interface ContentBlockStartEvent {
|
|
|
- start?: {
|
|
|
- text?: string
|
|
|
- thinking?: string
|
|
|
- }
|
|
|
- contentBlockIndex?: number
|
|
|
- // Alternative structure used by some AWS SDK versions
|
|
|
- content_block?: {
|
|
|
- type?: string
|
|
|
- thinking?: string
|
|
|
- }
|
|
|
- // Official AWS SDK structure for reasoning (as documented)
|
|
|
- contentBlock?: {
|
|
|
- type?: string
|
|
|
- thinking?: string
|
|
|
- reasoningContent?: {
|
|
|
- text?: string
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-interface ContentBlockDeltaEvent {
|
|
|
- delta?: {
|
|
|
- text?: string
|
|
|
- thinking?: string
|
|
|
- type?: string
|
|
|
- // AWS SDK structure for reasoning content deltas
|
|
|
- reasoningContent?: {
|
|
|
- text?: string
|
|
|
- }
|
|
|
- }
|
|
|
- contentBlockIndex?: number
|
|
|
+ temperature: number
|
|
|
+ topP: number
|
|
|
}
|
|
|
|
|
|
// Define types for stream events based on AWS SDK
|
|
|
@@ -110,8 +53,18 @@ export interface StreamEvent {
|
|
|
stopReason?: "end_turn" | "tool_use" | "max_tokens" | "stop_sequence"
|
|
|
additionalModelResponseFields?: Record<string, unknown>
|
|
|
}
|
|
|
- contentBlockStart?: ContentBlockStartEvent
|
|
|
- contentBlockDelta?: ContentBlockDeltaEvent
|
|
|
+ contentBlockStart?: {
|
|
|
+ start?: {
|
|
|
+ text?: string
|
|
|
+ }
|
|
|
+ contentBlockIndex?: number
|
|
|
+ }
|
|
|
+ contentBlockDelta?: {
|
|
|
+ delta?: {
|
|
|
+ text?: string
|
|
|
+ }
|
|
|
+ contentBlockIndex?: number
|
|
|
+ }
|
|
|
metadata?: {
|
|
|
usage?: {
|
|
|
inputTokens: number
|
|
|
@@ -302,17 +255,13 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
|
|
|
override async *createMessage(
|
|
|
systemPrompt: string,
|
|
|
messages: Anthropic.Messages.MessageParam[],
|
|
|
- metadata?: ApiHandlerCreateMessageMetadata & {
|
|
|
- thinking?: {
|
|
|
- enabled: boolean
|
|
|
- maxTokens?: number
|
|
|
- maxThinkingTokens?: number
|
|
|
- }
|
|
|
- },
|
|
|
+ metadata?: ApiHandlerCreateMessageMetadata,
|
|
|
): ApiStream {
|
|
|
- const modelConfig = this.getModel()
|
|
|
+ let modelConfig = this.getModel()
|
|
|
+ // Handle cross-region inference
|
|
|
const usePromptCache = Boolean(this.options.awsUsePromptCache && this.supportsAwsPromptCache(modelConfig))
|
|
|
|
|
|
+ // Generate a conversation ID based on the first few messages to maintain cache consistency
|
|
|
const conversationId =
|
|
|
messages.length > 0
|
|
|
? `conv_${messages[0].role}_${
|
|
|
@@ -322,6 +271,7 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
|
|
|
}`
|
|
|
: "default_conversation"
|
|
|
|
|
|
+ // Convert messages to Bedrock format, passing the model info and conversation ID
|
|
|
const formatted = this.convertToBedrockConverseMessages(
|
|
|
messages,
|
|
|
systemPrompt,
|
|
|
@@ -330,50 +280,18 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
|
|
|
conversationId,
|
|
|
)
|
|
|
|
|
|
- let additionalModelRequestFields: BedrockThinkingConfig | undefined
|
|
|
- let thinkingEnabled = false
|
|
|
-
|
|
|
- // Determine if thinking should be enabled
|
|
|
- // metadata?.thinking?.enabled: Explicitly enabled through API metadata (direct request)
|
|
|
- // shouldUseReasoningBudget(): Enabled through user settings (enableReasoningEffort = true)
|
|
|
- const isThinkingExplicitlyEnabled = metadata?.thinking?.enabled
|
|
|
- const isThinkingEnabledBySettings =
|
|
|
- shouldUseReasoningBudget({ model: modelConfig.info, settings: this.options }) &&
|
|
|
- modelConfig.reasoning &&
|
|
|
- modelConfig.reasoningBudget
|
|
|
-
|
|
|
- if ((isThinkingExplicitlyEnabled || isThinkingEnabledBySettings) && modelConfig.info.supportsReasoningBudget) {
|
|
|
- thinkingEnabled = true
|
|
|
- additionalModelRequestFields = {
|
|
|
- thinking: {
|
|
|
- type: "enabled",
|
|
|
- budget_tokens: metadata?.thinking?.maxThinkingTokens || modelConfig.reasoningBudget || 4096,
|
|
|
- },
|
|
|
- }
|
|
|
- logger.info("Extended thinking enabled for Bedrock request", {
|
|
|
- ctx: "bedrock",
|
|
|
- modelId: modelConfig.id,
|
|
|
- thinking: additionalModelRequestFields.thinking,
|
|
|
- })
|
|
|
- }
|
|
|
-
|
|
|
+ // Construct the payload
|
|
|
const inferenceConfig: BedrockInferenceConfig = {
|
|
|
- maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number),
|
|
|
- temperature: modelConfig.temperature ?? (this.options.modelTemperature as number),
|
|
|
+ maxTokens: modelConfig.info.maxTokens as number,
|
|
|
+ temperature: this.options.modelTemperature as number,
|
|
|
+ topP: 0.1,
|
|
|
}
|
|
|
|
|
|
- if (!thinkingEnabled) {
|
|
|
- inferenceConfig.topP = 0.1
|
|
|
- }
|
|
|
-
|
|
|
- const payload: BedrockPayload = {
|
|
|
+ const payload = {
|
|
|
modelId: modelConfig.id,
|
|
|
messages: formatted.messages,
|
|
|
system: formatted.system,
|
|
|
inferenceConfig,
|
|
|
- ...(additionalModelRequestFields && { additionalModelRequestFields }),
|
|
|
- // Add anthropic_version when using thinking features
|
|
|
- ...(thinkingEnabled && { anthropic_version: "bedrock-2023-05-31" }),
|
|
|
}
|
|
|
|
|
|
// Create AbortController with 10 minute timeout
|
|
|
@@ -479,74 +397,19 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
|
|
|
}
|
|
|
|
|
|
// Handle content blocks
|
|
|
- if (streamEvent.contentBlockStart) {
|
|
|
- const cbStart = streamEvent.contentBlockStart
|
|
|
-
|
|
|
- // Check if this is a reasoning block (official AWS SDK structure)
|
|
|
- if (cbStart.contentBlock?.reasoningContent) {
|
|
|
- if (cbStart.contentBlockIndex && cbStart.contentBlockIndex > 0) {
|
|
|
- yield { type: "reasoning", text: "\n" }
|
|
|
- }
|
|
|
- yield {
|
|
|
- type: "reasoning",
|
|
|
- text: cbStart.contentBlock.reasoningContent.text || "",
|
|
|
- }
|
|
|
- }
|
|
|
- // Check for thinking block - handle both possible AWS SDK structures
|
|
|
- // cbStart.contentBlock: newer/official structure
|
|
|
- // cbStart.content_block: alternative structure seen in some AWS SDK versions
|
|
|
- else if (cbStart.contentBlock?.type === "thinking" || cbStart.content_block?.type === "thinking") {
|
|
|
- const contentBlock = cbStart.contentBlock || cbStart.content_block
|
|
|
- if (cbStart.contentBlockIndex && cbStart.contentBlockIndex > 0) {
|
|
|
- yield { type: "reasoning", text: "\n" }
|
|
|
- }
|
|
|
- if (contentBlock?.thinking) {
|
|
|
- yield {
|
|
|
- type: "reasoning",
|
|
|
- text: contentBlock.thinking,
|
|
|
- }
|
|
|
- }
|
|
|
- } else if (cbStart.start?.text) {
|
|
|
- yield {
|
|
|
- type: "text",
|
|
|
- text: cbStart.start.text,
|
|
|
- }
|
|
|
+ if (streamEvent.contentBlockStart?.start?.text) {
|
|
|
+ yield {
|
|
|
+ type: "text",
|
|
|
+ text: streamEvent.contentBlockStart.start.text,
|
|
|
}
|
|
|
continue
|
|
|
}
|
|
|
|
|
|
// Handle content deltas
|
|
|
- if (streamEvent.contentBlockDelta) {
|
|
|
- const cbDelta = streamEvent.contentBlockDelta
|
|
|
- const delta = cbDelta.delta
|
|
|
-
|
|
|
- // Process reasoning and text content deltas
|
|
|
- // Multiple structures are supported for AWS SDK compatibility:
|
|
|
- // - delta.reasoningContent.text: official AWS docs structure for reasoning
|
|
|
- // - delta.thinking: alternative structure for thinking content
|
|
|
- // - delta.text: standard text content
|
|
|
- if (delta) {
|
|
|
- // Check for reasoningContent property (official AWS SDK structure)
|
|
|
- if (delta.reasoningContent?.text) {
|
|
|
- yield {
|
|
|
- type: "reasoning",
|
|
|
- text: delta.reasoningContent.text,
|
|
|
- }
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- // Handle alternative thinking structure (fallback for older SDK versions)
|
|
|
- if (delta.type === "thinking_delta" && delta.thinking) {
|
|
|
- yield {
|
|
|
- type: "reasoning",
|
|
|
- text: delta.thinking,
|
|
|
- }
|
|
|
- } else if (delta.text) {
|
|
|
- yield {
|
|
|
- type: "text",
|
|
|
- text: delta.text,
|
|
|
- }
|
|
|
- }
|
|
|
+ if (streamEvent.contentBlockDelta?.delta?.text) {
|
|
|
+ yield {
|
|
|
+ type: "text",
|
|
|
+ text: streamEvent.contentBlockDelta.delta.text,
|
|
|
}
|
|
|
continue
|
|
|
}
|
|
|
@@ -581,17 +444,10 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
|
|
|
try {
|
|
|
const modelConfig = this.getModel()
|
|
|
|
|
|
- // For completePrompt, thinking is typically not used, but we should still check
|
|
|
- // if thinking was somehow enabled in the model config
|
|
|
- const thinkingEnabled =
|
|
|
- shouldUseReasoningBudget({ model: modelConfig.info, settings: this.options }) &&
|
|
|
- modelConfig.reasoning &&
|
|
|
- modelConfig.reasoningBudget
|
|
|
-
|
|
|
const inferenceConfig: BedrockInferenceConfig = {
|
|
|
- maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number),
|
|
|
- temperature: modelConfig.temperature ?? (this.options.modelTemperature as number),
|
|
|
- ...(thinkingEnabled ? {} : { topP: 0.1 }), // Only set topP when thinking is NOT enabled
|
|
|
+ maxTokens: modelConfig.info.maxTokens as number,
|
|
|
+ temperature: this.options.modelTemperature as number,
|
|
|
+ topP: 0.1,
|
|
|
}
|
|
|
|
|
|
// For completePrompt, use a unique conversation ID based on the prompt
|
|
|
@@ -866,24 +722,9 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
|
|
|
return model
|
|
|
}
|
|
|
|
|
|
- override getModel(): {
|
|
|
- id: BedrockModelId | string
|
|
|
- info: ModelInfo
|
|
|
- maxTokens?: number
|
|
|
- temperature?: number
|
|
|
- reasoning?: any
|
|
|
- reasoningBudget?: number
|
|
|
- } {
|
|
|
+ override getModel(): { id: BedrockModelId | string; info: ModelInfo } {
|
|
|
if (this.costModelConfig?.id?.trim().length > 0) {
|
|
|
- // Get model params for cost model config
|
|
|
- const params = getModelParams({
|
|
|
- format: "anthropic",
|
|
|
- modelId: this.costModelConfig.id,
|
|
|
- model: this.costModelConfig.info,
|
|
|
- settings: this.options,
|
|
|
- defaultTemperature: BEDROCK_DEFAULT_TEMPERATURE,
|
|
|
- })
|
|
|
- return { ...this.costModelConfig, ...params }
|
|
|
+ return this.costModelConfig
|
|
|
}
|
|
|
|
|
|
let modelConfig = undefined
|
|
|
@@ -911,24 +752,8 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- // Get model params including reasoning configuration
|
|
|
- const params = getModelParams({
|
|
|
- format: "anthropic",
|
|
|
- modelId: modelConfig.id,
|
|
|
- model: modelConfig.info,
|
|
|
- settings: this.options,
|
|
|
- defaultTemperature: BEDROCK_DEFAULT_TEMPERATURE,
|
|
|
- })
|
|
|
-
|
|
|
// Don't override maxTokens/contextWindow here; handled in getModelById (and includes user overrides)
|
|
|
- return { ...modelConfig, ...params } as {
|
|
|
- id: BedrockModelId | string
|
|
|
- info: ModelInfo
|
|
|
- maxTokens?: number
|
|
|
- temperature?: number
|
|
|
- reasoning?: any
|
|
|
- reasoningBudget?: number
|
|
|
- }
|
|
|
+ return modelConfig as { id: BedrockModelId | string; info: ModelInfo }
|
|
|
}
|
|
|
|
|
|
/************************************************************************************
|
|
|
@@ -1080,33 +905,10 @@ Suggestions:
|
|
|
messageTemplate: `Invalid ARN format. ARN should follow the pattern: arn:aws:bedrock:region:account-id:resource-type/resource-name`,
|
|
|
logLevel: "error",
|
|
|
},
|
|
|
- VALIDATION_ERROR: {
|
|
|
- patterns: [
|
|
|
- "input tag",
|
|
|
- "does not match any of the expected tags",
|
|
|
- "field required",
|
|
|
- "validation",
|
|
|
- "invalid parameter",
|
|
|
- ],
|
|
|
- messageTemplate: `Parameter validation error: {errorMessage}
|
|
|
-
|
|
|
-This error indicates that the request parameters don't match AWS Bedrock's expected format.
|
|
|
-
|
|
|
-Common causes:
|
|
|
-1. Extended thinking parameter format is incorrect
|
|
|
-2. Model-specific parameters are not supported by this model
|
|
|
-3. API parameter structure has changed
|
|
|
-
|
|
|
-Please check:
|
|
|
-- Model supports the requested features (extended thinking, etc.)
|
|
|
-- Parameter format matches AWS Bedrock specification
|
|
|
-- Model ID is correct for the requested features`,
|
|
|
- logLevel: "error",
|
|
|
- },
|
|
|
// Default/generic error
|
|
|
GENERIC: {
|
|
|
patterns: [], // Empty patterns array means this is the default
|
|
|
- messageTemplate: `Unknown Error: {errorMessage}`,
|
|
|
+ messageTemplate: `Unknown Error`,
|
|
|
logLevel: "error",
|
|
|
},
|
|
|
}
|