Prechádzať zdrojové kódy

feat: add AWS Bedrock service tier support (#9955)

Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>
Co-authored-by: Roo Code <[email protected]>
Co-authored-by: Matt Rubens <[email protected]>
roomote[bot] 3 týždňov pred
rodič
commit
9f3122fe28

+ 1 - 0
packages/types/src/provider-settings.ts

@@ -230,6 +230,7 @@ const bedrockSchema = apiModelIdProviderModelSchema.extend({
 	awsBedrockEndpointEnabled: z.boolean().optional(),
 	awsBedrockEndpoint: z.string().optional(),
 	awsBedrock1MContext: z.boolean().optional(), // Enable 'context-1m-2025-08-07' beta for 1M context window.
+	awsBedrockServiceTier: z.enum(["STANDARD", "FLEX", "PRIORITY"]).optional(), // AWS Bedrock service tier selection
 })
 
 const vertexSchema = apiModelIdProviderModelSchema.extend({

+ 28 - 0
packages/types/src/providers/bedrock.ts

@@ -577,3 +577,31 @@ export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [
 	"anthropic.claude-haiku-4-5-20251001-v1:0",
 	"anthropic.claude-opus-4-5-20251101-v1:0",
 ] as const
+
+// Amazon Bedrock Service Tier types
+export type BedrockServiceTier = "STANDARD" | "FLEX" | "PRIORITY"
+
+// Models that support service tiers based on AWS documentation
+// https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html
+export const BEDROCK_SERVICE_TIER_MODEL_IDS = [
+	// Amazon Nova models
+	"amazon.nova-lite-v1:0",
+	"amazon.nova-2-lite-v1:0",
+	"amazon.nova-pro-v1:0",
+	"amazon.nova-pro-latency-optimized-v1:0",
+	// DeepSeek models
+	"deepseek.r1-v1:0",
+	// Qwen models
+	"qwen.qwen3-next-80b-a3b",
+	"qwen.qwen3-coder-480b-a35b-v1:0",
+	// OpenAI GPT-OSS models
+	"openai.gpt-oss-20b-1:0",
+	"openai.gpt-oss-120b-1:0",
+] as const
+
+// Service tier pricing multipliers
+export const BEDROCK_SERVICE_TIER_PRICING = {
+	STANDARD: 1.0, // Base price
+	FLEX: 0.5, // 50% discount from standard
+	PRIORITY: 1.75, // 75% premium over standard
+} as const

+ 242 - 1
src/api/providers/__tests__/bedrock.spec.ts

@@ -25,7 +25,7 @@ vi.mock("@aws-sdk/client-bedrock-runtime", () => {
 
 import { AwsBedrockHandler } from "../bedrock"
 import { ConverseStreamCommand, BedrockRuntimeClient } from "@aws-sdk/client-bedrock-runtime"
-import { BEDROCK_1M_CONTEXT_MODEL_IDS } from "@roo-code/types"
+import { BEDROCK_1M_CONTEXT_MODEL_IDS, BEDROCK_SERVICE_TIER_MODEL_IDS, bedrockModels } from "@roo-code/types"
 
 import type { Anthropic } from "@anthropic-ai/sdk"
 
@@ -755,4 +755,245 @@ describe("AwsBedrockHandler", () => {
 			expect(commandArg.modelId).toBe(`us.${BEDROCK_1M_CONTEXT_MODEL_IDS[0]}`)
 		})
 	})
+
+	describe("service tier feature", () => {
+		const supportedModelId = BEDROCK_SERVICE_TIER_MODEL_IDS[0] // amazon.nova-lite-v1:0
+
+		beforeEach(() => {
+			mockConverseStreamCommand.mockReset()
+		})
+
+		describe("pricing multipliers in getModel()", () => {
+			it("should apply FLEX tier pricing with 50% discount", () => {
+				const handler = new AwsBedrockHandler({
+					apiModelId: supportedModelId,
+					awsAccessKey: "test",
+					awsSecretKey: "test",
+					awsRegion: "us-east-1",
+					awsBedrockServiceTier: "FLEX",
+				})
+
+				const model = handler.getModel()
+				const baseModel = bedrockModels[supportedModelId as keyof typeof bedrockModels] as {
+					inputPrice: number
+					outputPrice: number
+				}
+
+				// FLEX tier should apply 0.5 multiplier (50% discount)
+				expect(model.info.inputPrice).toBe(baseModel.inputPrice * 0.5)
+				expect(model.info.outputPrice).toBe(baseModel.outputPrice * 0.5)
+			})
+
+			it("should apply PRIORITY tier pricing with 75% premium", () => {
+				const handler = new AwsBedrockHandler({
+					apiModelId: supportedModelId,
+					awsAccessKey: "test",
+					awsSecretKey: "test",
+					awsRegion: "us-east-1",
+					awsBedrockServiceTier: "PRIORITY",
+				})
+
+				const model = handler.getModel()
+				const baseModel = bedrockModels[supportedModelId as keyof typeof bedrockModels] as {
+					inputPrice: number
+					outputPrice: number
+				}
+
+				// PRIORITY tier should apply 1.75 multiplier (75% premium)
+				expect(model.info.inputPrice).toBe(baseModel.inputPrice * 1.75)
+				expect(model.info.outputPrice).toBe(baseModel.outputPrice * 1.75)
+			})
+
+			it("should not modify pricing for STANDARD tier", () => {
+				const handler = new AwsBedrockHandler({
+					apiModelId: supportedModelId,
+					awsAccessKey: "test",
+					awsSecretKey: "test",
+					awsRegion: "us-east-1",
+					awsBedrockServiceTier: "STANDARD",
+				})
+
+				const model = handler.getModel()
+				const baseModel = bedrockModels[supportedModelId as keyof typeof bedrockModels] as {
+					inputPrice: number
+					outputPrice: number
+				}
+
+				// STANDARD tier should not modify pricing (1.0 multiplier)
+				expect(model.info.inputPrice).toBe(baseModel.inputPrice)
+				expect(model.info.outputPrice).toBe(baseModel.outputPrice)
+			})
+
+			it("should not apply service tier pricing for unsupported models", () => {
+				const unsupportedModelId = "anthropic.claude-3-5-sonnet-20241022-v2:0"
+				const handler = new AwsBedrockHandler({
+					apiModelId: unsupportedModelId,
+					awsAccessKey: "test",
+					awsSecretKey: "test",
+					awsRegion: "us-east-1",
+					awsBedrockServiceTier: "FLEX", // Try to apply FLEX tier
+				})
+
+				const model = handler.getModel()
+				const baseModel = bedrockModels[unsupportedModelId as keyof typeof bedrockModels] as {
+					inputPrice: number
+					outputPrice: number
+				}
+
+				// Pricing should remain unchanged for unsupported models
+				expect(model.info.inputPrice).toBe(baseModel.inputPrice)
+				expect(model.info.outputPrice).toBe(baseModel.outputPrice)
+			})
+		})
+
+		describe("service_tier parameter in API requests", () => {
+			it("should include service_tier as top-level parameter for supported models", async () => {
+				const handler = new AwsBedrockHandler({
+					apiModelId: supportedModelId,
+					awsAccessKey: "test",
+					awsSecretKey: "test",
+					awsRegion: "us-east-1",
+					awsBedrockServiceTier: "PRIORITY",
+				})
+
+				const messages: Anthropic.Messages.MessageParam[] = [
+					{
+						role: "user",
+						content: "Test message",
+					},
+				]
+
+				const generator = handler.createMessage("", messages)
+				await generator.next() // Start the generator
+
+				// Verify the command was created with service_tier at top level
+				// Per AWS documentation, service_tier must be a top-level parameter, not inside additionalModelRequestFields
+				// https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html
+				expect(mockConverseStreamCommand).toHaveBeenCalled()
+				const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any
+
+				// service_tier should be at the top level of the payload
+				expect(commandArg.service_tier).toBe("PRIORITY")
+				// service_tier should NOT be in additionalModelRequestFields
+				if (commandArg.additionalModelRequestFields) {
+					expect(commandArg.additionalModelRequestFields.service_tier).toBeUndefined()
+				}
+			})
+
+			it("should include service_tier FLEX as top-level parameter", async () => {
+				const handler = new AwsBedrockHandler({
+					apiModelId: supportedModelId,
+					awsAccessKey: "test",
+					awsSecretKey: "test",
+					awsRegion: "us-east-1",
+					awsBedrockServiceTier: "FLEX",
+				})
+
+				const messages: Anthropic.Messages.MessageParam[] = [
+					{
+						role: "user",
+						content: "Test message",
+					},
+				]
+
+				const generator = handler.createMessage("", messages)
+				await generator.next() // Start the generator
+
+				expect(mockConverseStreamCommand).toHaveBeenCalled()
+				const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any
+
+				// service_tier should be at the top level of the payload
+				expect(commandArg.service_tier).toBe("FLEX")
+				// service_tier should NOT be in additionalModelRequestFields
+				if (commandArg.additionalModelRequestFields) {
+					expect(commandArg.additionalModelRequestFields.service_tier).toBeUndefined()
+				}
+			})
+
+			it("should NOT include service_tier for unsupported models", async () => {
+				const unsupportedModelId = "anthropic.claude-3-5-sonnet-20241022-v2:0"
+				const handler = new AwsBedrockHandler({
+					apiModelId: unsupportedModelId,
+					awsAccessKey: "test",
+					awsSecretKey: "test",
+					awsRegion: "us-east-1",
+					awsBedrockServiceTier: "PRIORITY", // Try to apply PRIORITY tier
+				})
+
+				const messages: Anthropic.Messages.MessageParam[] = [
+					{
+						role: "user",
+						content: "Test message",
+					},
+				]
+
+				const generator = handler.createMessage("", messages)
+				await generator.next() // Start the generator
+
+				expect(mockConverseStreamCommand).toHaveBeenCalled()
+				const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any
+
+				// Service tier should NOT be included for unsupported models (at top level or in additionalModelRequestFields)
+				expect(commandArg.service_tier).toBeUndefined()
+				if (commandArg.additionalModelRequestFields) {
+					expect(commandArg.additionalModelRequestFields.service_tier).toBeUndefined()
+				}
+			})
+
+			it("should NOT include service_tier when not specified", async () => {
+				const handler = new AwsBedrockHandler({
+					apiModelId: supportedModelId,
+					awsAccessKey: "test",
+					awsSecretKey: "test",
+					awsRegion: "us-east-1",
+					// No awsBedrockServiceTier specified
+				})
+
+				const messages: Anthropic.Messages.MessageParam[] = [
+					{
+						role: "user",
+						content: "Test message",
+					},
+				]
+
+				const generator = handler.createMessage("", messages)
+				await generator.next() // Start the generator
+
+				expect(mockConverseStreamCommand).toHaveBeenCalled()
+				const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any
+
+				// Service tier should NOT be included when not specified (at top level or in additionalModelRequestFields)
+				expect(commandArg.service_tier).toBeUndefined()
+				if (commandArg.additionalModelRequestFields) {
+					expect(commandArg.additionalModelRequestFields.service_tier).toBeUndefined()
+				}
+			})
+		})
+
+		describe("service tier with cross-region inference", () => {
+			it("should apply service tier pricing with cross-region inference prefix", () => {
+				const handler = new AwsBedrockHandler({
+					apiModelId: supportedModelId,
+					awsAccessKey: "test",
+					awsSecretKey: "test",
+					awsRegion: "us-east-1",
+					awsUseCrossRegionInference: true,
+					awsBedrockServiceTier: "FLEX",
+				})
+
+				const model = handler.getModel()
+				const baseModel = bedrockModels[supportedModelId as keyof typeof bedrockModels] as {
+					inputPrice: number
+					outputPrice: number
+				}
+
+				// Model ID should have cross-region prefix
+				expect(model.id).toBe(`us.${supportedModelId}`)
+
+				// FLEX tier pricing should still be applied
+				expect(model.info.inputPrice).toBe(baseModel.inputPrice * 0.5)
+				expect(model.info.outputPrice).toBe(baseModel.outputPrice * 0.5)
+			})
+		})
+	})
 })

+ 51 - 1
src/api/providers/bedrock.ts

@@ -18,6 +18,7 @@ import {
 	type ModelInfo,
 	type ProviderSettings,
 	type BedrockModelId,
+	type BedrockServiceTier,
 	bedrockDefaultModelId,
 	bedrockModels,
 	bedrockDefaultPromptRouterModelId,
@@ -27,6 +28,8 @@ import {
 	AWS_INFERENCE_PROFILE_MAPPING,
 	BEDROCK_1M_CONTEXT_MODEL_IDS,
 	BEDROCK_GLOBAL_INFERENCE_MODEL_IDS,
+	BEDROCK_SERVICE_TIER_MODEL_IDS,
+	BEDROCK_SERVICE_TIER_PRICING,
 } from "@roo-code/types"
 
 import { ApiStream } from "../transform/stream"
@@ -74,6 +77,13 @@ interface BedrockPayload {
 	toolConfig?: ToolConfiguration
 }
 
+// Extended payload type that includes service_tier as a top-level parameter
+// AWS Bedrock service tiers (STANDARD, FLEX, PRIORITY) are specified at the top level
+// https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html
+type BedrockPayloadWithServiceTier = BedrockPayload & {
+	service_tier?: BedrockServiceTier
+}
+
 // Define specific types for content block events to avoid 'as any' usage
 // These handle the multiple possible structures returned by AWS SDK
 interface ContentBlockStartEvent {
@@ -433,6 +443,17 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 			additionalModelRequestFields.anthropic_beta = anthropicBetas
 		}
 
+		// Determine if service tier should be applied (checked later when building payload)
+		const useServiceTier =
+			this.options.awsBedrockServiceTier && BEDROCK_SERVICE_TIER_MODEL_IDS.includes(baseModelId as any)
+		if (useServiceTier) {
+			logger.info("Service tier specified for Bedrock request", {
+				ctx: "bedrock",
+				modelId: modelConfig.id,
+				serviceTier: this.options.awsBedrockServiceTier,
+			})
+		}
+
 		// Build tool configuration if native tools are enabled
 		let toolConfig: ToolConfiguration | undefined
 		if (useNativeTools && metadata?.tools) {
@@ -442,7 +463,10 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 			}
 		}
 
-		const payload: BedrockPayload = {
+		// Build payload with optional service_tier at top level
+		// Service tier is a top-level parameter per AWS documentation, NOT inside additionalModelRequestFields
+		// https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html
+		const payload: BedrockPayloadWithServiceTier = {
 			modelId: modelConfig.id,
 			messages: formatted.messages,
 			system: formatted.system,
@@ -451,6 +475,8 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 			// Add anthropic_version at top level when using thinking features
 			...(thinkingEnabled && { anthropic_version: "bedrock-2023-05-31" }),
 			...(toolConfig && { toolConfig }),
+			// Add service_tier as a top-level parameter (not inside additionalModelRequestFields)
+			...(useServiceTier && { service_tier: this.options.awsBedrockServiceTier }),
 		}
 
 		// Create AbortController with 10 minute timeout
@@ -1089,6 +1115,30 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 			defaultTemperature: BEDROCK_DEFAULT_TEMPERATURE,
 		})
 
+		// Apply service tier pricing if specified and model supports it
+		const baseModelIdForTier = this.parseBaseModelId(modelConfig.id)
+		if (this.options.awsBedrockServiceTier && BEDROCK_SERVICE_TIER_MODEL_IDS.includes(baseModelIdForTier as any)) {
+			const pricingMultiplier = BEDROCK_SERVICE_TIER_PRICING[this.options.awsBedrockServiceTier]
+			if (pricingMultiplier && pricingMultiplier !== 1.0) {
+				// Apply pricing multiplier to all price fields
+				modelConfig.info = {
+					...modelConfig.info,
+					inputPrice: modelConfig.info.inputPrice
+						? modelConfig.info.inputPrice * pricingMultiplier
+						: undefined,
+					outputPrice: modelConfig.info.outputPrice
+						? modelConfig.info.outputPrice * pricingMultiplier
+						: undefined,
+					cacheWritesPrice: modelConfig.info.cacheWritesPrice
+						? modelConfig.info.cacheWritesPrice * pricingMultiplier
+						: undefined,
+					cacheReadsPrice: modelConfig.info.cacheReadsPrice
+						? modelConfig.info.cacheReadsPrice * pricingMultiplier
+						: undefined,
+				}
+			}
+		}
+
 		// Don't override maxTokens/contextWindow here; handled in getModelById (and includes user overrides)
 		return { ...modelConfig, ...params } as {
 			id: BedrockModelId | string

+ 49 - 0
webview-ui/src/components/settings/providers/Bedrock.tsx

@@ -5,9 +5,11 @@ import { VSCodeTextField } from "@vscode/webview-ui-toolkit/react"
 import {
 	type ProviderSettings,
 	type ModelInfo,
+	type BedrockServiceTier,
 	BEDROCK_REGIONS,
 	BEDROCK_1M_CONTEXT_MODEL_IDS,
 	BEDROCK_GLOBAL_INFERENCE_MODEL_IDS,
+	BEDROCK_SERVICE_TIER_MODEL_IDS,
 } from "@roo-code/types"
 
 import { useAppTranslation } from "@src/i18n/TranslationContext"
@@ -35,6 +37,10 @@ export const Bedrock = ({ apiConfiguration, setApiConfigurationField, selectedMo
 		!!apiConfiguration?.apiModelId &&
 		BEDROCK_GLOBAL_INFERENCE_MODEL_IDS.includes(apiConfiguration.apiModelId as any)
 
+	// Check if the selected model supports service tiers
+	const supportsServiceTiers =
+		!!apiConfiguration?.apiModelId && BEDROCK_SERVICE_TIER_MODEL_IDS.includes(apiConfiguration.apiModelId as any)
+
 	// Update the endpoint enabled state when the configuration changes
 	useEffect(() => {
 		setAwsEndpointSelected(!!apiConfiguration?.awsBedrockEndpointEnabled)
@@ -150,6 +156,49 @@ export const Bedrock = ({ apiConfiguration, setApiConfigurationField, selectedMo
 					</SelectContent>
 				</Select>
 			</div>
+			{supportsServiceTiers && (
+				<div>
+					<label className="block font-medium mb-1">{t("settings:providers.awsServiceTier")}</label>
+					<Select
+						value={apiConfiguration?.awsBedrockServiceTier || "STANDARD"}
+						onValueChange={(value) =>
+							setApiConfigurationField("awsBedrockServiceTier", value as BedrockServiceTier)
+						}>
+						<SelectTrigger className="w-full">
+							<SelectValue placeholder={t("settings:common.select")} />
+						</SelectTrigger>
+						<SelectContent>
+							<SelectItem value="STANDARD">
+								<div>
+									<div>{t("settings:providers.awsServiceTierStandard")}</div>
+									<div className="text-xs text-vscode-descriptionForeground">
+										{t("settings:providers.awsServiceTierStandardDesc")}
+									</div>
+								</div>
+							</SelectItem>
+							<SelectItem value="FLEX">
+								<div>
+									<div>{t("settings:providers.awsServiceTierFlex")}</div>
+									<div className="text-xs text-vscode-descriptionForeground">
+										{t("settings:providers.awsServiceTierFlexDesc")}
+									</div>
+								</div>
+							</SelectItem>
+							<SelectItem value="PRIORITY">
+								<div>
+									<div>{t("settings:providers.awsServiceTierPriority")}</div>
+									<div className="text-xs text-vscode-descriptionForeground">
+										{t("settings:providers.awsServiceTierPriorityDesc")}
+									</div>
+								</div>
+							</SelectItem>
+						</SelectContent>
+					</Select>
+					<div className="text-sm text-vscode-descriptionForeground mt-1">
+						{t("settings:providers.awsServiceTierNote")}
+					</div>
+				</div>
+			)}
 			{supportsGlobalInference && (
 				<Checkbox
 					checked={apiConfiguration?.awsUseGlobalInference || false}

+ 8 - 0
webview-ui/src/i18n/locales/ca/settings.json

@@ -369,6 +369,14 @@
 		"awsRegion": "Regió d'AWS",
 		"awsCrossRegion": "Utilitzar inferència entre regions",
 		"awsGlobalInference": "Utilitzar la inferència global (selecció automàtica de la regió òptima d'AWS)",
+		"awsServiceTier": "Nivell de servei",
+		"awsServiceTierStandard": "Standard",
+		"awsServiceTierStandardDesc": "Rendiment i cost equilibrats",
+		"awsServiceTierFlex": "Flex (50% de descompte)",
+		"awsServiceTierFlexDesc": "Cost menor, latència més alta per a tasques no crítiques",
+		"awsServiceTierPriority": "Priority (75% de sobrecost)",
+		"awsServiceTierPriorityDesc": "Rendiment més ràpid per a aplicacions crítiques",
+		"awsServiceTierNote": "Els nivells de servei afecten als preus i al rendiment. Flex ofereix un 50% de descompte amb latència més alta, Priority ofereix un 25% de millor rendiment amb un 75% de sobrecost.",
 		"awsBedrockVpc": {
 			"useCustomVpcEndpoint": "Utilitzar punt final VPC personalitzat",
 			"vpcEndpointUrlPlaceholder": "Introduïu l'URL del punt final VPC (opcional)",

+ 8 - 0
webview-ui/src/i18n/locales/de/settings.json

@@ -369,6 +369,14 @@
 		"awsRegion": "AWS Region",
 		"awsCrossRegion": "Regionsübergreifende Inferenz verwenden",
 		"awsGlobalInference": "Globale Inferenz verwenden (optimale AWS-Region automatisch auswählen)",
+		"awsServiceTier": "Service-Stufe",
+		"awsServiceTierStandard": "Standard",
+		"awsServiceTierStandardDesc": "Ausgewogene Leistung und Kosten",
+		"awsServiceTierFlex": "Flex (50% Rabatt)",
+		"awsServiceTierFlexDesc": "Niedrigere Kosten, höhere Latenz für unkritische Aufgaben",
+		"awsServiceTierPriority": "Priorität (75% Aufschlag)",
+		"awsServiceTierPriorityDesc": "Schnellste Leistung für geschäftskritische Anwendungen",
+		"awsServiceTierNote": "Service-Stufen beeinflussen Preisgestaltung und Leistung. Flex bietet 50% Rabatt mit höherer Latenz, Priorität bietet 25% bessere Leistung mit 75% Aufschlag.",
 		"awsBedrockVpc": {
 			"useCustomVpcEndpoint": "Benutzerdefinierten VPC-Endpunkt verwenden",
 			"vpcEndpointUrlPlaceholder": "VPC-Endpunkt-URL eingeben (optional)",

+ 8 - 0
webview-ui/src/i18n/locales/en/settings.json

@@ -378,6 +378,14 @@
 		"awsRegion": "AWS Region",
 		"awsCrossRegion": "Use cross-region inference",
 		"awsGlobalInference": "Use Global inference (auto-select optimal AWS Region)",
+		"awsServiceTier": "Service Tier",
+		"awsServiceTierStandard": "Standard",
+		"awsServiceTierStandardDesc": "Balanced performance and cost",
+		"awsServiceTierFlex": "Flex (50% discount)",
+		"awsServiceTierFlexDesc": "Lower cost, higher latency for non-critical tasks",
+		"awsServiceTierPriority": "Priority (75% premium)",
+		"awsServiceTierPriorityDesc": "Fastest performance for mission-critical applications",
+		"awsServiceTierNote": "Service tiers affect pricing and performance. Flex offers 50% discount with higher latency, Priority offers 25% better performance with 75% premium.",
 		"awsBedrockVpc": {
 			"useCustomVpcEndpoint": "Use custom VPC endpoint",
 			"vpcEndpointUrlPlaceholder": "Enter VPC Endpoint URL (optional)",

+ 8 - 0
webview-ui/src/i18n/locales/es/settings.json

@@ -369,6 +369,14 @@
 		"awsRegion": "Región de AWS",
 		"awsCrossRegion": "Usar inferencia entre regiones",
 		"awsGlobalInference": "Usar inferencia global (selección automática de la región óptima de AWS)",
+		"awsServiceTier": "Nivel de servicio",
+		"awsServiceTierStandard": "Estándar",
+		"awsServiceTierStandardDesc": "Rendimiento y costo equilibrados",
+		"awsServiceTierFlex": "Flexible (50% de descuento)",
+		"awsServiceTierFlexDesc": "Menor costo, mayor latencia para tareas no críticas",
+		"awsServiceTierPriority": "Prioridad (75% de prima)",
+		"awsServiceTierPriorityDesc": "Rendimiento más rápido para aplicaciones críticas",
+		"awsServiceTierNote": "Los niveles de servicio afectan los precios y rendimiento. Flexible ofrece 50% de descuento con mayor latencia, Prioridad ofrece 25% mejor rendimiento con 75% de prima.",
 		"awsBedrockVpc": {
 			"useCustomVpcEndpoint": "Usar punto de conexión VPC personalizado",
 			"vpcEndpointUrlPlaceholder": "Ingrese URL del punto de conexión VPC (opcional)",

+ 8 - 0
webview-ui/src/i18n/locales/fr/settings.json

@@ -369,6 +369,14 @@
 		"awsRegion": "Région AWS",
 		"awsCrossRegion": "Utiliser l'inférence inter-régions",
 		"awsGlobalInference": "Utiliser l'inférence globale (sélection automatique de la région AWS optimale)",
+		"awsServiceTier": "Niveau de service",
+		"awsServiceTierStandard": "Standard",
+		"awsServiceTierStandardDesc": "Performances et coûts équilibrés",
+		"awsServiceTierFlex": "Flexible (50% de réduction)",
+		"awsServiceTierFlexDesc": "Coût inférieur, latence plus élevée pour les tâches non critiques",
+		"awsServiceTierPriority": "Priorité (75% de prime)",
+		"awsServiceTierPriorityDesc": "Performances optimales pour les applications critiques",
+		"awsServiceTierNote": "Les niveaux de service affectent les prix et les performances. Flexible offre une réduction de 50% avec une latence plus élevée, Priorité offre 25% de meilleures performances avec une prime de 75%.",
 		"awsBedrockVpc": {
 			"useCustomVpcEndpoint": "Utiliser un point de terminaison VPC personnalisé",
 			"vpcEndpointUrlPlaceholder": "Entrer l'URL du point de terminaison VPC (optionnel)",

+ 8 - 0
webview-ui/src/i18n/locales/hi/settings.json

@@ -369,6 +369,14 @@
 		"awsRegion": "AWS क्षेत्र",
 		"awsCrossRegion": "क्रॉस-क्षेत्र अनुमान का उपयोग करें",
 		"awsGlobalInference": "वैश्विक अनुमान का उपयोग करें (स्वचालित रूप से श्रेष्ठ AWS क्षेत्र चुनें)",
+		"awsServiceTier": "सेवा स्तर",
+		"awsServiceTierStandard": "Standard",
+		"awsServiceTierStandardDesc": "संतुलित प्रदर्शन और लागत",
+		"awsServiceTierFlex": "Flex (50% छूट)",
+		"awsServiceTierFlexDesc": "कम लागत, गैर-महत्वपूर्ण कार्यों के लिए उच्च विलंबता",
+		"awsServiceTierPriority": "Priority (75% प्रीमियम)",
+		"awsServiceTierPriorityDesc": "मिशन-क्रिटिकल एप्लिकेशन के लिए सबसे तेज़ प्रदर्शन",
+		"awsServiceTierNote": "सेवा स्तर मूल्य निर्धारण और प्रदर्शन को प्रभावित करते हैं। Flex 50% छूट के साथ उच्च विलंबता प्रदान करता है, Priority 75% प्रीमियम के साथ 25% बेहतर प्रदर्शन प्रदान करता है।",
 		"awsBedrockVpc": {
 			"useCustomVpcEndpoint": "कस्टम VPC एंडपॉइंट का उपयोग करें",
 			"vpcEndpointUrlPlaceholder": "VPC एंडपॉइंट URL दर्ज करें (वैकल्पिक)",

+ 8 - 0
webview-ui/src/i18n/locales/id/settings.json

@@ -373,6 +373,14 @@
 		"awsRegion": "AWS Region",
 		"awsCrossRegion": "Gunakan cross-region inference",
 		"awsGlobalInference": "Gunakan inferensi Global (pilih Wilayah AWS optimal secara otomatis)",
+		"awsServiceTier": "Tingkat Layanan",
+		"awsServiceTierStandard": "Standard",
+		"awsServiceTierStandardDesc": "Performa dan biaya yang seimbang",
+		"awsServiceTierFlex": "Flex (diskon 50%)",
+		"awsServiceTierFlexDesc": "Biaya lebih rendah, latensi lebih tinggi untuk tugas non-kritikal",
+		"awsServiceTierPriority": "Priority (premium 75%)",
+		"awsServiceTierPriorityDesc": "Performa tercepat untuk aplikasi mission-critical",
+		"awsServiceTierNote": "Tingkat layanan memengaruhi harga dan performa. Flex menawarkan diskon 50% dengan latensi lebih tinggi, Priority menawarkan performa 25% lebih baik dengan premium 75%.",
 		"awsBedrockVpc": {
 			"useCustomVpcEndpoint": "Gunakan VPC endpoint kustom",
 			"vpcEndpointUrlPlaceholder": "Masukkan VPC Endpoint URL (opsional)",

+ 8 - 0
webview-ui/src/i18n/locales/it/settings.json

@@ -369,6 +369,14 @@
 		"awsRegion": "Regione AWS",
 		"awsCrossRegion": "Usa inferenza cross-regione",
 		"awsGlobalInference": "Usa l'inferenza globale (selezione automatica della regione AWS ottimale)",
+		"awsServiceTier": "Livello di servizio",
+		"awsServiceTierStandard": "Standard",
+		"awsServiceTierStandardDesc": "Prestazioni e costo equilibrati",
+		"awsServiceTierFlex": "Flex (sconto del 50%)",
+		"awsServiceTierFlexDesc": "Costo inferiore, latenza più alta per attività non critiche",
+		"awsServiceTierPriority": "Priority (premio del 75%)",
+		"awsServiceTierPriorityDesc": "Prestazioni più veloci per applicazioni mission-critical",
+		"awsServiceTierNote": "I livelli di servizio influenzano i prezzi e le prestazioni. Flex offre uno sconto del 50% con latenza più alta, Priority offre il 25% di prestazioni migliori con un premio del 75%.",
 		"awsBedrockVpc": {
 			"useCustomVpcEndpoint": "Usa endpoint VPC personalizzato",
 			"vpcEndpointUrlPlaceholder": "Inserisci URL endpoint VPC (opzionale)",

+ 8 - 0
webview-ui/src/i18n/locales/ja/settings.json

@@ -369,6 +369,14 @@
 		"awsRegion": "AWSリージョン",
 		"awsCrossRegion": "クロスリージョン推論を使用",
 		"awsGlobalInference": "グローバル推論を使用する(最適なAWSリージョンを自動選択)",
+		"awsServiceTier": "サービスティア",
+		"awsServiceTierStandard": "Standard",
+		"awsServiceTierStandardDesc": "バランスの取れたパフォーマンスとコスト",
+		"awsServiceTierFlex": "Flex(50%割引)",
+		"awsServiceTierFlexDesc": "低コスト、非クリティカルなタスクのレイテンシが高い",
+		"awsServiceTierPriority": "Priority(75%プレミアム)",
+		"awsServiceTierPriorityDesc": "ミッションクリティカルなアプリケーション向けの最速パフォーマンス",
+		"awsServiceTierNote": "サービスティアは価格とパフォーマンスに影響します。Flexは50%割引でレイテンシが高く、Priorityは25%優れたパフォーマンスで75%のプレミアムです。",
 		"awsBedrockVpc": {
 			"useCustomVpcEndpoint": "カスタムVPCエンドポイントを使用",
 			"vpcEndpointUrlPlaceholder": "VPCエンドポイントURLを入力(任意)",

+ 8 - 0
webview-ui/src/i18n/locales/ko/settings.json

@@ -369,6 +369,14 @@
 		"awsRegion": "AWS 리전",
 		"awsCrossRegion": "교차 리전 추론 사용",
 		"awsGlobalInference": "글로벌 추론 사용(최적의 AWS 리전 자동 선택)",
+		"awsServiceTier": "서비스 계층",
+		"awsServiceTierStandard": "Standard",
+		"awsServiceTierStandardDesc": "균형 잡힌 성능 및 비용",
+		"awsServiceTierFlex": "Flex (50% 할인)",
+		"awsServiceTierFlexDesc": "낮은 비용, 중요하지 않은 작업을 위한 높은 지연 시간",
+		"awsServiceTierPriority": "Priority (75% 프리미엄)",
+		"awsServiceTierPriorityDesc": "미션 크리티컬 애플리케이션을 위한 최고 성능",
+		"awsServiceTierNote": "서비스 계층은 가격 및 성능에 영향을 미칩니다. Flex는 50% 할인으로 높은 지연 시간을 제공하고, Priority는 75% 프리미엄으로 25% 향상된 성능을 제공합니다.",
 		"awsBedrockVpc": {
 			"useCustomVpcEndpoint": "사용자 지정 VPC 엔드포인트 사용",
 			"vpcEndpointUrlPlaceholder": "VPC 엔드포인트 URL 입력 (선택사항)",

+ 8 - 0
webview-ui/src/i18n/locales/nl/settings.json

@@ -369,6 +369,14 @@
 		"awsRegion": "AWS-regio",
 		"awsCrossRegion": "Gebruik cross-region inference",
 		"awsGlobalInference": "Gebruik wereldwijde inferentie (automatische selectie van optimale AWS-regio)",
+		"awsServiceTier": "Servicelaag",
+		"awsServiceTierStandard": "Standard",
+		"awsServiceTierStandardDesc": "Gebalanceerde prestaties en kosten",
+		"awsServiceTierFlex": "Flex (50% korting)",
+		"awsServiceTierFlexDesc": "Lagere kosten, hogere latency voor niet-kritieke taken",
+		"awsServiceTierPriority": "Priority (75% premie)",
+		"awsServiceTierPriorityDesc": "Snelste prestaties voor kritieke toepassingen",
+		"awsServiceTierNote": "Servicelagen beïnvloeden prijzen en prestaties. Flex biedt 50% korting met hogere latency, Priority biedt 25% betere prestaties met 75% premie.",
 		"awsBedrockVpc": {
 			"useCustomVpcEndpoint": "Aangepast VPC-eindpunt gebruiken",
 			"vpcEndpointUrlPlaceholder": "Voer VPC-eindpunt URL in (optioneel)",

+ 8 - 0
webview-ui/src/i18n/locales/pl/settings.json

@@ -369,6 +369,14 @@
 		"awsRegion": "Region AWS",
 		"awsCrossRegion": "Użyj wnioskowania międzyregionalnego",
 		"awsGlobalInference": "Użyj globalnej inferencji (automatyczny wybór optymalnego regionu AWS)",
+		"awsServiceTier": "Warstwa usługi",
+		"awsServiceTierStandard": "Standard",
+		"awsServiceTierStandardDesc": "Zrównoważona wydajność i koszt",
+		"awsServiceTierFlex": "Flex (50% zniżka)",
+		"awsServiceTierFlexDesc": "Niższy koszt, wyższe opóźnienie dla zadań niekrytycznych",
+		"awsServiceTierPriority": "Priority (75% premia)",
+		"awsServiceTierPriorityDesc": "Najszybsza wydajność dla aplikacji krytycznych",
+		"awsServiceTierNote": "Warstwy usługi wpływają na ceny i wydajność. Flex oferuje 50% zniżkę z wyższym opóźnieniem, Priority oferuje 25% lepszą wydajność z 75% premią.",
 		"awsBedrockVpc": {
 			"useCustomVpcEndpoint": "Użyj niestandardowego punktu końcowego VPC",
 			"vpcEndpointUrlPlaceholder": "Wprowadź URL punktu końcowego VPC (opcjonalnie)",

+ 8 - 0
webview-ui/src/i18n/locales/pt-BR/settings.json

@@ -369,6 +369,14 @@
 		"awsRegion": "Região AWS",
 		"awsCrossRegion": "Usar inferência entre regiões",
 		"awsGlobalInference": "Usar inferência global (selecionar automaticamente a região ideal da AWS)",
+		"awsServiceTier": "Nível de serviço",
+		"awsServiceTierStandard": "Padrão",
+		"awsServiceTierStandardDesc": "Desempenho e custo equilibrados",
+		"awsServiceTierFlex": "Flex (50% de desconto)",
+		"awsServiceTierFlexDesc": "Custo mais baixo, latência mais alta para tarefas não críticas",
+		"awsServiceTierPriority": "Priority (75% de prêmio)",
+		"awsServiceTierPriorityDesc": "Desempenho mais rápido para aplicações críticas",
+		"awsServiceTierNote": "Os níveis de serviço afetam preços e desempenho. Flex oferece 50% de desconto com latência mais alta, Priority oferece 25% melhor desempenho com 75% de prêmio.",
 		"awsBedrockVpc": {
 			"useCustomVpcEndpoint": "Usar endpoint VPC personalizado",
 			"vpcEndpointUrlPlaceholder": "Digite a URL do endpoint VPC (opcional)",

+ 8 - 0
webview-ui/src/i18n/locales/ru/settings.json

@@ -369,6 +369,14 @@
 		"awsRegion": "Регион AWS",
 		"awsCrossRegion": "Использовать кросс-региональный вывод",
 		"awsGlobalInference": "Использовать глобальный вывод (автоматический выбор оптимального региона AWS)",
+		"awsServiceTier": "Уровень обслуживания",
+		"awsServiceTierStandard": "Стандартный",
+		"awsServiceTierStandardDesc": "Сбалансированная производительность и стоимость",
+		"awsServiceTierFlex": "Гибкий (50% скидка)",
+		"awsServiceTierFlexDesc": "Более низкая стоимость, более высокая задержка для некритичных задач",
+		"awsServiceTierPriority": "Приоритетный (75% надбавка)",
+		"awsServiceTierPriorityDesc": "Максимальная производительность для критичных приложений",
+		"awsServiceTierNote": "Уровни обслуживания влияют на цены и производительность. Гибкий предлагает 50% скидку с более высокой задержкой, Приоритетный предлагает 25% лучшую производительность с 75% надбавкой.",
 		"awsBedrockVpc": {
 			"useCustomVpcEndpoint": "Использовать пользовательскую конечную точку VPC",
 			"vpcEndpointUrlPlaceholder": "Введите URL конечной точки VPC (опционально)",

+ 8 - 0
webview-ui/src/i18n/locales/tr/settings.json

@@ -369,6 +369,14 @@
 		"awsRegion": "AWS Bölgesi",
 		"awsCrossRegion": "Bölgeler arası çıkarım kullan",
 		"awsGlobalInference": "Genel çıkarımı kullan (en uygun AWS Bölgesini otomatik seç)",
+		"awsServiceTier": "Hizmet seviyesi",
+		"awsServiceTierStandard": "Standart",
+		"awsServiceTierStandardDesc": "Dengeli performans ve maliyet",
+		"awsServiceTierFlex": "Esnek (%50 indirim)",
+		"awsServiceTierFlexDesc": "Daha düşük maliyet, kritik olmayan görevler için daha yüksek gecikme",
+		"awsServiceTierPriority": "Öncelik (%75 ek ücret)",
+		"awsServiceTierPriorityDesc": "İş açısından kritik uygulamalar için en hızlı performans",
+		"awsServiceTierNote": "Hizmet seviyeleri fiyatlandırmayı ve performansı etkiler. Esnek %50 indirim sunarken daha yüksek gecikmeye sahiptir, Öncelik %25 daha iyi performans sunarken %75 ek ücrete sahiptir.",
 		"awsBedrockVpc": {
 			"useCustomVpcEndpoint": "Özel VPC uç noktası kullan",
 			"vpcEndpointUrlPlaceholder": "VPC uç noktası URL'sini girin (isteğe bağlı)",

+ 8 - 0
webview-ui/src/i18n/locales/vi/settings.json

@@ -369,6 +369,14 @@
 		"awsRegion": "Vùng AWS",
 		"awsCrossRegion": "Sử dụng suy luận liên vùng",
 		"awsGlobalInference": "Sử dụng suy luận toàn cầu (tự động chọn Khu vực AWS tối ưu)",
+		"awsServiceTier": "Cấp độ dịch vụ",
+		"awsServiceTierStandard": "Tiêu chuẩn",
+		"awsServiceTierStandardDesc": "Hiệu suất và chi phí cân bằng",
+		"awsServiceTierFlex": "Linh hoạt (giảm giá 50%)",
+		"awsServiceTierFlexDesc": "Chi phí thấp hơn, độ trễ cao hơn cho các tác vụ không quan trọng",
+		"awsServiceTierPriority": "Ưu tiên (Phí thêm 75%)",
+		"awsServiceTierPriorityDesc": "Hiệu suất nhanh nhất cho các ứng dụng quan trọng",
+		"awsServiceTierNote": "Các cấp độ dịch vụ ảnh hưởng đến giá và hiệu suất. Linh hoạt cung cấp giảm giá 50% với độ trễ cao hơn, Ưu tiên cung cấp hiệu suất tốt hơn 25% với phí thêm 75%.",
 		"awsBedrockVpc": {
 			"useCustomVpcEndpoint": "Sử dụng điểm cuối VPC tùy chỉnh",
 			"vpcEndpointUrlPlaceholder": "Nhập URL điểm cuối VPC (tùy chọn)",

+ 8 - 0
webview-ui/src/i18n/locales/zh-CN/settings.json

@@ -369,6 +369,14 @@
 		"awsRegion": "AWS 区域",
 		"awsCrossRegion": "使用跨区域推理",
 		"awsGlobalInference": "使用全局推理(自动选择最佳 AWS 区域)",
+		"awsServiceTier": "服务层级",
+		"awsServiceTierStandard": "Standard",
+		"awsServiceTierStandardDesc": "性能和成本均衡",
+		"awsServiceTierFlex": "Flex(50% 折扣)",
+		"awsServiceTierFlexDesc": "低成本,非关键任务的延迟较高",
+		"awsServiceTierPriority": "Priority(75% 费用溢价)",
+		"awsServiceTierPriorityDesc": "为关键业务应用提供最快性能",
+		"awsServiceTierNote": "服务层级会影响定价和性能。Flex 提供 50% 折扣但延迟较高,Priority 提供 25% 更好的性能但费用溢价 75%。",
 		"awsBedrockVpc": {
 			"useCustomVpcEndpoint": "使用自定义 VPC 端点",
 			"vpcEndpointUrlPlaceholder": "输入 VPC 端点 URL(可选)",

+ 8 - 0
webview-ui/src/i18n/locales/zh-TW/settings.json

@@ -369,6 +369,14 @@
 		"awsRegion": "AWS 區域",
 		"awsCrossRegion": "使用跨區域推論",
 		"awsGlobalInference": "使用全域推論 (自動選取最佳 AWS 區域)",
+		"awsServiceTier": "服務層級",
+		"awsServiceTierStandard": "Standard",
+		"awsServiceTierStandardDesc": "效能和成本均衡",
+		"awsServiceTierFlex": "Flex (50% 折扣)",
+		"awsServiceTierFlexDesc": "低成本,非關鍵工作的延遲較高",
+		"awsServiceTierPriority": "Priority (75% 溢價)",
+		"awsServiceTierPriorityDesc": "為關鍵業務應用提供最快效能",
+		"awsServiceTierNote": "服務層級會影響定價和效能。Flex 提供 50% 折扣但延遲較高,Priority 提供 25% 更好的效能但費用溢價 75%。",
 		"awsBedrockVpc": {
 			"useCustomVpcEndpoint": "使用自訂 VPC 端點",
 			"vpcEndpointUrlPlaceholder": "輸入 VPC 端點 URL(選填)",