5 months ago · 571d1a4833
--- a/packages/types/src/model.ts
+++ b/packages/types/src/model.ts
@@ -28,6 +28,13 @@ export const verbosityLevelsSchema = z.enum(verbosityLevels)
 
															 export type VerbosityLevel = z.infer<typeof verbosityLevelsSchema>
														
 
															+/**
														
 
															+ * Service tiers (OpenAI Responses API)
														
 
															+ */
														
 
															+export const serviceTiers = ["default", "flex", "priority"] as const
														
 
															+export const serviceTierSchema = z.enum(serviceTiers)
														
 
															+export type ServiceTier = z.infer<typeof serviceTierSchema>
														
 
															+
														
 
															 /**
														
 
															  * ModelParameter
														
 
															  */
														
@@ -69,9 +76,15 @@ export const modelInfoSchema = z.object({
 
															 	minTokensPerCachePoint: z.number().optional(),
														
 
															 	maxCachePoints: z.number().optional(),
														
 
															 	cachableFields: z.array(z.string()).optional(),
														
 
															+	/**
														
 
															+	 * Service tiers with pricing information.
														
 
															+	 * Each tier can have a name (for OpenAI service tiers) and pricing overrides.
														
 
															+	 * The top-level input/output/cache* fields represent the default/standard tier.
														
 
															+	 */
														
 
															 	tiers: z
														
 
															 		.array(
														
 
															 			z.object({
														
 
															+				name: serviceTierSchema.optional(), // Service tier name (flex, priority, etc.)
														
 
															 				contextWindow: z.number(),
														
 
															 				inputPrice: z.number().optional(),
														
 
															 				outputPrice: z.number().optional(),
														
--- a/packages/types/src/provider-settings.ts
+++ b/packages/types/src/provider-settings.ts
@@ -1,6 +1,6 @@
 
															 import { z } from "zod"
														
 
															-import { modelInfoSchema, reasoningEffortWithMinimalSchema, verbosityLevelsSchema } from "./model.js"
														
 
															+import { modelInfoSchema, reasoningEffortWithMinimalSchema, verbosityLevelsSchema, serviceTierSchema } from "./model.js"
														
 
															 import { codebaseIndexProviderSchema } from "./codebase-index.js"
														
 
															 import {
														
 
															 	anthropicModels,
														
@@ -225,6 +225,9 @@ const geminiCliSchema = apiModelIdProviderModelSchema.extend({
 
															 const openAiNativeSchema = apiModelIdProviderModelSchema.extend({
														
 
															 	openAiNativeApiKey: z.string().optional(),
														
 
															 	openAiNativeBaseUrl: z.string().optional(),
														
 
															+	// OpenAI Responses API service tier for openai-native provider only.
														
 
															+	// UI should only expose this when the selected model supports flex/priority.
														
 
															+	openAiNativeServiceTier: serviceTierSchema.optional(),
														
 
															 })
														
 
															 const mistralSchema = apiModelIdProviderModelSchema.extend({
														
--- a/packages/types/src/providers/openai.ts
+++ b/packages/types/src/providers/openai.ts
@@ -32,6 +32,10 @@ export const openAiNativeModels = {
 
															 		// supportsVerbosity is a new capability; ensure ModelInfo includes it
														
 
															 		supportsVerbosity: true,
														
 
															 		supportsTemperature: false,
														
 
															+		tiers: [
														
 
															+			{ name: "flex", contextWindow: 400000, inputPrice: 0.625, outputPrice: 5.0, cacheReadsPrice: 0.0625 },
														
 
															+			{ name: "priority", contextWindow: 400000, inputPrice: 2.5, outputPrice: 20.0, cacheReadsPrice: 0.25 },
														
 
															+		],
														
 
															 	},
														
 
															 	"gpt-5-mini-2025-08-07": {
														
 
															 		maxTokens: 128000,
														
@@ -46,6 +50,10 @@ export const openAiNativeModels = {
 
															 		description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
														
 
															 		supportsVerbosity: true,
														
 
															 		supportsTemperature: false,
														
 
															+		tiers: [
														
 
															+			{ name: "flex", contextWindow: 400000, inputPrice: 0.125, outputPrice: 1.0, cacheReadsPrice: 0.0125 },
														
 
															+			{ name: "priority", contextWindow: 400000, inputPrice: 0.45, outputPrice: 3.6, cacheReadsPrice: 0.045 },
														
 
															+		],
														
 
															 	},
														
 
															 	"gpt-5-nano-2025-08-07": {
														
 
															 		maxTokens: 128000,
														
@@ -60,6 +68,7 @@ export const openAiNativeModels = {
 
															 		description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
														
 
															 		supportsVerbosity: true,
														
 
															 		supportsTemperature: false,
														
 
															+		tiers: [{ name: "flex", contextWindow: 400000, inputPrice: 0.025, outputPrice: 0.2, cacheReadsPrice: 0.0025 }],
														
 
															 	},
														
 
															 	"gpt-4.1": {
														
 
															 		maxTokens: 32_768,
														
@@ -70,6 +79,9 @@ export const openAiNativeModels = {
 
															 		outputPrice: 8,
														
 
															 		cacheReadsPrice: 0.5,
														
 
															 		supportsTemperature: true,
														
 
															+		tiers: [
														
 
															+			{ name: "priority", contextWindow: 1_047_576, inputPrice: 3.5, outputPrice: 14.0, cacheReadsPrice: 0.875 },
														
 
															+		],
														
 
															 	},
														
 
															 	"gpt-4.1-mini": {
														
 
															 		maxTokens: 32_768,
														
@@ -80,6 +92,9 @@ export const openAiNativeModels = {
 
															 		outputPrice: 1.6,
														
 
															 		cacheReadsPrice: 0.1,
														
 
															 		supportsTemperature: true,
														
 
															+		tiers: [
														
 
															+			{ name: "priority", contextWindow: 1_047_576, inputPrice: 0.7, outputPrice: 2.8, cacheReadsPrice: 0.175 },
														
 
															+		],
														
 
															 	},
														
 
															 	"gpt-4.1-nano": {
														
 
															 		maxTokens: 32_768,
														
@@ -90,6 +105,9 @@ export const openAiNativeModels = {
 
															 		outputPrice: 0.4,
														
 
															 		cacheReadsPrice: 0.025,
														
 
															 		supportsTemperature: true,
														
 
															+		tiers: [
														
 
															+			{ name: "priority", contextWindow: 1_047_576, inputPrice: 0.2, outputPrice: 0.8, cacheReadsPrice: 0.05 },
														
 
															+		],
														
 
															 	},
														
 
															 	o3: {
														
 
															 		maxTokens: 100_000,
														
@@ -102,6 +120,10 @@ export const openAiNativeModels = {
 
															 		supportsReasoningEffort: true,
														
 
															 		reasoningEffort: "medium",
														
 
															 		supportsTemperature: false,
														
 
															+		tiers: [
														
 
															+			{ name: "flex", contextWindow: 200_000, inputPrice: 1.0, outputPrice: 4.0, cacheReadsPrice: 0.25 },
														
 
															+			{ name: "priority", contextWindow: 200_000, inputPrice: 3.5, outputPrice: 14.0, cacheReadsPrice: 0.875 },
														
 
															+		],
														
 
															 	},
														
 
															 	"o3-high": {
														
 
															 		maxTokens: 100_000,
														
@@ -136,6 +158,10 @@ export const openAiNativeModels = {
 
															 		supportsReasoningEffort: true,
														
 
															 		reasoningEffort: "medium",
														
 
															 		supportsTemperature: false,
														
 
															+		tiers: [
														
 
															+			{ name: "flex", contextWindow: 200_000, inputPrice: 0.55, outputPrice: 2.2, cacheReadsPrice: 0.138 },
														
 
															+			{ name: "priority", contextWindow: 200_000, inputPrice: 2.0, outputPrice: 8.0, cacheReadsPrice: 0.5 },
														
 
															+		],
														
 
															 	},
														
 
															 	"o4-mini-high": {
														
 
															 		maxTokens: 100_000,
														
@@ -232,6 +258,9 @@ export const openAiNativeModels = {
 
															 		outputPrice: 10,
														
 
															 		cacheReadsPrice: 1.25,
														
 
															 		supportsTemperature: true,
														
 
															+		tiers: [
														
 
															+			{ name: "priority", contextWindow: 128_000, inputPrice: 4.25, outputPrice: 17.0, cacheReadsPrice: 2.125 },
														
 
															+		],
														
 
															 	},
														
 
															 	"gpt-4o-mini": {
														
 
															 		maxTokens: 16_384,
														
@@ -242,6 +271,9 @@ export const openAiNativeModels = {
 
															 		outputPrice: 0.6,
														
 
															 		cacheReadsPrice: 0.075,
														
 
															 		supportsTemperature: true,
														
 
															+		tiers: [
														
 
															+			{ name: "priority", contextWindow: 128_000, inputPrice: 0.25, outputPrice: 1.0, cacheReadsPrice: 0.125 },
														
 
															+		],
														
 
															 	},
														
 
															 	"codex-mini-latest": {
														
 
															 		maxTokens: 16_384,
														
--- a/src/api/providers/openai-native.ts
+++ b/src/api/providers/openai-native.ts
@@ -11,6 +11,7 @@ import {
 
															 	type ReasoningEffort,
														
 
															 	type VerbosityLevel,
														
 
															 	type ReasoningEffortWithMinimal,
														
 
															+	type ServiceTier,
														
 
															 } from "@roo-code/types"
														
 
															 import type { ApiHandlerOptions } from "../../shared/api"
														
@@ -36,6 +37,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 
															 	private lastResponseId: string | undefined
														
 
															 	private responseIdPromise: Promise<string | undefined> | undefined
														
 
															 	private responseIdResolver: ((value: string | undefined) => void) | undefined
														
 
															+	// Resolved service tier from Responses API (actual tier used by OpenAI)
														
 
															+	private lastServiceTier: ServiceTier | undefined
														
 
															 	// Event types handled by the shared event processor to avoid duplication
														
 
															 	private readonly coreHandledEventTypes = new Set<string>([
														
@@ -90,10 +93,15 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 
															 		const cacheReadTokens =
														
 
															 			usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? cachedFromDetails ?? 0
														
 
															+		// Resolve effective tier: prefer actual tier from response; otherwise requested tier
														
 
															+		const effectiveTier =
														
 
															+			this.lastServiceTier || (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
														
 
															+		const effectiveInfo = this.applyServiceTierPricing(model.info, effectiveTier)
														
 
															+
														
 
															 		// Pass total input tokens directly to calculateApiCostOpenAI
														
 
															 		// The function handles subtracting both cache reads and writes internally (see shared/cost.ts:46)
														
 
															 		const totalCost = calculateApiCostOpenAI(
														
 
															-			model.info,
														
 
															+			effectiveInfo,
														
 
															 			totalInputTokens,
														
 
															 			totalOutputTokens,
														
 
															 			cacheWriteTokens,
														
@@ -146,6 +154,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 
															 		messages: Anthropic.Messages.MessageParam[],
														
 
															 		metadata?: ApiHandlerCreateMessageMetadata,
														
 
															 	): ApiStream {
														
 
															+		// Reset resolved tier for this request; will be set from response if present
														
 
															+		this.lastServiceTier = undefined
														
 
															+
														
 
															 		// Use Responses API for ALL models
														
 
															 		const { verbosity, reasoning } = this.getModel()
														
@@ -233,8 +244,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 
															 			previous_response_id?: string
														
 
															 			store?: boolean
														
 
															 			instructions?: string
														
 
															+			service_tier?: ServiceTier
														
 
															 		}
														
 
															+		// Validate requested tier against model support; if not supported, omit.
														
 
															+		const requestedTier = (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
														
 
															+		const allowedTierNames = new Set(model.info.tiers?.map((t) => t.name).filter(Boolean) || [])
														
 
															+
														
 
															 		const body: Gpt5RequestBody = {
														
 
															 			model: model.id,
														
 
															 			input: formattedInput,
														
@@ -262,6 +278,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 
															 			// Use the per-request reserved output computed by Roo (params.maxTokens from getModelParams).
														
 
															 			...(model.maxTokens ? { max_output_tokens: model.maxTokens } : {}),
														
 
															 			...(requestPreviousResponseId && { previous_response_id: requestPreviousResponseId }),
														
 
															+			// Include tier when selected and supported by the model, or when explicitly "default"
														
 
															+			...(requestedTier &&
														
 
															+				(requestedTier === "default" || allowedTierNames.has(requestedTier)) && {
														
 
															+					service_tier: requestedTier,
														
 
															+				}),
														
 
															 		}
														
 
															 		// Include text.verbosity only when the model explicitly supports it
														
@@ -636,6 +657,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 
															 							if (parsed.response?.id) {
														
 
															 								this.resolveResponseId(parsed.response.id)
														
 
															 							}
														
 
															+							// Capture resolved service tier if present
														
 
															+							if (parsed.response?.service_tier) {
														
 
															+								this.lastServiceTier = parsed.response.service_tier as ServiceTier
														
 
															+							}
														
 
															 							// Delegate standard event types to the shared processor to avoid duplication
														
 
															 							if (parsed?.type && this.coreHandledEventTypes.has(parsed.type)) {
														
@@ -927,6 +952,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 
															 								if (parsed.response?.id) {
														
 
															 									this.resolveResponseId(parsed.response.id)
														
 
															 								}
														
 
															+								// Capture resolved service tier if present
														
 
															+								if (parsed.response?.service_tier) {
														
 
															+									this.lastServiceTier = parsed.response.service_tier as ServiceTier
														
 
															+								}
														
 
															 								// Check if the done event contains the complete output (as a fallback)
														
 
															 								if (
														
@@ -1051,6 +1080,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 
															 		if (event?.response?.id) {
														
 
															 			this.resolveResponseId(event.response.id)
														
 
															 		}
														
 
															+		// Capture resolved service tier when available
														
 
															+		if (event?.response?.service_tier) {
														
 
															+			this.lastServiceTier = event.response.service_tier as ServiceTier
														
 
															+		}
														
 
															 		// Handle known streaming text deltas
														
 
															 		if (event?.type === "response.text.delta" || event?.type === "response.output_text.delta") {
														
@@ -1141,6 +1174,26 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 
															 		return info.reasoningEffort as ReasoningEffortWithMinimal | undefined
														
 
															 	}
														
 
															+	/**
														
 
															+	 * Returns a shallow-cloned ModelInfo with pricing overridden for the given tier, if available.
														
 
															+	 * If no tier or no overrides exist, the original ModelInfo is returned.
														
 
															+	 */
														
 
															+	private applyServiceTierPricing(info: ModelInfo, tier?: ServiceTier): ModelInfo {
														
 
															+		if (!tier || tier === "default") return info
														
 
															+
														
 
															+		// Find the tier with matching name in the tiers array
														
 
															+		const tierInfo = info.tiers?.find((t) => t.name === tier)
														
 
															+		if (!tierInfo) return info
														
 
															+
														
 
															+		return {
														
 
															+			...info,
														
 
															+			inputPrice: tierInfo.inputPrice ?? info.inputPrice,
														
 
															+			outputPrice: tierInfo.outputPrice ?? info.outputPrice,
														
 
															+			cacheReadsPrice: tierInfo.cacheReadsPrice ?? info.cacheReadsPrice,
														
 
															+			cacheWritesPrice: tierInfo.cacheWritesPrice ?? info.cacheWritesPrice,
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															 	// Removed isResponsesApiModel method as ALL models now use the Responses API
														
 
															 	override getModel() {
														
@@ -1214,6 +1267,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 
															 				store: false, // Don't store prompt completions
														
 
															 			}
														
 
															+			// Include service tier if selected and supported
														
 
															+			const requestedTier = (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
														
 
															+			const allowedTierNames = new Set(model.info.tiers?.map((t) => t.name).filter(Boolean) || [])
														
 
															+			if (requestedTier && (requestedTier === "default" || allowedTierNames.has(requestedTier))) {
														
 
															+				requestBody.service_tier = requestedTier
														
 
															+			}
														
 
															+
														
 
															 			// Add reasoning if supported
														
 
															 			if (reasoningEffort) {
														
 
															 				requestBody.reasoning = {
														
--- a/webview-ui/src/components/settings/ApiOptions.tsx
+++ b/webview-ui/src/components/settings/ApiOptions.tsx
@@ -514,7 +514,11 @@ const ApiOptions = ({
 
															 			)}
														
 
															 			{selectedProvider === "openai-native" && (
														
 
															-				<OpenAI apiConfiguration={apiConfiguration} setApiConfigurationField={setApiConfigurationField} />
														
 
															+				<OpenAI
														
 
															+					apiConfiguration={apiConfiguration}
														
 
															+					setApiConfigurationField={setApiConfigurationField}
														
 
															+					selectedModelInfo={selectedModelInfo}
														
 
															+				/>
														
 
															 			)}
														
 
															 			{selectedProvider === "mistral" && (
														
--- a/webview-ui/src/components/settings/ModelInfoView.tsx
+++ b/webview-ui/src/components/settings/ModelInfoView.tsx
@@ -25,7 +25,13 @@ export const ModelInfoView = ({
 
															 }: ModelInfoViewProps) => {
														
 
															 	const { t } = useAppTranslation()
														
 
															-	const infoItems = [
														
 
															+	// Show tiered pricing table for OpenAI Native when model supports non-standard tiers
														
 
															+	const allowedTierNames =
														
 
															+		modelInfo?.tiers?.filter((t) => t.name === "flex" || t.name === "priority")?.map((t) => t.name) ?? []
														
 
															+	const shouldShowTierPricingTable = apiProvider === "openai-native" && allowedTierNames.length > 0
														
 
															+	const fmt = (n?: number) => (typeof n === "number" ? `${formatPrice(n)}` : "—")
														
 
															+
														
 
															+	const baseInfoItems = [
														
 
															 		typeof modelInfo?.contextWindow === "number" && modelInfo.contextWindow > 0 && (
														
 
															 			<>
														
 
															 				<span className="font-medium">{t("settings:modelInfo.contextWindow")}</span>{" "}
														
@@ -53,6 +59,21 @@ export const ModelInfoView = ({
 
															 			supportsLabel={t("settings:modelInfo.supportsPromptCache")}
														
 
															 			doesNotSupportLabel={t("settings:modelInfo.noPromptCache")}
														
 
															 		/>,
														
 
															+		apiProvider === "gemini" && (
														
 
															+			<span className="italic">
														
 
															+				{selectedModelId.includes("pro-preview")
														
 
															+					? t("settings:modelInfo.gemini.billingEstimate")
														
 
															+					: t("settings:modelInfo.gemini.freeRequests", {
														
 
															+							count: selectedModelId && selectedModelId.includes("flash") ? 15 : 2,
														
 
															+						})}{" "}
														
 
															+				<VSCodeLink href="https://ai.google.dev/pricing" className="text-sm">
														
 
															+					{t("settings:modelInfo.gemini.pricingDetails")}
														
 
															+				</VSCodeLink>
														
 
															+			</span>
														
 
															+		),
														
 
															+	].filter(Boolean)
														
 
															+
														
 
															+	const priceInfoItems = [
														
 
															 		modelInfo?.inputPrice !== undefined && modelInfo.inputPrice > 0 && (
														
 
															 			<>
														
 
															 				<span className="font-medium">{t("settings:modelInfo.inputPrice")}:</span>{" "}
														
@@ -77,20 +98,10 @@ export const ModelInfoView = ({
 
															 				{formatPrice(modelInfo.cacheWritesPrice || 0)} / 1M tokens
														
 
															 			</>
														
 
															 		),
														
 
															-		apiProvider === "gemini" && (
														
 
															-			<span className="italic">
														
 
															-				{selectedModelId.includes("pro-preview")
														
 
															-					? t("settings:modelInfo.gemini.billingEstimate")
														
 
															-					: t("settings:modelInfo.gemini.freeRequests", {
														
 
															-							count: selectedModelId && selectedModelId.includes("flash") ? 15 : 2,
														
 
															-						})}{" "}
														
 
															-				<VSCodeLink href="https://ai.google.dev/pricing" className="text-sm">
														
 
															-					{t("settings:modelInfo.gemini.pricingDetails")}
														
 
															-				</VSCodeLink>
														
 
															-			</span>
														
 
															-		),
														
 
															 	].filter(Boolean)
														
 
															+	const infoItems = shouldShowTierPricingTable ? baseInfoItems : [...baseInfoItems, ...priceInfoItems]
														
 
															+
														
 
															 	return (
														
 
															 		<>
														
 
															 			{modelInfo?.description && (
														
@@ -106,6 +117,86 @@ export const ModelInfoView = ({
 
															 					<div key={index}>{item}</div>
														
 
															 				))}
														
 
															 			</div>
														
 
															+
														
 
															+			{shouldShowTierPricingTable && (
														
 
															+				<div className="mt-2">
														
 
															+					<div className="text-xs text-vscode-descriptionForeground mb-1">
														
 
															+						{t("settings:serviceTier.pricingTableTitle")}
														
 
															+					</div>
														
 
															+					<div className="border border-vscode-dropdown-border rounded-xs overflow-hidden">
														
 
															+						<table className="w-full text-sm">
														
 
															+							<thead className="bg-vscode-dropdown-background">
														
 
															+								<tr>
														
 
															+									<th className="text-left px-3 py-1.5">{t("settings:serviceTier.columns.tier")}</th>
														
 
															+									<th className="text-right px-3 py-1.5">
														
 
															+										{t("settings:serviceTier.columns.input")}
														
 
															+									</th>
														
 
															+									<th className="text-right px-3 py-1.5">
														
 
															+										{t("settings:serviceTier.columns.output")}
														
 
															+									</th>
														
 
															+									<th className="text-right px-3 py-1.5">
														
 
															+										{t("settings:serviceTier.columns.cacheReads")}
														
 
															+									</th>
														
 
															+								</tr>
														
 
															+							</thead>
														
 
															+							<tbody>
														
 
															+								<tr className="border-t border-vscode-dropdown-border/60">
														
 
															+									<td className="px-3 py-1.5">{t("settings:serviceTier.standard")}</td>
														
 
															+									<td className="px-3 py-1.5 text-right">{fmt(modelInfo?.inputPrice)}</td>
														
 
															+									<td className="px-3 py-1.5 text-right">{fmt(modelInfo?.outputPrice)}</td>
														
 
															+									<td className="px-3 py-1.5 text-right">{fmt(modelInfo?.cacheReadsPrice)}</td>
														
 
															+								</tr>
														
 
															+								{allowedTierNames.includes("flex") && (
														
 
															+									<tr className="border-t border-vscode-dropdown-border/60">
														
 
															+										<td className="px-3 py-1.5">{t("settings:serviceTier.flex")}</td>
														
 
															+										<td className="px-3 py-1.5 text-right">
														
 
															+											{fmt(
														
 
															+												modelInfo?.tiers?.find((t) => t.name === "flex")?.inputPrice ??
														
 
															+													modelInfo?.inputPrice,
														
 
															+											)}
														
 
															+										</td>
														
 
															+										<td className="px-3 py-1.5 text-right">
														
 
															+											{fmt(
														
 
															+												modelInfo?.tiers?.find((t) => t.name === "flex")?.outputPrice ??
														
 
															+													modelInfo?.outputPrice,
														
 
															+											)}
														
 
															+										</td>
														
 
															+										<td className="px-3 py-1.5 text-right">
														
 
															+											{fmt(
														
 
															+												modelInfo?.tiers?.find((t) => t.name === "flex")?.cacheReadsPrice ??
														
 
															+													modelInfo?.cacheReadsPrice,
														
 
															+											)}
														
 
															+										</td>
														
 
															+									</tr>
														
 
															+								)}
														
 
															+								{allowedTierNames.includes("priority") && (
														
 
															+									<tr className="border-t border-vscode-dropdown-border/60">
														
 
															+										<td className="px-3 py-1.5">{t("settings:serviceTier.priority")}</td>
														
 
															+										<td className="px-3 py-1.5 text-right">
														
 
															+											{fmt(
														
 
															+												modelInfo?.tiers?.find((t) => t.name === "priority")?.inputPrice ??
														
 
															+													modelInfo?.inputPrice,
														
 
															+											)}
														
 
															+										</td>
														
 
															+										<td className="px-3 py-1.5 text-right">
														
 
															+											{fmt(
														
 
															+												modelInfo?.tiers?.find((t) => t.name === "priority")?.outputPrice ??
														
 
															+													modelInfo?.outputPrice,
														
 
															+											)}
														
 
															+										</td>
														
 
															+										<td className="px-3 py-1.5 text-right">
														
 
															+											{fmt(
														
 
															+												modelInfo?.tiers?.find((t) => t.name === "priority")?.cacheReadsPrice ??
														
 
															+													modelInfo?.cacheReadsPrice,
														
 
															+											)}
														
 
															+										</td>
														
 
															+									</tr>
														
 
															+								)}
														
 
															+							</tbody>
														
 
															+						</table>
														
 
															+					</div>
														
 
															+				</div>
														
 
															+			)}
														
 
															 		</>
														
 
															 	)
														
 
															 }
														
--- a/webview-ui/src/components/settings/providers/OpenAI.tsx
+++ b/webview-ui/src/components/settings/providers/OpenAI.tsx
@@ -2,19 +2,21 @@ import { useCallback, useState } from "react"
 
															 import { Checkbox } from "vscrui"
														
 
															 import { VSCodeTextField } from "@vscode/webview-ui-toolkit/react"
														
 
															-import type { ProviderSettings } from "@roo-code/types"
														
 
															+import type { ModelInfo, ProviderSettings } from "@roo-code/types"
														
 
															 import { useAppTranslation } from "@src/i18n/TranslationContext"
														
 
															 import { VSCodeButtonLink } from "@src/components/common/VSCodeButtonLink"
														
 
															+import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue, StandardTooltip } from "@src/components/ui"
														
 
															 import { inputEventTransform } from "../transforms"
														
 
															 type OpenAIProps = {
														
 
															 	apiConfiguration: ProviderSettings
														
 
															 	setApiConfigurationField: (field: keyof ProviderSettings, value: ProviderSettings[keyof ProviderSettings]) => void
														
 
															+	selectedModelInfo?: ModelInfo
														
 
															 }
														
 
															-export const OpenAI = ({ apiConfiguration, setApiConfigurationField }: OpenAIProps) => {
														
 
															+export const OpenAI = ({ apiConfiguration, setApiConfigurationField, selectedModelInfo }: OpenAIProps) => {
														
 
															 	const { t } = useAppTranslation()
														
 
															 	const [openAiNativeBaseUrlSelected, setOpenAiNativeBaseUrlSelected] = useState(
														
@@ -72,6 +74,44 @@ export const OpenAI = ({ apiConfiguration, setApiConfigurationField }: OpenAIPro
 
															 					{t("settings:providers.getOpenAiApiKey")}
														
 
															 				</VSCodeButtonLink>
														
 
															 			)}
														
 
															+
														
 
															+			{(() => {
														
 
															+				const allowedTiers = (selectedModelInfo?.tiers?.map((t) => t.name).filter(Boolean) || []).filter(
														
 
															+					(t) => t === "flex" || t === "priority",
														
 
															+				)
														
 
															+				if (allowedTiers.length === 0) return null
														
 
															+
														
 
															+				return (
														
 
															+					<div className="flex flex-col gap-1 mt-2" data-testid="openai-service-tier">
														
 
															+						<div className="flex items-center gap-1">
														
 
															+							<label className="block font-medium mb-1">Service tier</label>
														
 
															+							<StandardTooltip content="For faster processing of API requests, try the priority processing service tier. For lower prices with higher latency, try the flex processing tier.">
														
 
															+								<i className="codicon codicon-info text-vscode-descriptionForeground text-xs" />
														
 
															+							</StandardTooltip>
														
 
															+						</div>
														
 
															+
														
 
															+						<Select
														
 
															+							value={apiConfiguration.openAiNativeServiceTier || "default"}
														
 
															+							onValueChange={(value) =>
														
 
															+								setApiConfigurationField(
														
 
															+									"openAiNativeServiceTier",
														
 
															+									value as ProviderSettings["openAiNativeServiceTier"],
														
 
															+								)
														
 
															+							}>
														
 
															+							<SelectTrigger className="w-full">
														
 
															+								<SelectValue placeholder={t("settings:common.select")} />
														
 
															+							</SelectTrigger>
														
 
															+							<SelectContent>
														
 
															+								<SelectItem value="default">Standard</SelectItem>
														
 
															+								{allowedTiers.includes("flex") && <SelectItem value="flex">Flex</SelectItem>}
														
 
															+								{allowedTiers.includes("priority") && (
														
 
															+									<SelectItem value="priority">Priority</SelectItem>
														
 
															+								)}
														
 
															+							</SelectContent>
														
 
															+						</Select>
														
 
															+					</div>
														
 
															+				)
														
 
															+			})()}
														
 
															 		</>
														
 
															 	)
														
 
															 }
														
--- a/webview-ui/src/i18n/locales/ca/settings.json
+++ b/webview-ui/src/i18n/locales/ca/settings.json
@@ -858,5 +858,19 @@
 
															 	"includeMaxOutputTokensDescription": "Enviar el paràmetre de tokens màxims de sortida a les sol·licituds API. Alguns proveïdors poden no admetre això.",
														
 
															 	"limitMaxTokensDescription": "Limitar el nombre màxim de tokens en la resposta",
														
 
															 	"maxOutputTokensLabel": "Tokens màxims de sortida",
														
 
															-	"maxTokensGenerateDescription": "Tokens màxims a generar en la resposta"
														
 
															+	"maxTokensGenerateDescription": "Tokens màxims a generar en la resposta",
														
 
															+	"serviceTier": {
														
 
															+		"label": "Nivell de servei",
														
 
															+		"tooltip": "Per a un processament més ràpid de les sol·licituds de l'API, proveu el nivell de servei de processament prioritari. Per a preus més baixos amb una latència més alta, proveu el nivell de processament flexible.",
														
 
															+		"standard": "Estàndard",
														
 
															+		"flex": "Flex",
														
 
															+		"priority": "Prioritat",
														
 
															+		"pricingTableTitle": "Preus per nivell de servei (preu per 1M de fitxes)",
														
 
															+		"columns": {
														
 
															+			"tier": "Nivell",
														
 
															+			"input": "Entrada",
														
 
															+			"output": "Sortida",
														
 
															+			"cacheReads": "Lectures de memòria cau"
														
 
															+		}
														
 
															+	}
														
 
															 }
														
--- a/webview-ui/src/i18n/locales/de/settings.json
+++ b/webview-ui/src/i18n/locales/de/settings.json
@@ -858,5 +858,19 @@
 
															 	"includeMaxOutputTokensDescription": "Senden Sie den Parameter für maximale Ausgabe-Tokens in API-Anfragen. Einige Anbieter unterstützen dies möglicherweise nicht.",
														
 
															 	"limitMaxTokensDescription": "Begrenze die maximale Anzahl von Tokens in der Antwort",
														
 
															 	"maxOutputTokensLabel": "Maximale Ausgabe-Tokens",
														
 
															-	"maxTokensGenerateDescription": "Maximale Tokens, die in der Antwort generiert werden"
														
 
															+	"maxTokensGenerateDescription": "Maximale Tokens, die in der Antwort generiert werden",
														
 
															+	"serviceTier": {
														
 
															+		"label": "Service-Stufe",
														
 
															+		"tooltip": "Für eine schnellere Verarbeitung von API-Anfragen, probiere die Prioritäts-Verarbeitungsstufe. Für niedrigere Preise bei höherer Latenz, probiere die Flex-Verarbeitungsstufe.",
														
 
															+		"standard": "Standard",
														
 
															+		"flex": "Flex",
														
 
															+		"priority": "Priorität",
														
 
															+		"pricingTableTitle": "Preise nach Service-Stufe (Preis pro 1 Mio. Token)",
														
 
															+		"columns": {
														
 
															+			"tier": "Stufe",
														
 
															+			"input": "Eingabe",
														
 
															+			"output": "Ausgabe",
														
 
															+			"cacheReads": "Cache-Lesevorgänge"
														
 
															+		}
														
 
															+	}
														
 
															 }
														
--- a/webview-ui/src/i18n/locales/en/settings.json
+++ b/webview-ui/src/i18n/locales/en/settings.json
@@ -857,5 +857,19 @@
 
															 	"includeMaxOutputTokensDescription": "Send max output tokens parameter in API requests. Some providers may not support this.",
														
 
															 	"limitMaxTokensDescription": "Limit the maximum number of tokens in the response",
														
 
															 	"maxOutputTokensLabel": "Max output tokens",
														
 
															-	"maxTokensGenerateDescription": "Maximum tokens to generate in response"
														
 
															+	"maxTokensGenerateDescription": "Maximum tokens to generate in response",
														
 
															+	"serviceTier": {
														
 
															+		"label": "Service tier",
														
 
															+		"tooltip": "For faster processing of API requests, try the priority processing service tier. For lower prices with higher latency, try the flex processing tier.",
														
 
															+		"standard": "Standard",
														
 
															+		"flex": "Flex",
														
 
															+		"priority": "Priority",
														
 
															+		"pricingTableTitle": "Pricing by service tier (price per 1M tokens)",
														
 
															+		"columns": {
														
 
															+			"tier": "Tier",
														
 
															+			"input": "Input",
														
 
															+			"output": "Output",
														
 
															+			"cacheReads": "Cache reads"
														
 
															+		}
														
 
															+	}
														
 
															 }
														
--- a/webview-ui/src/i18n/locales/es/settings.json
+++ b/webview-ui/src/i18n/locales/es/settings.json
@@ -858,5 +858,19 @@
 
															 	"includeMaxOutputTokensDescription": "Enviar parámetro de tokens máximos de salida en solicitudes API. Algunos proveedores pueden no soportar esto.",
														
 
															 	"limitMaxTokensDescription": "Limitar el número máximo de tokens en la respuesta",
														
 
															 	"maxOutputTokensLabel": "Tokens máximos de salida",
														
 
															-	"maxTokensGenerateDescription": "Tokens máximos a generar en la respuesta"
														
 
															+	"maxTokensGenerateDescription": "Tokens máximos a generar en la respuesta",
														
 
															+	"serviceTier": {
														
 
															+		"label": "Nivel de servicio",
														
 
															+		"tooltip": "Para un procesamiento más rápido de las solicitudes de API, prueba el nivel de servicio de procesamiento prioritario. Para precios más bajos con mayor latencia, prueba el nivel de procesamiento flexible.",
														
 
															+		"standard": "Estándar",
														
 
															+		"flex": "Flexible",
														
 
															+		"priority": "Prioridad",
														
 
															+		"pricingTableTitle": "Precios por nivel de servicio (precio por 1M de tokens)",
														
 
															+		"columns": {
														
 
															+			"tier": "Nivel",
														
 
															+			"input": "Entrada",
														
 
															+			"output": "Salida",
														
 
															+			"cacheReads": "Lecturas de caché"
														
 
															+		}
														
 
															+	}
														
 
															 }
														
--- a/webview-ui/src/i18n/locales/fr/settings.json
+++ b/webview-ui/src/i18n/locales/fr/settings.json
@@ -858,5 +858,19 @@
 
															 	"includeMaxOutputTokensDescription": "Envoyer le paramètre de tokens de sortie maximum dans les requêtes API. Certains fournisseurs peuvent ne pas supporter cela.",
														
 
															 	"limitMaxTokensDescription": "Limiter le nombre maximum de tokens dans la réponse",
														
 
															 	"maxOutputTokensLabel": "Tokens de sortie maximum",
														
 
															-	"maxTokensGenerateDescription": "Tokens maximum à générer dans la réponse"
														
 
															+	"maxTokensGenerateDescription": "Tokens maximum à générer dans la réponse",
														
 
															+	"serviceTier": {
														
 
															+		"label": "Niveau de service",
														
 
															+		"tooltip": "Pour un traitement plus rapide des demandes d'API, essayez le niveau de service de traitement prioritaire. Pour des prix plus bas avec une latence plus élevée, essayez le niveau de traitement flexible.",
														
 
															+		"standard": "Standard",
														
 
															+		"flex": "Flexible",
														
 
															+		"priority": "Priorité",
														
 
															+		"pricingTableTitle": "Tarification par niveau de service (prix par 1M de tokens)",
														
 
															+		"columns": {
														
 
															+			"tier": "Niveau",
														
 
															+			"input": "Entrée",
														
 
															+			"output": "Sortie",
														
 
															+			"cacheReads": "Lectures du cache"
														
 
															+		}
														
 
															+	}
														
 
															 }
														
--- a/webview-ui/src/i18n/locales/hi/settings.json
+++ b/webview-ui/src/i18n/locales/hi/settings.json
@@ -859,5 +859,19 @@
 
															 	"includeMaxOutputTokensDescription": "API अनुरोधों में अधिकतम आउटपुट टोकन पैरामीटर भेजें। कुछ प्रदाता इसका समर्थन नहीं कर सकते हैं।",
														
 
															 	"limitMaxTokensDescription": "प्रतिक्रिया में टोकन की अधिकतम संख्या सीमित करें",
														
 
															 	"maxOutputTokensLabel": "अधिकतम आउटपुट टोकन",
														
 
															-	"maxTokensGenerateDescription": "प्रतिक्रिया में उत्पन्न करने के लिए अधिकतम टोकन"
														
 
															+	"maxTokensGenerateDescription": "प्रतिक्रिया में उत्पन्न करने के लिए अधिकतम टोकन",
														
 
															+	"serviceTier": {
														
 
															+		"label": "सेवा स्तर",
														
 
															+		"tooltip": "API अनुरोधों के तेज़ प्रसंस्करण के लिए, प्राथमिकता प्रसंस्करण सेवा स्तर का प्रयास करें। उच्च विलंबता के साथ कम कीमतों के लिए, फ्लेक्स प्रसंस्करण स्तर का प्रयास करें।",
														
 
															+		"standard": "मानक",
														
 
															+		"flex": "फ्लेक्स",
														
 
															+		"priority": "प्राथमिकता",
														
 
															+		"pricingTableTitle": "सेवा स्तर के अनुसार मूल्य निर्धारण (प्रति 1M टोकन मूल्य)",
														
 
															+		"columns": {
														
 
															+			"tier": "स्तर",
														
 
															+			"input": "इनपुट",
														
 
															+			"output": "आउटपुट",
														
 
															+			"cacheReads": "कैश रीड"
														
 
															+		}
														
 
															+	}
														
 
															 }
														
--- a/webview-ui/src/i18n/locales/id/settings.json
+++ b/webview-ui/src/i18n/locales/id/settings.json
@@ -888,5 +888,19 @@
 
															 	"includeMaxOutputTokensDescription": "Kirim parameter token output maksimum dalam permintaan API. Beberapa provider mungkin tidak mendukung ini.",
														
 
															 	"limitMaxTokensDescription": "Batasi jumlah maksimum token dalam respons",
														
 
															 	"maxOutputTokensLabel": "Token output maksimum",
														
 
															-	"maxTokensGenerateDescription": "Token maksimum untuk dihasilkan dalam respons"
														
 
															+	"maxTokensGenerateDescription": "Token maksimum untuk dihasilkan dalam respons",
														
 
															+	"serviceTier": {
														
 
															+		"label": "Tingkat layanan",
														
 
															+		"tooltip": "Untuk pemrosesan permintaan API yang lebih cepat, coba tingkat layanan pemrosesan prioritas. Untuk harga lebih rendah dengan latensi lebih tinggi, coba tingkat pemrosesan fleksibel.",
														
 
															+		"standard": "Standar",
														
 
															+		"flex": "Fleksibel",
														
 
															+		"priority": "Prioritas",
														
 
															+		"pricingTableTitle": "Harga berdasarkan tingkat layanan (harga per 1 juta token)",
														
 
															+		"columns": {
														
 
															+			"tier": "Tingkat",
														
 
															+			"input": "Input",
														
 
															+			"output": "Output",
														
 
															+			"cacheReads": "Pembacaan cache"
														
 
															+		}
														
 
															+	}
														
 
															 }
														
--- a/webview-ui/src/i18n/locales/it/settings.json
+++ b/webview-ui/src/i18n/locales/it/settings.json
@@ -859,5 +859,19 @@
 
															 	"includeMaxOutputTokensDescription": "Invia il parametro dei token di output massimi nelle richieste API. Alcuni provider potrebbero non supportarlo.",
														
 
															 	"limitMaxTokensDescription": "Limita il numero massimo di token nella risposta",
														
 
															 	"maxOutputTokensLabel": "Token di output massimi",
														
 
															-	"maxTokensGenerateDescription": "Token massimi da generare nella risposta"
														
 
															+	"maxTokensGenerateDescription": "Token massimi da generare nella risposta",
														
 
															+	"serviceTier": {
														
 
															+		"label": "Livello di servizio",
														
 
															+		"tooltip": "Per un'elaborazione più rapida delle richieste API, prova il livello di servizio di elaborazione prioritaria. Per prezzi più bassi con una latenza maggiore, prova il livello di elaborazione flessibile.",
														
 
															+		"standard": "Standard",
														
 
															+		"flex": "Flessibile",
														
 
															+		"priority": "Priorità",
														
 
															+		"pricingTableTitle": "Prezzi per livello di servizio (prezzo per 1 milione di token)",
														
 
															+		"columns": {
														
 
															+			"tier": "Livello",
														
 
															+			"input": "Input",
														
 
															+			"output": "Output",
														
 
															+			"cacheReads": "Letture cache"
														
 
															+		}
														
 
															+	}
														
 
															 }
														
--- a/webview-ui/src/i18n/locales/ja/settings.json
+++ b/webview-ui/src/i18n/locales/ja/settings.json
@@ -859,5 +859,19 @@
 
															 	"includeMaxOutputTokensDescription": "APIリクエストで最大出力トークンパラメータを送信します。一部のプロバイダーはこれをサポートしていない場合があります。",
														
 
															 	"limitMaxTokensDescription": "レスポンスの最大トークン数を制限する",
														
 
															 	"maxOutputTokensLabel": "最大出力トークン",
														
 
															-	"maxTokensGenerateDescription": "レスポンスで生成する最大トークン数"
														
 
															+	"maxTokensGenerateDescription": "レスポンスで生成する最大トークン数",
														
 
															+	"serviceTier": {
														
 
															+		"label": "サービスティア",
														
 
															+		"tooltip": "APIリクエストをより速く処理するには、優先処理サービスティアをお試しください。低価格でレイテンシが高い場合は、フレックス処理ティアをお試しください。",
														
 
															+		"standard": "標準",
														
 
															+		"flex": "フレックス",
														
 
															+		"priority": "優先",
														
 
															+		"pricingTableTitle": "サービスティア別料金（100万トークンあたりの価格）",
														
 
															+		"columns": {
														
 
															+			"tier": "ティア",
														
 
															+			"input": "入力",
														
 
															+			"output": "出力",
														
 
															+			"cacheReads": "キャッシュ読み取り"
														
 
															+		}
														
 
															+	}
														
 
															 }
														
--- a/webview-ui/src/i18n/locales/ko/settings.json
+++ b/webview-ui/src/i18n/locales/ko/settings.json
@@ -859,5 +859,19 @@
 
															 	"includeMaxOutputTokensDescription": "API 요청에서 최대 출력 토큰 매개변수를 전송합니다. 일부 제공업체는 이를 지원하지 않을 수 있습니다.",
														
 
															 	"limitMaxTokensDescription": "응답에서 최대 토큰 수 제한",
														
 
															 	"maxOutputTokensLabel": "최대 출력 토큰",
														
 
															-	"maxTokensGenerateDescription": "응답에서 생성할 최대 토큰 수"
														
 
															+	"maxTokensGenerateDescription": "응답에서 생성할 최대 토큰 수",
														
 
															+	"serviceTier": {
														
 
															+		"label": "서비스 등급",
														
 
															+		"tooltip": "API 요청을 더 빠르게 처리하려면 우선 처리 서비스 등급을 사용해 보세요. 더 낮은 가격에 더 높은 지연 시간을 원하시면 플렉스 처리 등급을 사용해 보세요.",
														
 
															+		"standard": "표준",
														
 
															+		"flex": "플렉스",
														
 
															+		"priority": "우선",
														
 
															+		"pricingTableTitle": "서비스 등급별 가격 (100만 토큰당 가격)",
														
 
															+		"columns": {
														
 
															+			"tier": "등급",
														
 
															+			"input": "입력",
														
 
															+			"output": "출력",
														
 
															+			"cacheReads": "캐시 읽기"
														
 
															+		}
														
 
															+	}
														
 
															 }
														
--- a/webview-ui/src/i18n/locales/nl/settings.json
+++ b/webview-ui/src/i18n/locales/nl/settings.json
@@ -859,5 +859,19 @@
 
															 	"includeMaxOutputTokensDescription": "Stuur maximale output tokens parameter in API-verzoeken. Sommige providers ondersteunen dit mogelijk niet.",
														
 
															 	"limitMaxTokensDescription": "Beperk het maximale aantal tokens in het antwoord",
														
 
															 	"maxOutputTokensLabel": "Maximale output tokens",
														
 
															-	"maxTokensGenerateDescription": "Maximale tokens om te genereren in het antwoord"
														
 
															+	"maxTokensGenerateDescription": "Maximale tokens om te genereren in het antwoord",
														
 
															+	"serviceTier": {
														
 
															+		"label": "Serviceniveau",
														
 
															+		"tooltip": "Voor snellere verwerking van API-verzoeken, probeer het prioriteitsverwerkingsniveau. Voor lagere prijzen met hogere latentie, probeer het flexverwerkingsniveau.",
														
 
															+		"standard": "Standaard",
														
 
															+		"flex": "Flex",
														
 
															+		"priority": "Prioriteit",
														
 
															+		"pricingTableTitle": "Prijzen per serviceniveau (prijs per 1M tokens)",
														
 
															+		"columns": {
														
 
															+			"tier": "Niveau",
														
 
															+			"input": "Invoer",
														
 
															+			"output": "Uitvoer",
														
 
															+			"cacheReads": "Cache leest"
														
 
															+		}
														
 
															+	}
														
 
															 }
														
--- a/webview-ui/src/i18n/locales/pl/settings.json
+++ b/webview-ui/src/i18n/locales/pl/settings.json
@@ -859,5 +859,19 @@
 
															 	"includeMaxOutputTokensDescription": "Wyślij parametr maksymalnych tokenów wyjściowych w żądaniach API. Niektórzy dostawcy mogą tego nie obsługiwać.",
														
 
															 	"limitMaxTokensDescription": "Ogranicz maksymalną liczbę tokenów w odpowiedzi",
														
 
															 	"maxOutputTokensLabel": "Maksymalne tokeny wyjściowe",
														
 
															-	"maxTokensGenerateDescription": "Maksymalne tokeny do wygenerowania w odpowiedzi"
														
 
															+	"maxTokensGenerateDescription": "Maksymalne tokeny do wygenerowania w odpowiedzi",
														
 
															+	"serviceTier": {
														
 
															+		"label": "Poziom usług",
														
 
															+		"tooltip": "Aby szybciej przetwarzać żądania API, wypróbuj priorytetowy poziom usług. Aby uzyskać niższe ceny przy wyższej latencji, wypróbuj elastyczny poziom usług.",
														
 
															+		"standard": "Standardowy",
														
 
															+		"flex": "Elastyczny",
														
 
															+		"priority": "Priorytetowy",
														
 
															+		"pricingTableTitle": "Cennik według poziomu usług (cena za 1 mln tokenów)",
														
 
															+		"columns": {
														
 
															+			"tier": "Poziom",
														
 
															+			"input": "Wejście",
														
 
															+			"output": "Wyjście",
														
 
															+			"cacheReads": "Odczyty z pamięci podręcznej"
														
 
															+		}
														
 
															+	}
														
 
															 }
														
--- a/webview-ui/src/i18n/locales/pt-BR/settings.json
+++ b/webview-ui/src/i18n/locales/pt-BR/settings.json
@@ -859,5 +859,19 @@
 
															 	"includeMaxOutputTokensDescription": "Enviar parâmetro de tokens máximos de saída nas solicitações de API. Alguns provedores podem não suportar isso.",
														
 
															 	"limitMaxTokensDescription": "Limitar o número máximo de tokens na resposta",
														
 
															 	"maxOutputTokensLabel": "Tokens máximos de saída",
														
 
															-	"maxTokensGenerateDescription": "Tokens máximos para gerar na resposta"
														
 
															+	"maxTokensGenerateDescription": "Tokens máximos para gerar na resposta",
														
 
															+	"serviceTier": {
														
 
															+		"label": "Nível de serviço",
														
 
															+		"tooltip": "Para um processamento mais rápido das solicitações de API, experimente o nível de serviço de processamento prioritário. Para preços mais baixos com maior latência, experimente o nível de processamento flexível.",
														
 
															+		"standard": "Padrão",
														
 
															+		"flex": "Flexível",
														
 
															+		"priority": "Prioritário",
														
 
															+		"pricingTableTitle": "Preços por nível de serviço (preço por 1 milhão de tokens)",
														
 
															+		"columns": {
														
 
															+			"tier": "Nível",
														
 
															+			"input": "Entrada",
														
 
															+			"output": "Saída",
														
 
															+			"cacheReads": "Leituras de cache"
														
 
															+		}
														
 
															+	}
														
 
															 }
														
--- a/webview-ui/src/i18n/locales/ru/settings.json
+++ b/webview-ui/src/i18n/locales/ru/settings.json
@@ -859,5 +859,19 @@
 
															 	"includeMaxOutputTokensDescription": "Отправлять параметр максимальных выходных токенов в API-запросах. Некоторые провайдеры могут не поддерживать это.",
														
 
															 	"limitMaxTokensDescription": "Ограничить максимальное количество токенов в ответе",
														
 
															 	"maxOutputTokensLabel": "Максимальные выходные токены",
														
 
															-	"maxTokensGenerateDescription": "Максимальные токены для генерации в ответе"
														
 
															+	"maxTokensGenerateDescription": "Максимальные токены для генерации в ответе",
														
 
															+	"serviceTier": {
														
 
															+		"label": "Уровень обслуживания",
														
 
															+		"tooltip": "Для более быстрой обработки запросов API попробуйте уровень обслуживания с приоритетной обработкой. Для более низких цен с более высокой задержкой попробуйте уровень гибкой обработки.",
														
 
															+		"standard": "Стандартный",
														
 
															+		"flex": "Гибкий",
														
 
															+		"priority": "Приоритетный",
														
 
															+		"pricingTableTitle": "Цены по уровням обслуживания (цена за 1 млн токенов)",
														
 
															+		"columns": {
														
 
															+			"tier": "Уровень",
														
 
															+			"input": "Вход",
														
 
															+			"output": "Выход",
														
 
															+			"cacheReads": "Чтения из кэша"
														
 
															+		}
														
 
															+	}
														
 
															 }
														
--- a/webview-ui/src/i18n/locales/tr/settings.json
+++ b/webview-ui/src/i18n/locales/tr/settings.json
@@ -859,5 +859,19 @@
 
															 	"includeMaxOutputTokensDescription": "API isteklerinde maksimum çıktı token parametresini gönder. Bazı sağlayıcılar bunu desteklemeyebilir.",
														
 
															 	"limitMaxTokensDescription": "Yanıttaki maksimum token sayısını sınırla",
														
 
															 	"maxOutputTokensLabel": "Maksimum çıktı tokenları",
														
 
															-	"maxTokensGenerateDescription": "Yanıtta oluşturulacak maksimum token sayısı"
														
 
															+	"maxTokensGenerateDescription": "Yanıtta oluşturulacak maksimum token sayısı",
														
 
															+	"serviceTier": {
														
 
															+		"label": "Hizmet seviyesi",
														
 
															+		"tooltip": "Daha hızlı API isteği işleme için öncelikli işleme hizmeti seviyesini deneyin. Daha düşük gecikme süresiyle daha düşük fiyatlar için esnek işleme seviyesini deneyin.",
														
 
															+		"standard": "Standart",
														
 
															+		"flex": "Esnek",
														
 
															+		"priority": "Öncelik",
														
 
															+		"pricingTableTitle": "Hizmet seviyesine göre fiyatlandırma (1 milyon token başına fiyat)",
														
 
															+		"columns": {
														
 
															+			"tier": "Seviye",
														
 
															+			"input": "Giriş",
														
 
															+			"output": "Çıkış",
														
 
															+			"cacheReads": "Önbellek okumaları"
														
 
															+		}
														
 
															+	}
														
 
															 }
														
--- a/webview-ui/src/i18n/locales/vi/settings.json
+++ b/webview-ui/src/i18n/locales/vi/settings.json
@@ -859,5 +859,19 @@
 
															 	"includeMaxOutputTokensDescription": "Gửi tham số token đầu ra tối đa trong các yêu cầu API. Một số nhà cung cấp có thể không hỗ trợ điều này.",
														
 
															 	"limitMaxTokensDescription": "Giới hạn số lượng token tối đa trong phản hồi",
														
 
															 	"maxOutputTokensLabel": "Token đầu ra tối đa",
														
 
															-	"maxTokensGenerateDescription": "Token tối đa để tạo trong phản hồi"
														
 
															+	"maxTokensGenerateDescription": "Token tối đa để tạo trong phản hồi",
														
 
															+	"serviceTier": {
														
 
															+		"label": "Cấp độ dịch vụ",
														
 
															+		"tooltip": "Để xử lý các yêu cầu API nhanh hơn, hãy thử cấp độ dịch vụ xử lý ưu tiên. Để có giá thấp hơn với độ trễ cao hơn, hãy thử cấp độ xử lý linh hoạt.",
														
 
															+		"standard": "Tiêu chuẩn",
														
 
															+		"flex": "Linh hoạt",
														
 
															+		"priority": "Ưu tiên",
														
 
															+		"pricingTableTitle": "Giá theo cấp độ dịch vụ (giá mỗi 1 triệu token)",
														
 
															+		"columns": {
														
 
															+			"tier": "Cấp độ",
														
 
															+			"input": "Đầu vào",
														
 
															+			"output": "Đầu ra",
														
 
															+			"cacheReads": "Lượt đọc bộ nhớ đệm"
														
 
															+		}
														
 
															+	}
														
 
															 }
														
--- a/webview-ui/src/i18n/locales/zh-CN/settings.json
+++ b/webview-ui/src/i18n/locales/zh-CN/settings.json
@@ -859,5 +859,19 @@
 
															 	"includeMaxOutputTokensDescription": "在 API 请求中发送最大输出 Token 参数。某些提供商可能不支持此功能。",
														
 
															 	"limitMaxTokensDescription": "限制响应中的最大 Token 数量",
														
 
															 	"maxOutputTokensLabel": "最大输出 Token 数",
														
 
															-	"maxTokensGenerateDescription": "响应中生成的最大 Token 数"
														
 
															+	"maxTokensGenerateDescription": "响应中生成的最大 Token 数",
														
 
															+	"serviceTier": {
														
 
															+		"label": "服务等级",
														
 
															+		"tooltip": "为加快API请求处理速度，请尝试优先处理服务等级。为获得更低价格但延迟较高，请尝试灵活处理等级。",
														
 
															+		"standard": "标准",
														
 
															+		"flex": "灵活",
														
 
															+		"priority": "优先",
														
 
															+		"pricingTableTitle": "按服务等级定价 (每百万Token价格)",
														
 
															+		"columns": {
														
 
															+			"tier": "等级",
														
 
															+			"input": "输入",
														
 
															+			"output": "输出",
														
 
															+			"cacheReads": "缓存读取"
														
 
															+		}
														
 
															+	}
														
 
															 }
														
--- a/webview-ui/src/i18n/locales/zh-TW/settings.json
+++ b/webview-ui/src/i18n/locales/zh-TW/settings.json
@@ -859,5 +859,19 @@
 
															 	"includeMaxOutputTokensDescription": "在 API 請求中傳送最大輸出 Token 參數。某些提供商可能不支援此功能。",
														
 
															 	"limitMaxTokensDescription": "限制回應中的最大 Token 數量",
														
 
															 	"maxOutputTokensLabel": "最大輸出 Token 數",
														
 
															-	"maxTokensGenerateDescription": "回應中產生的最大 Token 數"
														
 
															+	"maxTokensGenerateDescription": "回應中產生的最大 Token 數",
														
 
															+	"serviceTier": {
														
 
															+		"label": "服務層級",
														
 
															+		"tooltip": "若需更快的 API 請求處理，請嘗試優先處理服務層級。若需較低價格但延遲較高，請嘗試彈性處理層級。",
														
 
															+		"standard": "標準",
														
 
															+		"flex": "彈性",
														
 
															+		"priority": "優先",
														
 
															+		"pricingTableTitle": "按服務層級定價（每百萬 Token 價格）",
														
 
															+		"columns": {
														
 
															+			"tier": "層級",
														
 
															+			"input": "輸入",
														
 
															+			"output": "輸出",
														
 
															+			"cacheReads": "快取讀取"
														
 
															+		}
														
 
															+	}
														
 
															 }