Explorar el Código

feat(pricing): support codex fast service tier billing

ding113 hace 1 mes
padre
commit
289fe7b4a7

+ 2 - 1
messages/en/settings/prices.json

@@ -216,6 +216,7 @@
     "pinAction": "Pin as local price",
     "pinSuccess": "Pinned {provider} pricing as local model price",
     "pinFailed": "Failed to pin provider pricing",
-    "pinned": "Pinned"
+    "pinned": "Pinned",
+    "priority": "Priority"
   }
 }

+ 2 - 1
messages/ja/settings/prices.json

@@ -215,6 +215,7 @@
     "pinAction": "ローカル価格として固定",
     "pinSuccess": "{provider} の価格をローカルモデル価格として固定しました",
     "pinFailed": "プロバイダー価格の固定に失敗しました",
-    "pinned": "固定済み"
+    "pinned": "固定済み",
+    "priority": "priority"
   }
 }

+ 2 - 1
messages/ru/settings/prices.json

@@ -215,6 +215,7 @@
     "pinAction": "Закрепить как локальную цену",
     "pinSuccess": "Цена {provider} закреплена как локальная цена модели",
     "pinFailed": "Не удалось закрепить цену провайдера",
-    "pinned": "Закреплено"
+    "pinned": "Закреплено",
+    "priority": "Priority"
   }
 }

+ 2 - 1
messages/zh-CN/settings/prices.json

@@ -216,6 +216,7 @@
     "pinAction": "固化为本地价格",
     "pinSuccess": "已将 {provider} 价格固化为本地模型价格",
     "pinFailed": "固化供应商价格失败",
-    "pinned": "已固化"
+    "pinned": "已固化",
+    "priority": "优先"
   }
 }

+ 2 - 1
messages/zh-TW/settings/prices.json

@@ -215,6 +215,7 @@
     "pinAction": "固化為本地價格",
     "pinSuccess": "已將 {provider} 價格固化為本地模型價格",
     "pinFailed": "固化供應商價格失敗",
-    "pinned": "已固化"
+    "pinned": "已固化",
+    "priority": "優先"
   }
 }

+ 17 - 1
src/app/[locale]/dashboard/logs/_components/error-details-dialog/components/MetadataTab.tsx

@@ -20,7 +20,10 @@ import { Link } from "@/i18n/routing";
 import { cn, formatTokenAmount } from "@/lib/utils";
 import { formatCurrency } from "@/lib/utils/currency";
 import { formatProviderTimeline } from "@/lib/utils/provider-chain-formatter";
-import { getPricingResolutionSpecialSetting } from "@/lib/utils/special-settings";
+import {
+  getPricingResolutionSpecialSetting,
+  hasPriorityServiceTierSpecialSetting,
+} from "@/lib/utils/special-settings";
 import type { MetadataTabProps } from "../types";
 
 export function MetadataTab({
@@ -54,6 +57,7 @@ export function MetadataTab({
   const pricingSourceLabel = pricingResolution
     ? t(`billingDetails.pricingSource.${pricingResolution.source}`)
     : null;
+  const hasPriorityServiceTier = hasPriorityServiceTierSpecialSetting(specialSettings);
 
   const handleCopyTimeline = () => {
     if (!providerChain) return;
@@ -245,6 +249,18 @@ export function MetadataTab({
                 </div>
               )}
 
+              {hasPriorityServiceTier ? (
+                <div className="flex justify-between items-center col-span-2">
+                  <span className="text-muted-foreground">{t("billingDetails.fast")}:</span>
+                  <Badge
+                    variant="outline"
+                    className="text-xs bg-orange-50 text-orange-700 border-orange-200 dark:bg-orange-950/30 dark:text-orange-300 dark:border-orange-800"
+                  >
+                    {t("billingDetails.fastPriority")}
+                  </Badge>
+                </div>
+              ) : null}
+
               {pricingResolution && pricingSourceLabel ? (
                 <>
                   <div className="flex justify-between col-span-2">

+ 17 - 1
src/app/[locale]/dashboard/logs/_components/error-details-dialog/components/SummaryTab.tsx

@@ -20,7 +20,10 @@ import { Button } from "@/components/ui/button";
 import { Link } from "@/i18n/routing";
 import { cn, formatTokenAmount } from "@/lib/utils";
 import { formatCurrency } from "@/lib/utils/currency";
-import { getPricingResolutionSpecialSetting } from "@/lib/utils/special-settings";
+import {
+  getPricingResolutionSpecialSetting,
+  hasPriorityServiceTierSpecialSetting,
+} from "@/lib/utils/special-settings";
 import { getFake200ReasonKey } from "../../fake200-reason";
 import {
   calculateOutputRate,
@@ -72,6 +75,7 @@ export function SummaryTab({
   const pricingSourceLabel = pricingResolution
     ? t(`billingDetails.pricingSource.${pricingResolution.source}`)
     : null;
+  const hasPriorityServiceTier = hasPriorityServiceTierSpecialSetting(specialSettings);
   const isFake200PostStreamFailure =
     typeof errorMessage === "string" && errorMessage.startsWith("FAKE_200_");
   const fake200Code =
@@ -358,6 +362,18 @@ export function SummaryTab({
                 </div>
               )}
 
+              {hasPriorityServiceTier ? (
+                <div className="flex justify-between items-center col-span-2">
+                  <span className="text-muted-foreground">{t("billingDetails.fast")}:</span>
+                  <Badge
+                    variant="outline"
+                    className="text-xs bg-orange-50 text-orange-700 border-orange-200 dark:bg-orange-950/30 dark:text-orange-300 dark:border-orange-800"
+                  >
+                    {t("billingDetails.fastPriority")}
+                  </Badge>
+                </div>
+              ) : null}
+
               {pricingResolution && pricingSourceLabel ? (
                 <>
                   <div className="flex justify-between col-span-2">

+ 22 - 3
src/app/[locale]/settings/prices/_components/provider-pricing-dialog.tsx

@@ -139,14 +139,26 @@ export function ProviderPricingDialog({ price, trigger, onSuccess }: ProviderPri
                     <div className="font-mono">
                       {formatTokenPrice(providerPricing.input_cost_per_token as number | undefined)}
                     </div>
+                    {typeof providerPricing.input_cost_per_token_priority === "number" ? (
+                      <div className="font-mono text-xs text-orange-600 dark:text-orange-400">
+                        {t("providerPricing.priority")}: {formatTokenPrice(
+                          providerPricing.input_cost_per_token_priority as number | undefined
+                        )}
+                      </div>
+                    ) : null}
                   </div>
                   <div>
                     <div className="text-muted-foreground">{t("providerPricing.output")}</div>
                     <div className="font-mono">
-                      {formatTokenPrice(
-                        providerPricing.output_cost_per_token as number | undefined
-                      )}
+                      {formatTokenPrice(providerPricing.output_cost_per_token as number | undefined)}
                     </div>
+                    {typeof providerPricing.output_cost_per_token_priority === "number" ? (
+                      <div className="font-mono text-xs text-orange-600 dark:text-orange-400">
+                        {t("providerPricing.priority")}: {formatTokenPrice(
+                          providerPricing.output_cost_per_token_priority as number | undefined
+                        )}
+                      </div>
+                    ) : null}
                   </div>
                   <div>
                     <div className="text-muted-foreground">{t("providerPricing.cacheRead")}</div>
@@ -155,6 +167,13 @@ export function ProviderPricingDialog({ price, trigger, onSuccess }: ProviderPri
                         providerPricing.cache_read_input_token_cost as number | undefined
                       )}
                     </div>
+                    {typeof providerPricing.cache_read_input_token_cost_priority === "number" ? (
+                      <div className="font-mono text-xs text-orange-600 dark:text-orange-400">
+                        {t("providerPricing.priority")}: {formatTokenPrice(
+                          providerPricing.cache_read_input_token_cost_priority as number | undefined
+                        )}
+                      </div>
+                    ) : null}
                   </div>
                 </div>
               </div>

+ 141 - 22
src/app/v1/_lib/proxy/response-handler.ts

@@ -139,6 +139,94 @@ function ensurePricingResolutionSpecialSetting(
   });
 }
 
+function getRequestedCodexServiceTier(session: ProxySession): string | null {
+  if (session.provider?.providerType !== "codex") {
+    return null;
+  }
+
+  const request = session.request.message as Record<string, unknown>;
+  return typeof request.service_tier === "string" ? request.service_tier : null;
+}
+
+export function parseServiceTierFromResponseText(responseText: string): string | null {
+  let lastSeenServiceTier: string | null = null;
+
+  const applyValue = (value: unknown) => {
+    if (typeof value === "string" && value.trim()) {
+      lastSeenServiceTier = value.trim();
+    }
+  };
+
+  try {
+    const parsedValue = JSON.parse(responseText);
+    if (parsedValue && typeof parsedValue === "object" && !Array.isArray(parsedValue)) {
+      const parsed = parsedValue as Record<string, unknown>;
+      applyValue(parsed.service_tier);
+      if (parsed.response && typeof parsed.response === "object") {
+        applyValue((parsed.response as Record<string, unknown>).service_tier);
+      }
+    }
+  } catch {
+    // ignore, fallback to SSE parsing below
+  }
+
+  if (lastSeenServiceTier) {
+    return lastSeenServiceTier;
+  }
+
+  if (isSSEText(responseText)) {
+    const events = parseSSEData(responseText);
+    for (const event of events) {
+      if (!event.data || typeof event.data !== "object") continue;
+      const data = event.data as Record<string, unknown>;
+      applyValue(data.service_tier);
+      if (data.response && typeof data.response === "object") {
+        applyValue((data.response as Record<string, unknown>).service_tier);
+      }
+    }
+  }
+
+  return lastSeenServiceTier;
+}
+
+function isPriorityServiceTierApplied(
+  session: ProxySession,
+  actualServiceTier: string | null
+): boolean {
+  if (actualServiceTier != null) {
+    return actualServiceTier === "priority";
+  }
+  return getRequestedCodexServiceTier(session) === "priority";
+}
+
+function ensureCodexServiceTierResultSpecialSetting(
+  session: ProxySession,
+  actualServiceTier: string | null
+): void {
+  if (session.provider?.providerType !== "codex") {
+    return;
+  }
+
+  const requestedServiceTier = getRequestedCodexServiceTier(session);
+  const effectivePriority = isPriorityServiceTierApplied(session, actualServiceTier);
+  const existing = session
+    .getSpecialSettings()
+    ?.find((setting) => setting.type === "codex_service_tier_result");
+
+  if (existing && existing.type === "codex_service_tier_result") {
+    return;
+  }
+
+  session.addSpecialSetting({
+    type: "codex_service_tier_result",
+    scope: "response",
+    hit: effectivePriority || requestedServiceTier != null || actualServiceTier != null,
+    requestedServiceTier,
+    actualServiceTier,
+    effectivePriority,
+  });
+}
+
 type FinalizeDeferredStreamingResult = {
   /**
    * “内部结算用”的状态码。
@@ -761,6 +849,12 @@ export class ProxyResponseHandler {
         const usageResult = parseUsageFromResponseText(responseText, provider.providerType);
         usageRecord = usageResult.usageRecord;
         usageMetrics = usageResult.usageMetrics;
+        const actualServiceTier = parseServiceTierFromResponseText(responseText);
+        ensureCodexServiceTierResultSpecialSetting(session, actualServiceTier);
+        const priorityServiceTierApplied = isPriorityServiceTierApplied(
+          session,
+          actualServiceTier
+        );
 
         if (usageMetrics) {
           usageMetrics = normalizeUsageWithSwap(
@@ -808,11 +902,12 @@ export class ProxyResponseHandler {
             usageMetrics,
             provider,
             provider.costMultiplier,
-            session.getContext1mApplied()
+            session.getContext1mApplied(),
+            priorityServiceTierApplied
           );
 
           // 追踪消费到 Redis(用于限流)
-          await trackCostToRedis(session, usageMetrics);
+          await trackCostToRedis(session, usageMetrics, priorityServiceTierApplied);
         }
 
         // Calculate cost for session tracking (with multiplier) and Langfuse (raw)
@@ -829,7 +924,8 @@ export class ProxyResponseHandler {
                   usageMetrics,
                   resolvedPricing.priceData,
                   provider.costMultiplier,
-                  session.getContext1mApplied()
+                  session.getContext1mApplied(),
+                  priorityServiceTierApplied
                 );
                 if (cost.gt(0)) {
                   costUsdStr = cost.toString();
@@ -840,7 +936,8 @@ export class ProxyResponseHandler {
                     usageMetrics,
                     resolvedPricing.priceData,
                     1.0,
-                    session.getContext1mApplied()
+                    session.getContext1mApplied(),
+                    priorityServiceTierApplied
                   );
                   if (rawCost.gt(0)) {
                     rawCostUsdStr = rawCost.toString();
@@ -853,7 +950,8 @@ export class ProxyResponseHandler {
                   costBreakdown = calculateRequestCostBreakdown(
                     usageMetrics,
                     resolvedPricing.priceData,
-                    session.getContext1mApplied()
+                    session.getContext1mApplied(),
+                    priorityServiceTierApplied
                   );
                 } catch {
                   /* non-critical */
@@ -1228,8 +1326,7 @@ export class ProxyResponseHandler {
                 headBufferedBytes += bytes;
               }
             } else {
-              inTailMode = true;
-              pushToTail();
+              pushChunk(text, bytes);
             }
           };
           const decoder = new TextDecoder();
@@ -1617,7 +1714,7 @@ export class ProxyResponseHandler {
     const statusCode = response.status;
 
     // 使用 AsyncTaskManager 管理后台处理任务
-    const taskId = `stream-${messageContext.id}`;
+    const taskId = `stream-${messageContext?.id || `unknown-${Date.now()}`}`;
     const abortController = new AbortController();
 
     // ⭐ 提升 idleTimeoutId 到外部作用域,以便客户端断开时能清除
@@ -1744,6 +1841,13 @@ export class ProxyResponseHandler {
         const usageResult = parseUsageFromResponseText(allContent, provider.providerType);
         usageForCost = usageResult.usageMetrics;
 
+        const actualServiceTier = parseServiceTierFromResponseText(allContent);
+        ensureCodexServiceTierResultSpecialSetting(session, actualServiceTier);
+        const priorityServiceTierApplied = isPriorityServiceTierApplied(
+          session,
+          actualServiceTier
+        );
+
         if (usageForCost) {
           usageForCost = normalizeUsageWithSwap(
             usageForCost,
@@ -1785,11 +1889,12 @@ export class ProxyResponseHandler {
           usageForCost,
           provider,
           provider.costMultiplier,
-          session.getContext1mApplied()
+          session.getContext1mApplied(),
+          priorityServiceTierApplied
         );
 
         // 追踪消费到 Redis(用于限流)
-        await trackCostToRedis(session, usageForCost);
+        await trackCostToRedis(session, usageForCost, priorityServiceTierApplied);
 
         // Calculate cost for session tracking (with multiplier) and Langfuse (raw)
         let costUsdStr: string | undefined;
@@ -1805,7 +1910,8 @@ export class ProxyResponseHandler {
                   usageForCost,
                   resolvedPricing.priceData,
                   provider.costMultiplier,
-                  session.getContext1mApplied()
+                  session.getContext1mApplied(),
+                  priorityServiceTierApplied
                 );
                 if (cost.gt(0)) {
                   costUsdStr = cost.toString();
@@ -1816,7 +1922,8 @@ export class ProxyResponseHandler {
                     usageForCost,
                     resolvedPricing.priceData,
                     1.0,
-                    session.getContext1mApplied()
+                    session.getContext1mApplied(),
+                    priorityServiceTierApplied
                   );
                   if (rawCost.gt(0)) {
                     rawCostUsdStr = rawCost.toString();
@@ -1829,7 +1936,8 @@ export class ProxyResponseHandler {
                   costBreakdown = calculateRequestCostBreakdown(
                     usageForCost,
                     resolvedPricing.priceData,
-                    session.getContext1mApplied()
+                    session.getContext1mApplied(),
+                    priorityServiceTierApplied
                   );
                 } catch {
                   /* non-critical */
@@ -2219,7 +2327,7 @@ export class ProxyResponseHandler {
   }
 }
 
-function extractUsageMetrics(value: unknown): UsageMetrics | null {
+export function extractUsageMetrics(value: unknown): UsageMetrics | null {
   if (!value || typeof value !== "object") {
     return null;
   }
@@ -2775,7 +2883,8 @@ async function updateRequestCostFromUsage(
   usage: UsageMetrics | null,
   provider: Provider | null,
   costMultiplier: number = 1.0,
-  context1mApplied: boolean = false
+  context1mApplied: boolean = false,
+  priorityServiceTierApplied: boolean = false
 ): Promise<void> {
   if (!usage) {
     logger.warn("[CostCalculation] No usage data, skipping cost update", {
@@ -2841,7 +2950,8 @@ async function updateRequestCostFromUsage(
       usage,
       resolvedPricing.priceData,
       costMultiplier,
-      context1mApplied
+      context1mApplied,
+      priorityServiceTierApplied
     );
 
     logger.info("[CostCalculation] Cost calculated successfully", {
@@ -2899,6 +3009,9 @@ export async function finalizeRequestStats(
 
   const providerIdForPersistence = providerIdOverride ?? session.provider?.id;
   const { usageMetrics } = parseUsageFromResponseText(responseText, provider.providerType);
+  const actualServiceTier = parseServiceTierFromResponseText(responseText);
+  ensureCodexServiceTierResultSpecialSetting(session, actualServiceTier);
+  const priorityServiceTierApplied = isPriorityServiceTierApplied(session, actualServiceTier);
   if (!usageMetrics) {
     await updateMessageRequestDetails(messageContext.id, {
       statusCode: statusCode,
@@ -2930,11 +3043,12 @@ export async function finalizeRequestStats(
     normalizedUsage,
     provider,
     provider.costMultiplier,
-    session.getContext1mApplied()
+    session.getContext1mApplied(),
+    priorityServiceTierApplied
   );
 
   // 5. 追踪消费到 Redis(用于限流)
-  await trackCostToRedis(session, normalizedUsage);
+  await trackCostToRedis(session, normalizedUsage, priorityServiceTierApplied);
 
   // 6. 更新 session usage
   if (session.sessionId) {
@@ -2948,7 +3062,8 @@ export async function finalizeRequestStats(
             normalizedUsage,
             resolvedPricing.priceData,
             provider.costMultiplier,
-            session.getContext1mApplied()
+            session.getContext1mApplied(),
+            priorityServiceTierApplied
           );
           if (cost.gt(0)) {
             costUsdStr = cost.toString();
@@ -3001,7 +3116,11 @@ export async function finalizeRequestStats(
 /**
  * 追踪消费到 Redis(用于限流)
  */
-async function trackCostToRedis(session: ProxySession, usage: UsageMetrics | null): Promise<void> {
+async function trackCostToRedis(
+  session: ProxySession,
+  usage: UsageMetrics | null,
+  priorityServiceTierApplied: boolean = false
+): Promise<void> {
   if (!usage || !session.sessionId) return;
 
   try {
@@ -3015,7 +3134,6 @@ async function trackCostToRedis(session: ProxySession, usage: UsageMetrics | nul
     const modelName = session.request.model;
     if (!modelName) return;
 
-    // 计算成本(应用倍率)- 使用 session 缓存避免重复查询
     const resolvedPricing = await session.getResolvedPricingByBillingSource(provider);
     if (!resolvedPricing) return;
 
@@ -3025,7 +3143,8 @@ async function trackCostToRedis(session: ProxySession, usage: UsageMetrics | nul
       usage,
       resolvedPricing.priceData,
       provider.costMultiplier,
-      session.getContext1mApplied()
+      session.getContext1mApplied(),
+      priorityServiceTierApplied
     );
     if (cost.lte(0)) return;
 

+ 54 - 20
src/lib/utils/cost-calculation.ts

@@ -137,15 +137,24 @@ export interface CostBreakdown {
 export function calculateRequestCostBreakdown(
   usage: UsageMetrics,
   priceData: ModelPriceData,
-  context1mApplied: boolean = false
+  context1mApplied: boolean = false,
+  priorityServiceTierApplied: boolean = false
 ): CostBreakdown {
   let inputBucket = new Decimal(0);
   let outputBucket = new Decimal(0);
   let cacheCreationBucket = new Decimal(0);
   let cacheReadBucket = new Decimal(0);
 
-  const inputCostPerToken = priceData.input_cost_per_token;
-  const outputCostPerToken = priceData.output_cost_per_token;
+  const baseInputCostPerToken = priceData.input_cost_per_token;
+  const baseOutputCostPerToken = priceData.output_cost_per_token;
+  const inputCostPerToken =
+    priorityServiceTierApplied && typeof priceData.input_cost_per_token_priority === "number"
+      ? priceData.input_cost_per_token_priority
+      : baseInputCostPerToken;
+  const outputCostPerToken =
+    priorityServiceTierApplied && typeof priceData.output_cost_per_token_priority === "number"
+      ? priceData.output_cost_per_token_priority
+      : baseOutputCostPerToken;
   const inputCostPerRequest = priceData.input_cost_per_request;
 
   // Per-request cost -> input bucket
@@ -162,19 +171,22 @@ export function calculateRequestCostBreakdown(
 
   const cacheCreation5mCost =
     priceData.cache_creation_input_token_cost ??
-    (inputCostPerToken != null ? inputCostPerToken * 1.25 : undefined);
+    (baseInputCostPerToken != null ? baseInputCostPerToken * 1.25 : undefined);
 
   const cacheCreation1hCost =
     priceData.cache_creation_input_token_cost_above_1hr ??
-    (inputCostPerToken != null ? inputCostPerToken * 2 : undefined) ??
+    (baseInputCostPerToken != null ? baseInputCostPerToken * 2 : undefined) ??
     cacheCreation5mCost;
 
   const cacheReadCost =
-    priceData.cache_read_input_token_cost ??
-    (inputCostPerToken != null
-      ? inputCostPerToken * 0.1
-      : outputCostPerToken != null
-        ? outputCostPerToken * 0.1
+    (priorityServiceTierApplied &&
+    typeof priceData.cache_read_input_token_cost_priority === "number"
+      ? priceData.cache_read_input_token_cost_priority
+      : priceData.cache_read_input_token_cost) ??
+    (baseInputCostPerToken != null
+      ? baseInputCostPerToken * 0.1
+      : baseOutputCostPerToken != null
+        ? baseOutputCostPerToken * 0.1
         : undefined);
 
   // Derive cache creation tokens by TTL
@@ -225,6 +237,7 @@ export function calculateRequestCostBreakdown(
   } else if (
     longContextThresholdExceeded &&
     context1mApplied &&
+    !priorityServiceTierApplied &&
     inputCostPerToken != null &&
     usage.input_tokens != null
   ) {
@@ -242,6 +255,7 @@ export function calculateRequestCostBreakdown(
   } else if (
     longContextThresholdExceeded &&
     context1mApplied &&
+    !priorityServiceTierApplied &&
     outputCostPerToken != null &&
     usage.output_tokens != null
   ) {
@@ -267,6 +281,7 @@ export function calculateRequestCostBreakdown(
   } else if (
     longContextThresholdExceeded &&
     context1mApplied &&
+    !priorityServiceTierApplied &&
     cacheCreation5mCost != null &&
     cache5mTokens != null
   ) {
@@ -280,6 +295,7 @@ export function calculateRequestCostBreakdown(
   // Cache creation 1h -> cache_creation bucket
   if (
     longContextThresholdExceeded &&
+    hasRealCacheCreationBase &&
     cacheCreation1hAboveThreshold != null &&
     cache1hTokens != null
   ) {
@@ -289,6 +305,7 @@ export function calculateRequestCostBreakdown(
   } else if (
     longContextThresholdExceeded &&
     context1mApplied &&
+    !priorityServiceTierApplied &&
     cacheCreation1hCost != null &&
     cache1hTokens != null
   ) {
@@ -351,12 +368,21 @@ export function calculateRequestCost(
   usage: UsageMetrics,
   priceData: ModelPriceData,
   multiplier: number = 1.0,
-  context1mApplied: boolean = false
+  context1mApplied: boolean = false,
+  priorityServiceTierApplied: boolean = false
 ): Decimal {
   const segments: Decimal[] = [];
 
-  const inputCostPerToken = priceData.input_cost_per_token;
-  const outputCostPerToken = priceData.output_cost_per_token;
+  const baseInputCostPerToken = priceData.input_cost_per_token;
+  const baseOutputCostPerToken = priceData.output_cost_per_token;
+  const inputCostPerToken =
+    priorityServiceTierApplied && typeof priceData.input_cost_per_token_priority === "number"
+      ? priceData.input_cost_per_token_priority
+      : baseInputCostPerToken;
+  const outputCostPerToken =
+    priorityServiceTierApplied && typeof priceData.output_cost_per_token_priority === "number"
+      ? priceData.output_cost_per_token_priority
+      : baseOutputCostPerToken;
   const inputCostPerRequest = priceData.input_cost_per_request;
 
   if (
@@ -372,19 +398,22 @@ export function calculateRequestCost(
 
   const cacheCreation5mCost =
     priceData.cache_creation_input_token_cost ??
-    (inputCostPerToken != null ? inputCostPerToken * 1.25 : undefined);
+    (baseInputCostPerToken != null ? baseInputCostPerToken * 1.25 : undefined);
 
   const cacheCreation1hCost =
     priceData.cache_creation_input_token_cost_above_1hr ??
-    (inputCostPerToken != null ? inputCostPerToken * 2 : undefined) ??
+    (baseInputCostPerToken != null ? baseInputCostPerToken * 2 : undefined) ??
     cacheCreation5mCost;
 
   const cacheReadCost =
-    priceData.cache_read_input_token_cost ??
-    (inputCostPerToken != null
-      ? inputCostPerToken * 0.1
-      : outputCostPerToken != null
-        ? outputCostPerToken * 0.1
+    (priorityServiceTierApplied &&
+    typeof priceData.cache_read_input_token_cost_priority === "number"
+      ? priceData.cache_read_input_token_cost_priority
+      : priceData.cache_read_input_token_cost) ??
+    (baseInputCostPerToken != null
+      ? baseInputCostPerToken * 0.1
+      : baseOutputCostPerToken != null
+        ? baseOutputCostPerToken * 0.1
         : undefined);
 
   // Derive cache creation tokens by TTL
@@ -434,6 +463,7 @@ export function calculateRequestCost(
   } else if (
     longContextThresholdExceeded &&
     context1mApplied &&
+    !priorityServiceTierApplied &&
     inputCostPerToken != null &&
     usage.input_tokens != null
   ) {
@@ -450,6 +480,7 @@ export function calculateRequestCost(
   } else if (
     longContextThresholdExceeded &&
     context1mApplied &&
+    !priorityServiceTierApplied &&
     outputCostPerToken != null &&
     usage.output_tokens != null
   ) {
@@ -475,6 +506,7 @@ export function calculateRequestCost(
   } else if (
     longContextThresholdExceeded &&
     context1mApplied &&
+    !priorityServiceTierApplied &&
     cacheCreation5mCost != null &&
     cache5mTokens != null
   ) {
@@ -488,6 +520,7 @@ export function calculateRequestCost(
   // 缓存创建费用(1小时 TTL):优先级 explicit long-context > context1m fallback > 普通
   if (
     longContextThresholdExceeded &&
+    hasRealCacheCreationBase &&
     cacheCreation1hAboveThreshold != null &&
     cache1hTokens != null
   ) {
@@ -495,6 +528,7 @@ export function calculateRequestCost(
   } else if (
     longContextThresholdExceeded &&
     context1mApplied &&
+    !priorityServiceTierApplied &&
     cacheCreation1hCost != null &&
     cache1hTokens != null
   ) {

+ 35 - 0
src/lib/utils/special-settings.ts

@@ -110,11 +110,20 @@ function buildSettingKey(setting: SpecialSetting): string {
     case "pricing_resolution":
       return JSON.stringify([
         setting.type,
+        setting.hit,
         setting.modelName,
         setting.resolvedModelName,
         setting.resolvedPricingProviderKey,
         setting.source,
       ]);
+    case "codex_service_tier_result":
+      return JSON.stringify([
+        setting.type,
+        setting.hit,
+        setting.requestedServiceTier,
+        setting.actualServiceTier,
+        setting.effectivePriority,
+      ]);
     default: {
       // 兜底:保证即使未来扩展类型也不会导致运行时崩溃
       const _exhaustive: never = setting;
@@ -182,6 +191,17 @@ export function hasPriorityServiceTierSpecialSetting(
     return false;
   }
 
+  const codexServiceTierResult = specialSettings.find(
+    (setting): setting is Extract<SpecialSetting, { type: "codex_service_tier_result" }> =>
+      setting.type === "codex_service_tier_result"
+  );
+  if (codexServiceTierResult) {
+    if (codexServiceTierResult.actualServiceTier != null) {
+      return codexServiceTierResult.actualServiceTier === "priority";
+    }
+    return codexServiceTierResult.effectivePriority;
+  }
+
   return specialSettings.some(
     (setting) =>
       setting.type === "provider_parameter_override" &&
@@ -192,6 +212,21 @@ export function hasPriorityServiceTierSpecialSetting(
   );
 }
 
+export function getPriorityServiceTierSpecialSetting(
+  specialSettings?: SpecialSetting[] | null
+): Extract<SpecialSetting, { type: "codex_service_tier_result" }> | null {
+  if (!Array.isArray(specialSettings) || specialSettings.length === 0) {
+    return null;
+  }
+
+  return (
+    specialSettings.find(
+      (setting): setting is Extract<SpecialSetting, { type: "codex_service_tier_result" }> =>
+        setting.type === "codex_service_tier_result"
+    ) ?? null
+  );
+}
+
 export function getPricingResolutionSpecialSetting(
   specialSettings?: SpecialSetting[] | null
 ): Extract<SpecialSetting, { type: "pricing_resolution" }> | null {

+ 11 - 1
src/types/special-settings.ts

@@ -17,7 +17,8 @@ export type SpecialSetting =
   | AnthropicCacheTtlHeaderOverrideSpecialSetting
   | AnthropicContext1mHeaderOverrideSpecialSetting
   | GeminiGoogleSearchOverrideSpecialSetting
-  | PricingResolutionSpecialSetting;
+  | PricingResolutionSpecialSetting
+  | CodexServiceTierResultSpecialSetting;
 
 export type SpecialSettingChangeValue = string | number | boolean | null;
 
@@ -215,3 +216,12 @@ export type PricingResolutionSpecialSetting = {
     | "single_provider_top_level"
     | "official_fallback";
 };
+
+export type CodexServiceTierResultSpecialSetting = {
+  type: "codex_service_tier_result";
+  scope: "response";
+  hit: boolean;
+  requestedServiceTier: string | null;
+  actualServiceTier: string | null;
+  effectivePriority: boolean;
+};

+ 180 - 2
tests/integration/billing-model-source.test.ts

@@ -142,12 +142,14 @@ function createSession({
   sessionId,
   messageId,
   providerOverrides,
+  requestMessage,
 }: {
   originalModel: string;
   redirectedModel: string;
   sessionId: string;
   messageId: number;
   providerOverrides?: Record<string, unknown>;
+  requestMessage?: Record<string, unknown>;
 }): ProxySession {
   const session = new (
     ProxySession as unknown as {
@@ -169,7 +171,7 @@ function createSession({
     requestUrl: new URL("http://localhost/v1/messages"),
     headers: new Headers(),
     headerLog: "",
-    request: { message: {}, log: "(test)", model: redirectedModel },
+    request: { message: requestMessage ?? {}, log: "(test)", model: redirectedModel },
     userAgent: null,
     context: {},
     clientAbortSignal: null,
@@ -220,11 +222,15 @@ function createSession({
   return session;
 }
 
-function createNonStreamResponse(usage: { input_tokens: number; output_tokens: number }): Response {
+function createNonStreamResponse(
+  usage: { input_tokens: number; output_tokens: number },
+  extras?: Record<string, unknown>
+): Response {
   return new Response(
     JSON.stringify({
       type: "message",
       usage,
+      ...(extras ?? {}),
     }),
     {
       status: 200,
@@ -434,6 +440,178 @@ describe("Billing model source - Redis session cost vs DB cost", () => {
     expect(dbCosts[0]).toBe("50");
     expect(sessionCosts[0]).toBe("50");
   });
+
+  it("codex fast: uses priority pricing when response reports service_tier=priority", async () => {
+    vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected"));
+    vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
+    vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
+    vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
+    vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
+    vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
+
+    vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
+      if (modelName === "gpt-5.4") {
+        return makePriceRecord(modelName, {
+          mode: "responses",
+          model_family: "gpt",
+          litellm_provider: "chatgpt",
+          pricing: {
+            openai: {
+              input_cost_per_token: 1,
+              output_cost_per_token: 10,
+              input_cost_per_token_priority: 2,
+              output_cost_per_token_priority: 20,
+            },
+          },
+        });
+      }
+      return null;
+    });
+
+    const dbCosts: string[] = [];
+    vi.mocked(updateMessageRequestCost).mockImplementation(async (_id: number, costUsd: unknown) => {
+      dbCosts.push(String(costUsd));
+    });
+
+    const sessionCosts: string[] = [];
+    vi.mocked(SessionManager.updateSessionUsage).mockImplementation(
+      async (_sessionId: string, payload: Record<string, unknown>) => {
+        if (typeof payload.costUsd === "string") {
+          sessionCosts.push(payload.costUsd);
+        }
+      }
+    );
+
+    const session = createSession({
+      originalModel: "gpt-5.4",
+      redirectedModel: "gpt-5.4",
+      sessionId: "sess-gpt54-priority-actual",
+      messageId: 3200,
+      providerOverrides: {
+        name: "ChatGPT",
+        url: "https://chatgpt.com/backend-api/codex",
+        providerType: "codex",
+      },
+      requestMessage: { service_tier: "default" },
+    });
+
+    const response = createNonStreamResponse(
+      { input_tokens: 2, output_tokens: 3 },
+      { service_tier: "priority" }
+    );
+    await ProxyResponseHandler.dispatch(session, response);
+    await drainAsyncTasks();
+
+    expect(dbCosts[0]).toBe("64");
+    expect(sessionCosts[0]).toBe("64");
+  });
+
+  it("codex fast: falls back to requested priority pricing when response omits service_tier", async () => {
+    vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected"));
+    vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
+    vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
+    vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
+    vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
+    vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
+
+    vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
+      if (modelName === "gpt-5.4") {
+        return makePriceRecord(modelName, {
+          mode: "responses",
+          model_family: "gpt",
+          litellm_provider: "chatgpt",
+          pricing: {
+            openai: {
+              input_cost_per_token: 1,
+              output_cost_per_token: 10,
+              input_cost_per_token_priority: 2,
+              output_cost_per_token_priority: 20,
+            },
+          },
+        });
+      }
+      return null;
+    });
+
+    const dbCosts: string[] = [];
+    vi.mocked(updateMessageRequestCost).mockImplementation(async (_id: number, costUsd: unknown) => {
+      dbCosts.push(String(costUsd));
+    });
+
+    const session = createSession({
+      originalModel: "gpt-5.4",
+      redirectedModel: "gpt-5.4",
+      sessionId: "sess-gpt54-priority-requested",
+      messageId: 3201,
+      providerOverrides: {
+        name: "ChatGPT",
+        url: "https://chatgpt.com/backend-api/codex",
+        providerType: "codex",
+      },
+      requestMessage: { service_tier: "priority" },
+    });
+
+    const response = createNonStreamResponse({ input_tokens: 2, output_tokens: 3 });
+    await ProxyResponseHandler.dispatch(session, response);
+    await drainAsyncTasks();
+
+    expect(dbCosts[0]).toBe("64");
+  });
+
+  it("codex fast: does not use priority pricing when response explicitly reports non-priority tier", async () => {
+    vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected"));
+    vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
+    vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
+    vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
+    vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
+    vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
+
+    vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
+      if (modelName === "gpt-5.4") {
+        return makePriceRecord(modelName, {
+          mode: "responses",
+          model_family: "gpt",
+          litellm_provider: "chatgpt",
+          pricing: {
+            openai: {
+              input_cost_per_token: 1,
+              output_cost_per_token: 10,
+              input_cost_per_token_priority: 2,
+              output_cost_per_token_priority: 20,
+            },
+          },
+        });
+      }
+      return null;
+    });
+
+    const dbCosts: string[] = [];
+    vi.mocked(updateMessageRequestCost).mockImplementation(async (_id: number, costUsd: unknown) => {
+      dbCosts.push(String(costUsd));
+    });
+
+    const session = createSession({
+      originalModel: "gpt-5.4",
+      redirectedModel: "gpt-5.4",
+      sessionId: "sess-gpt54-priority-downgraded",
+      messageId: 3202,
+      providerOverrides: {
+        name: "ChatGPT",
+        url: "https://chatgpt.com/backend-api/codex",
+        providerType: "codex",
+      },
+      requestMessage: { service_tier: "priority" },
+    });
+
+    const response = createNonStreamResponse(
+      { input_tokens: 2, output_tokens: 3 },
+      { service_tier: "default" }
+    );
+    await ProxyResponseHandler.dispatch(session, response);
+    await drainAsyncTasks();
+
+    expect(dbCosts[0]).toBe("32");
+  });
 });
 
 describe("价格表缺失/查询失败:不计费放行", () => {

+ 21 - 0
tests/unit/lib/cost-calculation-long-context.test.ts

@@ -22,4 +22,25 @@ describe("calculateRequestCost long-context", () => {
 
     expect(Number(cost.toString())).toBe(3.75);
   });
+
+  test("does not charge 1h cache long-context price when base cache creation price is missing", () => {
+    const cost = calculateRequestCost(
+      {
+        input_tokens: 250000,
+        cache_creation_1h_input_tokens: 1000,
+      },
+      {
+        mode: "chat",
+        model_family: "gpt",
+        input_cost_per_token: 0.0000025,
+        output_cost_per_token: 0.000015,
+        cache_creation_input_token_cost_above_1hr_above_272k_tokens: 0.5,
+      },
+      1,
+      false,
+      false
+    );
+
+    expect(Number(cost.toString())).toBe(0.63);
+  });
 });

+ 46 - 0
tests/unit/lib/cost-calculation-priority.test.ts

@@ -0,0 +1,46 @@
+import { describe, expect, test } from "vitest";
+import { calculateRequestCost } from "@/lib/utils/cost-calculation";
+import type { ModelPriceData } from "@/types/model-price";
+
+function makePriceData(overrides: Partial<ModelPriceData> = {}): ModelPriceData {
+  return {
+    mode: "responses",
+    input_cost_per_token: 1,
+    output_cost_per_token: 10,
+    cache_read_input_token_cost: 0.1,
+    input_cost_per_token_priority: 2,
+    output_cost_per_token_priority: 20,
+    cache_read_input_token_cost_priority: 0.2,
+    ...overrides,
+  };
+}
+
+describe("calculateRequestCost priority service tier", () => {
+  test("uses priority pricing fields when priority service tier is applied", () => {
+    const cost = calculateRequestCost(
+      { input_tokens: 2, output_tokens: 3, cache_read_input_tokens: 5 },
+      makePriceData(),
+      1,
+      false,
+      true
+    );
+
+    expect(Number(cost.toString())).toBe(65);
+  });
+
+  test("falls back to regular pricing when priority fields are absent", () => {
+    const cost = calculateRequestCost(
+      { input_tokens: 2, output_tokens: 3, cache_read_input_tokens: 5 },
+      makePriceData({
+        input_cost_per_token_priority: undefined,
+        output_cost_per_token_priority: undefined,
+        cache_read_input_token_cost_priority: undefined,
+      }),
+      1,
+      false,
+      true
+    );
+
+    expect(Number(cost.toString())).toBe(32.5);
+  });
+});

+ 46 - 1
tests/unit/lib/utils/special-settings.test.ts

@@ -1,6 +1,9 @@
 import { describe, expect, test } from "vitest";
 import type { SpecialSetting } from "@/types/special-settings";
-import { buildUnifiedSpecialSettings } from "@/lib/utils/special-settings";
+import {
+  buildUnifiedSpecialSettings,
+  hasPriorityServiceTierSpecialSetting,
+} from "@/lib/utils/special-settings";
 
 describe("buildUnifiedSpecialSettings", () => {
   test("无任何输入时应返回 null", () => {
@@ -159,3 +162,45 @@ describe("buildUnifiedSpecialSettings", () => {
     expect(settings?.filter((s) => s.type === "guard_intercept").length).toBe(1);
   });
 });
+
+describe("hasPriorityServiceTierSpecialSetting", () => {
+  test("returns true when codex actual service tier is priority", () => {
+    expect(
+      hasPriorityServiceTierSpecialSetting([
+        {
+          type: "codex_service_tier_result",
+          scope: "response",
+          hit: true,
+          requestedServiceTier: "default",
+          actualServiceTier: "priority",
+          effectivePriority: true,
+        },
+      ])
+    ).toBe(true);
+  });
+
+  test("returns false when codex actual service tier is non-priority even if request was priority", () => {
+    expect(
+      hasPriorityServiceTierSpecialSetting([
+        {
+          type: "provider_parameter_override",
+          scope: "provider",
+          providerId: 1,
+          providerName: "p",
+          providerType: "codex",
+          hit: true,
+          changed: true,
+          changes: [{ path: "service_tier", before: null, after: "priority", changed: true }],
+        },
+        {
+          type: "codex_service_tier_result",
+          scope: "response",
+          hit: true,
+          requestedServiceTier: "priority",
+          actualServiceTier: "default",
+          effectivePriority: false,
+        },
+      ])
+    ).toBe(false);
+  });
+});