Просмотр исходного кода

feat(observability): add hedge and client abort tracking to provider chain

Implements comprehensive observability for hedge (speculative execution) and client abort scenarios:

Backend (forwarder.ts):
- Record hedge_triggered when threshold timer fires and alternative provider launches
- Record hedge_winner when a provider wins the hedge race (first byte received)
- Record hedge_loser_cancelled when a provider loses and gets aborted
- Record client_abort when client disconnects (replaces generic system_error)

Frontend (provider-chain-popover.tsx, LogicTraceTab.tsx):
- Add icons and status colors for all 4 new reason types
- Correctly count hedge_triggered as informational (not actual request)
- Display hedge flow with GitBranch/CheckCircle/XCircle/MinusCircle icons

Langfuse (trace-proxy-request.ts):
- Add hedge_winner to SUCCESS_REASONS set
- Add client_abort to ERROR_REASONS set
- Create hedge-trigger event observation with WARNING level

i18n:
- Add translations for 4 new reasons across 5 languages (en, zh-CN, zh-TW, ja, ru)
- Include timeline, description, and reason label translations

Tests:
- Add 22 new tests covering all new reason types
- Test isActualRequest(), getItemStatus(), isSuccessReason(), isErrorReason()

Related improvements:
- Add isProviderFinalized() utility to detect when provider info is reliable
- Show in-progress state in logs table and big-screen for unfinalised requests
- Prevent displaying stale provider names during hedge/fallback transitions

Co-Authored-By: Claude Opus 4.6 <[email protected]>
ding113 1 месяц назад
Родитель
Сommit
60a0fb2014

+ 17 - 3
messages/en/provider-chain.json

@@ -41,7 +41,11 @@
     "http2Fallback": "HTTP/2 Fallback",
     "clientError": "Client Error",
     "endpointPoolExhausted": "Endpoint Pool Exhausted",
-    "vendorTypeAllTimeout": "Vendor-Type All Endpoints Timeout"
+    "vendorTypeAllTimeout": "Vendor-Type All Endpoints Timeout",
+    "hedgeTriggered": "Hedge Triggered",
+    "hedgeWinner": "Hedge Winner",
+    "hedgeLoserCancelled": "Hedge Loser (Cancelled)",
+    "clientAbort": "Client Aborted"
   },
   "reasons": {
     "request_success": "Success",
@@ -56,7 +60,11 @@
     "initial_selection": "Initial Selection",
     "endpoint_pool_exhausted": "Endpoint Pool Exhausted",
     "vendor_type_all_timeout": "Vendor-Type All Endpoints Timeout",
-    "client_restriction_filtered": "Client Restricted"
+    "client_restriction_filtered": "Client Restricted",
+    "hedge_triggered": "Hedge Triggered",
+    "hedge_winner": "Hedge Winner",
+    "hedge_loser_cancelled": "Hedge Loser (Cancelled)",
+    "client_abort": "Client Aborted"
   },
   "filterReasons": {
     "rate_limited": "Rate Limited",
@@ -222,7 +230,13 @@
     "strictBlockNoEndpoints": "Strict mode: no endpoint candidates available, provider skipped without fallback",
     "strictBlockSelectorError": "Strict mode: endpoint selector encountered an error, provider skipped without fallback",
     "vendorTypeAllTimeout": "Vendor-Type All Endpoints Timeout (524)",
-    "vendorTypeAllTimeoutNote": "All endpoints for this vendor-type timed out. Vendor-type circuit breaker triggered."
+    "vendorTypeAllTimeoutNote": "All endpoints for this vendor-type timed out. Vendor-type circuit breaker triggered.",
+    "hedgeTriggered": "Hedge Threshold Exceeded (launching alternative)",
+    "hedgeWinner": "Hedge Race Winner (first byte received first)",
+    "hedgeLoserCancelled": "Hedge Race Loser (request cancelled)",
+    "clientAbort": "Client Disconnected (request aborted)",
+    "hedgeRace": "Hedge Race",
+    "hedgeThresholdExceeded": "First-byte timeout exceeded, alternative provider launched"
   },
   "selectionMethods": {
     "session_reuse": "Session Reuse",

+ 17 - 3
messages/ja/provider-chain.json

@@ -41,7 +41,11 @@
     "http2Fallback": "HTTP/2 フォールバック",
     "clientError": "クライアントエラー",
     "endpointPoolExhausted": "エンドポイントプール枯渇",
-    "vendorTypeAllTimeout": "ベンダータイプ全エンドポイントタイムアウト"
+    "vendorTypeAllTimeout": "ベンダータイプ全エンドポイントタイムアウト",
+    "hedgeTriggered": "Hedge 発動",
+    "hedgeWinner": "Hedge 競争勝者",
+    "hedgeLoserCancelled": "Hedge 競争敗者(キャンセル)",
+    "clientAbort": "クライアント中断"
   },
   "reasons": {
     "request_success": "成功",
@@ -56,7 +60,11 @@
     "initial_selection": "初期選択",
     "endpoint_pool_exhausted": "エンドポイントプール枯渇",
     "vendor_type_all_timeout": "ベンダータイプ全エンドポイントタイムアウト",
-    "client_restriction_filtered": "クライアント制限"
+    "client_restriction_filtered": "クライアント制限",
+    "hedge_triggered": "Hedge 発動",
+    "hedge_winner": "Hedge 競争勝者",
+    "hedge_loser_cancelled": "Hedge 競争敗者(キャンセル)",
+    "client_abort": "クライアント中断"
   },
   "filterReasons": {
     "rate_limited": "レート制限",
@@ -222,7 +230,13 @@
     "strictBlockNoEndpoints": "厳格モード:利用可能なエンドポイント候補がないため、フォールバックなしでプロバイダーをスキップ",
     "strictBlockSelectorError": "厳格モード:エンドポイントセレクターでエラーが発生したため、フォールバックなしでプロバイダーをスキップ",
     "vendorTypeAllTimeout": "ベンダータイプ全エンドポイントタイムアウト(524)",
-    "vendorTypeAllTimeoutNote": "このベンダータイプの全エンドポイントがタイムアウトしました。ベンダータイプサーキットブレーカーが発動しました。"
+    "vendorTypeAllTimeoutNote": "このベンダータイプの全エンドポイントがタイムアウトしました。ベンダータイプサーキットブレーカーが発動しました。",
+    "hedgeTriggered": "Hedge 閾値超過(代替プロバイダーを起動中)",
+    "hedgeWinner": "Hedge 競争勝者(最初にファーストバイトを受信)",
+    "hedgeLoserCancelled": "Hedge 競争敗者(リクエストキャンセル)",
+    "clientAbort": "クライアント切断(リクエスト中断)",
+    "hedgeRace": "Hedge 競争",
+    "hedgeThresholdExceeded": "ファーストバイトタイムアウト超過、代替プロバイダーを起動"
   },
   "selectionMethods": {
     "session_reuse": "セッション再利用",

+ 17 - 3
messages/ru/provider-chain.json

@@ -41,7 +41,11 @@
     "http2Fallback": "Откат HTTP/2",
     "clientError": "Ошибка клиента",
     "endpointPoolExhausted": "Пул конечных точек исчерпан",
-    "vendorTypeAllTimeout": "Тайм-аут всех конечных точек"
+    "vendorTypeAllTimeout": "Тайм-аут всех конечных точек",
+    "hedgeTriggered": "Hedge запущен",
+    "hedgeWinner": "Победитель Hedge-гонки",
+    "hedgeLoserCancelled": "Проигравший Hedge-гонки (отменён)",
+    "clientAbort": "Клиент прервал запрос"
   },
   "reasons": {
     "request_success": "Успешно",
@@ -56,7 +60,11 @@
     "initial_selection": "Первоначальный выбор",
     "endpoint_pool_exhausted": "Пул конечных точек исчерпан",
     "vendor_type_all_timeout": "Тайм-аут всех конечных точек типа поставщика",
-    "client_restriction_filtered": "Клиент ограничен"
+    "client_restriction_filtered": "Клиент ограничен",
+    "hedge_triggered": "Hedge запущен",
+    "hedge_winner": "Победитель Hedge-гонки",
+    "hedge_loser_cancelled": "Проигравший Hedge-гонки (отменён)",
+    "client_abort": "Клиент прервал запрос"
   },
   "filterReasons": {
     "rate_limited": "Ограничение скорости",
@@ -222,7 +230,13 @@
     "strictBlockNoEndpoints": "Строгий режим: нет доступных кандидатов конечных точек, провайдер пропущен без отката",
     "strictBlockSelectorError": "Строгий режим: ошибка селектора конечных точек, провайдер пропущен без отката",
     "vendorTypeAllTimeout": "Тайм-аут всех конечных точек типа поставщика (524)",
-    "vendorTypeAllTimeoutNote": "Все конечные точки этого типа поставщика превысили тайм-аут. Активирован размыкатель типа поставщика."
+    "vendorTypeAllTimeoutNote": "Все конечные точки этого типа поставщика превысили тайм-аут. Активирован размыкатель типа поставщика.",
+    "hedgeTriggered": "Порог Hedge превышен (запускается альтернативный провайдер)",
+    "hedgeWinner": "Победитель Hedge-гонки (первый получил начальный байт)",
+    "hedgeLoserCancelled": "Проигравший Hedge-гонки (запрос отменён)",
+    "clientAbort": "Клиент отключился (запрос прерван)",
+    "hedgeRace": "Hedge-гонка",
+    "hedgeThresholdExceeded": "Тайм-аут первого байта превышен, запущен альтернативный провайдер"
   },
   "selectionMethods": {
     "session_reuse": "Повторное использование сессии",

+ 17 - 3
messages/zh-CN/provider-chain.json

@@ -41,7 +41,11 @@
     "http2Fallback": "HTTP/2 回退",
     "clientError": "客户端错误",
     "endpointPoolExhausted": "端点池耗尽",
-    "vendorTypeAllTimeout": "供应商类型全端点超时"
+    "vendorTypeAllTimeout": "供应商类型全端点超时",
+    "hedgeTriggered": "Hedge 已触发",
+    "hedgeWinner": "Hedge 竞速赢家",
+    "hedgeLoserCancelled": "Hedge 竞速输家(已取消)",
+    "clientAbort": "客户端中断"
   },
   "reasons": {
     "request_success": "成功",
@@ -56,7 +60,11 @@
     "initial_selection": "首次选择",
     "endpoint_pool_exhausted": "端点池耗尽",
     "vendor_type_all_timeout": "供应商类型全端点超时",
-    "client_restriction_filtered": "客户端受限"
+    "client_restriction_filtered": "客户端受限",
+    "hedge_triggered": "Hedge 已触发",
+    "hedge_winner": "Hedge 竞速赢家",
+    "hedge_loser_cancelled": "Hedge 竞速输家(已取消)",
+    "client_abort": "客户端中断"
   },
   "filterReasons": {
     "rate_limited": "速率限制",
@@ -222,7 +230,13 @@
     "strictBlockNoEndpoints": "严格模式:无可用端点候选,跳过该供应商且不降级",
     "strictBlockSelectorError": "严格模式:端点选择器发生错误,跳过该供应商且不降级",
     "vendorTypeAllTimeout": "供应商类型全端点超时(524)",
-    "vendorTypeAllTimeoutNote": "该供应商类型的所有端点均超时,已触发供应商类型临时熔断。"
+    "vendorTypeAllTimeoutNote": "该供应商类型的所有端点均超时,已触发供应商类型临时熔断。",
+    "hedgeTriggered": "Hedge 阈值超出(正在启动备选供应商)",
+    "hedgeWinner": "Hedge 竞速赢家(最先收到首字节)",
+    "hedgeLoserCancelled": "Hedge 竞速输家(请求已取消)",
+    "clientAbort": "客户端已断开连接(请求中断)",
+    "hedgeRace": "Hedge 竞速",
+    "hedgeThresholdExceeded": "首字节超时,已启动备选供应商"
   },
   "selectionMethods": {
     "session_reuse": "会话复用",

+ 17 - 3
messages/zh-TW/provider-chain.json

@@ -41,7 +41,11 @@
     "http2Fallback": "HTTP/2 回退",
     "clientError": "客戶端錯誤",
     "endpointPoolExhausted": "端點池耗盡",
-    "vendorTypeAllTimeout": "供應商類型全端點逾時"
+    "vendorTypeAllTimeout": "供應商類型全端點逾時",
+    "hedgeTriggered": "Hedge 已觸發",
+    "hedgeWinner": "Hedge 競速贏家",
+    "hedgeLoserCancelled": "Hedge 競速輸家(已取消)",
+    "clientAbort": "客戶端中斷"
   },
   "reasons": {
     "request_success": "成功",
@@ -56,7 +60,11 @@
     "initial_selection": "首次選擇",
     "endpoint_pool_exhausted": "端點池耗盡",
     "vendor_type_all_timeout": "供應商類型全端點逾時",
-    "client_restriction_filtered": "客戶端受限"
+    "client_restriction_filtered": "客戶端受限",
+    "hedge_triggered": "Hedge 已觸發",
+    "hedge_winner": "Hedge 競速贏家",
+    "hedge_loser_cancelled": "Hedge 競速輸家(已取消)",
+    "client_abort": "客戶端中斷"
   },
   "filterReasons": {
     "rate_limited": "速率限制",
@@ -222,7 +230,13 @@
     "strictBlockNoEndpoints": "嚴格模式:無可用端點候選,跳過該供應商且不降級",
     "strictBlockSelectorError": "嚴格模式:端點選擇器發生錯誤,跳過該供應商且不降級",
     "vendorTypeAllTimeout": "供應商類型全端點逾時(524)",
-    "vendorTypeAllTimeoutNote": "該供應商類型的所有端點均逾時,已觸發供應商類型臨時熔斷。"
+    "vendorTypeAllTimeoutNote": "該供應商類型的所有端點均逾時,已觸發供應商類型臨時熔斷。",
+    "hedgeTriggered": "Hedge 閾值超出(正在啟動備選供應商)",
+    "hedgeWinner": "Hedge 競速贏家(最先收到首位元組)",
+    "hedgeLoserCancelled": "Hedge 競速輸家(請求已取消)",
+    "clientAbort": "客戶端已斷開連接(請求中斷)",
+    "hedgeRace": "Hedge 競速",
+    "hedgeThresholdExceeded": "首位元組逾時,已啟動備選供應商"
   },
   "selectionMethods": {
     "session_reuse": "會話複用",

+ 5 - 2
src/actions/dashboard-realtime.ts

@@ -207,14 +207,17 @@ export async function getDashboardRealtimeData(): Promise<ActionResult<Dashboard
       // - 如果有 durationMs(已完成的请求),使用实际值
       // - 如果没有(进行中的请求),计算从开始到现在的耗时
       const latency = item.durationMs ?? now - item.startTime;
+      // Provider/status are unreliable before finalization (may change due to fallback/hedge).
+      // Use durationMs or statusCode as a finalization signal.
+      const isFinalized = item.statusCode != null || item.durationMs != null;
 
       return {
         id: item.sessionId ?? `req-${item.id}`, // 使用 sessionId,如果没有则用请求ID
         user: item.userName,
         model: item.originalModel ?? item.model ?? "Unknown", // 优先使用计费模型
-        provider: item.providerName ?? "Unknown",
+        provider: isFinalized ? (item.providerName ?? "Unknown") : "",
         latency,
-        status: item.statusCode ?? 200,
+        status: isFinalized ? (item.statusCode ?? 200) : 0,
         cost: parseFloat(item.costUsd ?? "0"),
         startTime: item.startTime,
       };

+ 34 - 13
src/app/[locale]/dashboard/logs/_components/error-details-dialog/components/LogicTraceTab.tsx

@@ -34,7 +34,11 @@ function getRequestStatus(item: ProviderChainItem): StepStatus {
   if (item.reason === "session_reuse" || item.selectionMethod === "session_reuse") {
     return "session_reuse";
   }
-  if (item.reason === "request_success" || item.reason === "retry_success") {
+  if (
+    item.reason === "request_success" ||
+    item.reason === "retry_success" ||
+    item.reason === "hedge_winner"
+  ) {
     return "success";
   }
   if (
@@ -43,11 +47,13 @@ function getRequestStatus(item: ProviderChainItem): StepStatus {
     item.reason === "resource_not_found" ||
     item.reason === "client_error_non_retryable" ||
     item.reason === "endpoint_pool_exhausted" ||
-    item.reason === "concurrent_limit_failed"
+    item.reason === "concurrent_limit_failed" ||
+    item.reason === "hedge_loser_cancelled" ||
+    item.reason === "client_abort"
   ) {
     return "failure";
   }
-  // http2_fallback and other retry-related reasons are treated as pending/in-progress
+  // hedge_triggered, http2_fallback and other retry-related reasons are treated as pending/in-progress
   return "pending";
 }
 
@@ -710,23 +716,38 @@ export function LogicTraceTab({
               item.reason === "session_reuse" || item.selectionMethod === "session_reuse";
 
             // Determine icon based on type
+            const isHedgeTriggered = item.reason === "hedge_triggered";
+            const isHedgeLoser = item.reason === "hedge_loser_cancelled";
+            const isClientAbort = item.reason === "client_abort";
             const stepIcon = isSessionReuse
               ? Link2
-              : isRetry
-                ? RefreshCw
-                : status === "success"
-                  ? CheckCircle
-                  : status === "failure"
-                    ? XCircle
-                    : Server;
+              : isHedgeTriggered
+                ? GitBranch
+                : isHedgeLoser || isClientAbort
+                  ? XCircle
+                  : isRetry
+                    ? RefreshCw
+                    : status === "success"
+                      ? CheckCircle
+                      : status === "failure"
+                        ? XCircle
+                        : Server;
 
             // Determine title based on type
             // For session reuse flow, show simplified "Execute Request" title for the first item
             const stepTitle = isSessionReuse
               ? t("logicTrace.executeRequest")
-              : isRetry
-                ? t("logicTrace.retryAttempt", { number: item.attemptNumber ?? 1 })
-                : t("logicTrace.attemptProvider", { provider: item.name });
+              : isHedgeTriggered
+                ? tChain("timeline.hedgeTriggered")
+                : isHedgeLoser
+                  ? tChain("timeline.hedgeLoserCancelled")
+                  : isClientAbort
+                    ? tChain("timeline.clientAbort")
+                    : isRetry
+                      ? t("logicTrace.retryAttempt", { number: item.attemptNumber ?? 1 })
+                      : item.reason === "hedge_winner"
+                        ? tChain("timeline.hedgeWinner")
+                        : t("logicTrace.attemptProvider", { provider: item.name });
 
             return (
               <StepCard

+ 55 - 0
src/app/[locale]/dashboard/logs/_components/provider-chain-popover.test.tsx

@@ -409,3 +409,58 @@ describe("provider-chain-popover layout", () => {
     expect(countBadge).not.toBeUndefined();
   });
 });
+
+describe("provider-chain-popover hedge/abort reason handling", () => {
+  test("hedge_triggered is not counted as actual request", () => {
+    const html = renderWithIntl(
+      <ProviderChainPopover
+        chain={[
+          { id: 1, name: "p1", reason: "initial_selection" },
+          { id: 1, name: "p1", reason: "hedge_triggered", attemptNumber: 1 },
+          { id: 2, name: "p2", reason: "hedge_winner", statusCode: 200, attemptNumber: 2 },
+          { id: 1, name: "p1", reason: "hedge_loser_cancelled", attemptNumber: 1 },
+        ]}
+        finalProvider="p2"
+      />
+    );
+
+    // hedge_triggered is informational, not an actual request
+    // so the request count should be 2 (winner + loser), not 3
+    const document = parseHtml(html);
+    const countBadge = Array.from(document.querySelectorAll('[data-slot="badge"]')).find((node) =>
+      (node.textContent ?? "").includes("times")
+    );
+    expect(countBadge?.textContent).toContain("2");
+  });
+
+  test("hedge_winner is treated as successful provider", () => {
+    const html = renderWithIntl(
+      <ProviderChainPopover
+        chain={[
+          { id: 1, name: "p1", reason: "initial_selection" },
+          { id: 2, name: "p2", reason: "hedge_winner", statusCode: 200, attemptNumber: 2 },
+          { id: 1, name: "p1", reason: "hedge_loser_cancelled", attemptNumber: 1 },
+        ]}
+        finalProvider="p2"
+      />
+    );
+
+    // Should render without error
+    expect(html).toContain("p2");
+  });
+
+  test("client_abort is counted as actual request", () => {
+    const html = renderWithIntl(
+      <ProviderChainPopover
+        chain={[
+          { id: 1, name: "p1", reason: "initial_selection" },
+          { id: 1, name: "p1", reason: "client_abort", attemptNumber: 1 },
+        ]}
+        finalProvider="p1"
+      />
+    );
+
+    // client_abort should be counted as actual request (requestCount=1 -> single view)
+    expect(html).toContain("p1");
+  });
+});

+ 38 - 2
src/app/[locale]/dashboard/logs/_components/provider-chain-popover.tsx

@@ -4,6 +4,7 @@ import {
   AlertTriangle,
   CheckCircle,
   ChevronRight,
+  GitBranch,
   InfoIcon,
   Link2,
   MinusCircle,
@@ -35,6 +36,7 @@ interface ProviderChainPopoverProps {
  */
 function isActualRequest(item: ProviderChainItem): boolean {
   if (item.reason === "client_restriction_filtered") return false;
+  if (item.reason === "hedge_triggered") return false;
 
   if (item.reason === "concurrent_limit_failed") return true;
 
@@ -43,6 +45,9 @@ function isActualRequest(item: ProviderChainItem): boolean {
   if (item.reason === "endpoint_pool_exhausted") return true;
   if (item.reason === "vendor_type_all_timeout") return true;
   if (item.reason === "client_error_non_retryable") return true;
+  if (item.reason === "hedge_winner") return true;
+  if (item.reason === "hedge_loser_cancelled") return true;
+  if (item.reason === "client_abort") return true;
   if ((item.reason === "request_success" || item.reason === "retry_success") && item.statusCode) {
     return true;
   }
@@ -70,7 +75,12 @@ function getItemStatus(item: ProviderChainItem): {
   color: string;
   bgColor: string;
 } {
-  if ((item.reason === "request_success" || item.reason === "retry_success") && item.statusCode) {
+  if (
+    (item.reason === "request_success" ||
+      item.reason === "retry_success" ||
+      item.reason === "hedge_winner") &&
+    item.statusCode
+  ) {
     return {
       icon: CheckCircle,
       color: "text-emerald-600",
@@ -111,6 +121,27 @@ function getItemStatus(item: ProviderChainItem): {
       bgColor: "bg-muted/30",
     };
   }
+  if (item.reason === "hedge_triggered") {
+    return {
+      icon: GitBranch,
+      color: "text-indigo-600",
+      bgColor: "bg-indigo-50 dark:bg-indigo-950/30",
+    };
+  }
+  if (item.reason === "hedge_loser_cancelled") {
+    return {
+      icon: XCircle,
+      color: "text-slate-500",
+      bgColor: "bg-slate-50 dark:bg-slate-800/50",
+    };
+  }
+  if (item.reason === "client_abort") {
+    return {
+      icon: MinusCircle,
+      color: "text-amber-600",
+      bgColor: "bg-amber-50 dark:bg-amber-950/30",
+    };
+  }
   return {
     icon: RefreshCw,
     color: "text-slate-500",
@@ -378,7 +409,12 @@ export function ProviderChainPopover({
   // Get the successful provider's costMultiplier and groupTag
   const successfulProvider = [...chain]
     .reverse()
-    .find((item) => item.reason === "request_success" || item.reason === "retry_success");
+    .find(
+      (item) =>
+        item.reason === "request_success" ||
+        item.reason === "retry_success" ||
+        item.reason === "hedge_winner"
+    );
   const finalCostMultiplier = successfulProvider?.costMultiplier;
   const finalGroupTag = successfulProvider?.groupTag;
   const finalGroupTags = parseGroupTags(finalGroupTag);

+ 6 - 0
src/app/[locale]/dashboard/logs/_components/virtualized-logs-table.tsx

@@ -14,6 +14,7 @@ import { useVirtualizer } from "@/hooks/use-virtualizer";
 import type { LogsTableColumn } from "@/lib/column-visibility";
 import { cn, formatTokenAmount } from "@/lib/utils";
 import { copyTextToClipboard } from "@/lib/utils/clipboard";
+import { isProviderFinalized } from "@/lib/utils/provider-display";
 import type { CurrencyCode } from "@/lib/utils/currency";
 import { formatCurrency } from "@/lib/utils/currency";
 import {
@@ -419,6 +420,11 @@ export function VirtualizedLogsTable({
                             <span className="h-1.5 w-1.5 rounded-full bg-orange-600 dark:bg-orange-400" />
                             {t("logs.table.blocked")}
                           </span>
+                        ) : !isProviderFinalized(log) ? (
+                          <span className="inline-flex items-center gap-1.5 text-xs text-muted-foreground">
+                            <Loader2 className="h-3 w-3 animate-spin" />
+                            {t("logs.details.inProgress")}
+                          </span>
                         ) : (
                           <div className="flex flex-col items-start gap-0.5 min-w-0">
                             <div className="flex items-center gap-1 min-w-0 w-full overflow-hidden">

+ 7 - 5
src/app/[locale]/internal/dashboard/big-screen/page.tsx

@@ -311,7 +311,7 @@ const ActivityStream = ({
               >
                 <div className={`col-span-2 truncate font-bold text-orange-400`}>{item.user}</div>
                 <div className={`col-span-3 truncate text-gray-300`}>{item.model}</div>
-                <div className={`col-span-3 truncate text-gray-500`}>{item.provider}</div>
+                <div className={`col-span-3 truncate text-gray-500`}>{item.provider || "..."}</div>
                 <div
                   className={`col-span-2 text-right ${item.latency > 1000 ? "text-red-400" : "text-green-400"}`}
                 >
@@ -320,12 +320,14 @@ const ActivityStream = ({
                 <div className="col-span-2 text-right flex justify-end">
                   <span
                     className={`px-1.5 rounded-sm ${
-                      item.status === 200
-                        ? "bg-green-500/10 text-green-500"
-                        : "bg-red-500/10 text-red-500"
+                      item.status === 0
+                        ? "bg-yellow-500/10 text-yellow-500"
+                        : item.status === 200
+                          ? "bg-green-500/10 text-green-500"
+                          : "bg-red-500/10 text-red-500"
                     }`}
                   >
-                    {item.status}
+                    {item.status === 0 ? "..." : item.status}
                   </span>
                 </div>
               </motion.div>

+ 32 - 2
src/app/v1/_lib/proxy/forwarder.ts

@@ -1054,7 +1054,7 @@ export class ProxyForwarder {
             // 记录到决策链(标记为客户端中断)
             session.addProviderToChain(currentProvider, {
               ...endpointAudit,
-              reason: "system_error", // 使用 system_error 作为客户端中断的原因
+              reason: "client_abort",
               circuitState: getCircuitState(currentProvider.id),
               attemptNumber: attemptCount,
               errorMessage: "Client aborted request",
@@ -2906,6 +2906,13 @@ export class ProxyForwarder {
         attempt.thresholdTimer = null;
       }
       attempts.delete(attempt);
+      if (reason === "hedge_loser") {
+        attempt.session.addProviderToChain(attempt.provider, {
+          ...attempt.endpointAudit,
+          reason: "hedge_loser_cancelled",
+          attemptNumber: attempt.sequence,
+        });
+      }
       try {
         attempt.responseController?.abort(new Error(reason));
       } catch {
@@ -3016,7 +3023,7 @@ export class ProxyForwarder {
       if (errorCategory === ErrorCategory.CLIENT_ABORT) {
         session.addProviderToChain(attempt.provider, {
           ...attempt.endpointAudit,
-          reason: "system_error",
+          reason: "client_abort",
           attemptNumber: attempt.sequence,
           errorMessage: "Client aborted request",
           circuitState: getCircuitState(attempt.provider.id),
@@ -3067,6 +3074,13 @@ export class ProxyForwarder {
       }
       session.setProvider(attempt.provider);
 
+      session.addProviderToChain(attempt.provider, {
+        ...attempt.endpointAudit,
+        reason: "hedge_winner",
+        attemptNumber: attempt.sequence,
+        statusCode: attempt.response.status,
+      });
+
       abortAllAttempts(attempt, "hedge_loser");
 
       if (session.sessionId) {
@@ -3175,6 +3189,12 @@ export class ProxyForwarder {
         attempt.thresholdTimer = setTimeout(() => {
           if (settled || attempt.settled || attempt.thresholdTriggered) return;
           attempt.thresholdTriggered = true;
+          attempt.session.addProviderToChain(attempt.provider, {
+            ...attempt.endpointAudit,
+            reason: "hedge_triggered",
+            attemptNumber: attempt.sequence,
+            circuitState: getCircuitState(attempt.provider.id),
+          });
           void launchAlternative();
         }, attempt.firstByteTimeoutMs);
       }
@@ -3257,6 +3277,16 @@ export class ProxyForwarder {
           if (settled || winnerCommitted) return;
           noMoreProviders = true;
           lastError = new ProxyError("Request aborted by client", 499);
+          for (const attempt of Array.from(attempts)) {
+            if (!attempt.settled) {
+              session.addProviderToChain(attempt.provider, {
+                ...attempt.endpointAudit,
+                reason: "client_abort",
+                attemptNumber: attempt.sequence,
+                errorMessage: "Client aborted request",
+              });
+            }
+          }
           abortAllAttempts(undefined, "client_abort");
           void finishIfExhausted();
         },

+ 5 - 1
src/app/v1/_lib/proxy/session.ts

@@ -453,7 +453,11 @@ export class ProxySession {
         | "http2_fallback" // HTTP/2 协议错误,回退到 HTTP/1.1(不切换供应商、不计入熔断器)
         | "endpoint_pool_exhausted" // 端点池耗尽(strict endpoint policy 阻止了 fallback)
         | "vendor_type_all_timeout" // 供应商类型全端点超时(524),触发 vendor-type 临时熔断
-        | "client_restriction_filtered"; // 供应商因客户端限制被跳过(会话复用路径)
+        | "client_restriction_filtered" // 供应商因客户端限制被跳过(会话复用路径)
+        | "hedge_triggered" // Hedge 计时器触发,启动备选供应商
+        | "hedge_winner" // 该供应商赢得 Hedge 竞速(最先收到首字节)
+        | "hedge_loser_cancelled" // 该供应商输掉 Hedge 竞速,请求被取消
+        | "client_abort"; // 客户端在响应完成前断开连接
       selectionMethod?:
         | "session_reuse"
         | "weighted_random"

+ 97 - 0
src/lib/langfuse/trace-proxy-request.test.ts

@@ -0,0 +1,97 @@
+/**
+ * Unit tests for reason classification in trace-proxy-request.
+ *
+ * We import the module and access the SUCCESS_REASONS / ERROR_REASONS
+ * indirectly by testing the exported-via-module isSuccessReason / isErrorReason
+ * helpers. Since those are module-private, we test the sets' membership
+ * through the publicly observable behavior of traceProxyRequest's chain
+ * iteration logic. Here we directly test the sets by re-declaring them
+ * (mirror test pattern).
+ */
+import { describe, expect, test } from "vitest";
+
+// Mirror the sets from trace-proxy-request.ts for unit-level validation.
+// If the source adds/removes a reason without updating these mirrors, the test
+// suite must be updated accordingly.
+const SUCCESS_REASONS = new Set([
+  "request_success",
+  "retry_success",
+  "initial_selection",
+  "session_reuse",
+  "hedge_winner",
+]);
+
+const ERROR_REASONS = new Set([
+  "system_error",
+  "vendor_type_all_timeout",
+  "endpoint_pool_exhausted",
+  "client_abort",
+]);
+
+function isSuccessReason(reason: string | undefined): boolean {
+  return !!reason && SUCCESS_REASONS.has(reason);
+}
+
+function isErrorReason(reason: string | undefined): boolean {
+  return !!reason && ERROR_REASONS.has(reason);
+}
+
+describe("isSuccessReason", () => {
+  test("hedge_winner is a success reason", () => {
+    expect(isSuccessReason("hedge_winner")).toBe(true);
+  });
+
+  test("request_success is a success reason", () => {
+    expect(isSuccessReason("request_success")).toBe(true);
+  });
+
+  test("retry_success is a success reason", () => {
+    expect(isSuccessReason("retry_success")).toBe(true);
+  });
+
+  test("hedge_triggered is NOT a success reason", () => {
+    expect(isSuccessReason("hedge_triggered")).toBe(false);
+  });
+
+  test("hedge_loser_cancelled is NOT a success reason", () => {
+    expect(isSuccessReason("hedge_loser_cancelled")).toBe(false);
+  });
+
+  test("client_abort is NOT a success reason", () => {
+    expect(isSuccessReason("client_abort")).toBe(false);
+  });
+
+  test("undefined is NOT a success reason", () => {
+    expect(isSuccessReason(undefined)).toBe(false);
+  });
+});
+
+describe("isErrorReason", () => {
+  test("client_abort is an error reason", () => {
+    expect(isErrorReason("client_abort")).toBe(true);
+  });
+
+  test("system_error is an error reason", () => {
+    expect(isErrorReason("system_error")).toBe(true);
+  });
+
+  test("hedge_winner is NOT an error reason", () => {
+    expect(isErrorReason("hedge_winner")).toBe(false);
+  });
+
+  test("hedge_triggered is NOT an error reason", () => {
+    expect(isErrorReason("hedge_triggered")).toBe(false);
+  });
+
+  test("hedge_loser_cancelled is NOT an error reason", () => {
+    expect(isErrorReason("hedge_loser_cancelled")).toBe(false);
+  });
+
+  test("retry_failed is NOT in the error set (it is WARNING level)", () => {
+    expect(isErrorReason("retry_failed")).toBe(false);
+  });
+
+  test("undefined is NOT an error reason", () => {
+    expect(isErrorReason(undefined)).toBe(false);
+  });
+});

+ 29 - 1
src/lib/langfuse/trace-proxy-request.ts

@@ -48,6 +48,7 @@ const SUCCESS_REASONS = new Set([
   "retry_success",
   "initial_selection",
   "session_reuse",
+  "hedge_winner",
 ]);
 
 function isSuccessReason(reason: string | undefined): boolean {
@@ -58,6 +59,7 @@ const ERROR_REASONS = new Set([
   "system_error",
   "vendor_type_all_timeout",
   "endpoint_pool_exhausted",
+  "client_abort",
 ]);
 
 function isErrorReason(reason: string | undefined): boolean {
@@ -275,8 +277,34 @@ export async function traceProxyRequest(ctx: TraceContext): Promise<void> {
           guardSpan.end(forwardStartDate);
         }
 
-        // 2. Provider attempt events (one per failed chain item)
+        // 2. Provider attempt events (one per failed/hedge chain item)
         for (const item of session.getProviderChain()) {
+          // Hedge trigger: informational event (not a success or failure)
+          if (item.reason === "hedge_triggered") {
+            const hedgeObs = rootSpan.startObservation(
+              "hedge-trigger",
+              {
+                level: "WARNING" as ObservationLevel,
+                input: {
+                  providerId: item.id,
+                  providerName: item.name,
+                  attempt: item.attemptNumber,
+                },
+                output: {
+                  reason: item.reason,
+                  circuitState: item.circuitState,
+                },
+                metadata: { ...item },
+              },
+              {
+                asType: "event",
+                startTime: new Date(item.timestamp ?? session.startTime),
+              } as { asType: "event" }
+            );
+            hedgeObs.end();
+            continue;
+          }
+
           if (!isSuccessReason(item.reason)) {
             const eventObs = rootSpan.startObservation(
               "provider-attempt",

+ 66 - 0
src/lib/utils/provider-chain-formatter.test.ts

@@ -522,3 +522,69 @@ describe("unknown reason graceful degradation", () => {
     expect(timeline).toContain("timeline.unknown");
   });
 });
+
+describe("hedge and client_abort reason handling", () => {
+  test("hedge_winner with statusCode is treated as success", () => {
+    const chain: ProviderChainItem[] = [
+      { id: 1, name: "p1", reason: "hedge_triggered", timestamp: 1000, attemptNumber: 1 },
+      {
+        id: 2,
+        name: "p2",
+        reason: "hedge_winner",
+        statusCode: 200,
+        timestamp: 2000,
+        attemptNumber: 2,
+      },
+      { id: 1, name: "p1", reason: "hedge_loser_cancelled", timestamp: 2000, attemptNumber: 1 },
+    ];
+    const { timeline } = formatProviderTimeline(chain, mockT);
+    // hedge_winner should appear in timeline
+    expect(timeline).toContain("p2");
+  });
+
+  test("hedge_triggered is not an actual request", () => {
+    const item: ProviderChainItem = {
+      id: 1,
+      name: "p1",
+      reason: "hedge_triggered",
+      timestamp: 1000,
+    };
+    // formatProviderDescription should handle hedge_triggered
+    const desc = formatProviderDescription([item], mockT);
+    expect(desc).toBeDefined();
+  });
+
+  test("hedge_loser_cancelled is an actual request", () => {
+    const chain: ProviderChainItem[] = [
+      { id: 1, name: "p1", reason: "hedge_loser_cancelled", timestamp: 1000, attemptNumber: 1 },
+    ];
+    const { timeline } = formatProviderTimeline(chain, mockT);
+    expect(timeline).toContain("p1");
+  });
+
+  test("client_abort is an actual request", () => {
+    const chain: ProviderChainItem[] = [
+      { id: 1, name: "p1", reason: "client_abort", timestamp: 1000, attemptNumber: 1 },
+    ];
+    const { timeline } = formatProviderTimeline(chain, mockT);
+    expect(timeline).toContain("p1");
+  });
+
+  test("formatProviderSummary handles hedge_winner chain", () => {
+    const chain: ProviderChainItem[] = [
+      { id: 1, name: "p1", reason: "initial_selection", timestamp: 1000 },
+      { id: 1, name: "p1", reason: "hedge_triggered", timestamp: 2000, attemptNumber: 1 },
+      {
+        id: 2,
+        name: "p2",
+        reason: "hedge_winner",
+        statusCode: 200,
+        timestamp: 3000,
+        attemptNumber: 2,
+      },
+      { id: 1, name: "p1", reason: "hedge_loser_cancelled", timestamp: 3000, attemptNumber: 1 },
+    ];
+    const summary = formatProviderSummary(chain, mockT);
+    expect(summary).toBeDefined();
+  });
+});

+ 24 - 3
src/lib/utils/provider-chain-formatter.ts

@@ -56,7 +56,12 @@ export function formatProbabilityCompact(probability: number | undefined | null)
  */
 function getProviderStatus(item: ProviderChainItem): "✓" | "✗" | "⚡" | "↓" | null {
   // 成功标记:必须有 statusCode 且是成功状态码
-  if ((item.reason === "request_success" || item.reason === "retry_success") && item.statusCode) {
+  if (
+    (item.reason === "request_success" ||
+      item.reason === "retry_success" ||
+      item.reason === "hedge_winner") &&
+    item.statusCode
+  ) {
     return "✓";
   }
   // 失败标记
@@ -66,10 +71,15 @@ function getProviderStatus(item: ProviderChainItem): "✓" | "✗" | "⚡" | "
     item.reason === "resource_not_found" ||
     item.reason === "client_error_non_retryable" ||
     item.reason === "endpoint_pool_exhausted" ||
-    item.reason === "vendor_type_all_timeout"
+    item.reason === "vendor_type_all_timeout" ||
+    item.reason === "client_abort"
   ) {
     return "✗";
   }
+  // Hedge 输家:取消标记
+  if (item.reason === "hedge_loser_cancelled") {
+    return "✗";
+  }
   // 并发限制失败
   if (item.reason === "concurrent_limit_failed") {
     return "⚡";
@@ -78,6 +88,10 @@ function getProviderStatus(item: ProviderChainItem): "✓" | "✗" | "⚡" | "
   if (item.reason === "http2_fallback") {
     return "↓";
   }
+  // Hedge 触发(信息性事件,不是请求结果)
+  if (item.reason === "hedge_triggered") {
+    return null;
+  }
   // 中间状态(选择成功但还没有请求结果)
   return null;
 }
@@ -96,11 +110,18 @@ function isActualRequest(item: ProviderChainItem): boolean {
     item.reason === "resource_not_found" ||
     item.reason === "client_error_non_retryable" ||
     item.reason === "endpoint_pool_exhausted" ||
-    item.reason === "vendor_type_all_timeout"
+    item.reason === "vendor_type_all_timeout" ||
+    item.reason === "client_abort"
   ) {
     return true;
   }
 
+  // Hedge 相关:winner 和 loser 都是实际请求
+  if (item.reason === "hedge_winner" || item.reason === "hedge_loser_cancelled") return true;
+
+  // Hedge 触发:信息性事件,不算实际请求
+  if (item.reason === "hedge_triggered") return false;
+
   // HTTP/2 回退:算作一次中间事件(显示但不计入失败)
   if (item.reason === "http2_fallback") return true;
 

+ 21 - 0
src/lib/utils/provider-display.ts

@@ -0,0 +1,21 @@
+/**
+ * Determine whether a request entry has been finalized.
+ *
+ * A request is considered finalized when:
+ * - It was blocked by a guard (blockedBy is set), OR
+ * - It has a non-empty providerChain (written at finalization time), OR
+ * - It has a statusCode (set when the response completes)
+ *
+ * Before finalization, provider info is unreliable because the upstream
+ * may change due to fallback, hedge, timeout, or fake-200 detection.
+ */
+export function isProviderFinalized(entry: {
+  providerChain?: unknown[] | null;
+  statusCode?: number | null;
+  blockedBy?: string | null;
+}): boolean {
+  if (entry.blockedBy) return true;
+  if (Array.isArray(entry.providerChain) && entry.providerChain.length > 0) return true;
+  if (entry.statusCode != null) return true;
+  return false;
+}

+ 5 - 1
src/types/message.ts

@@ -35,7 +35,11 @@ export interface ProviderChainItem {
     | "http2_fallback" // HTTP/2 协议错误,回退到 HTTP/1.1(不切换供应商、不计入熔断器)
     | "endpoint_pool_exhausted" // 端点池耗尽(所有端点熔断或不可用,严格模式阻止降级)
     | "vendor_type_all_timeout" // 供应商类型全端点超时(524),触发 vendor-type 临时熔断
-    | "client_restriction_filtered"; // Provider skipped due to client restriction (neutral, no circuit breaker)
+    | "client_restriction_filtered" // Provider skipped due to client restriction (neutral, no circuit breaker)
+    | "hedge_triggered" // Hedge 计时器触发,启动备选供应商
+    | "hedge_winner" // 该供应商赢得 Hedge 竞速(最先收到首字节)
+    | "hedge_loser_cancelled" // 该供应商输掉 Hedge 竞速,请求被取消
+    | "client_abort"; // 客户端在响应完成前断开连接
 
   // === 选择方法(细化) ===
   selectionMethod?:

+ 43 - 39
tests/integration/usage-ledger.test.ts

@@ -278,45 +278,49 @@ run("usage ledger integration", () => {
   });
 
   describe("backfill", () => {
-    test("backfill copies non-warmup message_request rows when ledger rows are missing", {
-      timeout: 60_000,
-    }, async () => {
-      const userId = nextUserId();
-      const providerId = nextProviderId();
-      const keepA = await insertMessageRequestRow({
-        key: nextKey("backfill-a"),
-        userId,
-        providerId,
-        costUsd: "1.100000000000000",
-      });
-      const keepB = await insertMessageRequestRow({
-        key: nextKey("backfill-b"),
-        userId,
-        providerId,
-        costUsd: "2.200000000000000",
-      });
-      const warmup = await insertMessageRequestRow({
-        key: nextKey("backfill-warmup"),
-        userId,
-        providerId,
-        blockedBy: "warmup",
-      });
-
-      await db.delete(usageLedger).where(inArray(usageLedger.requestId, [keepA, keepB, warmup]));
-
-      const summary = await backfillUsageLedger();
-      expect(summary.totalProcessed).toBeGreaterThanOrEqual(2);
-
-      const rows = await db
-        .select({ requestId: usageLedger.requestId })
-        .from(usageLedger)
-        .where(inArray(usageLedger.requestId, [keepA, keepB, warmup]));
-      const requestIds = rows.map((row) => row.requestId);
-
-      expect(requestIds).toContain(keepA);
-      expect(requestIds).toContain(keepB);
-      expect(requestIds).not.toContain(warmup);
-    });
+    test(
+      "backfill copies non-warmup message_request rows when ledger rows are missing",
+      {
+        timeout: 60_000,
+      },
+      async () => {
+        const userId = nextUserId();
+        const providerId = nextProviderId();
+        const keepA = await insertMessageRequestRow({
+          key: nextKey("backfill-a"),
+          userId,
+          providerId,
+          costUsd: "1.100000000000000",
+        });
+        const keepB = await insertMessageRequestRow({
+          key: nextKey("backfill-b"),
+          userId,
+          providerId,
+          costUsd: "2.200000000000000",
+        });
+        const warmup = await insertMessageRequestRow({
+          key: nextKey("backfill-warmup"),
+          userId,
+          providerId,
+          blockedBy: "warmup",
+        });
+
+        await db.delete(usageLedger).where(inArray(usageLedger.requestId, [keepA, keepB, warmup]));
+
+        const summary = await backfillUsageLedger();
+        expect(summary.totalProcessed).toBeGreaterThanOrEqual(2);
+
+        const rows = await db
+          .select({ requestId: usageLedger.requestId })
+          .from(usageLedger)
+          .where(inArray(usageLedger.requestId, [keepA, keepB, warmup]));
+        const requestIds = rows.map((row) => row.requestId);
+
+        expect(requestIds).toContain(keepA);
+        expect(requestIds).toContain(keepB);
+        expect(requestIds).not.toContain(warmup);
+      }
+    );
 
     test("backfill is idempotent when running twice", { timeout: 60_000 }, async () => {
       const requestId = await insertMessageRequestRow({

+ 54 - 0
tests/unit/lib/utils/provider-display.test.ts

@@ -0,0 +1,54 @@
+import { describe, expect, it } from "vitest";
+import { isProviderFinalized } from "@/lib/utils/provider-display";
+
+describe("isProviderFinalized", () => {
+  it.each([
+    {
+      name: "null providerChain + null statusCode = not finalized",
+      entry: { providerChain: null, statusCode: null, blockedBy: null },
+      expected: false,
+    },
+    {
+      name: "empty providerChain + null statusCode = not finalized",
+      entry: { providerChain: [], statusCode: null, blockedBy: null },
+      expected: false,
+    },
+    {
+      name: "undefined fields = not finalized",
+      entry: {},
+      expected: false,
+    },
+    {
+      name: "providerChain with items = finalized",
+      entry: { providerChain: [{ id: 1, name: "provider-a" }], statusCode: 200 },
+      expected: true,
+    },
+    {
+      name: "null providerChain + statusCode present = finalized",
+      entry: { providerChain: null, statusCode: 200 },
+      expected: true,
+    },
+    {
+      name: "statusCode 0 counts as finalized",
+      entry: { providerChain: null, statusCode: 0 },
+      expected: true,
+    },
+    {
+      name: "error statusCode = finalized",
+      entry: { providerChain: null, statusCode: 500 },
+      expected: true,
+    },
+    {
+      name: "blockedBy = finalized (regardless of other fields)",
+      entry: { providerChain: null, statusCode: null, blockedBy: "sensitive_word" },
+      expected: true,
+    },
+    {
+      name: "blockedBy takes priority over missing chain/status",
+      entry: { blockedBy: "rate_limit" },
+      expected: true,
+    },
+  ])("$name", ({ entry, expected }) => {
+    expect(isProviderFinalized(entry)).toBe(expected);
+  });
+});