Ver Fonte

feat(circuit-breaker): endpoint CB default-off + 524 decision chain audit (#773)

* feat(circuit-breaker): endpoint circuit breaker default-off + 524 decision chain audit

- Add ENABLE_ENDPOINT_CIRCUIT_BREAKER env var (default: false) to gate endpoint-level circuit breaker
- Gate isEndpointCircuitOpen, recordEndpointFailure, recordEndpointSuccess, triggerEndpointCircuitBreakerAlert behind env switch
- Add initEndpointCircuitBreaker() startup cleanup: clear stale Redis keys when feature disabled
- Gate endpoint filtering in endpoint-selector (getPreferredProviderEndpoints, getEndpointFilterStats)
- Fix 524 vendor-type timeout missing from decision chain: add chain entry with reason=vendor_type_all_timeout in forwarder
- Add vendor_type_all_timeout to ProviderChainItem reason union type (both backend session.ts and frontend message.ts)
- Add timeline rendering for vendor_type_all_timeout in provider-chain-formatter
- Replace hardcoded Chinese strings in provider-selector circuit_open details with i18n keys
- Add i18n translations for vendor_type_all_timeout and filterDetails (5 languages: zh-CN, zh-TW, en, ja, ru)
- Enhance LogicTraceTab to render filterDetails via i18n lookup with fallback
- Add endpoint_pool_exhausted and vendor_type_all_timeout to provider-chain-popover isActualRequest/getItemStatus
- Add comprehensive unit tests for all changes (endpoint-circuit-breaker, endpoint-selector, provider-chain-formatter)

* fix(i18n): fix Russian grammar errors and rate_limited translations

- Fix Russian: "конечная точкаов" -> "конечных точек" (11 occurrences)
- Fix Russian: "Ограничение стоимости" -> "Ограничение скорости" (rate_limited)
- Fix zh-CN: "费用限制" -> "速率限制" (filterDetails.rate_limited)
- Fix zh-TW: "費用限制" -> "速率限制" (filterDetails.rate_limited)
- Add initEndpointCircuitBreaker() to dev environment in instrumentation.ts
Ding há 1 mês atrás
pai
commit
aebb722706

+ 6 - 0
.env.example

@@ -88,6 +88,12 @@ STORE_SESSION_RESPONSE_BODY=true        # 是否在 Redis 中存储会话响应
 # - 启用:适用于网络稳定环境,连续网络错误也应触发熔断保护,避免持续请求不可达的供应商
 ENABLE_CIRCUIT_BREAKER_ON_NETWORK_ERRORS=false
 
+# 端点级别熔断器
+# 功能说明:控制是否启用端点级别的熔断器
+# - false (默认):禁用端点熔断器,所有启用的端点均可使用
+# - true:启用端点熔断器,连续失败的端点会被临时屏蔽(默认 3 次失败后熔断 5 分钟)
+ENABLE_ENDPOINT_CIRCUIT_BREAKER=false
+
 # 供应商缓存配置
 # 功能说明:控制是否启用供应商进程级缓存
 # - true (默认):启用缓存,30s TTL + Redis Pub/Sub 跨实例即时失效,提升供应商查询性能

+ 13 - 3
messages/en/provider-chain.json

@@ -38,7 +38,8 @@
     "concurrentLimit": "Concurrent Limit",
     "http2Fallback": "HTTP/2 Fallback",
     "clientError": "Client Error",
-    "endpointPoolExhausted": "Endpoint Pool Exhausted"
+    "endpointPoolExhausted": "Endpoint Pool Exhausted",
+    "vendorTypeAllTimeout": "Vendor-Type All Endpoints Timeout"
   },
   "reasons": {
     "request_success": "Success",
@@ -50,7 +51,8 @@
     "http2_fallback": "HTTP/2 Fallback",
     "session_reuse": "Session Reuse",
     "initial_selection": "Initial Selection",
-    "endpoint_pool_exhausted": "Endpoint Pool Exhausted"
+    "endpoint_pool_exhausted": "Endpoint Pool Exhausted",
+    "vendor_type_all_timeout": "Vendor-Type All Endpoints Timeout"
   },
   "filterReasons": {
     "rate_limited": "Rate Limited",
@@ -67,6 +69,12 @@
     "endpoint_circuit_open": "Endpoint Circuit Open",
     "endpoint_disabled": "Endpoint Disabled"
   },
+  "filterDetails": {
+    "vendor_type_circuit_open": "Vendor-type temporarily circuit-broken",
+    "circuit_open": "Circuit breaker open",
+    "circuit_half_open": "Circuit breaker half-open",
+    "rate_limited": "Rate limited"
+  },
   "details": {
     "selectionMethod": "Selection",
     "attemptNumber": "Attempt",
@@ -197,6 +205,8 @@
     "endpointStatsCircuitOpen": "Circuit-Open Endpoints: {count}",
     "endpointStatsAvailable": "Available Endpoints: {count}",
     "strictBlockNoEndpoints": "Strict mode: no endpoint candidates available, provider skipped without fallback",
-    "strictBlockSelectorError": "Strict mode: endpoint selector encountered an error, provider skipped without fallback"
+    "strictBlockSelectorError": "Strict mode: endpoint selector encountered an error, provider skipped without fallback",
+    "vendorTypeAllTimeout": "Vendor-Type All Endpoints Timeout (524)",
+    "vendorTypeAllTimeoutNote": "All endpoints for this vendor-type timed out. Vendor-type circuit breaker triggered."
   }
 }

+ 13 - 3
messages/ja/provider-chain.json

@@ -38,7 +38,8 @@
     "concurrentLimit": "同時実行制限",
     "http2Fallback": "HTTP/2 フォールバック",
     "clientError": "クライアントエラー",
-    "endpointPoolExhausted": "エンドポイントプール枯渇"
+    "endpointPoolExhausted": "エンドポイントプール枯渇",
+    "vendorTypeAllTimeout": "ベンダータイプ全エンドポイントタイムアウト"
   },
   "reasons": {
     "request_success": "成功",
@@ -50,7 +51,8 @@
     "http2_fallback": "HTTP/2 フォールバック",
     "session_reuse": "セッション再利用",
     "initial_selection": "初期選択",
-    "endpoint_pool_exhausted": "エンドポイントプール枯渇"
+    "endpoint_pool_exhausted": "エンドポイントプール枯渇",
+    "vendor_type_all_timeout": "ベンダータイプ全エンドポイントタイムアウト"
   },
   "filterReasons": {
     "rate_limited": "レート制限",
@@ -67,6 +69,12 @@
     "endpoint_circuit_open": "エンドポイントサーキットオープン",
     "endpoint_disabled": "エンドポイント無効"
   },
+  "filterDetails": {
+    "vendor_type_circuit_open": "ベンダータイプ一時サーキットブレイク",
+    "circuit_open": "サーキットブレーカーオープン",
+    "circuit_half_open": "サーキットブレーカーハーフオープン",
+    "rate_limited": "レート制限"
+  },
   "details": {
     "selectionMethod": "選択方法",
     "attemptNumber": "試行回数",
@@ -197,6 +205,8 @@
     "endpointStatsCircuitOpen": "サーキットオープンのエンドポイント: {count}",
     "endpointStatsAvailable": "利用可能なエンドポイント: {count}",
     "strictBlockNoEndpoints": "厳格モード:利用可能なエンドポイント候補がないため、フォールバックなしでプロバイダーをスキップ",
-    "strictBlockSelectorError": "厳格モード:エンドポイントセレクターでエラーが発生したため、フォールバックなしでプロバイダーをスキップ"
+    "strictBlockSelectorError": "厳格モード:エンドポイントセレクターでエラーが発生したため、フォールバックなしでプロバイダーをスキップ",
+    "vendorTypeAllTimeout": "ベンダータイプ全エンドポイントタイムアウト(524)",
+    "vendorTypeAllTimeoutNote": "このベンダータイプの全エンドポイントがタイムアウトしました。ベンダータイプサーキットブレーカーが発動しました。"
   }
 }

+ 20 - 10
messages/ru/provider-chain.json

@@ -38,7 +38,8 @@
     "concurrentLimit": "Лимит параллельных запросов",
     "http2Fallback": "Откат HTTP/2",
     "clientError": "Ошибка клиента",
-    "endpointPoolExhausted": "Пул конечная точкаов исчерпан"
+    "endpointPoolExhausted": "Пул конечных точек исчерпан",
+    "vendorTypeAllTimeout": "Тайм-аут всех конечных точек"
   },
   "reasons": {
     "request_success": "Успешно",
@@ -50,7 +51,8 @@
     "http2_fallback": "Откат HTTP/2",
     "session_reuse": "Повторное использование сессии",
     "initial_selection": "Первоначальный выбор",
-    "endpoint_pool_exhausted": "Пул конечная точкаов исчерпан"
+    "endpoint_pool_exhausted": "Пул конечных точек исчерпан",
+    "vendor_type_all_timeout": "Тайм-аут всех конечных точек типа поставщика"
   },
   "filterReasons": {
     "rate_limited": "Ограничение скорости",
@@ -64,9 +66,15 @@
     "model_not_supported": "Модель не поддерживается",
     "group_mismatch": "Несоответствие группы",
     "health_check_failed": "Проверка состояния не пройдена",
-    "endpoint_circuit_open": "Автомат конечная точкаа открыт",
+    "endpoint_circuit_open": "Автомат конечной точки открыт",
     "endpoint_disabled": "Эндпоинт отключен"
   },
+  "filterDetails": {
+    "vendor_type_circuit_open": "Временное размыкание типа поставщика",
+    "circuit_open": "Размыкатель открыт",
+    "circuit_half_open": "Размыкатель полуоткрыт",
+    "rate_limited": "Ограничение скорости"
+  },
   "details": {
     "selectionMethod": "Метод выбора",
     "attemptNumber": "Номер попытки",
@@ -190,13 +198,15 @@
     "ruleDescription": "Описание: {description}",
     "ruleHasOverride": "Переопределения: response={response}, statusCode={statusCode}",
     "clientErrorNote": "Эта ошибка вызвана вводом клиента, не повторяется и не учитывается в автомате защиты.",
-    "endpointPoolExhausted": "Пул конечная точкаов исчерпан (все конечная точкаы недоступны)",
-    "endpointStats": "Статистика фильтрации конечная точкаов",
-    "endpointStatsTotal": "Всего конечная точкаов: {count}",
-    "endpointStatsEnabled": "Включено конечная точкаов: {count}",
+    "endpointPoolExhausted": "Пул конечных точек исчерпан (все конечные точки недоступны)",
+    "endpointStats": "Статистика фильтрации конечных точек",
+    "endpointStatsTotal": "Всего конечных точек: {count}",
+    "endpointStatsEnabled": "Включено конечных точек: {count}",
     "endpointStatsCircuitOpen": "Эндпоинтов с открытым автоматом: {count}",
-    "endpointStatsAvailable": "Доступных конечная точкаов: {count}",
-    "strictBlockNoEndpoints": "Строгий режим: нет доступных кандидатов конечная точкаов, провайдер пропущен без отката",
-    "strictBlockSelectorError": "Строгий режим: ошибка селектора конечная точкаов, провайдер пропущен без отката"
+    "endpointStatsAvailable": "Доступных конечных точек: {count}",
+    "strictBlockNoEndpoints": "Строгий режим: нет доступных кандидатов конечных точек, провайдер пропущен без отката",
+    "strictBlockSelectorError": "Строгий режим: ошибка селектора конечных точек, провайдер пропущен без отката",
+    "vendorTypeAllTimeout": "Тайм-аут всех конечных точек типа поставщика (524)",
+    "vendorTypeAllTimeoutNote": "Все конечные точки этого типа поставщика превысили тайм-аут. Активирован размыкатель типа поставщика."
   }
 }

+ 13 - 3
messages/zh-CN/provider-chain.json

@@ -38,7 +38,8 @@
     "concurrentLimit": "并发限制",
     "http2Fallback": "HTTP/2 回退",
     "clientError": "客户端错误",
-    "endpointPoolExhausted": "端点池耗尽"
+    "endpointPoolExhausted": "端点池耗尽",
+    "vendorTypeAllTimeout": "供应商类型全端点超时"
   },
   "reasons": {
     "request_success": "成功",
@@ -50,7 +51,8 @@
     "http2_fallback": "HTTP/2 回退",
     "session_reuse": "会话复用",
     "initial_selection": "首次选择",
-    "endpoint_pool_exhausted": "端点池耗尽"
+    "endpoint_pool_exhausted": "端点池耗尽",
+    "vendor_type_all_timeout": "供应商类型全端点超时"
   },
   "filterReasons": {
     "rate_limited": "速率限制",
@@ -67,6 +69,12 @@
     "endpoint_circuit_open": "端点已熔断",
     "endpoint_disabled": "端点已禁用"
   },
+  "filterDetails": {
+    "vendor_type_circuit_open": "供应商类型临时熔断",
+    "circuit_open": "熔断器打开",
+    "circuit_half_open": "熔断器半开",
+    "rate_limited": "速率限制"
+  },
   "details": {
     "selectionMethod": "选择方式",
     "attemptNumber": "尝试次数",
@@ -197,6 +205,8 @@
     "endpointStatsCircuitOpen": "已熔断端点: {count}",
     "endpointStatsAvailable": "可用端点: {count}",
     "strictBlockNoEndpoints": "严格模式:无可用端点候选,跳过该供应商且不降级",
-    "strictBlockSelectorError": "严格模式:端点选择器发生错误,跳过该供应商且不降级"
+    "strictBlockSelectorError": "严格模式:端点选择器发生错误,跳过该供应商且不降级",
+    "vendorTypeAllTimeout": "供应商类型全端点超时(524)",
+    "vendorTypeAllTimeoutNote": "该供应商类型的所有端点均超时,已触发供应商类型临时熔断。"
   }
 }

+ 13 - 3
messages/zh-TW/provider-chain.json

@@ -38,7 +38,8 @@
     "concurrentLimit": "並發限制",
     "http2Fallback": "HTTP/2 回退",
     "clientError": "客戶端錯誤",
-    "endpointPoolExhausted": "端點池耗盡"
+    "endpointPoolExhausted": "端點池耗盡",
+    "vendorTypeAllTimeout": "供應商類型全端點逾時"
   },
   "reasons": {
     "request_success": "成功",
@@ -50,7 +51,8 @@
     "http2_fallback": "HTTP/2 回退",
     "session_reuse": "會話複用",
     "initial_selection": "首次選擇",
-    "endpoint_pool_exhausted": "端點池耗盡"
+    "endpoint_pool_exhausted": "端點池耗盡",
+    "vendor_type_all_timeout": "供應商類型全端點逾時"
   },
   "filterReasons": {
     "rate_limited": "速率限制",
@@ -67,6 +69,12 @@
     "endpoint_circuit_open": "端點已熔斷",
     "endpoint_disabled": "端點已停用"
   },
+  "filterDetails": {
+    "vendor_type_circuit_open": "供應商類型臨時熔斷",
+    "circuit_open": "熔斷器打開",
+    "circuit_half_open": "熔斷器半開",
+    "rate_limited": "速率限制"
+  },
   "details": {
     "selectionMethod": "選擇方式",
     "attemptNumber": "嘗試次數",
@@ -197,6 +205,8 @@
     "endpointStatsCircuitOpen": "已熔斷端點: {count}",
     "endpointStatsAvailable": "可用端點: {count}",
     "strictBlockNoEndpoints": "嚴格模式:無可用端點候選,跳過該供應商且不降級",
-    "strictBlockSelectorError": "嚴格模式:端點選擇器發生錯誤,跳過該供應商且不降級"
+    "strictBlockSelectorError": "嚴格模式:端點選擇器發生錯誤,跳過該供應商且不降級",
+    "vendorTypeAllTimeout": "供應商類型全端點逾時(524)",
+    "vendorTypeAllTimeoutNote": "該供應商類型的所有端點均逾時,已觸發供應商類型臨時熔斷。"
   }
 }

+ 1 - 0
scripts/deploy.ps1

@@ -503,6 +503,7 @@ ENABLE_SECURE_COOKIES=$secureCookies
 
 # Circuit Breaker Configuration
 ENABLE_CIRCUIT_BREAKER_ON_NETWORK_ERRORS=false
+ENABLE_ENDPOINT_CIRCUIT_BREAKER=false
 
 # Environment
 NODE_ENV=production

+ 1 - 0
scripts/deploy.sh

@@ -585,6 +585,7 @@ ENABLE_SECURE_COOKIES=${secure_cookies}
 
 # Circuit Breaker Configuration
 ENABLE_CIRCUIT_BREAKER_ON_NETWORK_ERRORS=false
+ENABLE_ENDPOINT_CIRCUIT_BREAKER=false
 
 # Environment
 NODE_ENV=production

+ 7 - 1
src/app/[locale]/dashboard/logs/_components/error-details-dialog/components/LogicTraceTab.tsx

@@ -353,7 +353,13 @@ export function LogicTraceTab({
                         {tChain(`filterReasons.${p.reason}`)}
                       </span>
                       {p.details && (
-                        <span className="text-muted-foreground break-all">({p.details})</span>
+                        <span className="text-muted-foreground break-all">
+                          (
+                          {tChain.has(`filterDetails.${p.details}`)
+                            ? tChain(`filterDetails.${p.details}`)
+                            : p.details}
+                          )
+                        </span>
                       )}
                     </div>
                   ))}

+ 10 - 0
src/app/[locale]/dashboard/logs/_components/provider-chain-popover.tsx

@@ -34,6 +34,9 @@ interface ProviderChainPopoverProps {
 function isActualRequest(item: ProviderChainItem): boolean {
   if (item.reason === "concurrent_limit_failed") return true;
   if (item.reason === "retry_failed" || item.reason === "system_error") return true;
+  if (item.reason === "endpoint_pool_exhausted") return true;
+  if (item.reason === "vendor_type_all_timeout") return true;
+  if (item.reason === "client_error_non_retryable") return true;
   if ((item.reason === "request_success" || item.reason === "retry_success") && item.statusCode) {
     return true;
   }
@@ -89,6 +92,13 @@ function getItemStatus(item: ProviderChainItem): {
       bgColor: "bg-orange-50 dark:bg-orange-950/30",
     };
   }
+  if (item.reason === "endpoint_pool_exhausted" || item.reason === "vendor_type_all_timeout") {
+    return {
+      icon: XCircle,
+      color: "text-rose-600",
+      bgColor: "bg-rose-50 dark:bg-rose-950/30",
+    };
+  }
   return {
     icon: RefreshCw,
     color: "text-slate-500",

+ 20 - 0
src/app/v1/_lib/proxy/forwarder.ts

@@ -1407,6 +1407,26 @@ export class ProxyForwarder {
               allEndpointAttemptsTimedOut &&
               currentProvider.providerVendorId
             ) {
+              // Record to decision chain BEFORE triggering vendor-type circuit breaker
+              session.addProviderToChain(currentProvider, {
+                ...endpointAudit,
+                reason: "vendor_type_all_timeout",
+                attemptNumber: attemptCount,
+                statusCode: 524,
+                errorMessage: errorMessage,
+                errorDetails: {
+                  provider: {
+                    id: currentProvider.id,
+                    name: currentProvider.name,
+                    statusCode: 524,
+                    statusText: proxyError.message,
+                    upstreamBody: proxyError.upstreamError?.body,
+                    upstreamParsed: proxyError.upstreamError?.parsed,
+                  },
+                  request: buildRequestDetails(session),
+                },
+              });
+
               await recordVendorTypeAllEndpointsTimeout(
                 currentProvider.providerVendorId,
                 currentProvider.providerType

+ 3 - 3
src/app/v1/_lib/proxy/provider-selector.ts

@@ -885,7 +885,7 @@ export class ProxyProviderResolver {
           id: p.id,
           name: p.name,
           reason: "circuit_open",
-          details: "供应商类型临时熔断",
+          details: "vendor_type_circuit_open",
         });
         continue;
       }
@@ -896,14 +896,14 @@ export class ProxyProviderResolver {
           id: p.id,
           name: p.name,
           reason: "circuit_open",
-          details: `熔断器${state === "open" ? "打开" : "半开"}`,
+          details: state === "open" ? "circuit_open" : "circuit_half_open",
         });
       } else {
         context.filteredProviders?.push({
           id: p.id,
           name: p.name,
           reason: "rate_limited",
-          details: "费用限制",
+          details: "rate_limited",
         });
       }
     }

+ 2 - 1
src/app/v1/_lib/proxy/session.ts

@@ -460,7 +460,8 @@ export class ProxySession {
         | "retry_with_cached_instructions" // Codex instructions 智能重试(缓存)
         | "client_error_non_retryable" // 不可重试的客户端错误(Prompt 超限、内容过滤、PDF 限制、Thinking 格式)
         | "http2_fallback" // HTTP/2 协议错误,回退到 HTTP/1.1(不切换供应商、不计入熔断器)
-        | "endpoint_pool_exhausted"; // 端点池耗尽(strict endpoint policy 阻止了 fallback)
+        | "endpoint_pool_exhausted" // 端点池耗尽(strict endpoint policy 阻止了 fallback)
+        | "vendor_type_all_timeout"; // 供应商类型全端点超时(524),触发 vendor-type 临时熔断
       selectionMethod?:
         | "session_reuse"
         | "weighted_random"

+ 20 - 0
src/instrumentation.ts

@@ -349,6 +349,16 @@ export async function register() {
         });
       }
 
+      // 初始化端点熔断器(禁用时清理残留状态)
+      try {
+        const { initEndpointCircuitBreaker } = await import("@/lib/endpoint-circuit-breaker");
+        await initEndpointCircuitBreaker();
+      } catch (error) {
+        logger.warn("[Instrumentation] Failed to initialize endpoint circuit breaker", {
+          error: error instanceof Error ? error.message : String(error),
+        });
+      }
+
       try {
         const { startEndpointProbeLogCleanup } = await import(
           "@/lib/provider-endpoints/probe-log-cleanup"
@@ -456,6 +466,16 @@ export async function register() {
           });
         }
 
+        // 初始化端点熔断器(禁用时清理残留状态)
+        try {
+          const { initEndpointCircuitBreaker } = await import("@/lib/endpoint-circuit-breaker");
+          await initEndpointCircuitBreaker();
+        } catch (error) {
+          logger.warn("[Instrumentation] Failed to initialize endpoint circuit breaker", {
+            error: error instanceof Error ? error.message : String(error),
+          });
+        }
+
         try {
           const { startEndpointProbeLogCleanup } = await import(
             "@/lib/provider-endpoints/probe-log-cleanup"

+ 4 - 0
src/lib/config/env.schema.ts

@@ -110,6 +110,10 @@ export const EnvSchema = z.object({
   LOG_LEVEL: z.enum(["fatal", "error", "warn", "info", "debug", "trace"]).default("info"),
   TZ: z.string().default("Asia/Shanghai"),
   ENABLE_CIRCUIT_BREAKER_ON_NETWORK_ERRORS: z.string().default("false").transform(booleanTransform),
+  // 端点级别熔断器开关
+  // - false (默认):禁用端点熔断器,所有端点均可使用
+  // - true:启用端点熔断器,连续失败的端点会被临时屏蔽
+  ENABLE_ENDPOINT_CIRCUIT_BREAKER: z.string().default("false").transform(booleanTransform),
   // 供应商缓存开关
   // - true (默认):启用进程级缓存,30s TTL,提升供应商查询性能
   // - false:禁用缓存,每次请求直接查询数据库

+ 65 - 0
src/lib/endpoint-circuit-breaker.ts

@@ -114,6 +114,11 @@ export async function getEndpointHealthInfo(
 }
 
 export async function isEndpointCircuitOpen(endpointId: number): Promise<boolean> {
+  const { getEnvConfig } = await import("@/lib/config/env.schema");
+  if (!getEnvConfig().ENABLE_ENDPOINT_CIRCUIT_BREAKER) {
+    return false;
+  }
+
   const health = await getOrCreateHealth(endpointId);
 
   if (health.circuitState === "closed") {
@@ -135,6 +140,11 @@ export async function isEndpointCircuitOpen(endpointId: number): Promise<boolean
 }
 
 export async function recordEndpointFailure(endpointId: number, error: Error): Promise<void> {
+  const { getEnvConfig } = await import("@/lib/config/env.schema");
+  if (!getEnvConfig().ENABLE_ENDPOINT_CIRCUIT_BREAKER) {
+    return;
+  }
+
   const health = await getOrCreateHealth(endpointId);
   const config = DEFAULT_ENDPOINT_CIRCUIT_BREAKER_CONFIG;
 
@@ -178,6 +188,11 @@ export async function recordEndpointFailure(endpointId: number, error: Error): P
 }
 
 export async function recordEndpointSuccess(endpointId: number): Promise<void> {
+  const { getEnvConfig } = await import("@/lib/config/env.schema");
+  if (!getEnvConfig().ENABLE_ENDPOINT_CIRCUIT_BREAKER) {
+    return;
+  }
+
   const health = await getOrCreateHealth(endpointId);
   const config = DEFAULT_ENDPOINT_CIRCUIT_BREAKER_CONFIG;
 
@@ -240,6 +255,11 @@ export async function triggerEndpointCircuitBreakerAlert(
   retryAt: string,
   lastError: string
 ): Promise<void> {
+  const { getEnvConfig } = await import("@/lib/config/env.schema");
+  if (!getEnvConfig().ENABLE_ENDPOINT_CIRCUIT_BREAKER) {
+    return;
+  }
+
   try {
     const { sendCircuitBreakerAlert } = await import("@/lib/notification/notifier");
 
@@ -280,3 +300,48 @@ export async function triggerEndpointCircuitBreakerAlert(
     });
   }
 }
+
+/**
+ * Startup initialization: when ENABLE_ENDPOINT_CIRCUIT_BREAKER is disabled,
+ * clear all endpoint circuit breaker states from both in-memory map and Redis
+ * to ensure no stale open states block endpoints.
+ *
+ * Called once at application startup.
+ */
+export async function initEndpointCircuitBreaker(): Promise<void> {
+  const { getEnvConfig } = await import("@/lib/config/env.schema");
+  if (getEnvConfig().ENABLE_ENDPOINT_CIRCUIT_BREAKER) {
+    return;
+  }
+
+  healthMap.clear();
+  loadedFromRedis.clear();
+
+  try {
+    const { getRedisClient } = await import("@/lib/redis/client");
+    const redis = getRedisClient();
+    if (!redis) return;
+
+    const pattern = "endpoint_circuit_breaker:state:*";
+    let cursor = "0";
+    let deletedCount = 0;
+    do {
+      const [nextCursor, keys] = await redis.scan(cursor, "MATCH", pattern, "COUNT", 100);
+      cursor = nextCursor;
+      if (keys.length > 0) {
+        await redis.del(...keys);
+        deletedCount += keys.length;
+      }
+    } while (cursor !== "0");
+
+    if (deletedCount > 0) {
+      logger.info("[EndpointCircuitBreaker] Cleared stale states on startup (feature disabled)", {
+        deletedCount,
+      });
+    }
+  } catch (error) {
+    logger.warn("[EndpointCircuitBreaker] Failed to clear stale states on startup", {
+      error: error instanceof Error ? error.message : String(error),
+    });
+  }
+}

+ 12 - 0
src/lib/provider-endpoints/endpoint-selector.ts

@@ -41,6 +41,12 @@ export async function getPreferredProviderEndpoints(input: {
     return [];
   }
 
+  // When endpoint circuit breaker is disabled, skip circuit check entirely
+  const { getEnvConfig } = await import("@/lib/config/env.schema");
+  if (!getEnvConfig().ENABLE_ENDPOINT_CIRCUIT_BREAKER) {
+    return rankProviderEndpoints(filtered);
+  }
+
   const circuitResults = await Promise.all(
     filtered.map(async (endpoint) => ({
       endpoint,
@@ -74,6 +80,12 @@ export async function getEndpointFilterStats(input: {
   const total = endpoints.length;
   const enabled = endpoints.filter((e) => e.isEnabled && !e.deletedAt).length;
 
+  // When endpoint circuit breaker is disabled, no endpoints can be circuit-open
+  const { getEnvConfig } = await import("@/lib/config/env.schema");
+  if (!getEnvConfig().ENABLE_ENDPOINT_CIRCUIT_BREAKER) {
+    return { total, enabled, circuitOpen: 0, available: enabled };
+  }
+
   const circuitResults = await Promise.all(
     endpoints
       .filter((e) => e.isEnabled && !e.deletedAt)

+ 104 - 0
src/lib/utils/provider-chain-formatter.test.ts

@@ -271,6 +271,110 @@ describe("endpoint_pool_exhausted", () => {
   });
 });
 
+// =============================================================================
+// vendor_type_all_timeout reason tests
+// =============================================================================
+
+describe("vendor_type_all_timeout", () => {
+  // ---------------------------------------------------------------------------
+  // Shared fixtures
+  // ---------------------------------------------------------------------------
+  const vendorTypeTimeoutItem: ProviderChainItem = {
+    id: 1,
+    name: "provider-timeout",
+    reason: "vendor_type_all_timeout",
+    timestamp: 1000,
+    statusCode: 524,
+    attemptNumber: 1,
+    errorMessage: "All endpoints timed out",
+    errorDetails: {
+      provider: {
+        id: 1,
+        name: "provider-timeout",
+        statusCode: 524,
+        statusText: "Origin Time-out",
+      },
+      request: {
+        method: "POST",
+        url: "https://api.example.com/v1/messages",
+        headers: "content-type: application/json",
+      },
+    },
+  };
+
+  const vendorTypeTimeoutNoDetails: ProviderChainItem = {
+    id: 1,
+    name: "provider-timeout",
+    reason: "vendor_type_all_timeout",
+    timestamp: 1000,
+    statusCode: 524,
+    errorMessage: "All endpoints timed out",
+  };
+
+  // ---------------------------------------------------------------------------
+  // formatProviderSummary
+  // ---------------------------------------------------------------------------
+
+  describe("formatProviderSummary", () => {
+    test("renders vendor_type_all_timeout with failure mark", () => {
+      const chain: ProviderChainItem[] = [vendorTypeTimeoutItem];
+      const result = formatProviderSummary(chain, mockT);
+
+      expect(result).toContain("provider-timeout");
+      expect(result).toContain("\u2717");
+    });
+  });
+
+  // ---------------------------------------------------------------------------
+  // formatProviderDescription
+  // ---------------------------------------------------------------------------
+
+  describe("formatProviderDescription", () => {
+    test("shows vendor type all timeout label", () => {
+      const chain: ProviderChainItem[] = [vendorTypeTimeoutItem];
+      const result = formatProviderDescription(chain, mockT);
+
+      expect(result).toContain("description.vendorTypeAllTimeout");
+    });
+  });
+
+  // ---------------------------------------------------------------------------
+  // formatProviderTimeline
+  // ---------------------------------------------------------------------------
+
+  describe("formatProviderTimeline", () => {
+    test("renders vendor_type_all_timeout with provider, statusCode, error, and note", () => {
+      const chain: ProviderChainItem[] = [vendorTypeTimeoutItem];
+      const { timeline } = formatProviderTimeline(chain, mockT);
+
+      // Title
+      expect(timeline).toContain("timeline.vendorTypeAllTimeout");
+      // Provider
+      expect(timeline).toContain("timeline.provider [provider=provider-timeout]");
+      // Status code
+      expect(timeline).toContain("timeline.statusCode [code=524]");
+      // Error from statusText
+      expect(timeline).toContain("timeline.error [error=Origin Time-out]");
+      // Note
+      expect(timeline).toContain("timeline.vendorTypeAllTimeoutNote");
+    });
+
+    test("renders vendor_type_all_timeout without error details", () => {
+      const chain: ProviderChainItem[] = [vendorTypeTimeoutNoDetails];
+      const { timeline } = formatProviderTimeline(chain, mockT);
+
+      // Should still render without crashing
+      expect(timeline).toContain("timeline.vendorTypeAllTimeout");
+      // Falls back to item-level fields
+      expect(timeline).toContain("timeline.provider [provider=provider-timeout]");
+      expect(timeline).toContain("timeline.statusCode [code=524]");
+      expect(timeline).toContain("timeline.error [error=All endpoints timed out]");
+      // Note is always present
+      expect(timeline).toContain("timeline.vendorTypeAllTimeoutNote");
+    });
+  });
+});
+
 // =============================================================================
 // Unknown reason graceful degradation
 // =============================================================================

+ 53 - 3
src/lib/utils/provider-chain-formatter.ts

@@ -64,7 +64,8 @@ function getProviderStatus(item: ProviderChainItem): "✓" | "✗" | "⚡" | "
     item.reason === "retry_failed" ||
     item.reason === "system_error" ||
     item.reason === "client_error_non_retryable" ||
-    item.reason === "endpoint_pool_exhausted"
+    item.reason === "endpoint_pool_exhausted" ||
+    item.reason === "vendor_type_all_timeout"
   ) {
     return "✗";
   }
@@ -92,7 +93,8 @@ function isActualRequest(item: ProviderChainItem): boolean {
     item.reason === "retry_failed" ||
     item.reason === "system_error" ||
     item.reason === "client_error_non_retryable" ||
-    item.reason === "endpoint_pool_exhausted"
+    item.reason === "endpoint_pool_exhausted" ||
+    item.reason === "vendor_type_all_timeout"
   ) {
     return true;
   }
@@ -313,6 +315,8 @@ export function formatProviderDescription(
         desc += ` ${t("description.clientError")}`;
       } else if (item.reason === "endpoint_pool_exhausted") {
         desc += ` ${t("description.endpointPoolExhausted")}`;
+      } else if (item.reason === "vendor_type_all_timeout") {
+        desc += ` ${t("description.vendorTypeAllTimeout")}`;
       }
 
       desc += "\n";
@@ -408,7 +412,12 @@ export function formatProviderTimeline(
         timeline += `\n${t("timeline.filtered")}:\n`;
         for (const f of ctx.filteredProviders) {
           const icon = f.reason === "circuit_open" ? "⚡" : "💰";
-          timeline += `  ${icon} ${f.name} (${f.details || f.reason})\n`;
+          const detailsText = f.details
+            ? t(`filterDetails.${f.details}`) !== `filterDetails.${f.details}`
+              ? t(`filterDetails.${f.details}`)
+              : f.details
+            : f.reason;
+          timeline += `  ${icon} ${f.name} (${detailsText})\n`;
         }
       }
 
@@ -742,6 +751,47 @@ export function formatProviderTimeline(
       continue;
     }
 
+    // === 供应商类型全端点超时(524) ===
+    if (item.reason === "vendor_type_all_timeout") {
+      timeline += `${t("timeline.vendorTypeAllTimeout")}\n\n`;
+
+      if (item.errorDetails?.provider) {
+        const p = item.errorDetails.provider;
+        timeline += `${t("timeline.provider", { provider: p.name })}\n`;
+        timeline += `${t("timeline.statusCode", { code: p.statusCode })}\n`;
+        timeline += `${t("timeline.error", { error: p.statusText })}\n`;
+
+        if (i > 0 && item.timestamp && chain[i - 1]?.timestamp) {
+          const duration = item.timestamp - (chain[i - 1]?.timestamp || 0);
+          timeline += `${t("timeline.requestDuration", { duration })}\n`;
+        }
+
+        if (p.upstreamParsed) {
+          timeline += `\n${t("timeline.errorDetails")}:\n`;
+          timeline += JSON.stringify(p.upstreamParsed, null, 2);
+        } else if (p.upstreamBody) {
+          timeline += `\n${t("timeline.errorDetails")}:\n${p.upstreamBody}`;
+        }
+
+        if (item.errorDetails?.request) {
+          timeline += formatRequestDetails(item.errorDetails.request, t);
+        }
+      } else {
+        timeline += `${t("timeline.provider", { provider: item.name })}\n`;
+        if (item.statusCode) {
+          timeline += `${t("timeline.statusCode", { code: item.statusCode })}\n`;
+        }
+        timeline += `${t("timeline.error", { error: item.errorMessage || t("timeline.unknown") })}\n`;
+
+        if (item.errorDetails?.request) {
+          timeline += formatRequestDetails(item.errorDetails.request, t);
+        }
+      }
+
+      timeline += `\n${t("timeline.vendorTypeAllTimeoutNote")}`;
+      continue;
+    }
+
     // 并发限制失败
     if (item.reason === "concurrent_limit_failed") {
       timeline += `${t("timeline.attemptFailed", { attempt: actualAttemptNumber ?? 0 })}\n\n`;

+ 2 - 1
src/types/message.ts

@@ -33,7 +33,8 @@ export interface ProviderChainItem {
     | "retry_with_cached_instructions" // Codex instructions 智能重试(缓存)
     | "client_error_non_retryable" // 不可重试的客户端错误(Prompt 超限、内容过滤、PDF 限制、Thinking 格式)
     | "http2_fallback" // HTTP/2 协议错误,回退到 HTTP/1.1(不切换供应商、不计入熔断器)
-    | "endpoint_pool_exhausted"; // 端点池耗尽(所有端点熔断或不可用,严格模式阻止降级)
+    | "endpoint_pool_exhausted" // 端点池耗尽(所有端点熔断或不可用,严格模式阻止降级)
+    | "vendor_type_all_timeout"; // 供应商类型全端点超时(524),触发 vendor-type 临时熔断
 
   // === 选择方法(细化) ===
   selectionMethod?:

+ 202 - 10
tests/unit/lib/endpoint-circuit-breaker.test.ts

@@ -31,9 +31,6 @@ afterEach(() => {
 
 describe("endpoint-circuit-breaker", () => {
   test("达到阈值后应打开熔断;到期后进入 half-open;成功后关闭并清零", async () => {
-    vi.useFakeTimers();
-    vi.setSystemTime(new Date("2026-01-01T00:00:00.000Z"));
-
     vi.resetModules();
 
     let redisState: SavedEndpointCircuitState | null = null;
@@ -45,6 +42,9 @@ describe("endpoint-circuit-breaker", () => {
       redisState = null;
     });
 
+    vi.doMock("@/lib/config/env.schema", () => ({
+      getEnvConfig: () => ({ ENABLE_ENDPOINT_CIRCUIT_BREAKER: true }),
+    }));
     vi.doMock("@/lib/logger", () => ({ logger: createLoggerMock() }));
     const sendAlertMock = vi.fn(async () => {});
     vi.doMock("@/lib/notification/notifier", () => ({
@@ -56,6 +56,9 @@ describe("endpoint-circuit-breaker", () => {
       deleteEndpointCircuitState: deleteMock,
     }));
 
+    vi.useFakeTimers();
+    vi.setSystemTime(new Date("2026-01-01T00:00:00.000Z"));
+
     const {
       isEndpointCircuitOpen,
       recordEndpointFailure,
@@ -74,6 +77,10 @@ describe("endpoint-circuit-breaker", () => {
     expect(openState.failureCount).toBe(3);
     expect(openState.circuitOpenUntil).toBe(Date.now() + 300000);
 
+    // Prime env module cache: under fake timers, dynamic import() inside isEndpointCircuitOpen
+    // may fail to resolve the vi.doMock unless the module is already in the import cache.
+    await import("@/lib/config/env.schema");
+
     expect(await isEndpointCircuitOpen(1)).toBe(true);
 
     vi.advanceTimersByTime(300000 + 1);
@@ -110,14 +117,17 @@ describe("endpoint-circuit-breaker", () => {
   });
 
   test("recordEndpointSuccess: closed 且 failureCount>0 时应清零", async () => {
-    vi.useFakeTimers();
-    vi.setSystemTime(new Date("2026-01-01T00:00:00.000Z"));
-
     vi.resetModules();
 
     const saveMock = vi.fn(async () => {});
 
+    vi.doMock("@/lib/config/env.schema", () => ({
+      getEnvConfig: () => ({ ENABLE_ENDPOINT_CIRCUIT_BREAKER: true }),
+    }));
     vi.doMock("@/lib/logger", () => ({ logger: createLoggerMock() }));
+
+    vi.useFakeTimers();
+    vi.setSystemTime(new Date("2026-01-01T00:00:00.000Z"));
     vi.doMock("@/lib/redis/endpoint-circuit-breaker-state", () => ({
       loadEndpointCircuitState: vi.fn(async () => null),
       saveEndpointCircuitState: saveMock,
@@ -145,6 +155,9 @@ describe("endpoint-circuit-breaker", () => {
     vi.resetModules();
 
     const sendAlertMock = vi.fn(async () => {});
+    vi.doMock("@/lib/config/env.schema", () => ({
+      getEnvConfig: () => ({ ENABLE_ENDPOINT_CIRCUIT_BREAKER: true }),
+    }));
     vi.doMock("@/lib/logger", () => ({ logger: createLoggerMock() }));
     vi.doMock("@/lib/notification/notifier", () => ({
       sendCircuitBreakerAlert: sendAlertMock,
@@ -183,6 +196,9 @@ describe("endpoint-circuit-breaker", () => {
     vi.resetModules();
 
     const sendAlertMock = vi.fn(async () => {});
+    vi.doMock("@/lib/config/env.schema", () => ({
+      getEnvConfig: () => ({ ENABLE_ENDPOINT_CIRCUIT_BREAKER: true }),
+    }));
     vi.doMock("@/lib/notification/notifier", () => ({
       sendCircuitBreakerAlert: sendAlertMock,
     }));
@@ -229,9 +245,6 @@ describe("endpoint-circuit-breaker", () => {
   });
 
   test("recordEndpointFailure should NOT reset circuitOpenUntil when already open", async () => {
-    vi.useFakeTimers();
-    vi.setSystemTime(new Date("2026-01-01T00:00:00.000Z"));
-
     vi.resetModules();
 
     let redisState: SavedEndpointCircuitState | null = null;
@@ -239,6 +252,9 @@ describe("endpoint-circuit-breaker", () => {
       redisState = state;
     });
 
+    vi.doMock("@/lib/config/env.schema", () => ({
+      getEnvConfig: () => ({ ENABLE_ENDPOINT_CIRCUIT_BREAKER: true }),
+    }));
     vi.doMock("@/lib/logger", () => ({ logger: createLoggerMock() }));
     vi.doMock("@/lib/notification/notifier", () => ({
       sendCircuitBreakerAlert: vi.fn(async () => {}),
@@ -249,7 +265,10 @@ describe("endpoint-circuit-breaker", () => {
       deleteEndpointCircuitState: vi.fn(async () => {}),
     }));
 
-    const { recordEndpointFailure, isEndpointCircuitOpen } = await import(
+    vi.useFakeTimers();
+    vi.setSystemTime(new Date("2026-01-01T00:00:00.000Z"));
+
+    const { recordEndpointFailure, isEndpointCircuitOpen, getEndpointHealthInfo } = await import(
       "@/lib/endpoint-circuit-breaker"
     );
 
@@ -258,6 +277,15 @@ describe("endpoint-circuit-breaker", () => {
     await recordEndpointFailure(100, new Error("fail"));
     await recordEndpointFailure(100, new Error("fail"));
 
+    // Verify circuit was opened (also serves as async flush before isEndpointCircuitOpen)
+    const { health: healthSnap } = await getEndpointHealthInfo(100);
+    expect(healthSnap.circuitState).toBe("open");
+
+    // Prime the env module cache: under fake timers, the dynamic import("@/lib/config/env.schema")
+    // inside isEndpointCircuitOpen may fail to resolve the mock unless the module is already cached.
+    const envMod = await import("@/lib/config/env.schema");
+    expect(envMod.getEnvConfig().ENABLE_ENDPOINT_CIRCUIT_BREAKER).toBe(true);
+
     expect(await isEndpointCircuitOpen(100)).toBe(true);
     const originalOpenUntil = redisState!.circuitOpenUntil;
     expect(originalOpenUntil).toBe(Date.now() + 300000);
@@ -274,6 +302,9 @@ describe("endpoint-circuit-breaker", () => {
   test("getEndpointCircuitStateSync returns correct state for known and unknown endpoints", async () => {
     vi.resetModules();
 
+    vi.doMock("@/lib/config/env.schema", () => ({
+      getEnvConfig: () => ({ ENABLE_ENDPOINT_CIRCUIT_BREAKER: true }),
+    }));
     vi.doMock("@/lib/logger", () => ({ logger: createLoggerMock() }));
     vi.doMock("@/lib/notification/notifier", () => ({
       sendCircuitBreakerAlert: vi.fn(async () => {}),
@@ -297,4 +328,165 @@ describe("endpoint-circuit-breaker", () => {
     await recordEndpointFailure(200, new Error("c"));
     expect(getEndpointCircuitStateSync(200)).toBe("open");
   });
+
+  describe("ENABLE_ENDPOINT_CIRCUIT_BREAKER disabled", () => {
+    test("isEndpointCircuitOpen returns false when ENABLE_ENDPOINT_CIRCUIT_BREAKER=false", async () => {
+      vi.resetModules();
+
+      vi.doMock("@/lib/config/env.schema", () => ({
+        getEnvConfig: () => ({ ENABLE_ENDPOINT_CIRCUIT_BREAKER: false }),
+      }));
+      vi.doMock("@/lib/logger", () => ({ logger: createLoggerMock() }));
+      vi.doMock("@/lib/redis/endpoint-circuit-breaker-state", () => ({
+        loadEndpointCircuitState: vi.fn(async () => null),
+        saveEndpointCircuitState: vi.fn(async () => {}),
+        deleteEndpointCircuitState: vi.fn(async () => {}),
+      }));
+
+      const { isEndpointCircuitOpen } = await import("@/lib/endpoint-circuit-breaker");
+
+      expect(await isEndpointCircuitOpen(1)).toBe(false);
+      expect(await isEndpointCircuitOpen(999)).toBe(false);
+    });
+
+    test("recordEndpointFailure is no-op when disabled", async () => {
+      vi.resetModules();
+
+      const saveMock = vi.fn(async () => {});
+
+      vi.doMock("@/lib/config/env.schema", () => ({
+        getEnvConfig: () => ({ ENABLE_ENDPOINT_CIRCUIT_BREAKER: false }),
+      }));
+      vi.doMock("@/lib/logger", () => ({ logger: createLoggerMock() }));
+      vi.doMock("@/lib/redis/endpoint-circuit-breaker-state", () => ({
+        loadEndpointCircuitState: vi.fn(async () => null),
+        saveEndpointCircuitState: saveMock,
+        deleteEndpointCircuitState: vi.fn(async () => {}),
+      }));
+
+      const { recordEndpointFailure } = await import("@/lib/endpoint-circuit-breaker");
+
+      await recordEndpointFailure(1, new Error("boom"));
+      await recordEndpointFailure(1, new Error("boom"));
+      await recordEndpointFailure(1, new Error("boom"));
+
+      expect(saveMock).not.toHaveBeenCalled();
+    });
+
+    test("recordEndpointSuccess is no-op when disabled", async () => {
+      vi.resetModules();
+
+      const saveMock = vi.fn(async () => {});
+
+      vi.doMock("@/lib/config/env.schema", () => ({
+        getEnvConfig: () => ({ ENABLE_ENDPOINT_CIRCUIT_BREAKER: false }),
+      }));
+      vi.doMock("@/lib/logger", () => ({ logger: createLoggerMock() }));
+      vi.doMock("@/lib/redis/endpoint-circuit-breaker-state", () => ({
+        loadEndpointCircuitState: vi.fn(async () => null),
+        saveEndpointCircuitState: saveMock,
+        deleteEndpointCircuitState: vi.fn(async () => {}),
+      }));
+
+      const { recordEndpointSuccess } = await import("@/lib/endpoint-circuit-breaker");
+
+      await recordEndpointSuccess(1);
+
+      expect(saveMock).not.toHaveBeenCalled();
+    });
+
+    test("triggerEndpointCircuitBreakerAlert is no-op when disabled", async () => {
+      vi.resetModules();
+
+      const sendAlertMock = vi.fn(async () => {});
+
+      vi.doMock("@/lib/config/env.schema", () => ({
+        getEnvConfig: () => ({ ENABLE_ENDPOINT_CIRCUIT_BREAKER: false }),
+      }));
+      vi.doMock("@/lib/logger", () => ({ logger: createLoggerMock() }));
+      vi.doMock("@/lib/notification/notifier", () => ({
+        sendCircuitBreakerAlert: sendAlertMock,
+      }));
+      vi.doMock("@/lib/redis/endpoint-circuit-breaker-state", () => ({
+        loadEndpointCircuitState: vi.fn(async () => null),
+        saveEndpointCircuitState: vi.fn(async () => {}),
+        deleteEndpointCircuitState: vi.fn(async () => {}),
+      }));
+
+      const { triggerEndpointCircuitBreakerAlert } = await import("@/lib/endpoint-circuit-breaker");
+
+      await triggerEndpointCircuitBreakerAlert(
+        5,
+        3,
+        "2026-01-01T00:05:00.000Z",
+        "connection refused"
+      );
+
+      expect(sendAlertMock).not.toHaveBeenCalled();
+    });
+
+    test("initEndpointCircuitBreaker clears in-memory state and Redis keys when disabled", async () => {
+      vi.resetModules();
+
+      const redisMock = {
+        scan: vi
+          .fn()
+          .mockResolvedValueOnce([
+            "0",
+            ["endpoint_circuit_breaker:state:1", "endpoint_circuit_breaker:state:2"],
+          ]),
+        del: vi.fn(async () => {}),
+      };
+
+      vi.doMock("@/lib/config/env.schema", () => ({
+        getEnvConfig: () => ({ ENABLE_ENDPOINT_CIRCUIT_BREAKER: false }),
+      }));
+      vi.doMock("@/lib/logger", () => ({ logger: createLoggerMock() }));
+      vi.doMock("@/lib/redis/client", () => ({
+        getRedisClient: () => redisMock,
+      }));
+      vi.doMock("@/lib/redis/endpoint-circuit-breaker-state", () => ({
+        loadEndpointCircuitState: vi.fn(async () => null),
+        saveEndpointCircuitState: vi.fn(async () => {}),
+        deleteEndpointCircuitState: vi.fn(async () => {}),
+      }));
+
+      const { initEndpointCircuitBreaker } = await import("@/lib/endpoint-circuit-breaker");
+      await initEndpointCircuitBreaker();
+
+      expect(redisMock.scan).toHaveBeenCalled();
+      expect(redisMock.del).toHaveBeenCalledWith(
+        "endpoint_circuit_breaker:state:1",
+        "endpoint_circuit_breaker:state:2"
+      );
+    });
+
+    test("initEndpointCircuitBreaker is no-op when enabled", async () => {
+      vi.resetModules();
+
+      const redisMock = {
+        scan: vi.fn(),
+        del: vi.fn(),
+      };
+
+      vi.doMock("@/lib/config/env.schema", () => ({
+        getEnvConfig: () => ({ ENABLE_ENDPOINT_CIRCUIT_BREAKER: true }),
+      }));
+      vi.doMock("@/lib/logger", () => ({ logger: createLoggerMock() }));
+      vi.doMock("@/lib/redis/client", () => ({
+        getRedisClient: () => redisMock,
+      }));
+      vi.doMock("@/lib/redis/endpoint-circuit-breaker-state", () => ({
+        loadEndpointCircuitState: vi.fn(async () => null),
+        saveEndpointCircuitState: vi.fn(async () => {}),
+        deleteEndpointCircuitState: vi.fn(async () => {}),
+      }));
+
+      const { initEndpointCircuitBreaker } = await import("@/lib/endpoint-circuit-breaker");
+      await initEndpointCircuitBreaker();
+
+      expect(redisMock.scan).not.toHaveBeenCalled();
+      expect(redisMock.del).not.toHaveBeenCalled();
+    });
+  });
 });

+ 90 - 0
tests/unit/lib/provider-endpoints/endpoint-selector.test.ts

@@ -109,6 +109,9 @@ describe("provider-endpoints: endpoint-selector", () => {
     vi.doMock("@/lib/endpoint-circuit-breaker", () => ({
       isEndpointCircuitOpen: isOpenMock,
     }));
+    vi.doMock("@/lib/config/env.schema", () => ({
+      getEnvConfig: () => ({ ENABLE_ENDPOINT_CIRCUIT_BREAKER: true }),
+    }));
 
     const { getPreferredProviderEndpoints, pickBestProviderEndpoint } = await import(
       "@/lib/provider-endpoints/endpoint-selector"
@@ -140,6 +143,9 @@ describe("provider-endpoints: endpoint-selector", () => {
     vi.doMock("@/lib/endpoint-circuit-breaker", () => ({
       isEndpointCircuitOpen: isOpenMock,
     }));
+    vi.doMock("@/lib/config/env.schema", () => ({
+      getEnvConfig: () => ({ ENABLE_ENDPOINT_CIRCUIT_BREAKER: true }),
+    }));
 
     const { getPreferredProviderEndpoints, pickBestProviderEndpoint } = await import(
       "@/lib/provider-endpoints/endpoint-selector"
@@ -177,6 +183,9 @@ describe("getEndpointFilterStats", () => {
     vi.doMock("@/lib/endpoint-circuit-breaker", () => ({
       isEndpointCircuitOpen: isOpenMock,
     }));
+    vi.doMock("@/lib/config/env.schema", () => ({
+      getEnvConfig: () => ({ ENABLE_ENDPOINT_CIRCUIT_BREAKER: true }),
+    }));
 
     const { getEndpointFilterStats } = await import("@/lib/provider-endpoints/endpoint-selector");
     const stats = await getEndpointFilterStats({ vendorId: 10, providerType: "claude" });
@@ -202,6 +211,9 @@ describe("getEndpointFilterStats", () => {
     vi.doMock("@/lib/endpoint-circuit-breaker", () => ({
       isEndpointCircuitOpen: isOpenMock,
     }));
+    vi.doMock("@/lib/config/env.schema", () => ({
+      getEnvConfig: () => ({ ENABLE_ENDPOINT_CIRCUIT_BREAKER: true }),
+    }));
 
     const { getEndpointFilterStats } = await import("@/lib/provider-endpoints/endpoint-selector");
     const stats = await getEndpointFilterStats({ vendorId: 99, providerType: "codex" });
@@ -232,6 +244,9 @@ describe("getEndpointFilterStats", () => {
     vi.doMock("@/lib/endpoint-circuit-breaker", () => ({
       isEndpointCircuitOpen: isOpenMock,
     }));
+    vi.doMock("@/lib/config/env.schema", () => ({
+      getEnvConfig: () => ({ ENABLE_ENDPOINT_CIRCUIT_BREAKER: true }),
+    }));
 
     const { getEndpointFilterStats } = await import("@/lib/provider-endpoints/endpoint-selector");
     const stats = await getEndpointFilterStats({ vendorId: 1, providerType: "openai-compatible" });
@@ -244,3 +259,78 @@ describe("getEndpointFilterStats", () => {
     });
   });
 });
+
+describe("ENABLE_ENDPOINT_CIRCUIT_BREAKER disabled", () => {
+  test("getPreferredProviderEndpoints skips circuit check when disabled", async () => {
+    vi.resetModules();
+
+    const endpoints: ProviderEndpoint[] = [
+      makeEndpoint({ id: 1, lastProbeOk: true, sortOrder: 0, lastProbeLatencyMs: 100 }),
+      makeEndpoint({ id: 2, lastProbeOk: true, sortOrder: 1, lastProbeLatencyMs: 50 }),
+      makeEndpoint({ id: 3, lastProbeOk: false, sortOrder: 0, lastProbeLatencyMs: 10 }),
+      makeEndpoint({ id: 4, isEnabled: false }),
+      makeEndpoint({ id: 5, deletedAt: new Date(1) }),
+    ];
+
+    const findMock = vi.fn(async () => endpoints);
+    const isOpenMock = vi.fn(async () => true);
+
+    vi.doMock("@/repository", () => ({
+      findProviderEndpointsByVendorAndType: findMock,
+    }));
+    vi.doMock("@/lib/endpoint-circuit-breaker", () => ({
+      isEndpointCircuitOpen: isOpenMock,
+    }));
+    vi.doMock("@/lib/config/env.schema", () => ({
+      getEnvConfig: () => ({ ENABLE_ENDPOINT_CIRCUIT_BREAKER: false }),
+    }));
+
+    const { getPreferredProviderEndpoints } = await import(
+      "@/lib/provider-endpoints/endpoint-selector"
+    );
+
+    const result = await getPreferredProviderEndpoints({
+      vendorId: 1,
+      providerType: "claude",
+    });
+
+    expect(isOpenMock).not.toHaveBeenCalled();
+    // All enabled, non-deleted endpoints returned (id=1,2,3), ranked by sortOrder/health
+    expect(result.map((e) => e.id)).toEqual([1, 2, 3]);
+  });
+
+  test("getEndpointFilterStats returns circuitOpen=0 when disabled", async () => {
+    vi.resetModules();
+
+    const endpoints: ProviderEndpoint[] = [
+      makeEndpoint({ id: 1, isEnabled: true, lastProbeOk: true }),
+      makeEndpoint({ id: 2, isEnabled: true, lastProbeOk: false }),
+      makeEndpoint({ id: 3, isEnabled: false }),
+      makeEndpoint({ id: 4, deletedAt: new Date(1) }),
+    ];
+
+    const findMock = vi.fn(async () => endpoints);
+    const isOpenMock = vi.fn(async () => true);
+
+    vi.doMock("@/repository", () => ({
+      findProviderEndpointsByVendorAndType: findMock,
+    }));
+    vi.doMock("@/lib/endpoint-circuit-breaker", () => ({
+      isEndpointCircuitOpen: isOpenMock,
+    }));
+    vi.doMock("@/lib/config/env.schema", () => ({
+      getEnvConfig: () => ({ ENABLE_ENDPOINT_CIRCUIT_BREAKER: false }),
+    }));
+
+    const { getEndpointFilterStats } = await import("@/lib/provider-endpoints/endpoint-selector");
+    const stats = await getEndpointFilterStats({ vendorId: 10, providerType: "claude" });
+
+    expect(isOpenMock).not.toHaveBeenCalled();
+    expect(stats).toEqual({
+      total: 4,
+      enabled: 2, // id=1,2 (isEnabled && !deletedAt)
+      circuitOpen: 0, // always 0 when disabled
+      available: 2, // equals enabled when disabled
+    });
+  });
+});