Jelajahi Sumber

merge: resolve conflicts with origin/dev

ding113 5 hari lalu
induk
melakukan
03c33f02ca
93 mengubah file dengan 9220 tambahan dan 386 penghapusan
  1. 14 0
      .env.example
  2. 1 1
      .github/workflows/test.yml
  3. 1 0
      .gitignore
  4. 8 8
      Dockerfile
  5. 4 1
      README.en.md
  6. 4 1
      README.md
  7. 7 1
      docker-compose.yaml
  8. 1 1
      drizzle/0062_aromatic_taskmaster.sql
  9. 1 0
      drizzle/0064_harsh_dragon_lord.sql
  10. 0 0
      drizzle/0065_stale_vertigo.sql
  11. 9 3
      drizzle/meta/0064_snapshot.json
  12. 2976 0
      drizzle/meta/0065_snapshot.json
  13. 9 2
      drizzle/meta/_journal.json
  14. 3 2
      messages/en/dashboard.json
  15. 4 1
      messages/en/settings/providers/filter.json
  16. 6 0
      messages/en/settings/providers/form/sections.json
  17. 15 0
      messages/en/settings/providers/inlineEdit.json
  18. 7 1
      messages/en/settings/providers/list.json
  19. 3 2
      messages/ja/dashboard.json
  20. 4 1
      messages/ja/settings/providers/filter.json
  21. 6 0
      messages/ja/settings/providers/form/sections.json
  22. 15 0
      messages/ja/settings/providers/inlineEdit.json
  23. 7 1
      messages/ja/settings/providers/list.json
  24. 3 2
      messages/ru/dashboard.json
  25. 4 1
      messages/ru/settings/providers/filter.json
  26. 6 0
      messages/ru/settings/providers/form/sections.json
  27. 15 0
      messages/ru/settings/providers/inlineEdit.json
  28. 7 1
      messages/ru/settings/providers/list.json
  29. 3 2
      messages/zh-CN/dashboard.json
  30. 4 1
      messages/zh-CN/settings/providers/filter.json
  31. 6 0
      messages/zh-CN/settings/providers/form/sections.json
  32. 21 6
      messages/zh-CN/settings/providers/inlineEdit.json
  33. 7 1
      messages/zh-CN/settings/providers/list.json
  34. 3 2
      messages/zh-TW/dashboard.json
  35. 4 1
      messages/zh-TW/settings/providers/filter.json
  36. 6 0
      messages/zh-TW/settings/providers/form/sections.json
  37. 15 0
      messages/zh-TW/settings/providers/inlineEdit.json
  38. 7 1
      messages/zh-TW/settings/providers/list.json
  39. 2 1
      package.json
  40. 15 0
      scripts/copy-version-to-standalone.cjs
  41. 1 0
      scripts/deploy.ps1
  42. 1 0
      scripts/deploy.sh
  43. 2 0
      src/actions/providers.ts
  44. 16 0
      src/app/[locale]/dashboard/logs/_components/error-details-dialog.test.tsx
  45. 10 0
      src/app/[locale]/dashboard/logs/_components/error-details-dialog/components/SummaryTab.tsx
  46. 20 0
      src/app/[locale]/dashboard/logs/_components/provider-chain-popover.test.tsx
  47. 19 0
      src/app/[locale]/dashboard/logs/_components/provider-chain-popover.tsx
  48. 0 1
      src/app/[locale]/dashboard/logs/_hooks/use-lazy-filter-options.ts
  49. 4 0
      src/app/[locale]/settings/providers/_components/forms/provider-form/index.tsx
  50. 3 0
      src/app/[locale]/settings/providers/_components/forms/provider-form/provider-form-context.tsx
  51. 2 0
      src/app/[locale]/settings/providers/_components/forms/provider-form/provider-form-types.ts
  52. 40 0
      src/app/[locale]/settings/providers/_components/forms/provider-form/sections/routing-section.tsx
  53. 315 0
      src/app/[locale]/settings/providers/_components/group-edit-combobox.tsx
  54. 129 66
      src/app/[locale]/settings/providers/_components/inline-edit-popover.tsx
  55. 311 0
      src/app/[locale]/settings/providers/_components/priority-edit-popover.tsx
  56. 13 1
      src/app/[locale]/settings/providers/_components/provider-list.tsx
  57. 215 71
      src/app/[locale]/settings/providers/_components/provider-manager.tsx
  58. 298 73
      src/app/[locale]/settings/providers/_components/provider-rich-list-item.tsx
  59. 35 0
      src/app/v1/_lib/proxy/forwarder.ts
  60. 55 14
      src/app/v1/_lib/proxy/provider-selector.ts
  61. 503 90
      src/app/v1/_lib/proxy/response-handler.ts
  62. 55 0
      src/app/v1/_lib/proxy/stream-finalization.ts
  63. 124 0
      src/components/ui/drawer.tsx
  64. 1 0
      src/drizzle/schema.ts
  65. 71 3
      src/instrumentation.ts
  66. 13 8
      src/lib/auth.ts
  67. 8 0
      src/lib/config/env.schema.ts
  68. 20 0
      src/lib/hooks/use-media-query.ts
  69. 6 3
      src/lib/redis/client.ts
  70. 2 0
      src/lib/redis/pubsub.ts
  71. 408 0
      src/lib/security/api-key-auth-cache.ts
  72. 384 0
      src/lib/security/api-key-vacuum-filter.ts
  73. 9 1
      src/lib/session-manager.ts
  74. 213 0
      src/lib/utils/upstream-error-detection.test.ts
  75. 249 0
      src/lib/utils/upstream-error-detection.ts
  76. 23 0
      src/lib/vacuum-filter/random.ts
  77. 606 0
      src/lib/vacuum-filter/vacuum-filter.ts
  78. 13 4
      src/lib/validation/schemas.ts
  79. 1 0
      src/repository/_shared/transformers.ts
  80. 142 4
      src/repository/key.ts
  81. 7 0
      src/repository/provider.ts
  82. 11 2
      src/repository/user.ts
  83. 4 0
      src/types/provider.ts
  84. 44 0
      tests/unit/lib/env-store-session-response-body.test.ts
  85. 12 0
      tests/unit/lib/session-manager-redaction.test.ts
  86. 201 0
      tests/unit/proxy/provider-selector-group-priority.test.ts
  87. 465 0
      tests/unit/security/api-key-auth-cache-redis-key.test.ts
  88. 400 0
      tests/unit/security/api-key-auth-cache.test.ts
  89. 85 0
      tests/unit/security/api-key-vacuum-filter-build.test.ts
  90. 83 0
      tests/unit/security/api-key-vacuum-filter-reloading.test.ts
  91. 41 0
      tests/unit/security/api-key-vacuum-filter-shortcircuit.test.ts
  92. 161 0
      tests/unit/security/auth-validateKey-cache.test.ts
  93. 133 0
      tests/unit/vacuum-filter/vacuum-filter.test.ts

+ 14 - 0
.env.example

@@ -8,6 +8,11 @@ AUTO_MIGRATE=true
 # 数据库连接字符串(仅用于本地开发或非 Docker Compose 部署)
 DSN="postgres://user:password@host:port/db_name"
 
+# API Key Vacuum Filter(真空过滤器)
+# - true (默认):启用。用于在访问 DB 前“负向短路”无效 key,降低 DB 压力、抵御爆破
+# - false:禁用(例如:需要排查问题或节省内存时)
+ENABLE_API_KEY_VACUUM_FILTER="true"
+
 # PostgreSQL 连接池配置(postgres.js)
 # 说明:
 # - 这些值是“每个应用进程”的连接池上限;k8s 多副本时需要按副本数分摊
@@ -58,12 +63,21 @@ REDIS_TLS_REJECT_UNAUTHORIZED=true      # 是否验证 Redis TLS 证书(默认
                                         # 设置为 false 可跳过证书验证,用于自签证书或共享证书场景
                                         # 仅在 rediss:// 协议时生效
 
+# API Key 鉴权缓存(Vacuum Filter -> Redis -> DB)
+# 说明:需要 ENABLE_RATE_LIMIT=true 且配置 REDIS_URL 才会启用 Redis 缓存;否则自动回落到 DB。
+API_KEY_AUTH_CACHE_TTL_SECONDS="60"      # 鉴权缓存 TTL(秒,默认 60,最大 3600)
+ENABLE_API_KEY_REDIS_CACHE="true"        # 是否启用 API Key Redis 缓存(默认:true)
+
 # Session 配置
 SESSION_TTL=300                         # Session 过期时间(秒,默认 300 = 5 分钟)
 STORE_SESSION_MESSAGES=false            # 会话消息存储模式(默认:false)
                                         # - false:存储请求/响应体但对 message 内容脱敏 [REDACTED]
                                         # - true:原样存储 message 内容(注意隐私和存储空间影响)
                                         # 警告:启用后会增加 Redis/DB 存储空间,且包含敏感信息
+STORE_SESSION_RESPONSE_BODY=true        # 是否在 Redis 中存储会话响应体(默认:true)
+                                        # - true:存储(SSE/JSON),用于调试/定位问题(Redis 临时缓存)
+                                        # - false:不存储响应体(注意:不影响本次请求处理;仅影响后续查看 response body)
+                                        # 说明:该开关不影响内部统计读取响应体(tokens/费用统计、SSE 假 200 检测仍会进行)
 
 # 熔断器配置
 # 功能说明:控制网络错误是否计入熔断器失败计数

+ 1 - 1
.github/workflows/test.yml

@@ -210,7 +210,7 @@ jobs:
           fi
 
       - name: Create summary
-        if: github.event_name == 'pull_request'
+        if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
         uses: actions/github-script@v7
         with:
           script: |

+ 1 - 0
.gitignore

@@ -92,3 +92,4 @@ tmp/
 .trae/
 .sisyphus
 .ace-tool/
+.worktrees/

+ 8 - 8
Dockerfile

@@ -10,19 +10,19 @@ COPY --from=deps /app/node_modules ./node_modules
 COPY . .
 ENV NEXT_TELEMETRY_DISABLED=1
 ENV CI=true
-RUN bun run build
+RUN --mount=type=cache,target=/app/.next/cache bun run build
 
 FROM node:20-slim AS runner
 WORKDIR /app
 ENV NODE_ENV=production
-ENV PORT=8080
-EXPOSE 8080
+ENV PORT=3000
+EXPOSE 3000
 
 # 关键:确保复制了所有必要的文件,特别是 drizzle 文件夹
 COPY --from=builder /app/public ./public
-COPY --from=builder /app/.next ./.next
-COPY --from=builder /app/node_modules ./node_modules
-COPY --from=builder /app/package.json ./package.json
-COPY --from=builder /app/drizzle ./drizzle 
+COPY --from=builder /app/.next/standalone ./
+COPY --from=builder /app/.next/static ./.next/static
+COPY --from=builder /app/drizzle ./drizzle
+COPY --from=builder /app/VERSION ./VERSION
 
-CMD ["node", "node_modules/.bin/next", "start"]
+CMD ["node", "server.js"]

+ 4 - 1
README.en.md

@@ -276,6 +276,9 @@ Docker Compose is the **preferred deployment method** — it automatically provi
 | `REDIS_URL`                                | `redis://localhost:6379` | Redis endpoint, supports `rediss://` for TLS providers.                                              |
 | `REDIS_TLS_REJECT_UNAUTHORIZED`            | `true`                   | Validate Redis TLS certificates; set `false` to skip (for self-signed/shared certs).                 |
 | `ENABLE_RATE_LIMIT`                        | `true`                   | Toggles multi-dimensional rate limiting; Fail-Open handles Redis outages gracefully.                 |
+| `ENABLE_API_KEY_VACUUM_FILTER`             | `true`                   | Enables API Key Vacuum Filter (negative short-circuit only; set to `false/0` to disable).            |
+| `ENABLE_API_KEY_REDIS_CACHE`               | `true`                   | Enables API Key auth Redis cache (requires Redis; auto-fallback to DB on errors).                    |
+| `API_KEY_AUTH_CACHE_TTL_SECONDS`           | `60`                     | API Key auth cache TTL in seconds (default 60, max 3600).                                             |
 | `SESSION_TTL`                              | `300`                    | Session cache window (seconds) that drives vendor reuse.                                             |
 | `ENABLE_SECURE_COOKIES`                    | `true`                   | Browsers require HTTPS for Secure cookies; set to `false` when serving plain HTTP outside localhost. |
 | `ENABLE_CIRCUIT_BREAKER_ON_NETWORK_ERRORS` | `false`                  | When `true`, network errors also trip the circuit breaker for quicker isolation.                     |
@@ -283,7 +286,7 @@ Docker Compose is the **preferred deployment method** — it automatically provi
 | `APP_URL`                                  | empty                    | Populate to expose correct `servers` entries in OpenAPI docs.                                        |
 | `API_TEST_TIMEOUT_MS`                      | `15000`                  | Timeout (ms) for provider API connectivity tests. Accepts 5000-120000 for regional tuning.           |
 
-> Boolean values should be `true/false` or `1/0` without quotes; otherwise Zod may coerce strings incorrectly. See `.env.example` for the full list.
+> Boolean values support `true/false` or `1/0`. Quoting in `.env` is also fine (dotenv will strip quotes). See `.env.example` for the full list.
 
 ## ❓ FAQ
 

+ 4 - 1
README.md

@@ -286,6 +286,9 @@ Docker Compose 是**首选部署方式**,自动配置数据库、Redis 和应
 | `REDIS_URL`                                | `redis://localhost:6379` | Redis 地址,支持 `rediss://` 用于 TLS。                                      |
 | `REDIS_TLS_REJECT_UNAUTHORIZED`            | `true`                   | 是否验证 Redis TLS 证书;设为 `false` 可跳过验证(用于自签/共享证书)。      |
 | `ENABLE_RATE_LIMIT`                        | `true`                   | 控制多维限流开关;Fail-Open 策略在 Redis 不可用时自动降级。                  |
+| `ENABLE_API_KEY_VACUUM_FILTER`             | `true`                   | 是否启用 API Key 真空过滤器(仅负向短路无效 key;可设为 `false/0` 关闭用于排查/节省内存)。 |
+| `ENABLE_API_KEY_REDIS_CACHE`               | `true`                   | 是否启用 API Key 鉴权 Redis 缓存(需 Redis 可用;异常自动回落到 DB)。       |
+| `API_KEY_AUTH_CACHE_TTL_SECONDS`           | `60`                     | API Key 鉴权缓存 TTL(秒,默认 60,最大 3600)。                              |
 | `SESSION_TTL`                              | `300`                    | Session 缓存时间(秒),影响供应商复用策略。                                 |
 | `ENABLE_SECURE_COOKIES`                    | `true`                   | 仅 HTTPS 场景能设置 Secure Cookie;HTTP 访问(非 localhost)需改为 `false`。 |
 | `ENABLE_CIRCUIT_BREAKER_ON_NETWORK_ERRORS` | `false`                  | 是否将网络错误计入熔断器;开启后能更激进地阻断异常线路。                     |
@@ -293,7 +296,7 @@ Docker Compose 是**首选部署方式**,自动配置数据库、Redis 和应
 | `APP_URL`                                  | 空                       | 设置后 OpenAPI 文档 `servers` 将展示正确域名/端口。                          |
 | `API_TEST_TIMEOUT_MS`                      | `15000`                  | 供应商 API 测试超时时间(毫秒,范围 5000-120000),跨境网络可适当提高。      |
 
-> 布尔变量请直接写 `true/false` 或 `1/0`,勿加引号,避免被 Zod 转换为真值。更多字段参考 `.env.example`。
+> 布尔变量支持 `true/false` 或 `1/0`;在 `.env` 文件里写成带引号形式也没问题(dotenv 会解析并去掉引号)。更多字段参考 `.env.example`。
 
 ## ❓ FAQ
 

+ 7 - 1
docker-compose.yaml

@@ -68,7 +68,13 @@ services:
       - "${APP_PORT:-23000}:3000"
     restart: unless-stopped
     healthcheck:
-      test: ["CMD-SHELL", "curl -f http://localhost:3000/api/actions/health || exit 1"]
+      test:
+        [
+          "CMD",
+          "node",
+          "-e",
+          "fetch('http://' + (process.env.HOSTNAME || '127.0.0.1') + ':3000/api/actions/health').then((r)=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))",
+        ]
       interval: 30s
       timeout: 5s
       retries: 3

+ 1 - 1
drizzle/0062_aromatic_taskmaster.sql

@@ -1 +1 @@
-ALTER TABLE "providers" ADD COLUMN "gemini_google_search_preference" varchar(20);
+ALTER TABLE "providers" ADD COLUMN IF NOT EXISTS "gemini_google_search_preference" varchar(20);

+ 1 - 0
drizzle/0064_harsh_dragon_lord.sql

@@ -0,0 +1 @@
+ALTER TABLE "providers" ADD COLUMN IF NOT EXISTS "group_priorities" jsonb DEFAULT 'null'::jsonb;

+ 0 - 0
drizzle/0064_stale_vertigo.sql → drizzle/0065_stale_vertigo.sql


+ 9 - 3
drizzle/meta/0064_snapshot.json

@@ -1,5 +1,5 @@
 {
-  "id": "5d3e46c6-0881-4776-ae60-4b8e4d06f999",
+  "id": "9fd69a68-7794-42af-ac5f-83f874adeecf",
   "prevId": "40d9ed20-d9e3-42a4-9357-3e17e4b06ba1",
   "version": "7",
   "dialect": "postgresql",
@@ -1415,7 +1415,6 @@
             }
           ],
           "isUnique": true,
-          "where": "\"provider_endpoints\".\"deleted_at\" IS NULL",
           "concurrently": false,
           "method": "btree",
           "with": {}
@@ -1671,6 +1670,13 @@
           "notNull": true,
           "default": 0
         },
+        "group_priorities": {
+          "name": "group_priorities",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'null'::jsonb"
+        },
         "cost_multiplier": {
           "name": "cost_multiplier",
           "type": "numeric(10, 4)",
@@ -2966,4 +2972,4 @@
     "schemas": {},
     "tables": {}
   }
-}
+}

+ 2976 - 0
drizzle/meta/0065_snapshot.json

@@ -0,0 +1,2976 @@
+{
+  "id": "5d3e46c6-0881-4776-ae60-4b8e4d06f999",
+  "prevId": "9fd69a68-7794-42af-ac5f-83f874adeecf",
+  "version": "7",
+  "dialect": "postgresql",
+  "tables": {
+    "public.error_rules": {
+      "name": "error_rules",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "serial",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "pattern": {
+          "name": "pattern",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "match_type": {
+          "name": "match_type",
+          "type": "varchar(20)",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'regex'"
+        },
+        "category": {
+          "name": "category",
+          "type": "varchar(50)",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "description": {
+          "name": "description",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "override_response": {
+          "name": "override_response",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "override_status_code": {
+          "name": "override_status_code",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "is_enabled": {
+          "name": "is_enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "is_default": {
+          "name": "is_default",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "priority": {
+          "name": "priority",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 0
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_error_rules_enabled": {
+          "name": "idx_error_rules_enabled",
+          "columns": [
+            {
+              "expression": "is_enabled",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "priority",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "unique_pattern": {
+          "name": "unique_pattern",
+          "columns": [
+            {
+              "expression": "pattern",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": true,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_category": {
+          "name": "idx_category",
+          "columns": [
+            {
+              "expression": "category",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_match_type": {
+          "name": "idx_match_type",
+          "columns": [
+            {
+              "expression": "match_type",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.keys": {
+      "name": "keys",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "serial",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "key": {
+          "name": "key",
+          "type": "varchar",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "varchar",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "is_enabled": {
+          "name": "is_enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": false,
+          "default": true
+        },
+        "expires_at": {
+          "name": "expires_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "can_login_web_ui": {
+          "name": "can_login_web_ui",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": false,
+          "default": false
+        },
+        "limit_5h_usd": {
+          "name": "limit_5h_usd",
+          "type": "numeric(10, 2)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "limit_daily_usd": {
+          "name": "limit_daily_usd",
+          "type": "numeric(10, 2)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "daily_reset_mode": {
+          "name": "daily_reset_mode",
+          "type": "daily_reset_mode",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'fixed'"
+        },
+        "daily_reset_time": {
+          "name": "daily_reset_time",
+          "type": "varchar(5)",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'00:00'"
+        },
+        "limit_weekly_usd": {
+          "name": "limit_weekly_usd",
+          "type": "numeric(10, 2)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "limit_monthly_usd": {
+          "name": "limit_monthly_usd",
+          "type": "numeric(10, 2)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "limit_total_usd": {
+          "name": "limit_total_usd",
+          "type": "numeric(10, 2)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "limit_concurrent_sessions": {
+          "name": "limit_concurrent_sessions",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "default": 0
+        },
+        "provider_group": {
+          "name": "provider_group",
+          "type": "varchar(200)",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'default'"
+        },
+        "cache_ttl_preference": {
+          "name": "cache_ttl_preference",
+          "type": "varchar(10)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        },
+        "deleted_at": {
+          "name": "deleted_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {
+        "idx_keys_user_id": {
+          "name": "idx_keys_user_id",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_keys_created_at": {
+          "name": "idx_keys_created_at",
+          "columns": [
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_keys_deleted_at": {
+          "name": "idx_keys_deleted_at",
+          "columns": [
+            {
+              "expression": "deleted_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.message_request": {
+      "name": "message_request",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "serial",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "provider_id": {
+          "name": "provider_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "key": {
+          "name": "key",
+          "type": "varchar",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "model": {
+          "name": "model",
+          "type": "varchar(128)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "duration_ms": {
+          "name": "duration_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "cost_usd": {
+          "name": "cost_usd",
+          "type": "numeric(21, 15)",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'0'"
+        },
+        "cost_multiplier": {
+          "name": "cost_multiplier",
+          "type": "numeric(10, 4)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "session_id": {
+          "name": "session_id",
+          "type": "varchar(64)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "request_sequence": {
+          "name": "request_sequence",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "default": 1
+        },
+        "provider_chain": {
+          "name": "provider_chain",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "status_code": {
+          "name": "status_code",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "api_type": {
+          "name": "api_type",
+          "type": "varchar(20)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "endpoint": {
+          "name": "endpoint",
+          "type": "varchar(256)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "original_model": {
+          "name": "original_model",
+          "type": "varchar(128)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "input_tokens": {
+          "name": "input_tokens",
+          "type": "bigint",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "output_tokens": {
+          "name": "output_tokens",
+          "type": "bigint",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "ttfb_ms": {
+          "name": "ttfb_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "cache_creation_input_tokens": {
+          "name": "cache_creation_input_tokens",
+          "type": "bigint",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "cache_read_input_tokens": {
+          "name": "cache_read_input_tokens",
+          "type": "bigint",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "cache_creation_5m_input_tokens": {
+          "name": "cache_creation_5m_input_tokens",
+          "type": "bigint",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "cache_creation_1h_input_tokens": {
+          "name": "cache_creation_1h_input_tokens",
+          "type": "bigint",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "cache_ttl_applied": {
+          "name": "cache_ttl_applied",
+          "type": "varchar(10)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "context_1m_applied": {
+          "name": "context_1m_applied",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": false,
+          "default": false
+        },
+        "special_settings": {
+          "name": "special_settings",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "error_message": {
+          "name": "error_message",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "error_stack": {
+          "name": "error_stack",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "error_cause": {
+          "name": "error_cause",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "blocked_by": {
+          "name": "blocked_by",
+          "type": "varchar(50)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "blocked_reason": {
+          "name": "blocked_reason",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "user_agent": {
+          "name": "user_agent",
+          "type": "varchar(512)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "messages_count": {
+          "name": "messages_count",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        },
+        "deleted_at": {
+          "name": "deleted_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {
+        "idx_message_request_user_date_cost": {
+          "name": "idx_message_request_user_date_cost",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "cost_usd",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"message_request\".\"deleted_at\" IS NULL",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_message_request_user_query": {
+          "name": "idx_message_request_user_query",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"message_request\".\"deleted_at\" IS NULL",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_message_request_session_id": {
+          "name": "idx_message_request_session_id",
+          "columns": [
+            {
+              "expression": "session_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"message_request\".\"deleted_at\" IS NULL",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_message_request_session_id_prefix": {
+          "name": "idx_message_request_session_id_prefix",
+          "columns": [
+            {
+              "expression": "\"session_id\" varchar_pattern_ops",
+              "asc": true,
+              "isExpression": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"message_request\".\"deleted_at\" IS NULL AND (\"message_request\".\"blocked_by\" IS NULL OR \"message_request\".\"blocked_by\" <> 'warmup')",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_message_request_session_seq": {
+          "name": "idx_message_request_session_seq",
+          "columns": [
+            {
+              "expression": "session_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "request_sequence",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"message_request\".\"deleted_at\" IS NULL",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_message_request_endpoint": {
+          "name": "idx_message_request_endpoint",
+          "columns": [
+            {
+              "expression": "endpoint",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"message_request\".\"deleted_at\" IS NULL",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_message_request_blocked_by": {
+          "name": "idx_message_request_blocked_by",
+          "columns": [
+            {
+              "expression": "blocked_by",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"message_request\".\"deleted_at\" IS NULL",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_message_request_provider_id": {
+          "name": "idx_message_request_provider_id",
+          "columns": [
+            {
+              "expression": "provider_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_message_request_user_id": {
+          "name": "idx_message_request_user_id",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_message_request_key": {
+          "name": "idx_message_request_key",
+          "columns": [
+            {
+              "expression": "key",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_message_request_created_at": {
+          "name": "idx_message_request_created_at",
+          "columns": [
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_message_request_deleted_at": {
+          "name": "idx_message_request_deleted_at",
+          "columns": [
+            {
+              "expression": "deleted_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.model_prices": {
+      "name": "model_prices",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "serial",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "model_name": {
+          "name": "model_name",
+          "type": "varchar",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "price_data": {
+          "name": "price_data",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "source": {
+          "name": "source",
+          "type": "varchar(20)",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'litellm'"
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_model_prices_latest": {
+          "name": "idx_model_prices_latest",
+          "columns": [
+            {
+              "expression": "model_name",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": false,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_model_prices_model_name": {
+          "name": "idx_model_prices_model_name",
+          "columns": [
+            {
+              "expression": "model_name",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_model_prices_created_at": {
+          "name": "idx_model_prices_created_at",
+          "columns": [
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": false,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_model_prices_source": {
+          "name": "idx_model_prices_source",
+          "columns": [
+            {
+              "expression": "source",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.notification_settings": {
+      "name": "notification_settings",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "serial",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "enabled": {
+          "name": "enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "use_legacy_mode": {
+          "name": "use_legacy_mode",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "circuit_breaker_enabled": {
+          "name": "circuit_breaker_enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "circuit_breaker_webhook": {
+          "name": "circuit_breaker_webhook",
+          "type": "varchar(512)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "daily_leaderboard_enabled": {
+          "name": "daily_leaderboard_enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "daily_leaderboard_webhook": {
+          "name": "daily_leaderboard_webhook",
+          "type": "varchar(512)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "daily_leaderboard_time": {
+          "name": "daily_leaderboard_time",
+          "type": "varchar(10)",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'09:00'"
+        },
+        "daily_leaderboard_top_n": {
+          "name": "daily_leaderboard_top_n",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "default": 5
+        },
+        "cost_alert_enabled": {
+          "name": "cost_alert_enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "cost_alert_webhook": {
+          "name": "cost_alert_webhook",
+          "type": "varchar(512)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "cost_alert_threshold": {
+          "name": "cost_alert_threshold",
+          "type": "numeric(5, 2)",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'0.80'"
+        },
+        "cost_alert_check_interval": {
+          "name": "cost_alert_check_interval",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "default": 60
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.notification_target_bindings": {
+      "name": "notification_target_bindings",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "serial",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "notification_type": {
+          "name": "notification_type",
+          "type": "notification_type",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "target_id": {
+          "name": "target_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "is_enabled": {
+          "name": "is_enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "schedule_cron": {
+          "name": "schedule_cron",
+          "type": "varchar(100)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "schedule_timezone": {
+          "name": "schedule_timezone",
+          "type": "varchar(50)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "template_override": {
+          "name": "template_override",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "unique_notification_target_binding": {
+          "name": "unique_notification_target_binding",
+          "columns": [
+            {
+              "expression": "notification_type",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "target_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": true,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_notification_bindings_type": {
+          "name": "idx_notification_bindings_type",
+          "columns": [
+            {
+              "expression": "notification_type",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "is_enabled",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_notification_bindings_target": {
+          "name": "idx_notification_bindings_target",
+          "columns": [
+            {
+              "expression": "target_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "is_enabled",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "notification_target_bindings_target_id_webhook_targets_id_fk": {
+          "name": "notification_target_bindings_target_id_webhook_targets_id_fk",
+          "tableFrom": "notification_target_bindings",
+          "tableTo": "webhook_targets",
+          "columnsFrom": [
+            "target_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.provider_endpoint_probe_logs": {
+      "name": "provider_endpoint_probe_logs",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "serial",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "endpoint_id": {
+          "name": "endpoint_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "source": {
+          "name": "source",
+          "type": "varchar(20)",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'scheduled'"
+        },
+        "ok": {
+          "name": "ok",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "status_code": {
+          "name": "status_code",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "latency_ms": {
+          "name": "latency_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "error_type": {
+          "name": "error_type",
+          "type": "varchar(64)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "error_message": {
+          "name": "error_message",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_provider_endpoint_probe_logs_endpoint_created_at": {
+          "name": "idx_provider_endpoint_probe_logs_endpoint_created_at",
+          "columns": [
+            {
+              "expression": "endpoint_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": false,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_provider_endpoint_probe_logs_created_at": {
+          "name": "idx_provider_endpoint_probe_logs_created_at",
+          "columns": [
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "provider_endpoint_probe_logs_endpoint_id_provider_endpoints_id_fk": {
+          "name": "provider_endpoint_probe_logs_endpoint_id_provider_endpoints_id_fk",
+          "tableFrom": "provider_endpoint_probe_logs",
+          "tableTo": "provider_endpoints",
+          "columnsFrom": [
+            "endpoint_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.provider_endpoints": {
+      "name": "provider_endpoints",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "serial",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "vendor_id": {
+          "name": "vendor_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "provider_type": {
+          "name": "provider_type",
+          "type": "varchar(20)",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'claude'"
+        },
+        "url": {
+          "name": "url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "label": {
+          "name": "label",
+          "type": "varchar(200)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "sort_order": {
+          "name": "sort_order",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 0
+        },
+        "is_enabled": {
+          "name": "is_enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "last_probed_at": {
+          "name": "last_probed_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "last_probe_ok": {
+          "name": "last_probe_ok",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "last_probe_status_code": {
+          "name": "last_probe_status_code",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "last_probe_latency_ms": {
+          "name": "last_probe_latency_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "last_probe_error_type": {
+          "name": "last_probe_error_type",
+          "type": "varchar(64)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "last_probe_error_message": {
+          "name": "last_probe_error_message",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        },
+        "deleted_at": {
+          "name": "deleted_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {
+        "uniq_provider_endpoints_vendor_type_url": {
+          "name": "uniq_provider_endpoints_vendor_type_url",
+          "columns": [
+            {
+              "expression": "vendor_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "provider_type",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "url",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": true,
+          "where": "\"provider_endpoints\".\"deleted_at\" IS NULL",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_provider_endpoints_vendor_type": {
+          "name": "idx_provider_endpoints_vendor_type",
+          "columns": [
+            {
+              "expression": "vendor_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "provider_type",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"provider_endpoints\".\"deleted_at\" IS NULL",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_provider_endpoints_enabled": {
+          "name": "idx_provider_endpoints_enabled",
+          "columns": [
+            {
+              "expression": "is_enabled",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "vendor_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "provider_type",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"provider_endpoints\".\"deleted_at\" IS NULL",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_provider_endpoints_created_at": {
+          "name": "idx_provider_endpoints_created_at",
+          "columns": [
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_provider_endpoints_deleted_at": {
+          "name": "idx_provider_endpoints_deleted_at",
+          "columns": [
+            {
+              "expression": "deleted_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "provider_endpoints_vendor_id_provider_vendors_id_fk": {
+          "name": "provider_endpoints_vendor_id_provider_vendors_id_fk",
+          "tableFrom": "provider_endpoints",
+          "tableTo": "provider_vendors",
+          "columnsFrom": [
+            "vendor_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.provider_vendors": {
+      "name": "provider_vendors",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "serial",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "website_domain": {
+          "name": "website_domain",
+          "type": "varchar(255)",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "display_name": {
+          "name": "display_name",
+          "type": "varchar(200)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "website_url": {
+          "name": "website_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "favicon_url": {
+          "name": "favicon_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "uniq_provider_vendors_website_domain": {
+          "name": "uniq_provider_vendors_website_domain",
+          "columns": [
+            {
+              "expression": "website_domain",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": true,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_provider_vendors_created_at": {
+          "name": "idx_provider_vendors_created_at",
+          "columns": [
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.providers": {
+      "name": "providers",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "serial",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "varchar",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "description": {
+          "name": "description",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "url": {
+          "name": "url",
+          "type": "varchar",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "key": {
+          "name": "key",
+          "type": "varchar",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "provider_vendor_id": {
+          "name": "provider_vendor_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "is_enabled": {
+          "name": "is_enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "weight": {
+          "name": "weight",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 1
+        },
+        "priority": {
+          "name": "priority",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 0
+        },
+        "cost_multiplier": {
+          "name": "cost_multiplier",
+          "type": "numeric(10, 4)",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'1.0'"
+        },
+        "group_tag": {
+          "name": "group_tag",
+          "type": "varchar(50)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "provider_type": {
+          "name": "provider_type",
+          "type": "varchar(20)",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'claude'"
+        },
+        "preserve_client_ip": {
+          "name": "preserve_client_ip",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "model_redirects": {
+          "name": "model_redirects",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "allowed_models": {
+          "name": "allowed_models",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'null'::jsonb"
+        },
+        "join_claude_pool": {
+          "name": "join_claude_pool",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": false,
+          "default": false
+        },
+        "codex_instructions_strategy": {
+          "name": "codex_instructions_strategy",
+          "type": "varchar(20)",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'auto'"
+        },
+        "mcp_passthrough_type": {
+          "name": "mcp_passthrough_type",
+          "type": "varchar(20)",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'none'"
+        },
+        "mcp_passthrough_url": {
+          "name": "mcp_passthrough_url",
+          "type": "varchar(512)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "limit_5h_usd": {
+          "name": "limit_5h_usd",
+          "type": "numeric(10, 2)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "limit_daily_usd": {
+          "name": "limit_daily_usd",
+          "type": "numeric(10, 2)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "daily_reset_mode": {
+          "name": "daily_reset_mode",
+          "type": "daily_reset_mode",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'fixed'"
+        },
+        "daily_reset_time": {
+          "name": "daily_reset_time",
+          "type": "varchar(5)",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'00:00'"
+        },
+        "limit_weekly_usd": {
+          "name": "limit_weekly_usd",
+          "type": "numeric(10, 2)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "limit_monthly_usd": {
+          "name": "limit_monthly_usd",
+          "type": "numeric(10, 2)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "limit_total_usd": {
+          "name": "limit_total_usd",
+          "type": "numeric(10, 2)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "total_cost_reset_at": {
+          "name": "total_cost_reset_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "limit_concurrent_sessions": {
+          "name": "limit_concurrent_sessions",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "default": 0
+        },
+        "max_retry_attempts": {
+          "name": "max_retry_attempts",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "circuit_breaker_failure_threshold": {
+          "name": "circuit_breaker_failure_threshold",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "default": 5
+        },
+        "circuit_breaker_open_duration": {
+          "name": "circuit_breaker_open_duration",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "default": 1800000
+        },
+        "circuit_breaker_half_open_success_threshold": {
+          "name": "circuit_breaker_half_open_success_threshold",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "default": 2
+        },
+        "proxy_url": {
+          "name": "proxy_url",
+          "type": "varchar(512)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "proxy_fallback_to_direct": {
+          "name": "proxy_fallback_to_direct",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": false,
+          "default": false
+        },
+        "first_byte_timeout_streaming_ms": {
+          "name": "first_byte_timeout_streaming_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 0
+        },
+        "streaming_idle_timeout_ms": {
+          "name": "streaming_idle_timeout_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 0
+        },
+        "request_timeout_non_streaming_ms": {
+          "name": "request_timeout_non_streaming_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 0
+        },
+        "website_url": {
+          "name": "website_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "favicon_url": {
+          "name": "favicon_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "cache_ttl_preference": {
+          "name": "cache_ttl_preference",
+          "type": "varchar(10)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "context_1m_preference": {
+          "name": "context_1m_preference",
+          "type": "varchar(20)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "codex_reasoning_effort_preference": {
+          "name": "codex_reasoning_effort_preference",
+          "type": "varchar(20)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "codex_reasoning_summary_preference": {
+          "name": "codex_reasoning_summary_preference",
+          "type": "varchar(20)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "codex_text_verbosity_preference": {
+          "name": "codex_text_verbosity_preference",
+          "type": "varchar(10)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "codex_parallel_tool_calls_preference": {
+          "name": "codex_parallel_tool_calls_preference",
+          "type": "varchar(10)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "anthropic_max_tokens_preference": {
+          "name": "anthropic_max_tokens_preference",
+          "type": "varchar(20)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "anthropic_thinking_budget_preference": {
+          "name": "anthropic_thinking_budget_preference",
+          "type": "varchar(20)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "gemini_google_search_preference": {
+          "name": "gemini_google_search_preference",
+          "type": "varchar(20)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "tpm": {
+          "name": "tpm",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "default": 0
+        },
+        "rpm": {
+          "name": "rpm",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "default": 0
+        },
+        "rpd": {
+          "name": "rpd",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "default": 0
+        },
+        "cc": {
+          "name": "cc",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "default": 0
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        },
+        "deleted_at": {
+          "name": "deleted_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "group_priorities": {
+          "name": "group_priorities",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'null'::jsonb"
+        }
+      },
+      "indexes": {
+        "idx_providers_enabled_priority": {
+          "name": "idx_providers_enabled_priority",
+          "columns": [
+            {
+              "expression": "is_enabled",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "priority",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "weight",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"providers\".\"deleted_at\" IS NULL",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_providers_group": {
+          "name": "idx_providers_group",
+          "columns": [
+            {
+              "expression": "group_tag",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"providers\".\"deleted_at\" IS NULL",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_providers_created_at": {
+          "name": "idx_providers_created_at",
+          "columns": [
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_providers_deleted_at": {
+          "name": "idx_providers_deleted_at",
+          "columns": [
+            {
+              "expression": "deleted_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_providers_vendor_type": {
+          "name": "idx_providers_vendor_type",
+          "columns": [
+            {
+              "expression": "provider_vendor_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "provider_type",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"providers\".\"deleted_at\" IS NULL",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "providers_provider_vendor_id_provider_vendors_id_fk": {
+          "name": "providers_provider_vendor_id_provider_vendors_id_fk",
+          "tableFrom": "providers",
+          "tableTo": "provider_vendors",
+          "columnsFrom": [
+            "provider_vendor_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "restrict",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.request_filters": {
+      "name": "request_filters",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "serial",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "varchar(100)",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "description": {
+          "name": "description",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "scope": {
+          "name": "scope",
+          "type": "varchar(20)",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "action": {
+          "name": "action",
+          "type": "varchar(30)",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "match_type": {
+          "name": "match_type",
+          "type": "varchar(20)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "target": {
+          "name": "target",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "replacement": {
+          "name": "replacement",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "priority": {
+          "name": "priority",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 0
+        },
+        "is_enabled": {
+          "name": "is_enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "binding_type": {
+          "name": "binding_type",
+          "type": "varchar(20)",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'global'"
+        },
+        "provider_ids": {
+          "name": "provider_ids",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "group_tags": {
+          "name": "group_tags",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_request_filters_enabled": {
+          "name": "idx_request_filters_enabled",
+          "columns": [
+            {
+              "expression": "is_enabled",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "priority",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_request_filters_scope": {
+          "name": "idx_request_filters_scope",
+          "columns": [
+            {
+              "expression": "scope",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_request_filters_action": {
+          "name": "idx_request_filters_action",
+          "columns": [
+            {
+              "expression": "action",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_request_filters_binding": {
+          "name": "idx_request_filters_binding",
+          "columns": [
+            {
+              "expression": "is_enabled",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "binding_type",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.sensitive_words": {
+      "name": "sensitive_words",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "serial",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "word": {
+          "name": "word",
+          "type": "varchar(255)",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "match_type": {
+          "name": "match_type",
+          "type": "varchar(20)",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'contains'"
+        },
+        "description": {
+          "name": "description",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "is_enabled": {
+          "name": "is_enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_sensitive_words_enabled": {
+          "name": "idx_sensitive_words_enabled",
+          "columns": [
+            {
+              "expression": "is_enabled",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "match_type",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_sensitive_words_created_at": {
+          "name": "idx_sensitive_words_created_at",
+          "columns": [
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.system_settings": {
+      "name": "system_settings",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "serial",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "site_title": {
+          "name": "site_title",
+          "type": "varchar(128)",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'Claude Code Hub'"
+        },
+        "allow_global_usage_view": {
+          "name": "allow_global_usage_view",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "currency_display": {
+          "name": "currency_display",
+          "type": "varchar(10)",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'USD'"
+        },
+        "billing_model_source": {
+          "name": "billing_model_source",
+          "type": "varchar(20)",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'original'"
+        },
+        "timezone": {
+          "name": "timezone",
+          "type": "varchar(64)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "enable_auto_cleanup": {
+          "name": "enable_auto_cleanup",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": false,
+          "default": false
+        },
+        "cleanup_retention_days": {
+          "name": "cleanup_retention_days",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "default": 30
+        },
+        "cleanup_schedule": {
+          "name": "cleanup_schedule",
+          "type": "varchar(50)",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'0 2 * * *'"
+        },
+        "cleanup_batch_size": {
+          "name": "cleanup_batch_size",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "default": 10000
+        },
+        "enable_client_version_check": {
+          "name": "enable_client_version_check",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "verbose_provider_error": {
+          "name": "verbose_provider_error",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "enable_http2": {
+          "name": "enable_http2",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "intercept_anthropic_warmup_requests": {
+          "name": "intercept_anthropic_warmup_requests",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "enable_thinking_signature_rectifier": {
+          "name": "enable_thinking_signature_rectifier",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "enable_thinking_budget_rectifier": {
+          "name": "enable_thinking_budget_rectifier",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "enable_codex_session_id_completion": {
+          "name": "enable_codex_session_id_completion",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "enable_claude_metadata_user_id_injection": {
+          "name": "enable_claude_metadata_user_id_injection",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "enable_response_fixer": {
+          "name": "enable_response_fixer",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "response_fixer_config": {
+          "name": "response_fixer_config",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'{\"fixTruncatedJson\":true,\"fixSseFormat\":true,\"fixEncoding\":true,\"maxJsonDepth\":200,\"maxFixSize\":1048576}'::jsonb"
+        },
+        "quota_db_refresh_interval_seconds": {
+          "name": "quota_db_refresh_interval_seconds",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "default": 10
+        },
+        "quota_lease_percent_5h": {
+          "name": "quota_lease_percent_5h",
+          "type": "numeric(5, 4)",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'0.05'"
+        },
+        "quota_lease_percent_daily": {
+          "name": "quota_lease_percent_daily",
+          "type": "numeric(5, 4)",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'0.05'"
+        },
+        "quota_lease_percent_weekly": {
+          "name": "quota_lease_percent_weekly",
+          "type": "numeric(5, 4)",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'0.05'"
+        },
+        "quota_lease_percent_monthly": {
+          "name": "quota_lease_percent_monthly",
+          "type": "numeric(5, 4)",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'0.05'"
+        },
+        "quota_lease_cap_usd": {
+          "name": "quota_lease_cap_usd",
+          "type": "numeric(10, 2)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.users": {
+      "name": "users",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "serial",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "varchar",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "description": {
+          "name": "description",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "role": {
+          "name": "role",
+          "type": "varchar",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'user'"
+        },
+        "rpm_limit": {
+          "name": "rpm_limit",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "daily_limit_usd": {
+          "name": "daily_limit_usd",
+          "type": "numeric(10, 2)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "provider_group": {
+          "name": "provider_group",
+          "type": "varchar(200)",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'default'"
+        },
+        "tags": {
+          "name": "tags",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'[]'::jsonb"
+        },
+        "limit_5h_usd": {
+          "name": "limit_5h_usd",
+          "type": "numeric(10, 2)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "limit_weekly_usd": {
+          "name": "limit_weekly_usd",
+          "type": "numeric(10, 2)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "limit_monthly_usd": {
+          "name": "limit_monthly_usd",
+          "type": "numeric(10, 2)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "limit_total_usd": {
+          "name": "limit_total_usd",
+          "type": "numeric(10, 2)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "limit_concurrent_sessions": {
+          "name": "limit_concurrent_sessions",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "daily_reset_mode": {
+          "name": "daily_reset_mode",
+          "type": "daily_reset_mode",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'fixed'"
+        },
+        "daily_reset_time": {
+          "name": "daily_reset_time",
+          "type": "varchar(5)",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'00:00'"
+        },
+        "is_enabled": {
+          "name": "is_enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "expires_at": {
+          "name": "expires_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "allowed_clients": {
+          "name": "allowed_clients",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'[]'::jsonb"
+        },
+        "allowed_models": {
+          "name": "allowed_models",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'[]'::jsonb"
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        },
+        "deleted_at": {
+          "name": "deleted_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {
+        "idx_users_active_role_sort": {
+          "name": "idx_users_active_role_sort",
+          "columns": [
+            {
+              "expression": "deleted_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "role",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"users\".\"deleted_at\" IS NULL",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_users_enabled_expires_at": {
+          "name": "idx_users_enabled_expires_at",
+          "columns": [
+            {
+              "expression": "is_enabled",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "expires_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"users\".\"deleted_at\" IS NULL",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_users_created_at": {
+          "name": "idx_users_created_at",
+          "columns": [
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_users_deleted_at": {
+          "name": "idx_users_deleted_at",
+          "columns": [
+            {
+              "expression": "deleted_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.webhook_targets": {
+      "name": "webhook_targets",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "serial",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "varchar(100)",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "provider_type": {
+          "name": "provider_type",
+          "type": "webhook_provider_type",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "webhook_url": {
+          "name": "webhook_url",
+          "type": "varchar(1024)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "telegram_bot_token": {
+          "name": "telegram_bot_token",
+          "type": "varchar(256)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "telegram_chat_id": {
+          "name": "telegram_chat_id",
+          "type": "varchar(64)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "dingtalk_secret": {
+          "name": "dingtalk_secret",
+          "type": "varchar(256)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "custom_template": {
+          "name": "custom_template",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "custom_headers": {
+          "name": "custom_headers",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "proxy_url": {
+          "name": "proxy_url",
+          "type": "varchar(512)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "proxy_fallback_to_direct": {
+          "name": "proxy_fallback_to_direct",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": false,
+          "default": false
+        },
+        "is_enabled": {
+          "name": "is_enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "last_test_at": {
+          "name": "last_test_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "last_test_result": {
+          "name": "last_test_result",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    }
+  },
+  "enums": {
+    "public.daily_reset_mode": {
+      "name": "daily_reset_mode",
+      "schema": "public",
+      "values": [
+        "fixed",
+        "rolling"
+      ]
+    },
+    "public.notification_type": {
+      "name": "notification_type",
+      "schema": "public",
+      "values": [
+        "circuit_breaker",
+        "daily_leaderboard",
+        "cost_alert"
+      ]
+    },
+    "public.webhook_provider_type": {
+      "name": "webhook_provider_type",
+      "schema": "public",
+      "values": [
+        "wechat",
+        "feishu",
+        "dingtalk",
+        "telegram",
+        "custom"
+      ]
+    }
+  },
+  "schemas": {},
+  "sequences": {},
+  "roles": {},
+  "policies": {},
+  "views": {},
+  "_meta": {
+    "columns": {},
+    "schemas": {},
+    "tables": {}
+  }
+}

+ 9 - 2
drizzle/meta/_journal.json

@@ -453,9 +453,16 @@
     {
       "idx": 64,
       "version": "7",
+      "when": 1770598056381,
+      "tag": "0064_harsh_dragon_lord",
+      "breakpoints": true
+    },
+    {
+      "idx": 65,
+      "version": "7",
       "when": 1770607089556,
-      "tag": "0064_stale_vertigo",
+      "tag": "0065_stale_vertigo",
       "breakpoints": true
     }
   ]
-}
+}

+ 3 - 2
messages/en/dashboard.json

@@ -243,6 +243,7 @@
         "billingRedirected": "billing: redirected"
       },
       "errorMessage": "Error Message",
+      "fake200ForwardedNotice": "Note: For streaming requests, this failure may be detected only after the stream ends; the response content may already have been forwarded to the client.",
       "filteredProviders": "Filtered Providers",
       "providerChain": {
         "title": "Provider Decision Chain Timeline",
@@ -493,7 +494,7 @@
       "providers": "Providers",
       "models": "Models",
       "noDetailedData": "No detailed data available",
-      "storageTip": "No detailed data found. To view request details, please check if the environment variable STORE_SESSION_MESSAGES is set to true. Note: Enabling this increases Redis memory usage and may include sensitive information.",
+      "storageTip": "No detailed data found. Possible reasons: Redis is disabled/unavailable (REDIS_URL + ENABLE_RATE_LIMIT=true), the data expired (SESSION_TTL, default 300s), or response body storage is disabled (STORE_SESSION_RESPONSE_BODY=false, affects response body only). To store unredacted messages, set STORE_SESSION_MESSAGES=true.",
       "clientInfo": "Client Info",
       "requestHeaders": "Request Headers",
       "requestBody": "Request Body",
@@ -571,7 +572,7 @@
       "fetchFailed": "Fetch Failed",
       "unknownError": "Unknown Error",
       "storageNotEnabled": "Not Stored",
-      "storageNotEnabledHint": "Tip: Set environment variable STORE_SESSION_MESSAGES=true to enable messages storage"
+      "storageNotEnabledHint": "Tip: Check REDIS_URL and ENABLE_RATE_LIMIT=true (session details cache). To store unredacted messages, set STORE_SESSION_MESSAGES=true."
     },
     "errors": {
       "copyFailed": "Copy Failed"

+ 4 - 1
messages/en/settings/providers/filter.json

@@ -9,5 +9,8 @@
     "active": "Active",
     "all": "Any status",
     "inactive": "Inactive"
-  }
+  },
+  "mobileFilter": "Filter",
+  "mobileFilterCount": "Filter ({count})",
+  "resetFilters": "Reset Filters"
 }

+ 6 - 0
messages/en/settings/providers/form/sections.json

@@ -296,6 +296,12 @@
         "label": "Provider Group",
         "placeholder": "e.g. premium, economy"
       },
+      "groupPriorities": {
+        "label": "Per-Group Priority",
+        "desc": "Override global priority for specific groups. Leave empty to use the global priority above.",
+        "placeholder": "Use global priority",
+        "noGroups": "Set a group tag first to configure per-group priorities"
+      },
       "priority": {
         "desc": "Lower value = higher priority (0 is highest). The system only chooses from the highest priority tier. Suggested: primary=0, standby=1, emergency=2",
         "label": "Priority",

+ 15 - 0
messages/en/settings/providers/inlineEdit.json

@@ -1,12 +1,27 @@
 {
+  "addGroup": "Add group",
   "cancel": "Cancel",
   "costMultiplierInvalid": "Please enter a non-negative number",
   "costMultiplierLabel": "Cost Multiplier",
+  "createGroup": "Create \"{name}\"",
+  "editGroups": "Edit Groups",
+  "globalPriority": "Global Priority",
+  "groupPriorityLabel": "Per-Group Priority",
+  "groupPriorityPlaceholder": "Use global",
+  "groupSaveError": "Failed to save group changes",
+  "groupValidation": {
+    "empty": "Group name cannot be empty",
+    "noComma": "Group name cannot contain comma",
+    "tooLong": "Group name cannot exceed 50 characters"
+  },
+  "noGroupsAvailable": "No groups available",
   "priorityInvalid": "Please enter an integer >= 0",
   "priorityLabel": "Priority",
   "save": "Save",
+  "saving": "Saving...",
   "saveFailed": "Save failed",
   "saveSuccess": "Saved successfully",
+  "searchGroups": "Search groups...",
   "weightInvalid": "Please enter an integer between 1 and 100",
   "weightLabel": "Weight"
 }

+ 7 - 1
messages/en/settings/providers/list.json

@@ -33,5 +33,11 @@
   "unknownError": "Unknown error",
   "viewFullKey": "View Complete API Key",
   "viewFullKeyDesc": "Please keep it safe and don't share it with others",
-  "weight": "Weight"
+  "weight": "Weight",
+  "actions": "Actions",
+  "actionClone": "Clone",
+  "actionResetCircuit": "Reset Circuit",
+  "actionResetUsage": "Reset Usage",
+  "actionDelete": "Delete",
+  "selectProvider": "Select {name}"
 }

+ 3 - 2
messages/ja/dashboard.json

@@ -243,6 +243,7 @@
         "billingRedirected": "課金: 実際"
       },
       "errorMessage": "エラーメッセージ",
+      "fake200ForwardedNotice": "注意:ストリーミング要求では、失敗判定がストリーム終了後になる場合があります。応答内容は既にクライアントへ転送されている可能性があります。",
       "filteredProviders": "フィルタされたプロバイダー",
       "providerChain": {
         "title": "プロバイダー決定チェーンタイムライン",
@@ -493,7 +494,7 @@
       "providers": "プロバイダー",
       "models": "モデル",
       "noDetailedData": "詳細データなし",
-      "storageTip": "詳細データが見つかりません。リクエストの詳細を表示するには、環境変数 STORE_SESSION_MESSAGES が true に設定されているか確認してください。注意:有効にすると Redis のメモリ使用量が増加し、機密情報が含まれる可能性があります。",
+      "storageTip": "詳細データが見つかりません。原因の例:Redis が未設定/利用不可 (REDIS_URL + ENABLE_RATE_LIMIT=true)、データの期限切れ (SESSION_TTL、既定 300 秒)、または応答本文の保存を無効化 (STORE_SESSION_RESPONSE_BODY=false、応答本文のみ)。未マスクの messages を保存するには STORE_SESSION_MESSAGES=true を設定してください。",
       "clientInfo": "クライアント情報",
       "requestHeaders": "リクエストヘッダー",
       "requestBody": "リクエストボディ",
@@ -571,7 +572,7 @@
       "fetchFailed": "取得失敗",
       "unknownError": "不明なエラー",
       "storageNotEnabled": "未保存",
-      "storageNotEnabledHint": "ヒント: メッセージの保存を有効にするには、環境変数 STORE_SESSION_MESSAGES=true を設定してください"
+      "storageNotEnabledHint": "ヒント: REDIS_URL と ENABLE_RATE_LIMIT=true を確認してください (セッション詳細キャッシュ)。未マスクの messages を保存するには STORE_SESSION_MESSAGES=true を設定してください"
     },
     "errors": {
       "copyFailed": "コピー失敗"

+ 4 - 1
messages/ja/settings/providers/filter.json

@@ -9,5 +9,8 @@
     "active": "有効",
     "all": "すべてのステータス",
     "inactive": "無効"
-  }
+  },
+  "mobileFilter": "フィルター",
+  "mobileFilterCount": "フィルター ({count})",
+  "resetFilters": "フィルターをリセット"
 }

+ 6 - 0
messages/ja/settings/providers/form/sections.json

@@ -297,6 +297,12 @@
         "label": "プロバイダーグループ",
         "placeholder": "例: premium, economy"
       },
+      "groupPriorities": {
+        "label": "グループ別優先度",
+        "desc": "特定のグループに個別の優先度を設定します。空欄の場合は上記のグローバル優先度を使用します。",
+        "placeholder": "グローバル優先度を使用",
+        "noGroups": "グループ別優先度を設定するには、先にグループタグを設定してください"
+      },
       "priority": {
         "desc": "値が小さいほど優先度が高くなります(0 が最も高い)。システムは最も高い優先度のプロバイダーのみから選択します。推奨: メイン=0、予備=1、緊急=2",
         "label": "優先度",

+ 15 - 0
messages/ja/settings/providers/inlineEdit.json

@@ -1,12 +1,27 @@
 {
+  "addGroup": "グループを追加",
   "cancel": "キャンセル",
   "costMultiplierInvalid": "0以上の数値を入力してください",
   "costMultiplierLabel": "コスト倍率",
+  "createGroup": "\"{name}\" を作成",
+  "editGroups": "グループを編集",
+  "globalPriority": "グローバル優先度",
+  "groupPriorityLabel": "グループ別優先度",
+  "groupPriorityPlaceholder": "グローバル値を使用",
+  "groupSaveError": "グループの保存に失敗しました",
+  "groupValidation": {
+    "empty": "グループ名を空にすることはできません",
+    "noComma": "グループ名にカンマを含めることはできません",
+    "tooLong": "グループ名は50文字以内にしてください"
+  },
+  "noGroupsAvailable": "利用可能なグループがありません",
   "priorityInvalid": "0 以上の整数を入力してください",
   "priorityLabel": "優先度",
   "save": "保存する",
+  "saving": "保存中...",
   "saveFailed": "保存に失敗しました",
   "saveSuccess": "保存に成功しました",
+  "searchGroups": "グループを検索...",
   "weightInvalid": "1〜100 の整数を入力してください",
   "weightLabel": "重み"
 }

+ 7 - 1
messages/ja/settings/providers/list.json

@@ -33,5 +33,11 @@
   "unknownError": "不明なエラー",
   "viewFullKey": "完全な API キーを表示",
   "viewFullKeyDesc": "安全に保管し、他人と共有しないでください",
-  "weight": "重み"
+  "weight": "重み",
+  "actions": "アクション",
+  "actionClone": "クローン",
+  "actionResetCircuit": "サーキットリセット",
+  "actionResetUsage": "使用量リセット",
+  "actionDelete": "削除",
+  "selectProvider": "{name} を選択"
 }

+ 3 - 2
messages/ru/dashboard.json

@@ -243,6 +243,7 @@
         "billingRedirected": "оплата: факт."
       },
       "errorMessage": "Сообщение об ошибке",
+      "fake200ForwardedNotice": "Примечание: для потоковых запросов эта ошибка может быть обнаружена только после завершения потока; содержимое ответа могло уже быть передано клиенту.",
       "filteredProviders": "Отфильтрованные поставщики",
       "providerChain": {
         "title": "Хронология цепочки решений поставщика",
@@ -493,7 +494,7 @@
       "providers": "Поставщики",
       "models": "Модели",
       "noDetailedData": "Подробные данные отсутствуют",
-      "storageTip": "Подробные данные не найдены. Чтобы просмотреть детали запроса, проверьте, установлена ли переменная окружения STORE_SESSION_MESSAGES в значение true. Примечание: включение увеличит использование памяти Redis и может содержать конфиденциальную информацию.",
+      "storageTip": "Подробные данные не найдены. Возможные причины: Redis отключен/недоступен (REDIS_URL + ENABLE_RATE_LIMIT=true), данные истекли (SESSION_TTL, по умолчанию 300с), или сохранение тела ответа отключено (STORE_SESSION_RESPONSE_BODY=false, влияет только на тело ответа). Чтобы сохранять сообщения без маскировки, установите STORE_SESSION_MESSAGES=true.",
       "clientInfo": "Информация о клиенте",
       "requestHeaders": "Заголовки запроса",
       "requestBody": "Тело запроса",
@@ -571,7 +572,7 @@
       "fetchFailed": "Не удалось получить",
       "unknownError": "Неизвестная ошибка",
       "storageNotEnabled": "Не сохранено",
-      "storageNotEnabledHint": "Подсказка: установите переменную окружения STORE_SESSION_MESSAGES=true, чтобы включить сохранение сообщений"
+      "storageNotEnabledHint": "Подсказка: проверьте REDIS_URL и ENABLE_RATE_LIMIT=true (кэш деталей сессии). Чтобы сохранять сообщения без маскировки, установите STORE_SESSION_MESSAGES=true."
     },
     "errors": {
       "copyFailed": "Не удалось скопировать"

+ 4 - 1
messages/ru/settings/providers/filter.json

@@ -9,5 +9,8 @@
     "active": "Активные",
     "all": "Все статусы",
     "inactive": "Неактивные"
-  }
+  },
+  "mobileFilter": "Фильтр",
+  "mobileFilterCount": "Фильтр ({count})",
+  "resetFilters": "Сбросить фильтры"
 }

+ 6 - 0
messages/ru/settings/providers/form/sections.json

@@ -297,6 +297,12 @@
         "label": "Группа провайдера",
         "placeholder": "напр. premium, economy"
       },
+      "groupPriorities": {
+        "label": "Приоритет по группам",
+        "desc": "Переопределение глобального приоритета для определённых групп. Оставьте пустым для использования глобального приоритета выше.",
+        "placeholder": "Использовать глобальный приоритет",
+        "noGroups": "Сначала задайте тег группы для настройки приоритетов по группам"
+      },
       "priority": {
         "desc": "Меньше — выше приоритет (0 — наивысший). Система выбирает только из провайдеров с максимальным приоритетом. Рекомендации: основной=0, резерв=1, аварийный=2",
         "label": "Приоритет",

+ 15 - 0
messages/ru/settings/providers/inlineEdit.json

@@ -1,12 +1,27 @@
 {
+  "addGroup": "Добавить группу",
   "cancel": "Отмена",
   "costMultiplierInvalid": "Введите число не меньше 0",
   "costMultiplierLabel": "Коэф цены",
+  "createGroup": "Создать \"{name}\"",
+  "editGroups": "Редактировать группы",
+  "globalPriority": "Глобальный приоритет",
+  "groupPriorityLabel": "Приоритет по группам",
+  "groupPriorityPlaceholder": "Глобальное значение",
+  "groupSaveError": "Не удалось сохранить изменения группы",
+  "groupValidation": {
+    "empty": "Название группы не может быть пустым",
+    "noComma": "Название группы не может содержать запятую",
+    "tooLong": "Название группы не может превышать 50 символов"
+  },
+  "noGroupsAvailable": "Нет доступных групп",
   "priorityInvalid": "Введите целое число >= 0",
   "priorityLabel": "Приоритет",
   "save": "Сохранить",
+  "saving": "Сохранение...",
   "saveFailed": "Не удалось сохранить",
   "saveSuccess": "Успешно сохранено",
+  "searchGroups": "Поиск групп...",
   "weightInvalid": "Введите целое число от 1 до 100",
   "weightLabel": "Вес"
 }

+ 7 - 1
messages/ru/settings/providers/list.json

@@ -33,5 +33,11 @@
   "unknownError": "Неизвестная ошибка",
   "viewFullKey": "Просмотр полного API-ключа",
   "viewFullKeyDesc": "Пожалуйста, храните его в безопасности и не делитесь с другими",
-  "weight": "Вес"
+  "weight": "Вес",
+  "actions": "Действия",
+  "actionClone": "Клонировать",
+  "actionResetCircuit": "Сбросить автоматический выключатель",
+  "actionResetUsage": "Сбросить использование",
+  "actionDelete": "Удалить",
+  "selectProvider": "Выбрать {name}"
 }

+ 3 - 2
messages/zh-CN/dashboard.json

@@ -243,6 +243,7 @@
         "billingRedirected": "计费: 实际"
       },
       "errorMessage": "错误信息",
+      "fake200ForwardedNotice": "提示:对于流式请求,该失败可能在流结束后才被识别;响应内容可能已原样透传给客户端。",
       "filteredProviders": "被过滤的供应商",
       "providerChain": {
         "title": "供应商决策链时间线",
@@ -493,7 +494,7 @@
       "providers": "供应商",
       "models": "模型",
       "noDetailedData": "暂无详细数据",
-      "storageTip": "未找到详细数据。如需查看请求详情,请检查环境变量 STORE_SESSION_MESSAGES 是否已设置为 true。注意:启用后会增加 Redis 内存使用,且可能包含敏感信息。",
+      "storageTip": "未找到详细数据。可能原因:Redis 未配置/不可用(REDIS_URL + ENABLE_RATE_LIMIT=true)、数据已过期(SESSION_TTL,默认 300 秒),或已禁用响应体存储(STORE_SESSION_RESPONSE_BODY=false,仅影响响应体)。如需保存未脱敏 messages,请设置 STORE_SESSION_MESSAGES=true。",
       "clientInfo": "客户端信息",
       "requestHeaders": "请求头",
       "requestBody": "请求体",
@@ -571,7 +572,7 @@
       "fetchFailed": "获取失败",
       "unknownError": "未知错误",
       "storageNotEnabled": "未存储",
-      "storageNotEnabledHint": "提示:请设置环境变量 STORE_SESSION_MESSAGES=true 以启用 messages 存储"
+      "storageNotEnabledHint": "提示:请检查 REDIS_URL 与 ENABLE_RATE_LIMIT=true(用于会话详情缓存);如需保存未脱敏 messages,请设置 STORE_SESSION_MESSAGES=true。"
     },
     "errors": {
       "copyFailed": "复制失败"

+ 4 - 1
messages/zh-CN/settings/providers/filter.json

@@ -9,5 +9,8 @@
     "all": "全部",
     "default": "default"
   },
-  "circuitBroken": "熔断"
+  "circuitBroken": "熔断",
+  "mobileFilter": "筛选",
+  "mobileFilterCount": "筛选 ({count})",
+  "resetFilters": "重置筛选"
 }

+ 6 - 0
messages/zh-CN/settings/providers/form/sections.json

@@ -70,6 +70,12 @@
         "label": "供应商分组",
         "placeholder": "例如 premium, economy",
         "desc": "分组标签。从列表选择或输入新名称后按 Enter 创建(最多50字符)。只有 providerGroup 匹配的用户才能使用此供应商。"
+      },
+      "groupPriorities": {
+        "label": "分组优先级覆盖",
+        "desc": "为特定分组设置独立的优先级。留空则使用上方的全局优先级。",
+        "placeholder": "使用全局优先级",
+        "noGroups": "请先设置分组标签,才能配置分组优先级"
       }
     },
     "cacheTtl": {

+ 21 - 6
messages/zh-CN/settings/providers/inlineEdit.json

@@ -1,12 +1,27 @@
 {
-  "save": "保存",
+  "addGroup": "添加分组",
   "cancel": "取消",
-  "saveSuccess": "保存成功",
-  "saveFailed": "保存失败",
-  "priorityLabel": "优先级",
-  "weightLabel": "权重",
+  "costMultiplierInvalid": "请输入大于等于 0 的数字",
   "costMultiplierLabel": "成本倍数",
+  "createGroup": "创建 \"{name}\"",
+  "editGroups": "编辑分组",
+  "globalPriority": "全局优先级",
+  "groupPriorityLabel": "分组优先级",
+  "groupPriorityPlaceholder": "使用全局值",
+  "groupSaveError": "保存分组失败",
+  "groupValidation": {
+    "empty": "分组名不能为空",
+    "noComma": "分组名不能包含逗号",
+    "tooLong": "分组名不能超过50字符"
+  },
+  "noGroupsAvailable": "无可用分组",
   "priorityInvalid": "请输入大于等于 0 的整数",
+  "priorityLabel": "优先级",
+  "save": "保存",
+  "saving": "保存中...",
+  "saveFailed": "保存失败",
+  "saveSuccess": "保存成功",
+  "searchGroups": "搜索分组...",
   "weightInvalid": "请输入 1-100 之间的整数",
-  "costMultiplierInvalid": "请输入大于等于 0 的数字"
+  "weightLabel": "权重"
 }

+ 7 - 1
messages/zh-CN/settings/providers/list.json

@@ -33,5 +33,11 @@
   "toggleSuccessDesc": "供应商 \"{name}\" 状态已更新",
   "toggleFailed": "状态切换失败",
   "statusEnabled": "启用",
-  "statusDisabled": "禁用"
+  "statusDisabled": "禁用",
+  "actions": "操作",
+  "actionClone": "克隆",
+  "actionResetCircuit": "重置熔断",
+  "actionResetUsage": "重置用量",
+  "actionDelete": "删除",
+  "selectProvider": "选择 {name}"
 }

+ 3 - 2
messages/zh-TW/dashboard.json

@@ -243,6 +243,7 @@
         "billingRedirected": "計費: 實際"
       },
       "errorMessage": "錯誤訊息",
+      "fake200ForwardedNotice": "提示:對於串流請求,此失敗可能在串流結束後才被識別;回應內容可能已原樣透傳給用戶端。",
       "filteredProviders": "被過濾的供應商",
       "providerChain": {
         "title": "供應商決策鏈時間軸",
@@ -493,7 +494,7 @@
       "providers": "供應商",
       "models": "Model",
       "noDetailedData": "暫無詳細資料",
-      "storageTip": "未找到詳細資料。如需查看請求詳情,請檢查環境變數 STORE_SESSION_MESSAGES 是否已設定為 true。注意:啟用後會增加 Redis 記憶體使用,且可能包含敏感資訊。",
+      "storageTip": "未找到詳細資料。可能原因:Redis 未設定/不可用(REDIS_URL + ENABLE_RATE_LIMIT=true)、資料已過期(SESSION_TTL,預設 300 秒),或已停用回應本文儲存(STORE_SESSION_RESPONSE_BODY=false,僅影響回應本文)。如需儲存未脫敏的 messages,請設定 STORE_SESSION_MESSAGES=true。",
       "clientInfo": "用戶端資訊",
       "requestHeaders": "請求頭",
       "requestBody": "請求體",
@@ -571,7 +572,7 @@
       "fetchFailed": "取得失敗",
       "unknownError": "未知錯誤",
       "storageNotEnabled": "未儲存",
-      "storageNotEnabledHint": "提示:請設定環境變數 STORE_SESSION_MESSAGES=true 以啟用訊息儲存"
+      "storageNotEnabledHint": "提示:請檢查 REDIS_URL 與 ENABLE_RATE_LIMIT=true(用於 Session 詳情快取);如需儲存未脫敏的 messages,請設定 STORE_SESSION_MESSAGES=true。"
     },
     "errors": {
       "copyFailed": "複製失敗"

+ 4 - 1
messages/zh-TW/settings/providers/filter.json

@@ -9,5 +9,8 @@
     "active": "已啟用",
     "all": "所有狀態",
     "inactive": "已停用"
-  }
+  },
+  "mobileFilter": "篩選",
+  "mobileFilterCount": "篩選 ({count})",
+  "resetFilters": "重置篩選"
 }

+ 6 - 0
messages/zh-TW/settings/providers/form/sections.json

@@ -297,6 +297,12 @@
         "label": "供應商分組",
         "placeholder": "例如 premium, economy"
       },
+      "groupPriorities": {
+        "label": "分組優先級覆蓋",
+        "desc": "為特定分組設定獨立的優先級。留空則使用上方的全域優先級。",
+        "placeholder": "使用全域優先級",
+        "noGroups": "請先設定分組標籤,才能設定分組優先級"
+      },
       "priority": {
         "desc": "數值越小,優先級越高(0 最高)。系統只會從最高優先級的供應商中選擇。建議:主力=0,備用=1,緊急備援=2",
         "label": "優先級",

+ 15 - 0
messages/zh-TW/settings/providers/inlineEdit.json

@@ -1,12 +1,27 @@
 {
+  "addGroup": "新增分組",
   "cancel": "放棄",
   "costMultiplierInvalid": "請輸入大於等於 0 的數字",
   "costMultiplierLabel": "成本倍數",
+  "createGroup": "建立 \"{name}\"",
+  "editGroups": "編輯分組",
+  "globalPriority": "全域優先級",
+  "groupPriorityLabel": "分組優先級",
+  "groupPriorityPlaceholder": "使用全域值",
+  "groupSaveError": "儲存分組失敗",
+  "groupValidation": {
+    "empty": "分組名稱不能為空",
+    "noComma": "分組名稱不能包含逗號",
+    "tooLong": "分組名稱不能超過50字元"
+  },
+  "noGroupsAvailable": "無可用分組",
   "priorityInvalid": "請輸入大於等於 0 的整數",
   "priorityLabel": "優先級",
   "save": "儲存",
+  "saving": "儲存中...",
   "saveFailed": "儲存失敗",
   "saveSuccess": "儲存成功",
+  "searchGroups": "搜尋分組...",
   "weightInvalid": "請輸入 1-100 之間的整數",
   "weightLabel": "權重"
 }

+ 7 - 1
messages/zh-TW/settings/providers/list.json

@@ -33,5 +33,11 @@
   "unknownError": "未知錯誤",
   "viewFullKey": "查看完整 API 金鑰",
   "viewFullKeyDesc": "請妥善保管,不要洩露給他人",
-  "weight": "權重"
+  "weight": "權重",
+  "actions": "操作",
+  "actionClone": "複製",
+  "actionResetCircuit": "重置熔斷",
+  "actionResetUsage": "重置用量",
+  "actionDelete": "刪除",
+  "selectProvider": "選擇 {name}"
 }

+ 2 - 1
package.json

@@ -4,7 +4,7 @@
   "private": true,
   "scripts": {
     "dev": "next dev --port 13500",
-    "build": "next build && cp VERSION .next/standalone/VERSION",
+    "build": "next build && (node scripts/copy-version-to-standalone.cjs || bun scripts/copy-version-to-standalone.cjs)",
     "start": "next start",
     "lint": "biome check .",
     "lint:fix": "biome check --write .",
@@ -102,6 +102,7 @@
     "timeago.js": "^4",
     "tw-animate-css": "^1",
     "undici": "^7",
+    "vaul": "^1.1.2",
     "zod": "^4"
   },
   "devDependencies": {

+ 15 - 0
scripts/copy-version-to-standalone.cjs

@@ -0,0 +1,15 @@
+const fs = require("node:fs");
+const path = require("node:path");
+
+const src = path.resolve(process.cwd(), "VERSION");
+const dstDir = path.resolve(process.cwd(), ".next", "standalone");
+const dst = path.join(dstDir, "VERSION");
+
+if (!fs.existsSync(src)) {
+  console.error(`[copy-version] VERSION not found at ${src}`);
+  process.exit(1);
+}
+
+fs.mkdirSync(dstDir, { recursive: true });
+fs.copyFileSync(src, dst);
+console.log(`[copy-version] Copied VERSION -> ${dst}`);

+ 1 - 0
scripts/deploy.ps1

@@ -496,6 +496,7 @@ ENABLE_RATE_LIMIT=true
 # Session Configuration
 SESSION_TTL=300
 STORE_SESSION_MESSAGES=false
+STORE_SESSION_RESPONSE_BODY=true
 
 # Cookie Security
 ENABLE_SECURE_COOKIES=$secureCookies

+ 1 - 0
scripts/deploy.sh

@@ -578,6 +578,7 @@ ENABLE_RATE_LIMIT=true
 # Session Configuration
 SESSION_TTL=300
 STORE_SESSION_MESSAGES=false
+STORE_SESSION_RESPONSE_BODY=true
 
 # Cookie Security
 ENABLE_SECURE_COOKIES=${secure_cookies}

+ 2 - 0
src/actions/providers.ts

@@ -244,6 +244,7 @@ export async function getProviders(): Promise<ProviderDisplay[]> {
         isEnabled: provider.isEnabled,
         weight: provider.weight,
         priority: provider.priority,
+        groupPriorities: provider.groupPriorities,
         costMultiplier: provider.costMultiplier,
         groupTag: provider.groupTag,
         providerType: provider.providerType,
@@ -616,6 +617,7 @@ export async function editProvider(
     priority?: number;
     cost_multiplier?: number;
     group_tag?: string | null;
+    group_priorities?: Record<string, number> | null;
     provider_type?: ProviderType;
     preserve_client_ip?: boolean;
     model_redirects?: Record<string, string> | null;

+ 16 - 0
src/app/[locale]/dashboard/logs/_components/error-details-dialog.test.tsx

@@ -252,6 +252,7 @@ const messages = {
           default: "No error",
         },
         errorMessage: "Error message",
+        fake200ForwardedNotice: "Note: detected after stream end; payload may have been forwarded",
         viewDetails: "View details",
         filteredProviders: "Filtered providers",
         providerChain: {
@@ -325,6 +326,21 @@ function parseHtml(html: string) {
 }
 
 describe("error-details-dialog layout", () => {
+  test("renders fake-200 forwarded notice when errorMessage is a FAKE_200_* code", () => {
+    const html = renderWithIntl(
+      <ErrorDetailsDialog
+        externalOpen
+        statusCode={502}
+        errorMessage={"FAKE_200_EMPTY_BODY"}
+        providerChain={null}
+        sessionId={null}
+      />
+    );
+
+    expect(html).toContain("FAKE_200_EMPTY_BODY");
+    expect(html).toContain("Note: detected after stream end; payload may have been forwarded");
+  });
+
   test("renders special settings section when specialSettings exists", () => {
     const html = renderWithIntl(
       <ErrorDetailsDialog

+ 10 - 0
src/app/[locale]/dashboard/logs/_components/error-details-dialog/components/SummaryTab.tsx

@@ -8,6 +8,7 @@ import {
   DollarSign,
   ExternalLink,
   Globe,
+  InfoIcon,
   Loader2,
   Monitor,
   Settings2,
@@ -64,6 +65,8 @@ export function SummaryTab({
   const hasRedirect = originalModel && currentModel && originalModel !== currentModel;
   const specialSettingsContent =
     specialSettings && specialSettings.length > 0 ? JSON.stringify(specialSettings, null, 2) : null;
+  const isFake200PostStreamFailure =
+    typeof errorMessage === "string" && errorMessage.startsWith("FAKE_200_");
 
   return (
     <div className="space-y-6">
@@ -423,6 +426,13 @@ export function SummaryTab({
             <p className="text-xs text-rose-800 dark:text-rose-200 line-clamp-3 font-mono">
               {errorMessage.length > 200 ? `${errorMessage.slice(0, 200)}...` : errorMessage}
             </p>
+            {/* 注意:假 200 检测发生在 SSE 流式结束后;此时内容已可能透传给客户端,因此需要提示用户避免误解。 */}
+            {isFake200PostStreamFailure && (
+              <div className="mt-2 flex items-start gap-2 rounded-md border border-amber-200 bg-amber-50 p-2 text-[11px] text-amber-800 dark:border-amber-800 dark:bg-amber-950/20 dark:text-amber-200">
+                <InfoIcon className="h-3.5 w-3.5 shrink-0 mt-0.5" aria-hidden="true" />
+                <span>{t("fake200ForwardedNotice")}</span>
+              </div>
+            )}
             {errorMessage.length > 200 && onViewLogicTrace && (
               <Button
                 variant="link"

+ 20 - 0
src/app/[locale]/dashboard/logs/_components/provider-chain-popover.test.tsx

@@ -85,6 +85,7 @@ const messages = {
       },
       details: {
         clickStatusCode: "Click status code",
+        fake200ForwardedNotice: "Note: payload may have been forwarded",
       },
     },
   },
@@ -256,6 +257,25 @@ describe("provider-chain-popover group badges", () => {
 });
 
 describe("provider-chain-popover layout", () => {
+  test("renders fake-200 forwarded notice when chain has FAKE_200_* errorMessage", () => {
+    const html = renderWithIntl(
+      <ProviderChainPopover
+        chain={[
+          {
+            id: 1,
+            name: "p1",
+            reason: "retry_failed",
+            statusCode: 502,
+            errorMessage: "FAKE_200_EMPTY_BODY",
+          },
+        ]}
+        finalProvider="p1"
+      />
+    );
+
+    expect(html).toContain("Note: payload may have been forwarded");
+  });
+
   test("requestCount<=1 branch keeps truncation container shrinkable", () => {
     const html = renderWithIntl(
       <ProviderChainPopover

+ 19 - 0
src/app/[locale]/dashboard/logs/_components/provider-chain-popover.tsx

@@ -105,6 +105,11 @@ export function ProviderChainPopover({
   const t = useTranslations("dashboard");
   const tChain = useTranslations("provider-chain");
 
+  // “假 200”识别发生在 SSE 流式结束后:此时响应内容可能已透传给客户端,但内部会按失败统计/熔断。
+  const hasFake200PostStreamFailure = chain.some(
+    (item) => typeof item.errorMessage === "string" && item.errorMessage.startsWith("FAKE_200_")
+  );
+
   // Calculate actual request count (excluding intermediate states)
   const requestCount = chain.filter(isActualRequest).length;
 
@@ -152,6 +157,14 @@ export function ProviderChainPopover({
                 {/* Provider name */}
                 <div className="font-medium text-xs">{displayName}</div>
 
+                {/* 注意:假 200 检测发生在 SSE 流式结束后;此时内容已可能透传给客户端。 */}
+                {hasFake200PostStreamFailure && (
+                  <div className="flex items-start gap-1.5 text-[10px] text-amber-500 dark:text-amber-400">
+                    <InfoIcon className="h-3 w-3 shrink-0 mt-0.5" aria-hidden="true" />
+                    <span>{t("logs.details.fake200ForwardedNotice")}</span>
+                  </div>
+                )}
+
                 {/* Session reuse detailed info */}
                 {isSessionReuse && (
                   <div className="space-y-1.5 pt-1 border-t border-zinc-600 dark:border-zinc-300">
@@ -453,6 +466,12 @@ export function ProviderChainPopover({
         </div>
 
         <div className="p-2 border-t bg-muted/30">
+          {hasFake200PostStreamFailure && (
+            <div className="flex items-start justify-center gap-1.5 text-[10px] text-amber-700 dark:text-amber-300 px-2 pb-1">
+              <InfoIcon className="h-3 w-3 shrink-0 mt-0.5" aria-hidden="true" />
+              <span className="text-center">{t("logs.details.fake200ForwardedNotice")}</span>
+            </div>
+          )}
           <p className="text-[10px] text-muted-foreground text-center">
             {onChainItemClick
               ? t("logs.providerChain.clickItemForDetails")

+ 0 - 1
src/app/[locale]/dashboard/logs/_hooks/use-lazy-filter-options.ts

@@ -47,7 +47,6 @@ function createLazyFilterHook<T>(
       };
     }, []);
 
-    // biome-ignore lint/correctness/useExhaustiveDependencies: fetcher 是工厂函数的闭包参数,在 hook 生命周期内永不改变
     const load = useCallback(async () => {
       // 如果已加载或有进行中的请求,跳过
       if (isLoaded || inFlightRef.current) return;

+ 4 - 0
src/app/[locale]/settings/providers/_components/forms/provider-form/index.tsx

@@ -308,6 +308,10 @@ function ProviderFormContent({
           allowed_models:
             state.routing.allowedModels.length > 0 ? state.routing.allowedModels : null,
           priority: state.routing.priority,
+          group_priorities:
+            Object.keys(state.routing.groupPriorities).length > 0
+              ? state.routing.groupPriorities
+              : null,
           weight: state.routing.weight,
           cost_multiplier: state.routing.costMultiplier,
           group_tag: state.routing.groupTag.length > 0 ? state.routing.groupTag.join(",") : null,

+ 3 - 0
src/app/[locale]/settings/providers/_components/forms/provider-form/provider-form-context.tsx

@@ -47,6 +47,7 @@ export function createInitialState(
       modelRedirects: sourceProvider?.modelRedirects ?? {},
       allowedModels: sourceProvider?.allowedModels ?? [],
       priority: sourceProvider?.priority ?? 0,
+      groupPriorities: sourceProvider?.groupPriorities ?? {},
       weight: sourceProvider?.weight ?? 1,
       costMultiplier: sourceProvider?.costMultiplier ?? 1.0,
       cacheTtlPreference: sourceProvider?.cacheTtlPreference ?? "inherit",
@@ -141,6 +142,8 @@ export function providerFormReducer(
       return { ...state, routing: { ...state.routing, allowedModels: action.payload } };
     case "SET_PRIORITY":
       return { ...state, routing: { ...state.routing, priority: action.payload } };
+    case "SET_GROUP_PRIORITIES":
+      return { ...state, routing: { ...state.routing, groupPriorities: action.payload } };
     case "SET_WEIGHT":
       return { ...state, routing: { ...state.routing, weight: action.payload } };
     case "SET_COST_MULTIPLIER":

+ 2 - 0
src/app/[locale]/settings/providers/_components/forms/provider-form/provider-form-types.ts

@@ -40,6 +40,7 @@ export interface RoutingState {
   modelRedirects: Record<string, string>;
   allowedModels: string[];
   priority: number;
+  groupPriorities: Record<string, number>;
   weight: number;
   costMultiplier: number;
   cacheTtlPreference: "inherit" | "5m" | "1h";
@@ -118,6 +119,7 @@ export type ProviderFormAction =
   | { type: "SET_MODEL_REDIRECTS"; payload: Record<string, string> }
   | { type: "SET_ALLOWED_MODELS"; payload: string[] }
   | { type: "SET_PRIORITY"; payload: number }
+  | { type: "SET_GROUP_PRIORITIES"; payload: Record<string, number> }
   | { type: "SET_WEIGHT"; payload: number }
   | { type: "SET_COST_MULTIPLIER"; payload: number }
   | { type: "SET_CACHE_TTL_PREFERENCE"; payload: "inherit" | "5m" | "1h" }

+ 40 - 0
src/app/[locale]/settings/providers/_components/forms/provider-form/sections/routing-section.tsx

@@ -281,6 +281,46 @@ export function RoutingSection() {
               />
             </SmartInputWrapper>
           </div>
+
+          {/* Per-Group Priority Override */}
+          {state.routing.groupTag.length > 0 && (
+            <div className="mt-4 space-y-3">
+              <div className="text-sm font-medium">
+                {t("sections.routing.scheduleParams.groupPriorities.label")}
+              </div>
+              <p className="text-xs text-muted-foreground">
+                {t("sections.routing.scheduleParams.groupPriorities.desc")}
+              </p>
+              <div className="grid grid-cols-1 sm:grid-cols-2 gap-3">
+                {state.routing.groupTag.map((group) => (
+                  <div key={group} className="flex items-center gap-2">
+                    <Badge variant="outline" className="font-mono text-xs shrink-0">
+                      {group}
+                    </Badge>
+                    <Input
+                      type="number"
+                      value={state.routing.groupPriorities[group] ?? ""}
+                      onChange={(e) => {
+                        const val = e.target.value;
+                        const next = { ...state.routing.groupPriorities };
+                        if (val === "") {
+                          delete next[group];
+                        } else {
+                          next[group] = parseInt(val, 10) || 0;
+                        }
+                        dispatch({ type: "SET_GROUP_PRIORITIES", payload: next });
+                      }}
+                      placeholder={t("sections.routing.scheduleParams.groupPriorities.placeholder")}
+                      disabled={state.ui.isPending}
+                      min="0"
+                      step="1"
+                      className="h-8 text-sm"
+                    />
+                  </div>
+                ))}
+              </div>
+            </div>
+          )}
         </SectionCard>
 
         {/* Advanced Settings */}

+ 315 - 0
src/app/[locale]/settings/providers/_components/group-edit-combobox.tsx

@@ -0,0 +1,315 @@
+"use client";
+
+import { Loader2, Plus } from "lucide-react";
+import { useTranslations } from "next-intl";
+import type * as React from "react";
+import { useCallback, useEffect, useMemo, useRef, useState } from "react";
+import { Badge } from "@/components/ui/badge";
+import { Checkbox } from "@/components/ui/checkbox";
+import {
+  Command,
+  CommandEmpty,
+  CommandGroup,
+  CommandInput,
+  CommandItem,
+  CommandList,
+} from "@/components/ui/command";
+import { Drawer, DrawerContent, DrawerHeader, DrawerTitle } from "@/components/ui/drawer";
+import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
+import { useMediaQuery } from "@/lib/hooks/use-media-query";
+import { cn } from "@/lib/utils";
+import { getContrastTextColor, getGroupColor } from "@/lib/utils/color";
+
+const MAX_GROUP_NAME_LENGTH = 50;
+
+export interface GroupEditComboboxProps {
+  currentGroups: string[];
+  allGroups: string[];
+  userGroups: string[];
+  isAdmin: boolean;
+  onSave: (groups: string[]) => Promise<boolean>;
+  disabled?: boolean;
+}
+
+export function GroupEditCombobox({
+  currentGroups,
+  allGroups,
+  userGroups,
+  isAdmin,
+  onSave,
+  disabled = false,
+}: GroupEditComboboxProps) {
+  const t = useTranslations("settings.providers.inlineEdit");
+  const isDesktop = useMediaQuery("(min-width: 768px)");
+  const [open, setOpen] = useState(false);
+  const [searchValue, setSearchValue] = useState("");
+  const [selectedGroups, setSelectedGroups] = useState<string[]>([]);
+  const [saving, setSaving] = useState(false);
+
+  const inputRef = useRef<HTMLInputElement>(null);
+
+  // Sync selectedGroups with currentGroups when opening
+  useEffect(() => {
+    if (open) {
+      setSelectedGroups([...currentGroups]);
+      setSearchValue("");
+    }
+  }, [open, currentGroups]);
+
+  // Auto-focus search input when opening
+  useEffect(() => {
+    if (!open) return;
+    const raf = requestAnimationFrame(() => {
+      inputRef.current?.focus();
+    });
+    return () => cancelAnimationFrame(raf);
+  }, [open]);
+
+  // Available groups: admin sees all groups, non-admin sees only their assigned groups
+  const availableGroups = useMemo(() => {
+    if (isAdmin) {
+      return allGroups.filter((g) => g !== "default");
+    }
+    return userGroups.filter((g) => g !== "default");
+  }, [isAdmin, allGroups, userGroups]);
+
+  // Validation for new group name
+  const validateGroupName = useCallback(
+    (name: string): string | null => {
+      const trimmed = name.trim();
+      if (trimmed.length === 0) {
+        return t("groupValidation.empty");
+      }
+      if (trimmed.includes(",")) {
+        return t("groupValidation.noComma");
+      }
+      if (trimmed.length > MAX_GROUP_NAME_LENGTH) {
+        return t("groupValidation.tooLong");
+      }
+      return null;
+    },
+    [t]
+  );
+
+  // Check if the search value matches an existing group (case-insensitive)
+  const searchMatchesExisting = useMemo(() => {
+    const trimmed = searchValue.trim().toLowerCase();
+    return availableGroups.some((g) => g.toLowerCase() === trimmed);
+  }, [searchValue, availableGroups]);
+
+  // Can create a new group?
+  const canCreateGroup = useMemo(() => {
+    const trimmed = searchValue.trim();
+    if (!isAdmin) return false;
+    if (trimmed.length === 0) return false;
+    if (searchMatchesExisting) return false;
+    return validateGroupName(trimmed) === null;
+  }, [isAdmin, searchValue, searchMatchesExisting, validateGroupName]);
+
+  const stopPropagation = (e: React.SyntheticEvent) => {
+    e.stopPropagation();
+  };
+
+  const handleOpenChange = (nextOpen: boolean) => {
+    if (disabled && nextOpen) return;
+    setOpen(nextOpen);
+  };
+
+  const toggleGroup = async (group: string) => {
+    const previousSelection = [...selectedGroups];
+    const newSelection = previousSelection.includes(group)
+      ? previousSelection.filter((g) => g !== group)
+      : [...previousSelection, group];
+
+    setSelectedGroups(newSelection);
+
+    // Optimistic update: save immediately
+    setSaving(true);
+    try {
+      const ok = await onSave(newSelection);
+      if (!ok) {
+        // Rollback on failure
+        setSelectedGroups(previousSelection);
+      }
+    } catch {
+      // Rollback on exception
+      setSelectedGroups(previousSelection);
+    } finally {
+      setSaving(false);
+    }
+  };
+
+  const handleCreateGroup = async () => {
+    const trimmed = searchValue.trim();
+    if (!canCreateGroup) return;
+
+    const previousSelection = [...selectedGroups];
+    const newSelection = [...previousSelection, trimmed];
+    setSelectedGroups(newSelection);
+    setSearchValue("");
+
+    // Save immediately
+    setSaving(true);
+    try {
+      const ok = await onSave(newSelection);
+      if (!ok) {
+        // Rollback on failure
+        setSelectedGroups(previousSelection);
+      }
+    } catch {
+      // Rollback on exception
+      setSelectedGroups(previousSelection);
+    } finally {
+      setSaving(false);
+    }
+  };
+
+  // Trigger button: show badges if groups exist, otherwise show + button
+  const triggerButton = (
+    <button
+      type="button"
+      disabled={disabled}
+      className={cn(
+        "inline-flex flex-wrap items-center gap-1 rounded-sm",
+        "focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-1",
+        disabled ? "cursor-default" : "cursor-pointer"
+      )}
+      onPointerDown={stopPropagation}
+      onClick={(e) => {
+        e.stopPropagation();
+        if (!isDesktop) handleOpenChange(true);
+      }}
+    >
+      {currentGroups.length > 0 ? (
+        currentGroups.map((tag, index) => {
+          const bgColor = getGroupColor(tag);
+          return (
+            <Badge
+              key={`${tag}-${index}`}
+              className="text-xs"
+              style={{ backgroundColor: bgColor, color: getContrastTextColor(bgColor) }}
+            >
+              {tag}
+            </Badge>
+          );
+        })
+      ) : (
+        <Badge variant="outline" className="text-xs gap-1">
+          <Plus className="h-3 w-3" />
+          {t("addGroup")}
+        </Badge>
+      )}
+    </button>
+  );
+
+  // Filter groups based on search
+  const filteredGroups = useMemo(() => {
+    const trimmed = searchValue.trim().toLowerCase();
+    if (!trimmed) return availableGroups;
+    return availableGroups.filter((g) => g.toLowerCase().includes(trimmed));
+  }, [availableGroups, searchValue]);
+
+  const commandContent = (
+    <Command shouldFilter={false}>
+      <CommandInput
+        ref={inputRef}
+        placeholder={t("searchGroups")}
+        value={searchValue}
+        onValueChange={setSearchValue}
+        onKeyDown={(e) => {
+          if (e.key === "Escape") {
+            e.preventDefault();
+            setOpen(false);
+          }
+          if (e.key === "Enter" && canCreateGroup) {
+            e.preventDefault();
+            void handleCreateGroup();
+          }
+        }}
+      />
+      <CommandList>
+        <CommandEmpty>{canCreateGroup ? null : t("noGroupsAvailable")}</CommandEmpty>
+
+        {/* Existing groups */}
+        {filteredGroups.length > 0 && (
+          <CommandGroup>
+            <div className="grid grid-cols-3 gap-1 p-1">
+              {filteredGroups.map((group) => {
+                const isSelected = selectedGroups.includes(group);
+                const bgColor = getGroupColor(group);
+                return (
+                  <CommandItem
+                    key={group}
+                    value={group}
+                    onSelect={() => toggleGroup(group)}
+                    className="cursor-pointer data-[selected=true]:bg-accent data-[selected=true]:text-accent-foreground"
+                    disabled={saving}
+                  >
+                    <Checkbox checked={isSelected} className="mr-1.5" disabled={saving} />
+                    <span className="text-xs font-medium truncate" style={{ color: bgColor }}>
+                      {group}
+                    </span>
+                  </CommandItem>
+                );
+              })}
+            </div>
+          </CommandGroup>
+        )}
+
+        {/* Create new group option (admin only) */}
+        {canCreateGroup && (
+          <CommandGroup>
+            <CommandItem
+              value={`create-${searchValue.trim()}`}
+              onSelect={handleCreateGroup}
+              className="cursor-pointer"
+              disabled={saving}
+            >
+              <Plus className="h-4 w-4 mr-2" />
+              <span>{t("createGroup", { name: searchValue.trim() })}</span>
+            </CommandItem>
+          </CommandGroup>
+        )}
+      </CommandList>
+
+      {saving && (
+        <div className="flex items-center justify-center py-2 text-xs text-muted-foreground">
+          <Loader2 className="h-3 w-3 animate-spin mr-1" />
+          {t("saving")}
+        </div>
+      )}
+    </Command>
+  );
+
+  if (!isDesktop) {
+    return (
+      <>
+        {triggerButton}
+        <Drawer open={open} onOpenChange={handleOpenChange}>
+          <DrawerContent>
+            <DrawerHeader>
+              <DrawerTitle>{t("editGroups")}</DrawerTitle>
+            </DrawerHeader>
+            <div className="px-4 pb-6">{commandContent}</div>
+          </DrawerContent>
+        </Drawer>
+      </>
+    );
+  }
+
+  return (
+    <Popover open={open} onOpenChange={handleOpenChange}>
+      <PopoverTrigger asChild>{triggerButton}</PopoverTrigger>
+      <PopoverContent
+        align="start"
+        side="bottom"
+        sideOffset={6}
+        className="w-80 p-0"
+        onPointerDown={stopPropagation}
+        onClick={stopPropagation}
+      >
+        {commandContent}
+      </PopoverContent>
+    </Popover>
+  );
+}

+ 129 - 66
src/app/[locale]/settings/providers/_components/inline-edit-popover.tsx

@@ -5,8 +5,10 @@ import { useTranslations } from "next-intl";
 import type * as React from "react";
 import { useEffect, useMemo, useRef, useState } from "react";
 import { Button } from "@/components/ui/button";
+import { Drawer, DrawerContent, DrawerHeader, DrawerTitle } from "@/components/ui/drawer";
 import { Input } from "@/components/ui/input";
 import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
+import { useMediaQuery } from "@/lib/hooks/use-media-query";
 import { cn } from "@/lib/utils";
 
 export interface InlineEditPopoverProps {
@@ -29,6 +31,7 @@ export function InlineEditPopover({
   type = "number",
 }: InlineEditPopoverProps) {
   const t = useTranslations("settings.providers.inlineEdit");
+  const isDesktop = useMediaQuery("(min-width: 768px)");
   const [open, setOpen] = useState(false);
   const [draft, setDraft] = useState(() => value.toString());
   const [saving, setSaving] = useState(false);
@@ -102,24 +105,133 @@ export function InlineEditPopover({
     }
   };
 
-  return (
-    <Popover open={open} onOpenChange={handleOpenChange}>
-      <PopoverTrigger asChild>
-        <button
-          type="button"
-          disabled={disabled}
-          className={cn(
-            "tabular-nums font-medium underline-offset-4 rounded-sm",
-            "focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-1",
-            disabled ? "cursor-default text-muted-foreground" : "cursor-pointer hover:underline"
-          )}
+  const triggerButton = (
+    <button
+      type="button"
+      disabled={disabled}
+      className={cn(
+        "tabular-nums font-medium underline-offset-4 rounded-sm",
+        "focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-1",
+        disabled ? "cursor-default text-muted-foreground" : "cursor-pointer hover:underline"
+      )}
+      onPointerDown={stopPropagation}
+      onClick={(e) => {
+        e.stopPropagation();
+        if (!isDesktop) handleOpenChange(true);
+      }}
+    >
+      {value}
+      {suffix}
+    </button>
+  );
+
+  const formContent = (
+    <div className="grid gap-2">
+      <div className="text-xs font-medium md:block hidden">{label}</div>
+      <div className="flex items-center gap-2">
+        <Input
+          ref={inputRef}
+          value={draft}
+          onChange={(e) => setDraft(e.target.value)}
+          disabled={disabled || saving}
+          className="w-full md:w-24 tabular-nums"
+          aria-label={label}
+          aria-invalid={validationError != null}
+          type="number"
+          inputMode="decimal"
+          step={type === "integer" ? "1" : "any"}
           onPointerDown={stopPropagation}
           onClick={stopPropagation}
-        >
-          {value}
-          {suffix}
-        </button>
-      </PopoverTrigger>
+          onKeyDown={(e) => {
+            e.stopPropagation();
+            if (e.key === "Escape") {
+              e.preventDefault();
+              handleCancel();
+            }
+            if (e.key === "Enter") {
+              e.preventDefault();
+              void handleSave();
+            }
+          }}
+        />
+        {suffix && <span className="text-sm text-muted-foreground">{suffix}</span>}
+      </div>
+      {validationError && <div className="text-xs text-destructive">{validationError}</div>}
+      <div className="flex items-center justify-end gap-2 pt-1">
+        <Button type="button" size="sm" variant="outline" onClick={handleCancel} disabled={saving}>
+          {t("cancel")}
+        </Button>
+        <Button type="button" size="sm" onClick={handleSave} disabled={!canSave}>
+          {saving && <Loader2 className="mr-2 h-4 w-4 animate-spin" />}
+          {t("save")}
+        </Button>
+      </div>
+    </div>
+  );
+
+  if (!isDesktop) {
+    return (
+      <>
+        {triggerButton}
+        <Drawer open={open} onOpenChange={handleOpenChange}>
+          <DrawerContent>
+            <DrawerHeader>
+              <DrawerTitle>{label}</DrawerTitle>
+            </DrawerHeader>
+            <div className="px-4 pb-6">
+              <div className="grid gap-3">
+                <Input
+                  ref={inputRef}
+                  value={draft}
+                  onChange={(e) => setDraft(e.target.value)}
+                  disabled={disabled || saving}
+                  className="tabular-nums text-lg"
+                  aria-label={label}
+                  aria-invalid={validationError != null}
+                  type="number"
+                  inputMode="decimal"
+                  step={type === "integer" ? "1" : "any"}
+                  onKeyDown={(e) => {
+                    if (e.key === "Escape") {
+                      e.preventDefault();
+                      handleCancel();
+                    }
+                    if (e.key === "Enter") {
+                      e.preventDefault();
+                      void handleSave();
+                    }
+                  }}
+                />
+                {suffix && <span className="text-sm text-muted-foreground">{suffix}</span>}
+                {validationError && (
+                  <div className="text-sm text-destructive">{validationError}</div>
+                )}
+                <div className="flex gap-2 pt-2">
+                  <Button
+                    variant="outline"
+                    onClick={handleCancel}
+                    disabled={saving}
+                    className="flex-1"
+                    size="lg"
+                  >
+                    {t("cancel")}
+                  </Button>
+                  <Button onClick={handleSave} disabled={!canSave} className="flex-1" size="lg">
+                    {saving && <Loader2 className="mr-2 h-4 w-4 animate-spin" />}
+                    {t("save")}
+                  </Button>
+                </div>
+              </div>
+            </div>
+          </DrawerContent>
+        </Drawer>
+      </>
+    );
+  }
+
+  return (
+    <Popover open={open} onOpenChange={handleOpenChange}>
+      <PopoverTrigger asChild>{triggerButton}</PopoverTrigger>
 
       <PopoverContent
         align="center"
@@ -129,56 +241,7 @@ export function InlineEditPopover({
         onPointerDown={stopPropagation}
         onClick={stopPropagation}
       >
-        <div className="grid gap-2">
-          <div className="text-xs font-medium">{label}</div>
-
-          <div className="flex items-center gap-2">
-            <Input
-              ref={inputRef}
-              value={draft}
-              onChange={(e) => setDraft(e.target.value)}
-              disabled={disabled || saving}
-              className="w-24 tabular-nums"
-              aria-label={label}
-              aria-invalid={validationError != null}
-              type="number"
-              inputMode="decimal"
-              step={type === "integer" ? "1" : "any"}
-              onPointerDown={stopPropagation}
-              onClick={stopPropagation}
-              onKeyDown={(e) => {
-                e.stopPropagation();
-                if (e.key === "Escape") {
-                  e.preventDefault();
-                  handleCancel();
-                }
-                if (e.key === "Enter") {
-                  e.preventDefault();
-                  void handleSave();
-                }
-              }}
-            />
-            {suffix && <span className="text-sm text-muted-foreground">{suffix}</span>}
-          </div>
-
-          {validationError && <div className="text-xs text-destructive">{validationError}</div>}
-
-          <div className="flex items-center justify-end gap-2 pt-1">
-            <Button
-              type="button"
-              size="sm"
-              variant="outline"
-              onClick={handleCancel}
-              disabled={saving}
-            >
-              {t("cancel")}
-            </Button>
-            <Button type="button" size="sm" onClick={handleSave} disabled={!canSave}>
-              {saving && <Loader2 className="mr-2 h-4 w-4 animate-spin" />}
-              {t("save")}
-            </Button>
-          </div>
-        </div>
+        {formContent}
       </PopoverContent>
     </Popover>
   );

+ 311 - 0
src/app/[locale]/settings/providers/_components/priority-edit-popover.tsx

@@ -0,0 +1,311 @@
+"use client";
+
+import { Loader2 } from "lucide-react";
+import { useTranslations } from "next-intl";
+import type * as React from "react";
+import { useEffect, useRef, useState } from "react";
+import { Badge } from "@/components/ui/badge";
+import { Button } from "@/components/ui/button";
+import { Drawer, DrawerContent, DrawerHeader, DrawerTitle } from "@/components/ui/drawer";
+import { Input } from "@/components/ui/input";
+import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
+import { useMediaQuery } from "@/lib/hooks/use-media-query";
+import { cn } from "@/lib/utils";
+
+interface PriorityEditPopoverProps {
+  globalPriority: number;
+  groupPriorities: Record<string, number> | null;
+  groups: string[];
+  activeGroupFilter: string | null;
+  disabled?: boolean;
+  onSave: (
+    globalPriority: number,
+    groupPriorities: Record<string, number> | null
+  ) => Promise<boolean>;
+  validator: (value: string) => string | null;
+}
+
+export function PriorityEditPopover({
+  globalPriority,
+  groupPriorities,
+  groups,
+  activeGroupFilter,
+  disabled = false,
+  onSave,
+  validator,
+}: PriorityEditPopoverProps) {
+  const t = useTranslations("settings.providers.inlineEdit");
+  const isDesktop = useMediaQuery("(min-width: 768px)");
+  const [open, setOpen] = useState(false);
+  const [saving, setSaving] = useState(false);
+  const [globalDraft, setGlobalDraft] = useState(() => globalPriority.toString());
+  const [groupDrafts, setGroupDrafts] = useState<Record<string, string>>({});
+
+  const globalInputRef = useRef<HTMLInputElement>(null);
+
+  // Compute display value and whether it's a group override
+  const effectivePriority =
+    activeGroupFilter && groupPriorities?.[activeGroupFilter] != null
+      ? groupPriorities[activeGroupFilter]
+      : globalPriority;
+  const isGroupOverride = activeGroupFilter != null && groupPriorities?.[activeGroupFilter] != null;
+
+  // Validation for global draft
+  const globalError = validator(globalDraft.trim());
+
+  // Validation for group drafts
+  const groupErrors: Record<string, string | null> = {};
+  for (const g of groups) {
+    const draft = groupDrafts[g] ?? "";
+    if (draft.trim() === "") {
+      groupErrors[g] = null; // empty means use global
+    } else {
+      groupErrors[g] = validator(draft.trim());
+    }
+  }
+
+  const hasAnyError = globalError != null || Object.values(groupErrors).some((e) => e != null);
+
+  const canSave = !disabled && !saving && !hasAnyError && globalDraft.trim() !== "";
+
+  useEffect(() => {
+    if (!open) return;
+    const raf = requestAnimationFrame(() => {
+      globalInputRef.current?.focus();
+      globalInputRef.current?.select();
+    });
+    return () => cancelAnimationFrame(raf);
+  }, [open]);
+
+  const stopPropagation = (e: React.SyntheticEvent) => {
+    e.stopPropagation();
+  };
+
+  const resetDrafts = () => {
+    setGlobalDraft(globalPriority.toString());
+    const drafts: Record<string, string> = {};
+    for (const g of groups) {
+      drafts[g] = groupPriorities?.[g] != null ? groupPriorities[g].toString() : "";
+    }
+    setGroupDrafts(drafts);
+  };
+
+  const handleOpenChange = (nextOpen: boolean) => {
+    if (disabled && nextOpen) return;
+    if (nextOpen) {
+      resetDrafts();
+    } else {
+      setSaving(false);
+    }
+    setOpen(nextOpen);
+  };
+
+  const handleCancel = () => {
+    resetDrafts();
+    setOpen(false);
+  };
+
+  const handleSave = async () => {
+    if (!canSave) return;
+
+    const parsedGlobal = Number(globalDraft.trim());
+    if (!Number.isFinite(parsedGlobal) || !Number.isInteger(parsedGlobal) || parsedGlobal < 0)
+      return;
+
+    const mergedGroupPriorities: Record<string, number> = { ...(groupPriorities ?? {}) };
+    for (const g of groups) {
+      const draft = (groupDrafts[g] ?? "").trim();
+      if (draft === "") {
+        delete mergedGroupPriorities[g];
+        continue;
+      }
+      const val = Number(draft);
+      if (Number.isFinite(val) && Number.isInteger(val) && val >= 0) {
+        mergedGroupPriorities[g] = val;
+      }
+    }
+    const hasGroupOverrides = Object.keys(mergedGroupPriorities).length > 0;
+
+    setSaving(true);
+    try {
+      const ok = await onSave(parsedGlobal, hasGroupOverrides ? mergedGroupPriorities : null);
+      if (ok) {
+        setOpen(false);
+      }
+    } finally {
+      setSaving(false);
+    }
+  };
+
+  const handleGroupDraftChange = (group: string, value: string) => {
+    setGroupDrafts((prev) => ({ ...prev, [group]: value }));
+  };
+
+  const triggerButton = (
+    <button
+      type="button"
+      disabled={disabled}
+      className={cn(
+        "inline-flex items-center gap-1 tabular-nums font-medium underline-offset-4 rounded-sm",
+        "focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-1",
+        disabled ? "cursor-default text-muted-foreground" : "cursor-pointer hover:underline"
+      )}
+      onPointerDown={stopPropagation}
+      onClick={(e) => {
+        e.stopPropagation();
+        if (!isDesktop) handleOpenChange(true);
+      }}
+    >
+      {effectivePriority}
+      {isGroupOverride && activeGroupFilter && (
+        <Badge variant="outline" className="text-[10px] px-1 py-0 leading-tight font-normal">
+          {activeGroupFilter}
+        </Badge>
+      )}
+    </button>
+  );
+
+  const priorityFormFields = (
+    <>
+      {/* Global priority */}
+      <div className="grid gap-1.5">
+        <div className="text-xs font-medium">{t("globalPriority")}</div>
+        <Input
+          ref={globalInputRef}
+          value={globalDraft}
+          onChange={(e) => setGlobalDraft(e.target.value)}
+          disabled={disabled || saving}
+          className="tabular-nums"
+          aria-label={t("globalPriority")}
+          aria-invalid={globalError != null}
+          type="number"
+          inputMode="decimal"
+          step="1"
+          onPointerDown={stopPropagation}
+          onClick={stopPropagation}
+          onKeyDown={(e) => {
+            e.stopPropagation();
+            if (e.key === "Escape") {
+              e.preventDefault();
+              handleCancel();
+            }
+            if (e.key === "Enter") {
+              e.preventDefault();
+              void handleSave();
+            }
+          }}
+        />
+        {globalError && <div className="text-xs text-destructive">{globalError}</div>}
+      </div>
+
+      {/* Per-group priorities */}
+      {groups.length > 0 && (
+        <div className="grid gap-1.5">
+          <div className="text-xs font-medium">{t("groupPriorityLabel")}</div>
+          {groups.map((group) => (
+            <div key={group} className="flex items-center gap-2">
+              <span className="text-xs text-muted-foreground min-w-[60px] truncate" title={group}>
+                {group}
+              </span>
+              <Input
+                value={groupDrafts[group] ?? ""}
+                onChange={(e) => handleGroupDraftChange(group, e.target.value)}
+                disabled={disabled || saving}
+                placeholder={t("groupPriorityPlaceholder")}
+                className="tabular-nums"
+                aria-label={`${t("groupPriorityLabel")} - ${group}`}
+                aria-invalid={groupErrors[group] != null}
+                type="number"
+                inputMode="decimal"
+                step="1"
+                onPointerDown={stopPropagation}
+                onClick={stopPropagation}
+                onKeyDown={(e) => {
+                  e.stopPropagation();
+                  if (e.key === "Escape") {
+                    e.preventDefault();
+                    handleCancel();
+                  }
+                  if (e.key === "Enter") {
+                    e.preventDefault();
+                    void handleSave();
+                  }
+                }}
+              />
+              {groupErrors[group] && (
+                <div className="text-xs text-destructive">{groupErrors[group]}</div>
+              )}
+            </div>
+          ))}
+        </div>
+      )}
+    </>
+  );
+
+  const actionButtons = (
+    <div className="flex items-center justify-end gap-2 pt-1">
+      <Button type="button" size="sm" variant="outline" onClick={handleCancel} disabled={saving}>
+        {t("cancel")}
+      </Button>
+      <Button type="button" size="sm" onClick={handleSave} disabled={!canSave}>
+        {saving && <Loader2 className="mr-2 h-4 w-4 animate-spin" />}
+        {t("save")}
+      </Button>
+    </div>
+  );
+
+  if (!isDesktop) {
+    return (
+      <>
+        {triggerButton}
+        <Drawer open={open} onOpenChange={handleOpenChange}>
+          <DrawerContent>
+            <DrawerHeader>
+              <DrawerTitle>{t("globalPriority")}</DrawerTitle>
+            </DrawerHeader>
+            <div className="px-4 pb-6">
+              <div className="grid gap-3">
+                {priorityFormFields}
+                <div className="flex gap-2 pt-2">
+                  <Button
+                    variant="outline"
+                    onClick={handleCancel}
+                    disabled={saving}
+                    className="flex-1"
+                    size="lg"
+                  >
+                    {t("cancel")}
+                  </Button>
+                  <Button onClick={handleSave} disabled={!canSave} className="flex-1" size="lg">
+                    {saving && <Loader2 className="mr-2 h-4 w-4 animate-spin" />}
+                    {t("save")}
+                  </Button>
+                </div>
+              </div>
+            </div>
+          </DrawerContent>
+        </Drawer>
+      </>
+    );
+  }
+
+  return (
+    <Popover open={open} onOpenChange={handleOpenChange}>
+      <PopoverTrigger asChild>{triggerButton}</PopoverTrigger>
+
+      <PopoverContent
+        align="center"
+        side="bottom"
+        sideOffset={6}
+        className="w-auto p-3"
+        onPointerDown={stopPropagation}
+        onClick={stopPropagation}
+      >
+        <div className="grid gap-3">
+          {priorityFormFields}
+          {actionButtons}
+        </div>
+      </PopoverContent>
+    </Popover>
+  );
+}

+ 13 - 1
src/app/[locale]/settings/providers/_components/provider-list.tsx

@@ -23,9 +23,13 @@ interface ProviderListProps {
   statisticsLoading?: boolean;
   currencyCode?: CurrencyCode;
   enableMultiProviderTypes: boolean;
+  activeGroupFilter?: string | null;
   isMultiSelectMode?: boolean;
   selectedProviderIds?: Set<number>;
   onSelectProvider?: (providerId: number, checked: boolean) => void;
+  allGroups?: string[];
+  userGroups?: string[];
+  isAdmin?: boolean;
 }
 
 export function ProviderList({
@@ -36,9 +40,13 @@ export function ProviderList({
   statisticsLoading = false,
   currencyCode = "USD",
   enableMultiProviderTypes,
+  activeGroupFilter = null,
   isMultiSelectMode = false,
   selectedProviderIds = new Set(),
   onSelectProvider,
+  allGroups = [],
+  userGroups = [],
+  isAdmin = false,
 }: ProviderListProps) {
   const t = useTranslations("settings.providers");
 
@@ -55,7 +63,7 @@ export function ProviderList({
   }
 
   return (
-    <div className="border rounded-lg overflow-hidden">
+    <div className="grid gap-3 md:block md:border md:rounded-lg md:overflow-hidden md:gap-0">
       {providers.map((provider) => (
         <ProviderRichListItem
           key={provider.id}
@@ -66,11 +74,15 @@ export function ProviderList({
           statisticsLoading={statisticsLoading}
           currencyCode={currencyCode}
           enableMultiProviderTypes={enableMultiProviderTypes}
+          activeGroupFilter={activeGroupFilter}
           isMultiSelectMode={isMultiSelectMode}
           isSelected={selectedProviderIds.has(provider.id)}
           onSelectChange={
             onSelectProvider ? (checked) => onSelectProvider(provider.id, checked) : undefined
           }
+          allGroups={allGroups}
+          userGroups={userGroups}
+          isAdmin={isAdmin}
         />
       ))}
     </div>

+ 215 - 71
src/app/[locale]/settings/providers/_components/provider-manager.tsx

@@ -1,8 +1,9 @@
 "use client";
-import { AlertTriangle, LayoutGrid, LayoutList, Loader2, Search } from "lucide-react";
+import { AlertTriangle, Filter, LayoutGrid, LayoutList, Loader2, Search } from "lucide-react";
 import { useTranslations } from "next-intl";
 import { type ReactNode, useCallback, useEffect, useMemo, useState } from "react";
 import { Button } from "@/components/ui/button";
+import { Collapsible, CollapsibleContent } from "@/components/ui/collapsible";
 import { Input } from "@/components/ui/input";
 import { Label } from "@/components/ui/label";
 import {
@@ -77,6 +78,7 @@ export function ProviderManager({
   const [statusFilter, setStatusFilter] = useState<"all" | "active" | "inactive">("all");
   const [groupFilter, setGroupFilter] = useState<string[]>([]);
   const [circuitBrokenFilter, setCircuitBrokenFilter] = useState(false);
+  const [mobileFilterOpen, setMobileFilterOpen] = useState(false);
 
   // Batch edit state
   const [isMultiSelectMode, setIsMultiSelectMode] = useState(false);
@@ -89,6 +91,16 @@ export function ProviderManager({
     return providers.filter((p) => healthStatus[p.id]?.circuitState === "open").length;
   }, [providers, healthStatus]);
 
+  const activeFilterCount = useMemo(() => {
+    let count = 0;
+    if (typeFilter !== "all") count++;
+    if (statusFilter !== "all") count++;
+    if (groupFilter.length > 0) count++;
+    if (circuitBrokenFilter) count++;
+    if (sortBy !== "priority") count++;
+    return count;
+  }, [typeFilter, statusFilter, groupFilter, circuitBrokenFilter, sortBy]);
+
   // Auto-reset circuit broken filter when no providers are broken
   useEffect(() => {
     if (circuitBrokenCount === 0 && circuitBrokenFilter) {
@@ -120,6 +132,18 @@ export function ProviderManager({
     return sortedGroups;
   }, [providers]);
 
+  // User's assigned groups (for non-admin users)
+  const userGroups = useMemo(() => {
+    if (!currentUser?.providerGroup) return [];
+    return currentUser.providerGroup
+      .split(",")
+      .map((g) => g.trim())
+      .filter(Boolean);
+  }, [currentUser?.providerGroup]);
+
+  // Check if current user is admin
+  const isAdmin = currentUser?.role === "admin";
+
   // 统一过滤逻辑:搜索 + 类型筛选 + 排序
   const filteredProviders = useMemo(() => {
     let result = providers;
@@ -284,52 +308,10 @@ export function ProviderManager({
         />
         {addDialogSlot ? <div className="ml-auto">{addDialogSlot}</div> : null}
       </div>
-      {/* 筛选条件 */}
+      {/* Filter section */}
       <div className="flex flex-col gap-3">
-        <div className="flex flex-col sm:flex-row items-stretch sm:items-center gap-2">
-          {/* View Mode Toggle */}
-          <div className="flex items-center border rounded-md bg-muted/50 p-1">
-            <Button
-              variant={viewMode === "list" ? "secondary" : "ghost"}
-              size="sm"
-              className="h-7 px-2 gap-1.5 text-xs"
-              onClick={() => setViewMode("list")}
-              title={tStrings("viewModeList")}
-            >
-              <LayoutList className="h-3.5 w-3.5" />
-              <span className="hidden sm:inline">{tStrings("viewModeList")}</span>
-            </Button>
-            <Button
-              variant={viewMode === "vendor" ? "secondary" : "ghost"}
-              size="sm"
-              className="h-7 px-2 gap-1.5 text-xs"
-              onClick={() => setViewMode("vendor")}
-              title={tStrings("viewModeVendor")}
-            >
-              <LayoutGrid className="h-3.5 w-3.5" />
-              <span className="hidden sm:inline">{tStrings("viewModeVendor")}</span>
-            </Button>
-          </div>
-
-          <ProviderTypeFilter value={typeFilter} onChange={setTypeFilter} disabled={loading} />
-
-          {/* Status filter */}
-          <Select
-            value={statusFilter}
-            onValueChange={(value) => setStatusFilter(value as "all" | "active" | "inactive")}
-            disabled={loading}
-          >
-            <SelectTrigger className="w-full sm:w-[140px]">
-              <SelectValue />
-            </SelectTrigger>
-            <SelectContent>
-              <SelectItem value="all">{tFilter("status.all")}</SelectItem>
-              <SelectItem value="active">{tFilter("status.active")}</SelectItem>
-              <SelectItem value="inactive">{tFilter("status.inactive")}</SelectItem>
-            </SelectContent>
-          </Select>
-
-          <ProviderSortDropdown value={sortBy} onChange={setSortBy} disabled={loading} />
+        {/* Mobile: search + filter toggle button */}
+        <div className="flex items-center gap-2 md:hidden">
           <div className="relative flex-1">
             <Search className="absolute left-3 top-1/2 -translate-y-1/2 h-4 w-4 text-muted-foreground" />
             <Input
@@ -341,40 +323,198 @@ export function ProviderManager({
               disabled={loading}
             />
           </div>
+          <Button
+            variant="outline"
+            size="default"
+            className="flex-shrink-0"
+            onClick={() => setMobileFilterOpen((prev) => !prev)}
+          >
+            <Filter className="mr-1.5 h-4 w-4" />
+            {activeFilterCount > 0
+              ? tFilter("mobileFilterCount", { count: activeFilterCount })
+              : tFilter("mobileFilter")}
+          </Button>
         </div>
 
-        {/* Group filter */}
-        {allGroups.length > 0 && (
-          <div className="flex flex-wrap gap-2 items-center">
-            <span className="text-sm text-muted-foreground">{tFilter("groups.label")}</span>
-            <Button
-              variant={groupFilter.length === 0 ? "default" : "outline"}
-              size="sm"
-              onClick={() => setGroupFilter([])}
-              disabled={loading}
-              className="h-7"
-            >
-              {tFilter("groups.all")}
-            </Button>
-            {allGroups.map((group) => (
+        {/* Mobile: collapsible filter panel */}
+        <Collapsible open={mobileFilterOpen} onOpenChange={setMobileFilterOpen}>
+          <CollapsibleContent className="md:hidden">
+            <div className="flex flex-col gap-3 p-3 border rounded-lg bg-muted/30">
+              <ProviderTypeFilter value={typeFilter} onChange={setTypeFilter} disabled={loading} />
+              <Select
+                value={statusFilter}
+                onValueChange={(value) => setStatusFilter(value as "all" | "active" | "inactive")}
+                disabled={loading}
+              >
+                <SelectTrigger>
+                  <SelectValue />
+                </SelectTrigger>
+                <SelectContent>
+                  <SelectItem value="all">{tFilter("status.all")}</SelectItem>
+                  <SelectItem value="active">{tFilter("status.active")}</SelectItem>
+                  <SelectItem value="inactive">{tFilter("status.inactive")}</SelectItem>
+                </SelectContent>
+              </Select>
+              <ProviderSortDropdown value={sortBy} onChange={setSortBy} disabled={loading} />
+              {allGroups.length > 0 && (
+                <div className="flex flex-wrap gap-2 items-center">
+                  <span className="text-sm text-muted-foreground">{tFilter("groups.label")}</span>
+                  <Button
+                    variant={groupFilter.length === 0 ? "default" : "outline"}
+                    size="sm"
+                    onClick={() => setGroupFilter([])}
+                    disabled={loading}
+                    className="h-7"
+                  >
+                    {tFilter("groups.all")}
+                  </Button>
+                  {allGroups.map((group) => (
+                    <Button
+                      key={group}
+                      variant={groupFilter.includes(group) ? "default" : "outline"}
+                      size="sm"
+                      onClick={() =>
+                        setGroupFilter((prev) =>
+                          prev.includes(group) ? prev.filter((g) => g !== group) : [...prev, group]
+                        )
+                      }
+                      disabled={loading}
+                      className="h-7"
+                    >
+                      {group}
+                    </Button>
+                  ))}
+                </div>
+              )}
+              {circuitBrokenCount > 0 && (
+                <div className="flex items-center gap-2">
+                  <AlertTriangle
+                    className={`h-4 w-4 ${circuitBrokenFilter ? "text-destructive" : "text-muted-foreground"}`}
+                  />
+                  <Label
+                    htmlFor="circuit-broken-filter-mobile"
+                    className={`text-sm cursor-pointer select-none ${circuitBrokenFilter ? "text-destructive font-medium" : "text-muted-foreground"}`}
+                  >
+                    {tFilter("circuitBroken")} ({circuitBrokenCount})
+                  </Label>
+                  <Switch
+                    id="circuit-broken-filter-mobile"
+                    checked={circuitBrokenFilter}
+                    onCheckedChange={setCircuitBrokenFilter}
+                    disabled={loading}
+                  />
+                </div>
+              )}
               <Button
-                key={group}
-                variant={groupFilter.includes(group) ? "default" : "outline"}
+                variant="ghost"
                 size="sm"
                 onClick={() => {
-                  setGroupFilter((prev) =>
-                    prev.includes(group) ? prev.filter((g) => g !== group) : [...prev, group]
-                  );
+                  setTypeFilter("all");
+                  setStatusFilter("all");
+                  setGroupFilter([]);
+                  setCircuitBrokenFilter(false);
+                  setSortBy("priority");
                 }}
+                className="self-end"
+              >
+                {tFilter("resetFilters")}
+              </Button>
+            </div>
+          </CollapsibleContent>
+        </Collapsible>
+
+        {/* Desktop: original filter layout */}
+        <div className="hidden md:flex flex-col gap-3">
+          <div className="flex flex-col sm:flex-row items-stretch sm:items-center gap-2">
+            {/* View Mode Toggle */}
+            <div className="flex items-center border rounded-md bg-muted/50 p-1">
+              <Button
+                variant={viewMode === "list" ? "secondary" : "ghost"}
+                size="sm"
+                className="h-7 px-2 gap-1.5 text-xs"
+                onClick={() => setViewMode("list")}
+                title={tStrings("viewModeList")}
+              >
+                <LayoutList className="h-3.5 w-3.5" />
+                <span className="hidden sm:inline">{tStrings("viewModeList")}</span>
+              </Button>
+              <Button
+                variant={viewMode === "vendor" ? "secondary" : "ghost"}
+                size="sm"
+                className="h-7 px-2 gap-1.5 text-xs"
+                onClick={() => setViewMode("vendor")}
+                title={tStrings("viewModeVendor")}
+              >
+                <LayoutGrid className="h-3.5 w-3.5" />
+                <span className="hidden sm:inline">{tStrings("viewModeVendor")}</span>
+              </Button>
+            </div>
+
+            <ProviderTypeFilter value={typeFilter} onChange={setTypeFilter} disabled={loading} />
+
+            <Select
+              value={statusFilter}
+              onValueChange={(value) => setStatusFilter(value as "all" | "active" | "inactive")}
+              disabled={loading}
+            >
+              <SelectTrigger className="w-full sm:w-[140px]">
+                <SelectValue />
+              </SelectTrigger>
+              <SelectContent>
+                <SelectItem value="all">{tFilter("status.all")}</SelectItem>
+                <SelectItem value="active">{tFilter("status.active")}</SelectItem>
+                <SelectItem value="inactive">{tFilter("status.inactive")}</SelectItem>
+              </SelectContent>
+            </Select>
+
+            <ProviderSortDropdown value={sortBy} onChange={setSortBy} disabled={loading} />
+            <div className="relative flex-1">
+              <Search className="absolute left-3 top-1/2 -translate-y-1/2 h-4 w-4 text-muted-foreground" />
+              <Input
+                type="search"
+                placeholder={t("placeholder")}
+                value={searchTerm}
+                onChange={(e) => setSearchTerm(e.target.value)}
+                className="pl-9"
+                disabled={loading}
+              />
+            </div>
+          </div>
+
+          {/* Group filter */}
+          {allGroups.length > 0 && (
+            <div className="flex flex-wrap gap-2 items-center">
+              <span className="text-sm text-muted-foreground">{tFilter("groups.label")}</span>
+              <Button
+                variant={groupFilter.length === 0 ? "default" : "outline"}
+                size="sm"
+                onClick={() => setGroupFilter([])}
                 disabled={loading}
                 className="h-7"
               >
-                {group}
+                {tFilter("groups.all")}
               </Button>
-            ))}
-          </div>
-        )}
-        {/* 搜索结果提示 + Circuit Breaker filter */}
+              {allGroups.map((group) => (
+                <Button
+                  key={group}
+                  variant={groupFilter.includes(group) ? "default" : "outline"}
+                  size="sm"
+                  onClick={() => {
+                    setGroupFilter((prev) =>
+                      prev.includes(group) ? prev.filter((g) => g !== group) : [...prev, group]
+                    );
+                  }}
+                  disabled={loading}
+                  className="h-7"
+                >
+                  {group}
+                </Button>
+              ))}
+            </div>
+          )}
+        </div>
+
+        {/* Search result count + Circuit Breaker filter (both mobile and desktop) */}
         <div className="flex items-center justify-between">
           {debouncedSearchTerm ? (
             <p className="text-sm text-muted-foreground">
@@ -394,7 +534,7 @@ export function ProviderManager({
 
           {/* Circuit Breaker toggle - only show if there are broken providers */}
           {circuitBrokenCount > 0 && (
-            <div className="flex items-center gap-2">
+            <div className="hidden md:flex items-center gap-2">
               <AlertTriangle
                 className={`h-4 w-4 ${circuitBrokenFilter ? "text-destructive" : "text-muted-foreground"}`}
               />
@@ -436,9 +576,13 @@ export function ProviderManager({
               statisticsLoading={statisticsLoading}
               currencyCode={currencyCode}
               enableMultiProviderTypes={enableMultiProviderTypes}
+              activeGroupFilter={groupFilter.length === 1 ? groupFilter[0] : null}
               isMultiSelectMode={isMultiSelectMode}
               selectedProviderIds={selectedProviderIds}
               onSelectProvider={handleSelectProvider}
+              allGroups={allGroups}
+              userGroups={userGroups}
+              isAdmin={isAdmin}
             />
           ) : (
             <ProviderVendorView

+ 298 - 73
src/app/[locale]/settings/providers/_components/provider-rich-list-item.tsx

@@ -7,6 +7,7 @@ import {
   Edit,
   Globe,
   Key,
+  MoreHorizontal,
   RotateCcw,
   Trash,
   XCircle,
@@ -44,6 +45,13 @@ import {
   DialogHeader,
   DialogTitle,
 } from "@/components/ui/dialog";
+import {
+  DropdownMenu,
+  DropdownMenuContent,
+  DropdownMenuItem,
+  DropdownMenuSeparator,
+  DropdownMenuTrigger,
+} from "@/components/ui/dropdown-menu";
 import { Skeleton } from "@/components/ui/skeleton";
 import { Switch } from "@/components/ui/switch";
 import { PROVIDER_GROUP, PROVIDER_LIMITS } from "@/lib/constants/provider.constants";
@@ -55,7 +63,9 @@ import { formatCurrency } from "@/lib/utils/currency";
 import type { ProviderDisplay, ProviderStatistics } from "@/types/provider";
 import type { User } from "@/types/user";
 import { ProviderForm } from "./forms/provider-form";
+import { GroupEditCombobox } from "./group-edit-combobox";
 import { InlineEditPopover } from "./inline-edit-popover";
+import { PriorityEditPopover } from "./priority-edit-popover";
 import { ProviderEndpointHover } from "./provider-endpoint-hover";
 
 interface ProviderRichListItemProps {
@@ -74,10 +84,14 @@ interface ProviderRichListItemProps {
   enableMultiProviderTypes: boolean;
   isMultiSelectMode?: boolean;
   isSelected?: boolean;
+  activeGroupFilter?: string | null;
   onSelectChange?: (checked: boolean) => void;
   onEdit?: () => void;
   onClone?: () => void;
   onDelete?: () => void;
+  allGroups?: string[];
+  userGroups?: string[];
+  isAdmin?: boolean;
 }
 
 export function ProviderRichListItem({
@@ -90,10 +104,14 @@ export function ProviderRichListItem({
   enableMultiProviderTypes,
   isMultiSelectMode = false,
   isSelected = false,
+  activeGroupFilter = null,
   onSelectChange,
   onEdit: onEditProp,
   onClone: onCloneProp,
   onDelete: onDeleteProp,
+  allGroups = [],
+  userGroups = [],
+  isAdmin = false,
 }: ProviderRichListItemProps) {
   const router = useRouter();
   const queryClient = useQueryClient();
@@ -106,6 +124,7 @@ export function ProviderRichListItem({
   const [openEdit, setOpenEdit] = useState(false);
   const [openClone, setOpenClone] = useState(false);
   const [showKeyDialog, setShowKeyDialog] = useState(false);
+  const [mobileDeleteDialogOpen, setMobileDeleteDialogOpen] = useState(false);
   const [unmaskedKey, setUnmaskedKey] = useState<string | null>(null);
   const [copied, setCopied] = useState(false);
   const [clipboardAvailable, setClipboardAvailable] = useState(false);
@@ -359,48 +378,280 @@ export function ProviderRichListItem({
     };
   };
 
-  const handleSavePriority = createSaveHandler("priority");
   const handleSaveWeight = createSaveHandler("weight");
   const handleSaveCostMultiplier = createSaveHandler("cost_multiplier");
 
+  const providerGroups = provider.groupTag
+    ? provider.groupTag
+        .split(",")
+        .map((t) => t.trim())
+        .filter(Boolean)
+    : [];
+
+  const handleSaveGroups = async (groups: string[]): Promise<boolean> => {
+    try {
+      const groupTag = groups.length > 0 ? groups.join(",") : null;
+      const res = await editProvider(provider.id, { group_tag: groupTag });
+      if (res.ok) {
+        toast.success(tInline("saveSuccess"));
+        queryClient.invalidateQueries({ queryKey: ["providers"] });
+        router.refresh();
+        return true;
+      }
+      toast.error(tInline("groupSaveError"), {
+        description: res.error || tList("unknownError"),
+      });
+      return false;
+    } catch (error) {
+      console.error("Failed to save groups:", error);
+      toast.error(tInline("groupSaveError"), { description: tList("unknownError") });
+      return false;
+    }
+  };
+
+  const handleSavePriorityWithGroups = async (
+    newGlobal: number,
+    newGroupPriorities: Record<string, number> | null
+  ): Promise<boolean> => {
+    try {
+      const res = await editProvider(provider.id, {
+        priority: newGlobal,
+        group_priorities: newGroupPriorities,
+      });
+      if (res.ok) {
+        toast.success(tInline("saveSuccess"));
+        queryClient.invalidateQueries({ queryKey: ["providers"] });
+        router.refresh();
+        return true;
+      }
+      toast.error(tInline("saveFailed"), { description: res.error || tList("unknownError") });
+      return false;
+    } catch (error) {
+      console.error("Failed to update priority:", error);
+      toast.error(tInline("saveFailed"), { description: tList("unknownError") });
+      return false;
+    }
+  };
+
   return (
     <>
-      <div className="flex items-center gap-4 py-3 px-4 border-b hover:bg-muted/50 transition-colors">
-        {/* 多选模式下显示 checkbox */}
+      <div className="rounded-lg border bg-card p-4 md:rounded-none md:border-0 md:border-b md:bg-transparent md:p-0 md:py-3 md:px-4 flex flex-col gap-3 md:flex-row md:items-center md:gap-4 hover:bg-muted/50 transition-colors">
+        {/* Checkbox: shared between mobile and desktop */}
         {isMultiSelectMode && (
           <Checkbox
             checked={isSelected}
             onCheckedChange={(checked) => onSelectChange?.(Boolean(checked))}
             onClick={(e) => e.stopPropagation()}
-            aria-label={`Select ${provider.name}`}
+            aria-label={tList("selectProvider", { name: provider.name })}
+            className="flex-shrink-0"
           />
         )}
 
-        {/* 左侧:状态和类型图标 */}
-        <div className="flex items-center gap-2">
-          {/* 启用状态指示器 */}
+        {/* Mobile: top row with name and switch */}
+        <div className="flex items-center justify-between md:hidden">
+          <div className="flex items-center gap-2 min-w-0 flex-1">
+            {provider.isEnabled ? (
+              <CheckCircle className="h-4 w-4 text-green-500 flex-shrink-0" />
+            ) : (
+              <XCircle className="h-4 w-4 text-gray-400 flex-shrink-0" />
+            )}
+            <div
+              className={`flex items-center justify-center w-6 h-6 rounded ${typeConfig.bgColor} flex-shrink-0`}
+              title={`${typeLabel} - ${typeDescription}`}
+              aria-label={typeLabel}
+            >
+              <TypeIcon className="h-3.5 w-3.5" aria-hidden />
+            </div>
+            <span className="font-semibold truncate">{provider.name}</span>
+          </div>
+          {canEdit && (
+            <Switch
+              checked={provider.isEnabled}
+              onCheckedChange={handleToggle}
+              disabled={togglePending}
+              className="data-[state=checked]:bg-green-500"
+            />
+          )}
+        </div>
+
+        {/* Mobile: status badges */}
+        <div className="flex flex-wrap items-center gap-1.5 md:hidden">
+          {canEdit ? (
+            <GroupEditCombobox
+              currentGroups={providerGroups}
+              allGroups={allGroups}
+              userGroups={userGroups}
+              isAdmin={isAdmin}
+              onSave={handleSaveGroups}
+            />
+          ) : providerGroups.length > 0 ? (
+            providerGroups.map((tag, index) => {
+              const bgColor = getGroupColor(tag);
+              return (
+                <Badge
+                  key={`${tag}-${index}`}
+                  className="text-xs"
+                  style={{ backgroundColor: bgColor, color: getContrastTextColor(bgColor) }}
+                >
+                  {tag}
+                </Badge>
+              );
+            })
+          ) : (
+            <Badge variant="outline">{PROVIDER_GROUP.DEFAULT}</Badge>
+          )}
+          {healthStatus?.circuitState === "open" && (
+            <Badge variant="destructive" className="flex items-center gap-1">
+              <AlertTriangle className="h-3 w-3" />
+              {tList("circuitBroken")}
+            </Badge>
+          )}
+        </div>
+
+        {/* Mobile: metrics row */}
+        <div className="flex items-center gap-3 text-sm md:hidden">
+          <div className="flex items-center gap-1">
+            <span className="text-xs text-muted-foreground">{tList("priority")}:</span>
+            <span className="font-medium tabular-nums">
+              {canEdit ? (
+                <PriorityEditPopover
+                  globalPriority={provider.priority}
+                  groupPriorities={provider.groupPriorities}
+                  groups={providerGroups}
+                  activeGroupFilter={activeGroupFilter ?? null}
+                  validator={validatePriority}
+                  onSave={handleSavePriorityWithGroups}
+                />
+              ) : (
+                provider.priority
+              )}
+            </span>
+          </div>
+          <div className="flex items-center gap-1">
+            <span className="text-xs text-muted-foreground">{tList("weight")}:</span>
+            <span className="font-medium tabular-nums">
+              {canEdit ? (
+                <InlineEditPopover
+                  value={provider.weight}
+                  label={tInline("weightLabel")}
+                  type="integer"
+                  validator={validateWeight}
+                  onSave={handleSaveWeight}
+                />
+              ) : (
+                provider.weight
+              )}
+            </span>
+          </div>
+          <div className="flex items-center gap-1">
+            <span className="text-xs text-muted-foreground">{tList("costMultiplier")}:</span>
+            <span className="font-medium tabular-nums">
+              {canEdit ? (
+                <InlineEditPopover
+                  value={provider.costMultiplier}
+                  label={tInline("costMultiplierLabel")}
+                  validator={validateCostMultiplier}
+                  onSave={handleSaveCostMultiplier}
+                  suffix="x"
+                  type="number"
+                />
+              ) : (
+                <>{provider.costMultiplier}x</>
+              )}
+            </span>
+          </div>
+        </div>
+
+        {/* Mobile: actions */}
+        <div className="flex items-center justify-end gap-2 md:hidden">
+          {canEdit && (
+            <Button variant="outline" className="min-h-[44px] min-w-[44px]" onClick={handleEdit}>
+              <Edit className="h-4 w-4" />
+            </Button>
+          )}
+          {canEdit && (
+            <DropdownMenu>
+              <DropdownMenuTrigger asChild>
+                <Button
+                  variant="outline"
+                  className="min-h-[44px] min-w-[44px]"
+                  aria-label={tList("actions")}
+                >
+                  <MoreHorizontal className="h-4 w-4" />
+                </Button>
+              </DropdownMenuTrigger>
+              <DropdownMenuContent align="end">
+                <DropdownMenuItem onClick={handleClone}>
+                  <Copy className="mr-2 h-4 w-4" />
+                  {tList("actionClone")}
+                </DropdownMenuItem>
+                {healthStatus?.circuitState === "open" && (
+                  <DropdownMenuItem onClick={handleResetCircuit} disabled={resetPending}>
+                    <RotateCcw className="mr-2 h-4 w-4 text-orange-600" />
+                    {tList("actionResetCircuit")}
+                  </DropdownMenuItem>
+                )}
+                {provider.limitTotalUsd !== null && provider.limitTotalUsd > 0 && (
+                  <DropdownMenuItem onClick={handleResetTotalUsage} disabled={resetUsagePending}>
+                    <RotateCcw className="mr-2 h-4 w-4 text-blue-600" />
+                    {tList("actionResetUsage")}
+                  </DropdownMenuItem>
+                )}
+                <DropdownMenuSeparator />
+                <DropdownMenuItem
+                  className="text-destructive"
+                  onSelect={() => setMobileDeleteDialogOpen(true)}
+                >
+                  <Trash className="mr-2 h-4 w-4" />
+                  {tList("actionDelete")}
+                </DropdownMenuItem>
+              </DropdownMenuContent>
+            </DropdownMenu>
+          )}
+        </div>
+
+        {canEdit && (
+          <AlertDialog open={mobileDeleteDialogOpen} onOpenChange={setMobileDeleteDialogOpen}>
+            <AlertDialogContent>
+              <AlertDialogHeader>
+                <AlertDialogTitle>{tList("confirmDeleteTitle")}</AlertDialogTitle>
+                <AlertDialogDescription>
+                  {tList("confirmDeleteMessage", { name: provider.name })}
+                </AlertDialogDescription>
+              </AlertDialogHeader>
+              <div className="flex justify-end gap-2">
+                <AlertDialogCancel>{tList("cancelButton")}</AlertDialogCancel>
+                <AlertDialogAction
+                  onClick={handleDelete}
+                  className="bg-red-600 hover:bg-red-700"
+                  disabled={deletePending}
+                >
+                  {tList("deleteButton")}
+                </AlertDialogAction>
+              </div>
+            </AlertDialogContent>
+          </AlertDialog>
+        )}
+
+        {/* Desktop: original info section (hidden on mobile) */}
+        <div className="hidden md:flex items-center gap-2 flex-shrink-0">
           {provider.isEnabled ? (
             <CheckCircle className="h-4 w-4 text-green-500 flex-shrink-0" />
           ) : (
             <XCircle className="h-4 w-4 text-gray-400 flex-shrink-0" />
           )}
-
-          {/* 类型图标 */}
           <div
             className={`flex items-center justify-center w-6 h-6 rounded ${typeConfig.bgColor} flex-shrink-0`}
-            title={`${typeLabel} · ${typeDescription}`}
+            title={`${typeLabel} - ${typeDescription}`}
             aria-label={typeLabel}
           >
             <TypeIcon className="h-3.5 w-3.5" aria-hidden />
           </div>
         </div>
 
-        {/* 中间:名称、URL、官网、tag、熔断状态 */}
-        <div className="flex-1 min-w-0">
+        <div className="hidden md:block flex-1 min-w-0">
           <div className="flex items-center gap-2 flex-wrap">
-            {/* Favicon */}
             {provider.faviconUrl && (
-              // eslint-disable-next-line @next/next/no-img-element
               <img
                 src={provider.faviconUrl}
                 alt=""
@@ -410,52 +661,40 @@ export function ProviderRichListItem({
                 }}
               />
             )}
-
-            {/* 名称 */}
             <span className="font-semibold truncate">{provider.name}</span>
-
-            {/* Group Tags (supports comma-separated values) */}
-            {(provider.groupTag
-              ? provider.groupTag
-                  .split(",")
-                  .map((t) => t.trim())
-                  .filter(Boolean)
-              : []
-            ).length > 0 ? (
-              provider.groupTag
-                ?.split(",")
-                .map((t) => t.trim())
-                .filter(Boolean)
-                .map((tag, index) => {
-                  const bgColor = getGroupColor(tag);
-                  return (
-                    <Badge
-                      key={`${tag}-${index}`}
-                      className="flex-shrink-0 text-xs"
-                      style={{
-                        backgroundColor: bgColor,
-                        color: getContrastTextColor(bgColor),
-                      }}
-                    >
-                      {tag}
-                    </Badge>
-                  );
-                })
+            {canEdit ? (
+              <GroupEditCombobox
+                currentGroups={providerGroups}
+                allGroups={allGroups}
+                userGroups={userGroups}
+                isAdmin={isAdmin}
+                onSave={handleSaveGroups}
+              />
+            ) : providerGroups.length > 0 ? (
+              providerGroups.map((tag, index) => {
+                const bgColor = getGroupColor(tag);
+                return (
+                  <Badge
+                    key={`${tag}-${index}`}
+                    className="flex-shrink-0 text-xs"
+                    style={{ backgroundColor: bgColor, color: getContrastTextColor(bgColor) }}
+                  >
+                    {tag}
+                  </Badge>
+                );
+              })
             ) : (
               <Badge variant="outline" className="flex-shrink-0">
                 {PROVIDER_GROUP.DEFAULT}
               </Badge>
             )}
-
-            {/* 熔断器警告 */}
-            {healthStatus && healthStatus.circuitState === "open" && (
+            {healthStatus?.circuitState === "open" && (
               <Badge variant="destructive" className="flex items-center gap-1 flex-shrink-0">
                 <AlertTriangle className="h-3 w-3" />
                 {tList("circuitBroken")}
               </Badge>
             )}
           </div>
-
           <div className="flex items-center gap-3 mt-1 text-sm text-muted-foreground flex-wrap">
             {/* Vendor & Endpoints OR Legacy URL */}
             {vendor ? (
@@ -482,8 +721,6 @@ export function ProviderRichListItem({
                 {tList("officialWebsite")}
               </a>
             )}
-
-            {/* API Key 展示(仅管理员) */}
             {canEdit && (
               <button
                 onClick={(e) => {
@@ -496,8 +733,6 @@ export function ProviderRichListItem({
                 {provider.maskedKey}
               </button>
             )}
-
-            {/* 超时配置可视化(紧凑格式) */}
             <span className="text-xs text-muted-foreground flex-shrink-0">
               {tTimeout("summary", {
                 streaming:
@@ -517,18 +752,19 @@ export function ProviderRichListItem({
           </div>
         </div>
 
-        {/* 右侧:指标(仅桌面端) */}
+        {/* Desktop: metrics */}
         <div className="hidden md:grid grid-cols-3 gap-4 text-center flex-shrink-0">
           <div>
             <div className="text-xs text-muted-foreground">{tList("priority")}</div>
             <div className="font-medium">
               {canEdit ? (
-                <InlineEditPopover
-                  value={provider.priority}
-                  label={tInline("priorityLabel")}
-                  type="integer"
+                <PriorityEditPopover
+                  globalPriority={provider.priority}
+                  groupPriorities={provider.groupPriorities}
+                  groups={providerGroups}
+                  activeGroupFilter={activeGroupFilter ?? null}
                   validator={validatePriority}
-                  onSave={handleSavePriority}
+                  onSave={handleSavePriorityWithGroups}
                 />
               ) : (
                 <span>{provider.priority}</span>
@@ -570,7 +806,7 @@ export function ProviderRichListItem({
           </div>
         </div>
 
-        {/* 今日用量(仅大屏) */}
+        {/* Desktop: today usage */}
         <div className="hidden lg:block text-center flex-shrink-0 min-w-[100px]">
           <div className="text-xs text-muted-foreground">{tList("todayUsageLabel")}</div>
           {statisticsLoading ? (
@@ -595,9 +831,8 @@ export function ProviderRichListItem({
           )}
         </div>
 
-        {/* 操作按钮 */}
-        <div className="flex items-center gap-1 flex-shrink-0">
-          {/* 启用/禁用切换 */}
+        {/* Desktop: action buttons */}
+        <div className="hidden md:flex items-center gap-1 flex-shrink-0">
           {canEdit && (
             <Switch
               checked={provider.isEnabled}
@@ -606,8 +841,6 @@ export function ProviderRichListItem({
               className="data-[state=checked]:bg-green-500"
             />
           )}
-
-          {/* 编辑按钮 */}
           {canEdit && (
             <Button
               size="icon"
@@ -621,8 +854,6 @@ export function ProviderRichListItem({
               <Edit className="h-4 w-4" />
             </Button>
           )}
-
-          {/* 克隆按钮 */}
           {canEdit && (
             <Button
               size="icon"
@@ -636,9 +867,7 @@ export function ProviderRichListItem({
               <Copy className="h-4 w-4" />
             </Button>
           )}
-
-          {/* 熔断重置按钮(仅熔断时显示) */}
-          {canEdit && healthStatus && healthStatus.circuitState === "open" && (
+          {canEdit && healthStatus?.circuitState === "open" && (
             <Button
               size="icon"
               variant="ghost"
@@ -651,8 +880,6 @@ export function ProviderRichListItem({
               <RotateCcw className="h-4 w-4 text-orange-600" />
             </Button>
           )}
-
-          {/* 总用量重置按钮(仅配置了总限额时显示) */}
           {canEdit && provider.limitTotalUsd !== null && provider.limitTotalUsd > 0 && (
             <Button
               size="icon"
@@ -667,8 +894,6 @@ export function ProviderRichListItem({
               <RotateCcw className="h-4 w-4 text-blue-600" />
             </Button>
           )}
-
-          {/* 删除按钮 */}
           {canEdit && (
             <AlertDialog>
               <AlertDialogTrigger asChild>

+ 35 - 0
src/app/v1/_lib/proxy/forwarder.ts

@@ -51,6 +51,7 @@ import {
 import { ModelRedirector } from "./model-redirector";
 import { ProxyProviderResolver } from "./provider-selector";
 import type { ProxySession } from "./session";
+import { setDeferredStreamingFinalization } from "./stream-finalization";
 import {
   detectThinkingBudgetRectifierTrigger,
   rectifyThinkingBudget,
@@ -582,6 +583,40 @@ export class ProxyForwarder {
           const contentType = response.headers.get("content-type") || "";
           const isSSE = contentType.includes("text/event-stream");
 
+          // ========== 流式响应:延迟成功判定(避免“假 200”)==========
+          // 背景:上游可能返回 HTTP 200,但 SSE 内容为错误 JSON(如 {"error": "..."})。
+          // 如果在“收到响应头”时就立刻记录 success / 更新 session 绑定:
+          // - 会把会话粘到一个实际不可用的 provider;
+          // - 熔断/故障转移统计被误记为成功;
+          // - 客户端下一次自动重试可能仍复用到同一 provider,导致“假 200”让重试失效。
+          //
+          // 解决:Forwarder 只负责尽快把 Response 返回给下游开始透传,
+          // 把最终成功/失败结算延迟到 ResponseHandler:等 SSE 正常结束后再基于最终 body 补充检查并更新内部状态。
+          if (isSSE) {
+            setDeferredStreamingFinalization(session, {
+              providerId: currentProvider.id,
+              providerName: currentProvider.name,
+              providerPriority: currentProvider.priority || 0,
+              attemptNumber: attemptCount,
+              totalProvidersAttempted,
+              isFirstAttempt: totalProvidersAttempted === 1 && attemptCount === 1,
+              isFailoverSuccess: totalProvidersAttempted > 1,
+              endpointId: activeEndpoint.endpointId,
+              endpointUrl: endpointAudit.endpointUrl,
+              upstreamStatusCode: response.status,
+            });
+
+            logger.info("ProxyForwarder: Streaming response received, deferring finalization", {
+              providerId: currentProvider.id,
+              providerName: currentProvider.name,
+              attemptNumber: attemptCount,
+              totalProvidersAttempted,
+              statusCode: response.status,
+            });
+
+            return response;
+          }
+
           if (!isSSE) {
             // 非流式响应:检测空响应
             const contentLength = response.headers.get("content-length");

+ 55 - 14
src/app/v1/_lib/proxy/provider-selector.ts

@@ -903,12 +903,23 @@ export class ProxyProviderResolver {
     }
 
     // Step 5: 优先级分层(只选择最高优先级的供应商)
-    const topPriorityProviders = ProxyProviderResolver.selectTopPriority(healthyProviders);
-    const priorities = [...new Set(healthyProviders.map((p) => p.priority || 0))].sort(
-      (a, b) => a - b
+    const topPriorityProviders = ProxyProviderResolver.selectTopPriority(
+      healthyProviders,
+      effectiveGroupPick
     );
+    const priorities = [
+      ...new Set(
+        healthyProviders.map((p) =>
+          ProxyProviderResolver.resolveEffectivePriority(p, effectiveGroupPick ?? null)
+        )
+      ),
+    ].sort((a, b) => a - b);
     context.priorityLevels = priorities;
-    context.selectedPriority = Math.min(...healthyProviders.map((p) => p.priority || 0));
+    context.selectedPriority = Math.min(
+      ...healthyProviders.map((p) =>
+        ProxyProviderResolver.resolveEffectivePriority(p, effectiveGroupPick ?? null)
+      )
+    );
 
     // Step 6: 成本排序 + 加权选择 + 计算概率
     const totalWeight = topPriorityProviders.reduce((sum, p) => sum + p.weight, 0);
@@ -1024,18 +1035,38 @@ export class ProxyProviderResolver {
   }
 
   /**
-   * 优先级分层:只选择最高优先级的供应商
+   * 解析供应商的有效优先级:优先使用分组覆盖值,回退到全局默认值
+   * 支持逗号分隔的多分组(如 "cli,admin"),取匹配到的最小优先级
+   */
+  static resolveEffectivePriority(provider: Provider, userGroup: string | null): number {
+    if (userGroup && provider.groupPriorities) {
+      const groups = parseGroupString(userGroup);
+      const overrides = groups
+        .map((g) => provider.groupPriorities?.[g])
+        .filter((v): v is number => v !== undefined);
+      if (overrides.length > 0) {
+        return Math.min(...overrides);
+      }
+    }
+    return provider.priority ?? 0;
+  }
+
+  /**
+   * 优先级分层:只选择最高优先级的供应商(支持分组优先级覆盖)
    */
-  private static selectTopPriority(providers: Provider[]): Provider[] {
+  private static selectTopPriority(providers: Provider[], userGroup?: string | null): Provider[] {
     if (providers.length === 0) {
       return [];
     }
 
-    // 找到最小的优先级值(最高优先级)
-    const minPriority = Math.min(...providers.map((p) => p.priority || 0));
+    const group = userGroup ?? null;
+    const minPriority = Math.min(
+      ...providers.map((p) => ProxyProviderResolver.resolveEffectivePriority(p, group))
+    );
 
-    // 只返回该优先级的供应商
-    return providers.filter((p) => (p.priority || 0) === minPriority);
+    return providers.filter(
+      (p) => ProxyProviderResolver.resolveEffectivePriority(p, group) === minPriority
+    );
   }
 
   /**
@@ -1174,7 +1205,10 @@ export class ProxyProviderResolver {
     }
 
     // 优先级分层
-    const topPriorityProviders = ProxyProviderResolver.selectTopPriority(healthyProviders);
+    const topPriorityProviders = ProxyProviderResolver.selectTopPriority(
+      healthyProviders,
+      effectiveGroupPick
+    );
 
     // 成本排序 + 加权随机选择
     const selected = ProxyProviderResolver.selectOptimal(topPriorityProviders);
@@ -1201,10 +1235,17 @@ export class ProxyProviderResolver {
         beforeHealthCheck: typeFiltered.length,
         afterHealthCheck: healthyProviders.length,
         filteredProviders: [],
-        priorityLevels: [...new Set(healthyProviders.map((p) => p.priority || 0))].sort(
-          (a, b) => a - b
+        priorityLevels: [
+          ...new Set(
+            healthyProviders.map((p) =>
+              ProxyProviderResolver.resolveEffectivePriority(p, effectiveGroupPick ?? null)
+            )
+          ),
+        ].sort((a, b) => a - b),
+        selectedPriority: ProxyProviderResolver.resolveEffectivePriority(
+          selected,
+          effectiveGroupPick ?? null
         ),
-        selectedPriority: selected.priority || 0,
         candidatesAtPriority: candidates,
       },
     };

+ 503 - 90
src/app/v1/_lib/proxy/response-handler.ts

@@ -11,6 +11,7 @@ import { SessionTracker } from "@/lib/session-tracker";
 import { calculateRequestCost } from "@/lib/utils/cost-calculation";
 import { hasValidPriceData } from "@/lib/utils/price-data";
 import { parseSSEData } from "@/lib/utils/sse";
+import { detectUpstreamErrorFromSseOrJsonText } from "@/lib/utils/upstream-error-detection";
 import {
   updateMessageRequestCost,
   updateMessageRequestDetails,
@@ -23,6 +24,7 @@ import { GeminiAdapter } from "../gemini/adapter";
 import type { GeminiResponse } from "../gemini/types";
 import { isClientAbortError } from "./errors";
 import type { ProxySession } from "./session";
+import { consumeDeferredStreamingFinalization } from "./stream-finalization";
 
 export type UsageMetrics = {
   input_tokens?: number;
@@ -59,6 +61,310 @@ function cleanResponseHeaders(headers: Headers): Headers {
   return cleaned;
 }
 
+type FinalizeDeferredStreamingResult = {
+  /**
+   * “内部结算用”的状态码。
+   *
+   * 注意:这不会改变客户端实际收到的 HTTP 状态码(SSE 已经开始透传后无法回头改)。
+   * 这里的目的仅是让内部统计/熔断/会话绑定把“假 200”按失败处理。
+   */
+  effectiveStatusCode: number;
+  /**
+   * 内部记录的错误原因(用于写入 DB/监控,帮助定位“假 200”问题)。
+   */
+  errorMessage: string | null;
+  /**
+   * 写入 DB 时用于归因的 providerId(优先使用 deferred meta 的 providerId)。
+   *
+   * 说明:对 SSE 来说,session.provider 可能在后续逻辑里被更新/覆盖;而 deferred meta 代表本次流真正对应的 provider。
+   * 该字段用于保证 DB 的 providerId 与 providerChain/熔断归因一致。
+   */
+  providerIdForPersistence: number | null;
+};
+
+/**
+ * 若本次 SSE 被标记为“延迟结算”,则在流结束后补齐成功/失败的最终判定。
+ *
+ * 触发条件
+ * - Forwarder 收到 Response 且识别为 SSE 时,会在 session 上挂载 DeferredStreamingFinalization 元信息。
+ * - ResponseHandler 在后台读取完整 SSE 内容后,调用本函数:
+ *   - 如果内容看起来是上游错误 JSON(假 200),则:
+ *     - 计入熔断器失败;
+ *     - 不更新 session 智能绑定(避免把会话粘到坏 provider);
+ *     - 内部状态码改为 502(只影响统计与后续重试选择,不影响本次客户端响应)。
+ *   - 如果流正常结束且未命中错误判定,则按成功结算并更新绑定/熔断/endpoint 成功率。
+ *
+ * @param streamEndedNormally - 必须是 reader 读到 done=true 的“自然结束”;超时/中断等异常结束由其它逻辑处理。
+ * @param clientAborted - 标记是否为客户端主动中断(用于内部状态码映射,避免把中断记为 200 completed)
+ * @param abortReason - 非自然结束时的原因码(用于内部记录/熔断归因;不会影响客户端响应)
+ */
+async function finalizeDeferredStreamingFinalizationIfNeeded(
+  session: ProxySession,
+  allContent: string,
+  upstreamStatusCode: number,
+  streamEndedNormally: boolean,
+  clientAborted: boolean,
+  abortReason?: string
+): Promise<FinalizeDeferredStreamingResult> {
+  const meta = consumeDeferredStreamingFinalization(session);
+  const provider = session.provider;
+
+  const providerIdForPersistence = meta?.providerId ?? provider?.id ?? null;
+
+  // 仅在“上游 HTTP=200 且流自然结束”时做“假 200”检测:
+  // - 非 200:HTTP 已经表明失败(无需额外启发式)
+  // - 非自然结束:内容可能是部分流/截断,启发式会显著提高误判风险
+  //
+  // 此处返回 `{isError:false}` 仅表示“跳过检测”,最终仍会在下面按中断/超时视为失败结算。
+  const shouldDetectFake200 = streamEndedNormally && upstreamStatusCode === 200;
+  const detected = shouldDetectFake200
+    ? detectUpstreamErrorFromSseOrJsonText(allContent)
+    : ({ isError: false } as const);
+
+  // “内部结算用”的状态码(不会改变客户端实际 HTTP 状态码)。
+  // - 假 200:映射为 502,确保内部统计/熔断/会话绑定把它当作失败。
+  // - 未自然结束:也应映射为失败(避免把中断/部分流误记为 200 completed)。
+  let effectiveStatusCode: number;
+  let errorMessage: string | null;
+  if (detected.isError) {
+    effectiveStatusCode = 502;
+    errorMessage = detected.code;
+  } else if (!streamEndedNormally) {
+    effectiveStatusCode = clientAborted ? 499 : 502;
+    errorMessage = clientAborted ? "CLIENT_ABORTED" : (abortReason ?? "STREAM_ABORTED");
+  } else {
+    effectiveStatusCode = upstreamStatusCode;
+    errorMessage = null;
+  }
+
+  // 未启用延迟结算 / provider 缺失:
+  // - 只返回“内部状态码 + 错误原因”,由调用方写入统计;
+  // - 不在这里更新熔断/绑定(meta 缺失意味着 Forwarder 没有启用延迟结算;provider 缺失意味着无法归因)。
+  if (!meta || !provider) {
+    return { effectiveStatusCode, errorMessage, providerIdForPersistence };
+  }
+
+  // meta 由 Forwarder 在“拿到 upstream Response 的那一刻”记录,代表真正产生本次流的 provider。
+  // 即使 session.provider 在之后被其它逻辑意外修改(极端情况),我们仍以 meta 为准更新:
+  // - provider/endpoint 熔断与统计
+  // - session 智能绑定
+  // 这样能避免把成功/失败记到错误的 provider 上。
+  let providerForChain = provider;
+  if (provider.id !== meta.providerId) {
+    logger.warn("[ResponseHandler] Deferred streaming meta provider mismatch", {
+      sessionId: session.sessionId ?? null,
+      metaProviderId: meta.providerId,
+      currentProviderId: provider.id,
+      canonicalProviderId: meta.providerId,
+    });
+
+    // 尝试用 meta.providerId 找回正确的 Provider 对象,保证 providerChain 的审计数据一致
+    try {
+      const providers = await session.getProvidersSnapshot();
+      const resolved = providers.find((p) => p.id === meta.providerId);
+      if (resolved) {
+        providerForChain = resolved;
+      } else {
+        logger.warn("[ResponseHandler] Deferred streaming meta provider not found in snapshot", {
+          sessionId: session.sessionId ?? null,
+          metaProviderId: meta.providerId,
+          currentProviderId: provider.id,
+        });
+      }
+    } catch (resolveError) {
+      logger.warn("[ResponseHandler] Failed to resolve meta provider from snapshot", {
+        sessionId: session.sessionId ?? null,
+        metaProviderId: meta.providerId,
+        currentProviderId: provider.id,
+        error: resolveError,
+      });
+    }
+  }
+
+  // 未自然结束:不更新 session 绑定(避免把会话粘到不稳定 provider),但要避免把它误记为 200 completed。
+  //
+  // 同时,为了让故障转移/熔断能正确工作:
+  // - 客户端主动中断:不计入熔断器(这通常不是供应商问题)
+  // - 非客户端中断:计入 provider/endpoint 熔断失败(与 timeout 路径保持一致)
+  if (!streamEndedNormally) {
+    if (!clientAborted) {
+      try {
+        // 动态导入:避免 proxy 模块与熔断器模块之间潜在的循环依赖。
+        const { recordFailure } = await import("@/lib/circuit-breaker");
+        await recordFailure(meta.providerId, new Error(errorMessage ?? "STREAM_ABORTED"));
+      } catch (cbError) {
+        logger.warn("[ResponseHandler] Failed to record streaming failure in circuit breaker", {
+          providerId: meta.providerId,
+          sessionId: session.sessionId ?? null,
+          error: cbError,
+        });
+      }
+
+      if (meta.endpointId != null) {
+        try {
+          const { recordEndpointFailure } = await import("@/lib/endpoint-circuit-breaker");
+          await recordEndpointFailure(meta.endpointId, new Error(errorMessage ?? "STREAM_ABORTED"));
+        } catch (endpointError) {
+          logger.warn("[ResponseHandler] Failed to record endpoint failure (stream aborted)", {
+            endpointId: meta.endpointId,
+            providerId: meta.providerId,
+            sessionId: session.sessionId ?? null,
+            error: endpointError,
+          });
+        }
+      }
+    }
+
+    session.addProviderToChain(providerForChain, {
+      endpointId: meta.endpointId,
+      endpointUrl: meta.endpointUrl,
+      reason: "system_error",
+      attemptNumber: meta.attemptNumber,
+      statusCode: effectiveStatusCode,
+      errorMessage: errorMessage ?? undefined,
+    });
+
+    return { effectiveStatusCode, errorMessage, providerIdForPersistence };
+  }
+
+  if (detected.isError) {
+    logger.warn("[ResponseHandler] SSE completed but body indicates error (fake 200)", {
+      providerId: meta.providerId,
+      providerName: meta.providerName,
+      upstreamStatusCode: meta.upstreamStatusCode,
+      effectiveStatusCode,
+      code: detected.code,
+      detail: detected.detail ?? null,
+    });
+
+    // 计入熔断器:让后续请求能正确触发故障转移/熔断
+    try {
+      // 动态导入:避免 proxy 模块与熔断器模块之间潜在的循环依赖。
+      const { recordFailure } = await import("@/lib/circuit-breaker");
+      await recordFailure(meta.providerId, new Error(detected.code));
+    } catch (cbError) {
+      logger.warn("[ResponseHandler] Failed to record fake-200 error in circuit breaker", {
+        providerId: meta.providerId,
+        sessionId: session.sessionId ?? null,
+        error: cbError,
+      });
+    }
+
+    // endpoint 级熔断:与成功路径保持对称,避免“假 200”只影响 provider 而不影响 endpoint 健康度
+    if (meta.endpointId != null) {
+      try {
+        const { recordEndpointFailure } = await import("@/lib/endpoint-circuit-breaker");
+        await recordEndpointFailure(meta.endpointId, new Error(detected.code));
+      } catch (endpointError) {
+        logger.warn("[ResponseHandler] Failed to record endpoint failure (fake 200)", {
+          endpointId: meta.endpointId,
+          providerId: meta.providerId,
+          error: endpointError,
+        });
+      }
+    }
+
+    // 记录到决策链(用于日志展示与 DB 持久化)。
+    // 注意:这里用 effectiveStatusCode(502)而不是 upstreamStatusCode(200),
+    // 以便让内部链路明确显示这是一次失败(否则会被误读为成功)。
+    session.addProviderToChain(providerForChain, {
+      endpointId: meta.endpointId,
+      endpointUrl: meta.endpointUrl,
+      reason: "retry_failed",
+      attemptNumber: meta.attemptNumber,
+      statusCode: effectiveStatusCode,
+      errorMessage: detected.code,
+    });
+
+    return { effectiveStatusCode, errorMessage, providerIdForPersistence };
+  }
+
+  // ========== 真正成功(SSE 完整结束且未命中错误判定)==========
+  if (meta.endpointId != null) {
+    try {
+      const { recordEndpointSuccess } = await import("@/lib/endpoint-circuit-breaker");
+      await recordEndpointSuccess(meta.endpointId);
+    } catch (endpointError) {
+      logger.warn("[ResponseHandler] Failed to record endpoint success (stream)", {
+        endpointId: meta.endpointId,
+        providerId: meta.providerId,
+        error: endpointError,
+      });
+    }
+  }
+
+  try {
+    const { recordSuccess } = await import("@/lib/circuit-breaker");
+    await recordSuccess(meta.providerId);
+  } catch (cbError) {
+    logger.warn("[ResponseHandler] Failed to record streaming success in circuit breaker", {
+      providerId: meta.providerId,
+      error: cbError,
+    });
+  }
+
+  // 成功后绑定 session 到供应商(智能绑定策略)
+  if (session.sessionId) {
+    const result = await SessionManager.updateSessionBindingSmart(
+      session.sessionId,
+      meta.providerId,
+      meta.providerPriority,
+      meta.isFirstAttempt,
+      meta.isFailoverSuccess
+    );
+
+    if (result.updated) {
+      logger.info("[ResponseHandler] Session binding updated (stream finalized)", {
+        sessionId: session.sessionId,
+        providerId: meta.providerId,
+        providerName: meta.providerName,
+        priority: meta.providerPriority,
+        reason: result.reason,
+        details: result.details,
+        attemptNumber: meta.attemptNumber,
+        totalProvidersAttempted: meta.totalProvidersAttempted,
+      });
+    } else {
+      logger.debug("[ResponseHandler] Session binding not updated (stream finalized)", {
+        sessionId: session.sessionId,
+        providerId: meta.providerId,
+        providerName: meta.providerName,
+        priority: meta.providerPriority,
+        reason: result.reason,
+        details: result.details,
+      });
+    }
+
+    // 统一更新两个数据源(确保监控数据一致)
+    void SessionManager.updateSessionProvider(session.sessionId, {
+      providerId: meta.providerId,
+      providerName: meta.providerName,
+    }).catch((err) => {
+      logger.error("[ResponseHandler] Failed to update session provider info (stream)", {
+        error: err,
+      });
+    });
+  }
+
+  session.addProviderToChain(providerForChain, {
+    endpointId: meta.endpointId,
+    endpointUrl: meta.endpointUrl,
+    reason: meta.isFirstAttempt ? "request_success" : "retry_success",
+    attemptNumber: meta.attemptNumber,
+    statusCode: meta.upstreamStatusCode,
+  });
+
+  logger.info("[ResponseHandler] Streaming request finalized as success", {
+    providerId: meta.providerId,
+    providerName: meta.providerName,
+    attemptNumber: meta.attemptNumber,
+    totalProvidersAttempted: meta.totalProvidersAttempted,
+    statusCode: meta.upstreamStatusCode,
+  });
+
+  return { effectiveStatusCode, errorMessage, providerIdForPersistence };
+}
+
 export class ProxyResponseHandler {
   static async dispatch(session: ProxySession, response: Response): Promise<Response> {
     let fixedResponse = response;
@@ -215,8 +521,8 @@ export class ProxyResponseHandler {
             statusCode: statusCode,
             ttfbMs: session.ttfbMs ?? duration,
             providerChain: session.getProviderChain(),
-            model: session.getCurrentModel() ?? undefined, // 更新重定向后的模型
-            providerId: session.provider?.id, // 更新最终供应商ID(重试切换后)
+            model: session.getCurrentModel() ?? undefined, // 更新重定向后的模型
+            providerId: session.provider?.id, // 更新最终供应商ID(重试切换后)
             context1mApplied: session.getContext1mApplied(),
           });
           const tracker = ProxyStatusTracker.getInstance();
@@ -371,8 +677,8 @@ export class ProxyResponseHandler {
             cacheCreation1hInputTokens: usageMetrics?.cache_creation_1h_input_tokens,
             cacheTtlApplied: usageMetrics?.cache_ttl ?? null,
             providerChain: session.getProviderChain(),
-            model: session.getCurrentModel() ?? undefined, // 更新重定向后的模型
-            providerId: session.provider?.id, // 更新最终供应商ID(重试切换后)
+            model: session.getCurrentModel() ?? undefined, // 更新重定向后的模型
+            providerId: session.provider?.id, // 更新最终供应商ID(重试切换后)
             context1mApplied: session.getContext1mApplied(),
           });
 
@@ -573,15 +879,22 @@ export class ProxyResponseHandler {
             const reader = responseForStats.body?.getReader();
             if (!reader) return;
 
+            // 注意:即使 STORE_SESSION_RESPONSE_BODY=false(不写入 Redis),这里也会在内存中累积完整流内容:
+            // - 用于解析 usage/cost 与内部结算(例如“假 200”检测)
+            // 因此该开关仅影响“是否持久化”,不用于控制流式内存占用。
             const chunks: string[] = [];
             const decoder = new TextDecoder();
             let isFirstChunk = true;
+            let streamEndedNormally = false;
 
             while (true) {
               if (session.clientAbortSignal?.aborted) break;
 
               const { done, value } = await reader.read();
-              if (done) break;
+              if (done) {
+                streamEndedNormally = true;
+                break;
+              }
               if (value) {
                 if (isFirstChunk) {
                   isFirstChunk = false;
@@ -594,6 +907,7 @@ export class ProxyResponseHandler {
             const flushed = decoder.decode();
             if (flushed) chunks.push(flushed);
             const allContent = chunks.join("");
+            const clientAborted = session.clientAbortSignal?.aborted ?? false;
 
             // 存储响应体到 Redis(5分钟过期)
             if (session.sessionId) {
@@ -608,7 +922,21 @@ export class ProxyResponseHandler {
 
             // 使用共享的统计处理方法
             const duration = Date.now() - session.startTime;
-            await finalizeRequestStats(session, allContent, statusCode, duration);
+            const finalized = await finalizeDeferredStreamingFinalizationIfNeeded(
+              session,
+              allContent,
+              statusCode,
+              streamEndedNormally,
+              clientAborted
+            );
+            await finalizeRequestStats(
+              session,
+              allContent,
+              finalized.effectiveStatusCode,
+              duration,
+              finalized.errorMessage ?? undefined,
+              finalized.providerIdForPersistence ?? undefined
+            );
           } catch (error) {
             if (!isClientAbortError(error as Error)) {
               logger.error("[ResponseHandler] Gemini passthrough stats task failed:", error);
@@ -702,6 +1030,9 @@ export class ProxyResponseHandler {
     const processingPromise = (async () => {
       const reader = internalStream.getReader();
       const decoder = new TextDecoder();
+      // 注意:即使 STORE_SESSION_RESPONSE_BODY=false(不写入 Redis),这里也会在内存中累积完整流内容:
+      // - 用于解析 usage/cost 与内部结算(例如“假 200”检测)
+      // 因此该开关仅影响“是否持久化”,不用于控制流式内存占用。
       const chunks: string[] = [];
       let usageForCost: UsageMetrics | null = null;
       let isFirstChunk = true; // ⭐ 标记是否为第一块数据
@@ -779,7 +1110,24 @@ export class ProxyResponseHandler {
         return chunks.join("");
       };
 
-      const finalizeStream = async (allContent: string): Promise<void> => {
+      const finalizeStream = async (
+        allContent: string,
+        streamEndedNormally: boolean,
+        clientAborted: boolean,
+        abortReason?: string
+      ): Promise<void> => {
+        const finalized = await finalizeDeferredStreamingFinalizationIfNeeded(
+          session,
+          allContent,
+          statusCode,
+          streamEndedNormally,
+          clientAborted,
+          abortReason
+        );
+        const effectiveStatusCode = finalized.effectiveStatusCode;
+        const streamErrorMessage = finalized.errorMessage;
+        const providerIdForPersistence = finalized.providerIdForPersistence;
+
         // 存储响应体到 Redis(5分钟过期)
         if (session.sessionId) {
           void SessionManager.storeSessionResponse(
@@ -839,10 +1187,10 @@ export class ProxyResponseHandler {
         await trackCostToRedis(session, usageForCost);
 
         // 更新 session 使用量到 Redis(用于实时监控)
-        if (session.sessionId && usageForCost) {
+        if (session.sessionId) {
           let costUsdStr: string | undefined;
           try {
-            if (session.request.model) {
+            if (usageForCost && session.request.model) {
               const priceData = await session.getCachedPriceDataByBillingSource();
               if (priceData) {
                 const cost = calculateRequestCost(
@@ -862,22 +1210,30 @@ export class ProxyResponseHandler {
             });
           }
 
-          void SessionManager.updateSessionUsage(session.sessionId, {
-            inputTokens: usageForCost.input_tokens,
-            outputTokens: usageForCost.output_tokens,
-            cacheCreationInputTokens: usageForCost.cache_creation_input_tokens,
-            cacheReadInputTokens: usageForCost.cache_read_input_tokens,
-            costUsd: costUsdStr,
-            status: statusCode >= 200 && statusCode < 300 ? "completed" : "error",
-            statusCode: statusCode,
-          }).catch((error: unknown) => {
-            logger.error("[ResponseHandler] Failed to update session usage:", error);
-          });
+          const payload: SessionUsageUpdate = {
+            status: effectiveStatusCode >= 200 && effectiveStatusCode < 300 ? "completed" : "error",
+            statusCode: effectiveStatusCode,
+            ...(streamErrorMessage ? { errorMessage: streamErrorMessage } : {}),
+          };
+
+          if (usageForCost) {
+            payload.inputTokens = usageForCost.input_tokens;
+            payload.outputTokens = usageForCost.output_tokens;
+            payload.cacheCreationInputTokens = usageForCost.cache_creation_input_tokens;
+            payload.cacheReadInputTokens = usageForCost.cache_read_input_tokens;
+            payload.costUsd = costUsdStr;
+          }
+
+          void SessionManager.updateSessionUsage(session.sessionId, payload).catch(
+            (error: unknown) => {
+              logger.error("[ResponseHandler] Failed to update session usage:", error);
+            }
+          );
         }
 
         // 保存扩展信息(status code, tokens, provider chain)
         await updateMessageRequestDetails(messageContext.id, {
-          statusCode: statusCode,
+          statusCode: effectiveStatusCode,
           inputTokens: usageForCost?.input_tokens,
           outputTokens: usageForCost?.output_tokens,
           ttfbMs: session.ttfbMs,
@@ -887,13 +1243,15 @@ export class ProxyResponseHandler {
           cacheCreation1hInputTokens: usageForCost?.cache_creation_1h_input_tokens,
           cacheTtlApplied: usageForCost?.cache_ttl ?? null,
           providerChain: session.getProviderChain(),
-          model: session.getCurrentModel() ?? undefined, // ⭐ 更新重定向后的模型
-          providerId: session.provider?.id, // ⭐ 更新最终供应商ID(重试切换后)
+          ...(streamErrorMessage ? { errorMessage: streamErrorMessage } : {}),
+          model: session.getCurrentModel() ?? undefined, // 更新重定向后的模型
+          providerId: providerIdForPersistence ?? session.provider?.id, // 更新最终供应商ID(重试切换后)
           context1mApplied: session.getContext1mApplied(),
         });
       };
 
       try {
+        let streamEndedNormally = false;
         while (true) {
           // 检查取消信号
           if (session.clientAbortSignal?.aborted || abortController.signal.aborted) {
@@ -907,6 +1265,7 @@ export class ProxyResponseHandler {
 
           const { value, done } = await reader.read();
           if (done) {
+            streamEndedNormally = true;
             break;
           }
           if (value) {
@@ -945,7 +1304,30 @@ export class ProxyResponseHandler {
         // ⭐ 流式读取完成:清除静默期计时器
         clearIdleTimer();
         const allContent = flushAndJoin();
-        await finalizeStream(allContent);
+        const clientAborted = session.clientAbortSignal?.aborted ?? false;
+        try {
+          await finalizeStream(allContent, streamEndedNormally, clientAborted);
+        } catch (finalizeError) {
+          logger.error("ResponseHandler: Failed to finalize stream", {
+            taskId,
+            providerId: provider.id,
+            providerName: provider.name,
+            messageId: messageContext.id,
+            streamEndedNormally,
+            clientAborted,
+            finalizeError,
+          });
+
+          // 回退:避免 finalizeStream 失败导致 request record 未被更新
+          await persistRequestFailure({
+            session,
+            messageContext,
+            statusCode: statusCode && statusCode >= 400 ? statusCode : 500,
+            error: finalizeError,
+            taskId,
+            phase: "stream",
+          });
+        }
       } catch (error) {
         // 检测 AbortError 的来源:响应超时 vs 静默期超时 vs 客户端/上游中断
         const err = error as Error;
@@ -972,32 +1354,30 @@ export class ProxyResponseHandler {
               errorName: err.name,
             });
 
-            // ⚠️ 计入熔断器(动态导入避免循环依赖)
+            // 注意:无法重试,因为客户端已收到 HTTP 200
+            // 错误已记录,不抛出异常(避免影响后台任务)
+
+            // 结算并消费 deferred meta,确保 provider chain/熔断归因完整
             try {
-              const { recordFailure } = await import("@/lib/circuit-breaker");
-              await recordFailure(provider.id, err);
-              logger.debug("ResponseHandler: Response timeout recorded in circuit breaker", {
-                providerId: provider.id,
+              const allContent = flushAndJoin();
+              await finalizeStream(allContent, false, false, "STREAM_RESPONSE_TIMEOUT");
+            } catch (finalizeError) {
+              logger.error("ResponseHandler: Failed to finalize response-timeout stream", {
+                taskId,
+                messageId: messageContext.id,
+                finalizeError,
               });
-            } catch (cbError) {
-              logger.warn("ResponseHandler: Failed to record timeout in circuit breaker", {
-                providerId: provider.id,
-                error: cbError,
+
+              // 回退:至少保证 DB 记录能落下,避免 orphan record
+              await persistRequestFailure({
+                session,
+                messageContext,
+                statusCode: statusCode && statusCode >= 400 ? statusCode : 502,
+                error: err,
+                taskId,
+                phase: "stream",
               });
             }
-
-            // 注意:无法重试,因为客户端已收到 HTTP 200
-            // 错误已记录,熔断器已更新,不抛出异常(避免影响后台任务)
-
-            // 更新数据库记录(避免 orphan record)
-            await persistRequestFailure({
-              session,
-              messageContext,
-              statusCode: statusCode && statusCode >= 400 ? statusCode : 502,
-              error: err,
-              taskId,
-              phase: "stream",
-            });
           } else if (isIdleTimeout) {
             // ⚠️ 静默期超时:计入熔断器并记录错误日志
             logger.error("ResponseHandler: Streaming idle timeout", {
@@ -1008,32 +1388,30 @@ export class ProxyResponseHandler {
               chunksCollected: chunks.length,
             });
 
-            // ⚠️ 计入熔断器(动态导入避免循环依赖)
+            // 注意:无法重试,因为客户端已收到 HTTP 200
+            // 错误已记录,不抛出异常(避免影响后台任务)
+
+            // 结算并消费 deferred meta,确保 provider chain/熔断归因完整
             try {
-              const { recordFailure } = await import("@/lib/circuit-breaker");
-              await recordFailure(provider.id, err);
-              logger.debug("ResponseHandler: Streaming idle timeout recorded in circuit breaker", {
-                providerId: provider.id,
+              const allContent = flushAndJoin();
+              await finalizeStream(allContent, false, false, "STREAM_IDLE_TIMEOUT");
+            } catch (finalizeError) {
+              logger.error("ResponseHandler: Failed to finalize idle-timeout stream", {
+                taskId,
+                messageId: messageContext.id,
+                finalizeError,
               });
-            } catch (cbError) {
-              logger.warn("ResponseHandler: Failed to record timeout in circuit breaker", {
-                providerId: provider.id,
-                error: cbError,
+
+              // 回退:至少保证 DB 记录能落下,避免 orphan record
+              await persistRequestFailure({
+                session,
+                messageContext,
+                statusCode: statusCode && statusCode >= 400 ? statusCode : 502,
+                error: err,
+                taskId,
+                phase: "stream",
               });
             }
-
-            // 注意:无法重试,因为客户端已收到 HTTP 200
-            // 错误已记录,熔断器已更新,不抛出异常(避免影响后台任务)
-
-            // 更新数据库记录(避免 orphan record - 这是导致 185 个孤儿记录的根本原因!)
-            await persistRequestFailure({
-              session,
-              messageContext,
-              statusCode: statusCode && statusCode >= 400 ? statusCode : 502,
-              error: err,
-              taskId,
-              phase: "stream",
-            });
           } else if (!clientAborted) {
             // 上游在流式过程中意外中断:视为供应商/网络错误
             logger.error("ResponseHandler: Upstream stream aborted unexpectedly", {
@@ -1046,14 +1424,27 @@ export class ProxyResponseHandler {
               errorMessage: err.message || "(empty message)",
             });
 
-            await persistRequestFailure({
-              session,
-              messageContext,
-              statusCode: 502,
-              error: err,
-              taskId,
-              phase: "stream",
-            });
+            // 结算并消费 deferred meta,确保 provider chain/熔断归因完整
+            try {
+              const allContent = flushAndJoin();
+              await finalizeStream(allContent, false, false, "STREAM_UPSTREAM_ABORTED");
+            } catch (finalizeError) {
+              logger.error("ResponseHandler: Failed to finalize upstream-aborted stream", {
+                taskId,
+                messageId: messageContext.id,
+                finalizeError,
+              });
+
+              // 回退:至少保证 DB 记录能落下,避免 orphan record
+              await persistRequestFailure({
+                session,
+                messageContext,
+                statusCode: 502,
+                error: err,
+                taskId,
+                phase: "stream",
+              });
+            }
           } else {
             // 客户端主动中断:正常日志,不抛出错误
             logger.warn("ResponseHandler: Stream reading aborted by client", {
@@ -1070,7 +1461,7 @@ export class ProxyResponseHandler {
             });
             try {
               const allContent = flushAndJoin();
-              await finalizeStream(allContent);
+              await finalizeStream(allContent, false, true);
             } catch (finalizeError) {
               logger.error("ResponseHandler: Failed to finalize aborted stream response", {
                 taskId,
@@ -1082,15 +1473,27 @@ export class ProxyResponseHandler {
         } else {
           logger.error("Failed to save SSE content:", error);
 
-          // 更新数据库记录(避免 orphan record)
-          await persistRequestFailure({
-            session,
-            messageContext,
-            statusCode: statusCode && statusCode >= 400 ? statusCode : 500,
-            error,
-            taskId,
-            phase: "stream",
-          });
+          // 结算并消费 deferred meta,确保 provider chain/熔断归因完整
+          try {
+            const allContent = flushAndJoin();
+            await finalizeStream(allContent, false, clientAborted, "STREAM_PROCESSING_ERROR");
+          } catch (finalizeError) {
+            logger.error("ResponseHandler: Failed to finalize stream after processing error", {
+              taskId,
+              messageId: messageContext.id,
+              finalizeError,
+            });
+
+            // 回退:至少保证 DB 记录能落下,避免 orphan record
+            await persistRequestFailure({
+              session,
+              messageContext,
+              statusCode: statusCode && statusCode >= 400 ? statusCode : 500,
+              error,
+              taskId,
+              phase: "stream",
+            });
+          }
         }
       } finally {
         // 确保资源释放
@@ -1795,18 +2198,25 @@ async function updateRequestCostFromUsage(
 /**
  * 统一的请求统计处理方法
  * 用于消除 Gemini 透传、普通非流式、普通流式之间的重复统计逻辑
+ *
+ * @param statusCode - 内部结算状态码(可能与客户端实际收到的 HTTP 状态不同,例如“假 200”会被映射为 502)
+ * @param errorMessage - 可选的内部错误原因(用于把假 200/解析失败等信息写入 DB 与监控)
  */
 export async function finalizeRequestStats(
   session: ProxySession,
   responseText: string,
   statusCode: number,
-  duration: number
+  duration: number,
+  errorMessage?: string,
+  providerIdOverride?: number
 ): Promise<void> {
   const { messageContext, provider } = session;
   if (!provider || !messageContext) {
     return;
   }
 
+  const providerIdForPersistence = providerIdOverride ?? session.provider?.id;
+
   // 1. 结束请求状态追踪
   ProxyStatusTracker.getInstance().endRequest(messageContext.user.id, messageContext.id);
 
@@ -1820,10 +2230,11 @@ export async function finalizeRequestStats(
     // 即使没有 usageMetrics,也需要更新状态码和 provider chain
     await updateMessageRequestDetails(messageContext.id, {
       statusCode: statusCode,
+      ...(errorMessage ? { errorMessage } : {}),
       ttfbMs: session.ttfbMs ?? duration,
       providerChain: session.getProviderChain(),
       model: session.getCurrentModel() ?? undefined,
-      providerId: session.provider?.id, // ⭐ 更新最终供应商ID(重试切换后)
+      providerId: providerIdForPersistence, // 更新最终供应商ID(重试切换后)
       context1mApplied: session.getContext1mApplied(),
     });
     return;
@@ -1892,6 +2303,7 @@ export async function finalizeRequestStats(
       costUsd: costUsdStr,
       status: statusCode >= 200 && statusCode < 300 ? "completed" : "error",
       statusCode: statusCode,
+      ...(errorMessage ? { errorMessage } : {}),
     }).catch((error: unknown) => {
       logger.error("[ResponseHandler] Failed to update session usage:", error);
     });
@@ -1909,8 +2321,9 @@ export async function finalizeRequestStats(
     cacheCreation1hInputTokens: normalizedUsage.cache_creation_1h_input_tokens,
     cacheTtlApplied: normalizedUsage.cache_ttl ?? null,
     providerChain: session.getProviderChain(),
+    ...(errorMessage ? { errorMessage } : {}),
     model: session.getCurrentModel() ?? undefined,
-    providerId: session.provider?.id, // ⭐ 更新最终供应商ID(重试切换后)
+    providerId: providerIdForPersistence, // 更新最终供应商ID(重试切换后)
     context1mApplied: session.getContext1mApplied(),
   });
 }
@@ -2066,7 +2479,7 @@ async function persistRequestFailure(options: {
       ttfbMs: phase === "non-stream" ? (session.ttfbMs ?? duration) : session.ttfbMs,
       providerChain: session.getProviderChain(),
       model: session.getCurrentModel() ?? undefined,
-      providerId: session.provider?.id, // 更新最终供应商ID(重试切换后)
+      providerId: session.provider?.id, // 更新最终供应商ID(重试切换后)
       context1mApplied: session.getContext1mApplied(),
     });
 

+ 55 - 0
src/app/v1/_lib/proxy/stream-finalization.ts

@@ -0,0 +1,55 @@
+import type { ProxySession } from "./session";
+
+/**
+ * 流式响应(SSE)在“收到响应头”时无法确定成功与否:
+ * - 上游可能返回 HTTP 200,但 body 是错误 JSON(假 200)
+ * - 只有在 SSE 结束后才能做最终判定
+ *
+ * 该结构用于 Forwarder → ResponseHandler 之间传递“延迟结算”的必要信息:
+ * - Forwarder:拿到 Response 后尽快开始向客户端透传(降低延迟);但不要立刻记为 success/绑定 session。
+ * - ResponseHandler:在流正常结束后,基于最终响应体做一次补充检查,然后再更新熔断/endpoint/会话绑定。
+ *
+ * 说明:
+ * - 这里选择使用 WeakMap,而不是把字段挂到 session 上:
+ *   - 避免污染 ProxySession 对象;
+ *   - 更类型安全;
+ *   - 元信息生命周期跟随 session 实例,消费后可立即清理。
+ * - 元信息是一次性的:消费后会被清空,避免跨请求污染。
+ */
+export type DeferredStreamingFinalization = {
+  providerId: number;
+  providerName: string;
+  providerPriority: number;
+  attemptNumber: number;
+  totalProvidersAttempted: number;
+  isFirstAttempt: boolean;
+  isFailoverSuccess: boolean;
+  endpointId: number | null;
+  endpointUrl: string;
+  upstreamStatusCode: number;
+};
+
+const deferredMeta = new WeakMap<ProxySession, DeferredStreamingFinalization>();
+
+export function setDeferredStreamingFinalization(
+  session: ProxySession,
+  meta: DeferredStreamingFinalization
+): void {
+  // Forwarder 在识别到 SSE 时调用:标记该请求需要在流结束后“二次结算”。
+  deferredMeta.set(session, meta);
+}
+
+export function consumeDeferredStreamingFinalization(
+  session: ProxySession
+): DeferredStreamingFinalization | null {
+  // 备注:
+  // - 该函数内部无 await;JS 事件循环保证单次调用不会被并发打断。
+  // - ProxySession 是“每次请求”创建的实例;即使多个后台任务先后调用,
+  //   也只有第一次能拿到 meta,其余调用都会得到 null。
+  const meta = deferredMeta.get(session) ?? null;
+  if (meta) {
+    // 只允许消费一次:避免重复结算(例如多个后台统计任务并行时)。
+    deferredMeta.delete(session);
+  }
+  return meta;
+}

+ 124 - 0
src/components/ui/drawer.tsx

@@ -0,0 +1,124 @@
+"use client";
+
+import type * as React from "react";
+import { Drawer as DrawerPrimitive } from "vaul";
+
+import { cn } from "@/lib/utils/index";
+
+function Drawer({ ...props }: React.ComponentProps<typeof DrawerPrimitive.Root>) {
+  return <DrawerPrimitive.Root data-slot="drawer" {...props} />;
+}
+
+function DrawerTrigger({ ...props }: React.ComponentProps<typeof DrawerPrimitive.Trigger>) {
+  return <DrawerPrimitive.Trigger data-slot="drawer-trigger" {...props} />;
+}
+
+function DrawerPortal({ ...props }: React.ComponentProps<typeof DrawerPrimitive.Portal>) {
+  return <DrawerPrimitive.Portal data-slot="drawer-portal" {...props} />;
+}
+
+function DrawerClose({ ...props }: React.ComponentProps<typeof DrawerPrimitive.Close>) {
+  return <DrawerPrimitive.Close data-slot="drawer-close" {...props} />;
+}
+
+function DrawerOverlay({
+  className,
+  ...props
+}: React.ComponentProps<typeof DrawerPrimitive.Overlay>) {
+  return (
+    <DrawerPrimitive.Overlay
+      data-slot="drawer-overlay"
+      className={cn(
+        "data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 fixed inset-0 z-50 bg-black/50",
+        className
+      )}
+      {...props}
+    />
+  );
+}
+
+function DrawerContent({
+  className,
+  children,
+  ...props
+}: React.ComponentProps<typeof DrawerPrimitive.Content>) {
+  return (
+    <DrawerPortal data-slot="drawer-portal">
+      <DrawerOverlay />
+      <DrawerPrimitive.Content
+        data-slot="drawer-content"
+        className={cn(
+          "group/drawer-content bg-background fixed z-50 flex h-auto flex-col",
+          "data-[vaul-drawer-direction=top]:inset-x-0 data-[vaul-drawer-direction=top]:top-0 data-[vaul-drawer-direction=top]:mb-24 data-[vaul-drawer-direction=top]:max-h-[80vh] data-[vaul-drawer-direction=top]:rounded-b-lg data-[vaul-drawer-direction=top]:border-b",
+          "data-[vaul-drawer-direction=bottom]:inset-x-0 data-[vaul-drawer-direction=bottom]:bottom-0 data-[vaul-drawer-direction=bottom]:mt-24 data-[vaul-drawer-direction=bottom]:max-h-[95vh] data-[vaul-drawer-direction=bottom]:rounded-t-lg data-[vaul-drawer-direction=bottom]:border-t",
+          "data-[vaul-drawer-direction=right]:inset-y-0 data-[vaul-drawer-direction=right]:right-0 data-[vaul-drawer-direction=right]:w-3/4 data-[vaul-drawer-direction=right]:border-l data-[vaul-drawer-direction=right]:sm:max-w-sm",
+          "data-[vaul-drawer-direction=left]:inset-y-0 data-[vaul-drawer-direction=left]:left-0 data-[vaul-drawer-direction=left]:w-3/4 data-[vaul-drawer-direction=left]:border-r data-[vaul-drawer-direction=left]:sm:max-w-sm",
+          className
+        )}
+        {...props}
+      >
+        <div className="bg-muted mx-auto mt-4 hidden h-2 w-[100px] shrink-0 rounded-full group-data-[vaul-drawer-direction=bottom]/drawer-content:block" />
+        {children}
+      </DrawerPrimitive.Content>
+    </DrawerPortal>
+  );
+}
+
+function DrawerHeader({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="drawer-header"
+      className={cn(
+        "flex flex-col gap-0.5 p-4 group-data-[vaul-drawer-direction=bottom]/drawer-content:text-center group-data-[vaul-drawer-direction=top]/drawer-content:text-center md:gap-1.5 md:text-left",
+        className
+      )}
+      {...props}
+    />
+  );
+}
+
+function DrawerFooter({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="drawer-footer"
+      className={cn("mt-auto flex flex-col gap-2 p-4", className)}
+      {...props}
+    />
+  );
+}
+
+function DrawerTitle({ className, ...props }: React.ComponentProps<typeof DrawerPrimitive.Title>) {
+  return (
+    <DrawerPrimitive.Title
+      data-slot="drawer-title"
+      className={cn("text-foreground font-semibold", className)}
+      {...props}
+    />
+  );
+}
+
+function DrawerDescription({
+  className,
+  ...props
+}: React.ComponentProps<typeof DrawerPrimitive.Description>) {
+  return (
+    <DrawerPrimitive.Description
+      data-slot="drawer-description"
+      className={cn("text-muted-foreground text-sm", className)}
+      {...props}
+    />
+  );
+}
+
+export {
+  Drawer,
+  DrawerPortal,
+  DrawerOverlay,
+  DrawerTrigger,
+  DrawerClose,
+  DrawerContent,
+  DrawerHeader,
+  DrawerFooter,
+  DrawerTitle,
+  DrawerDescription,
+};

+ 1 - 0
src/drizzle/schema.ts

@@ -162,6 +162,7 @@ export const providers = pgTable('providers', {
 
   // 优先级和分组配置
   priority: integer('priority').notNull().default(0),
+  groupPriorities: jsonb('group_priorities').$type<Record<string, number> | null>().default(null),
   costMultiplier: numeric('cost_multiplier', { precision: 10, scale: 4 }).default('1.0'),
   groupTag: varchar('group_tag', { length: 50 }),
 

+ 71 - 3
src/instrumentation.ts

@@ -3,11 +3,13 @@
  * 在服务器启动时自动执行数据库迁移
  */
 
-// instrumentation 需要 Node.js runtime(依赖数据库与 Redis 等 Node 能力)
-export const runtime = "nodejs";
-
 import { startCacheCleanup, stopCacheCleanup } from "@/lib/cache/session-cache";
 import { logger } from "@/lib/logger";
+import { CHANNEL_API_KEYS_UPDATED, subscribeCacheInvalidation } from "@/lib/redis/pubsub";
+import { apiKeyVacuumFilter } from "@/lib/security/api-key-vacuum-filter";
+
+// instrumentation 需要 Node.js runtime(依赖数据库与 Redis 等 Node 能力)
+export const runtime = "nodejs";
 
 const instrumentationState = globalThis as unknown as {
   __CCH_CACHE_CLEANUP_STARTED__?: boolean;
@@ -15,6 +17,8 @@ const instrumentationState = globalThis as unknown as {
   __CCH_SHUTDOWN_IN_PROGRESS__?: boolean;
   __CCH_CLOUD_PRICE_SYNC_STARTED__?: boolean;
   __CCH_CLOUD_PRICE_SYNC_INTERVAL_ID__?: ReturnType<typeof setInterval>;
+  __CCH_API_KEY_VF_SYNC_STARTED__?: boolean;
+  __CCH_API_KEY_VF_SYNC_CLEANUP__?: (() => void) | null;
 };
 
 /**
@@ -82,6 +86,57 @@ async function startCloudPriceSyncScheduler(): Promise<void> {
   }
 }
 
+/**
+ * 多实例:订阅 API Key 变更广播,触发本机 Vacuum Filter 失效并重建。
+ *
+ * 目标:
+ * - 避免“本机 filter 漏包含新 key”导致的误拒绝
+ * - 重建失败/Redis 未配置时自动降级(不阻塞启动)
+ */
+async function startApiKeyVacuumFilterSync(): Promise<void> {
+  if (instrumentationState.__CCH_API_KEY_VF_SYNC_STARTED__) {
+    return;
+  }
+
+  // 与 Redis client 的启用条件保持一致:未启用限流/未配置 Redis 时不尝试订阅,避免额外 warn 日志
+  const rateLimitRaw = process.env.ENABLE_RATE_LIMIT?.trim();
+  if (rateLimitRaw === "false" || rateLimitRaw === "0" || !process.env.REDIS_URL) {
+    return;
+  }
+
+  try {
+    const cleanup = await subscribeCacheInvalidation(CHANNEL_API_KEYS_UPDATED, () => {
+      apiKeyVacuumFilter.invalidateAndReload({ reason: "api_keys_updated" });
+    });
+
+    if (!cleanup) {
+      return;
+    }
+
+    instrumentationState.__CCH_API_KEY_VF_SYNC_STARTED__ = true;
+    instrumentationState.__CCH_API_KEY_VF_SYNC_CLEANUP__ = cleanup;
+    logger.info("[Instrumentation] API Key Vacuum Filter sync enabled");
+  } catch (error) {
+    logger.warn("[Instrumentation] API Key Vacuum Filter sync init failed", {
+      error: error instanceof Error ? error.message : String(error),
+    });
+  }
+}
+
+function warmupApiKeyVacuumFilter(): void {
+  // 预热 API Key Vacuum Filter(减少无效 key 对 DB 的压力)
+  try {
+    apiKeyVacuumFilter.startBackgroundReload({ reason: "startup" });
+  } catch (error) {
+    logger.warn("[Instrumentation] Failed to start API key vacuum filter preload", {
+      error: error instanceof Error ? error.message : String(error),
+    });
+  }
+
+  // 多实例:订阅 key 变更广播以触发本机 filter 重建
+  void startApiKeyVacuumFilterSync();
+}
+
 export async function register() {
   // 仅在服务器端执行
   if (process.env.NEXT_RUNTIME === "nodejs") {
@@ -121,6 +176,15 @@ export async function register() {
           });
         }
 
+        try {
+          instrumentationState.__CCH_API_KEY_VF_SYNC_CLEANUP__?.();
+          instrumentationState.__CCH_API_KEY_VF_SYNC_STARTED__ = false;
+        } catch (error) {
+          logger.warn("[Instrumentation] Failed to cleanup API key vacuum filter sync", {
+            error: error instanceof Error ? error.message : String(error),
+          });
+        }
+
         try {
           const { stopEndpointProbeScheduler } = await import(
             "@/lib/provider-endpoints/probe-scheduler"
@@ -206,6 +270,8 @@ export async function register() {
         logger.info("[Instrumentation] AUTO_MIGRATE=false: skipping migrations");
       }
 
+      warmupApiKeyVacuumFilter();
+
       // 回填 provider_vendors(按域名自动聚合旧 providers)
       try {
         const { backfillProviderVendorsFromProviders } = await import(
@@ -306,6 +372,8 @@ export async function register() {
       if (isConnected) {
         await runMigrations();
 
+        warmupApiKeyVacuumFilter();
+
         // 回填 provider_vendors(按域名自动聚合旧 providers)
         try {
           const { backfillProviderVendorsFromProviders } = await import(

+ 13 - 8
src/lib/auth.ts

@@ -1,8 +1,7 @@
 import { cookies, headers } from "next/headers";
 import { config } from "@/lib/config/config";
 import { getEnvConfig } from "@/lib/config/env.schema";
-import { findActiveKeyByKeyString } from "@/repository/key";
-import { findUserById } from "@/repository/user";
+import { validateApiKeyAndGetUser } from "@/repository/key";
 import type { Key } from "@/types/key";
 import type { User } from "@/types/user";
 
@@ -107,18 +106,24 @@ export async function validateKey(
     return { user: adminUser, key: adminKey };
   }
 
-  const key = await findActiveKeyByKeyString(keyString);
-  if (!key) {
+  // 默认鉴权链路:Vacuum Filter(仅负向短路) → Redis(key/user 缓存) → DB(权威校验)
+  const authResult = await validateApiKeyAndGetUser(keyString);
+  if (!authResult) {
     return null;
   }
 
-  // 检查 Web UI 登录权限
-  if (!allowReadOnlyAccess && !key.canLoginWebUi) {
+  const { user, key } = authResult;
+
+  // 用户状态校验:与 v1 proxy 侧保持一致,避免禁用/过期用户继续登录或持有会话
+  if (!user.isEnabled) {
+    return null;
+  }
+  if (user.expiresAt && user.expiresAt.getTime() <= Date.now()) {
     return null;
   }
 
-  const user = await findUserById(key.userId);
-  if (!user) {
+  // 检查 Web UI 登录权限
+  if (!allowReadOnlyAccess && !key.canLoginWebUi) {
     return null;
   }
 

+ 8 - 0
src/lib/config/env.schema.ts

@@ -98,6 +98,14 @@ export const EnvSchema = z.object({
   // - false (默认):存储请求/响应体但对 message 内容脱敏 [REDACTED]
   // - true:原样存储 message 内容(注意隐私和存储空间影响)
   STORE_SESSION_MESSAGES: z.string().default("false").transform(booleanTransform),
+  // 会话响应体存储开关
+  // - true (默认):存储响应体(SSE/JSON),用于调试/回放/问题定位(Redis 临时缓存,默认 5 分钟)
+  // - false:不存储响应体(注意:不影响本次请求处理;仅影响后续在 UI/诊断中查看 response body)
+  //
+  // 说明:
+  // - 该开关只影响“写入 Redis 的响应体内容”,不影响内部统计逻辑读取响应体(例如 tokens/费用统计、SSE 结束后的假 200 检测)。
+  // - message 内容是否脱敏仍由 STORE_SESSION_MESSAGES 控制。
+  STORE_SESSION_RESPONSE_BODY: z.string().default("true").transform(booleanTransform),
   DEBUG_MODE: z.string().default("false").transform(booleanTransform),
   LOG_LEVEL: z.enum(["fatal", "error", "warn", "info", "debug", "trace"]).default("info"),
   TZ: z.string().default("Asia/Shanghai"),

+ 20 - 0
src/lib/hooks/use-media-query.ts

@@ -0,0 +1,20 @@
+"use client";
+
+import { useEffect, useState } from "react";
+
+export function useMediaQuery(query: string): boolean {
+  const [matches, setMatches] = useState(false);
+
+  useEffect(() => {
+    if (typeof window === "undefined" || !("matchMedia" in window)) {
+      return;
+    }
+    const mql = window.matchMedia(query);
+    setMatches(mql.matches);
+    const handler = (e: MediaQueryListEvent) => setMatches(e.matches);
+    mql.addEventListener("change", handler);
+    return () => mql.removeEventListener("change", handler);
+  }, [query]);
+
+  return matches;
+}

+ 6 - 3
src/lib/redis/client.ts

@@ -21,7 +21,8 @@ function maskRedisUrl(redisUrl: string) {
  * Includes servername for SNI (Server Name Indication) support.
  */
 function buildTlsConfig(redisUrl: string): Record<string, unknown> {
-  const rejectUnauthorized = process.env.REDIS_TLS_REJECT_UNAUTHORIZED !== "false";
+  const raw = process.env.REDIS_TLS_REJECT_UNAUTHORIZED?.trim();
+  const rejectUnauthorized = raw !== "false" && raw !== "0";
 
   try {
     const url = new URL(redisUrl);
@@ -79,7 +80,8 @@ export function getRedisClient(): Redis | null {
   }
 
   const redisUrl = process.env.REDIS_URL;
-  const isEnabled = process.env.ENABLE_RATE_LIMIT === "true";
+  const rateLimitRaw = process.env.ENABLE_RATE_LIMIT?.trim();
+  const isEnabled = rateLimitRaw !== "false" && rateLimitRaw !== "0";
 
   if (!isEnabled || !redisUrl) {
     logger.warn("[Redis] Rate limiting disabled or REDIS_URL not configured");
@@ -112,7 +114,8 @@ export function getRedisClient(): Redis | null {
 
     // 2. 如果使用 rediss://,则添加显式的 TLS 配置(支持跳过证书验证)
     if (useTls) {
-      const rejectUnauthorized = process.env.REDIS_TLS_REJECT_UNAUTHORIZED !== "false";
+      const raw = process.env.REDIS_TLS_REJECT_UNAUTHORIZED?.trim();
+      const rejectUnauthorized = raw !== "false" && raw !== "0";
       logger.info("[Redis] Using TLS connection (rediss://)", {
         redisUrl: safeRedisUrl,
         rejectUnauthorized,

+ 2 - 0
src/lib/redis/pubsub.ts

@@ -7,6 +7,8 @@ import { getRedisClient } from "./client";
 export const CHANNEL_ERROR_RULES_UPDATED = "cch:cache:error_rules:updated";
 export const CHANNEL_REQUEST_FILTERS_UPDATED = "cch:cache:request_filters:updated";
 export const CHANNEL_SENSITIVE_WORDS_UPDATED = "cch:cache:sensitive_words:updated";
+// API Key 集合发生变化(典型:创建新 key)时,通知各实例重建 Vacuum Filter,避免误拒绝
+export const CHANNEL_API_KEYS_UPDATED = "cch:cache:api_keys:updated";
 
 type CacheInvalidationCallback = () => void;
 

+ 408 - 0
src/lib/security/api-key-auth-cache.ts

@@ -0,0 +1,408 @@
+import { logger } from "@/lib/logger";
+import type { Key } from "@/types/key";
+import type { User } from "@/types/user";
+
+type RedisPipelineLike = {
+  setex(key: string, ttlSeconds: number, value: string): RedisPipelineLike;
+  del(key: string): RedisPipelineLike;
+  exec(): Promise<unknown>;
+};
+
+type RedisLike = {
+  get(key: string): Promise<string | null>;
+  setex(key: string, ttlSeconds: number, value: string): Promise<unknown>;
+  del(key: string): Promise<number>;
+  pipeline(): RedisPipelineLike;
+};
+
+const CACHE_VERSION = 1 as const;
+
+const REDIS_KEYS = {
+  keyByHash: (sha256Hex: string) => `api_key_auth:v${CACHE_VERSION}:key:${sha256Hex}`,
+  userById: (userId: number) => `api_key_auth:v${CACHE_VERSION}:user:${userId}`,
+};
+
+function isEdgeRuntime(): boolean {
+  if (typeof process === "undefined") return true;
+  return process.env.NEXT_RUNTIME === "edge";
+}
+
+function isApiKeyRedisCacheEnabled(): boolean {
+  if (isEdgeRuntime()) return false;
+  const raw = process.env.ENABLE_API_KEY_REDIS_CACHE?.trim();
+  return raw !== "false" && raw !== "0";
+}
+
+function getCacheTtlSeconds(): number {
+  const raw = process.env.API_KEY_AUTH_CACHE_TTL_SECONDS;
+  const parsed = raw ? Number.parseInt(raw, 10) : 60;
+  if (!Number.isFinite(parsed) || parsed <= 0) return 60;
+  // 上限 1 小时,避免配置错误导致“长时间脏读”
+  return Math.min(parsed, 3600);
+}
+
+const textEncoder = new TextEncoder();
+const byteToHex = Array.from({ length: 256 }, (_, index) => index.toString(16).padStart(2, "0"));
+
+function bufferToHex(buffer: ArrayBuffer): string {
+  const bytes = new Uint8Array(buffer);
+  let out = "";
+  for (let i = 0; i < bytes.length; i++) {
+    out += byteToHex[bytes[i]];
+  }
+  return out;
+}
+
+async function sha256Hex(value: string): Promise<string | null> {
+  const subtle = (globalThis as unknown as { crypto?: Crypto }).crypto?.subtle;
+  if (!subtle) return null;
+
+  try {
+    const digest = await subtle.digest("SHA-256", textEncoder.encode(value));
+    return bufferToHex(digest);
+  } catch (error) {
+    logger.debug(
+      { error: error instanceof Error ? error.message : String(error) },
+      "[ApiKeyAuthCache] sha256 digest failed"
+    );
+    return null;
+  }
+}
+
+function shouldUseRedisClient(): boolean {
+  // Edge runtime/浏览器等无 process 环境:直接禁用
+  if (typeof process === "undefined") return false;
+
+  // 与 getRedisClient 的启用条件保持一致,避免在未配置 Redis 时触发热路径 warn 日志
+  if (process.env.CI === "true" || process.env.NEXT_PHASE === "phase-production-build")
+    return false;
+  if (!process.env.REDIS_URL) return false;
+  const rateLimitRaw = process.env.ENABLE_RATE_LIMIT?.trim();
+  if (rateLimitRaw === "false" || rateLimitRaw === "0") return false;
+  return true;
+}
+
+let getRedisClientFn: (() => unknown) | null | undefined;
+
+async function getRedisForApiKeyAuthCache(): Promise<RedisLike | null> {
+  if (!isApiKeyRedisCacheEnabled()) return null;
+  if (!shouldUseRedisClient()) return null;
+
+  if (getRedisClientFn === undefined) {
+    try {
+      const mod = await import("@/lib/redis/client");
+      getRedisClientFn = mod.getRedisClient;
+    } catch (error) {
+      logger.debug(
+        { error: error instanceof Error ? error.message : String(error) },
+        "[ApiKeyAuthCache] Load redis client failed"
+      );
+      getRedisClientFn = null;
+    }
+  }
+
+  if (!getRedisClientFn) return null;
+  return getRedisClientFn() as RedisLike | null;
+}
+
+function parseRequiredDate(value: unknown): Date | null {
+  const date = value instanceof Date ? value : new Date(String(value));
+  return Number.isNaN(date.getTime()) ? null : date;
+}
+
+function parseOptionalDate(value: unknown): Date | null | undefined {
+  if (value === undefined) return undefined;
+  if (value === null) return null;
+  return parseRequiredDate(value);
+}
+
+type CachedKeyPayloadV1 = {
+  v: 1;
+  key: Omit<Key, "key">;
+};
+
+type CachedUserPayloadV1 = {
+  v: 1;
+  user: User;
+};
+
+function hydrateKeyFromCache(keyString: string, payload: CachedKeyPayloadV1): Key | null {
+  const key = payload.key as unknown as Record<string, unknown>;
+  if (!key || typeof key !== "object") return null;
+  if (typeof key.id !== "number" || typeof key.userId !== "number") return null;
+  if (typeof key.name !== "string" || typeof key.isEnabled !== "boolean") return null;
+  if (typeof key.canLoginWebUi !== "boolean") return null;
+  if (typeof key.dailyResetMode !== "string" || typeof key.dailyResetTime !== "string") return null;
+  if (typeof key.limitConcurrentSessions !== "number") return null;
+
+  const createdAt = parseRequiredDate(key.createdAt);
+  const updatedAt = parseRequiredDate(key.updatedAt);
+  if (!createdAt || !updatedAt) return null;
+
+  const expiresAt = parseOptionalDate(key.expiresAt);
+  const deletedAt = parseOptionalDate(key.deletedAt);
+  if (key.expiresAt != null && !expiresAt) return null;
+  if (key.deletedAt != null && !deletedAt) return null;
+
+  return {
+    ...(payload.key as Omit<Key, "key">),
+    key: keyString,
+    createdAt,
+    updatedAt,
+    expiresAt: expiresAt === undefined ? undefined : expiresAt,
+    deletedAt: deletedAt === undefined ? undefined : deletedAt,
+  } as Key;
+}
+
+function hydrateUserFromCache(payload: CachedUserPayloadV1): User | null {
+  const user = payload.user as unknown as Record<string, unknown>;
+  if (!user || typeof user !== "object") return null;
+  if (typeof user.id !== "number" || typeof user.name !== "string") return null;
+  if (typeof user.role !== "string") return null;
+  if (typeof user.isEnabled !== "boolean") return null;
+  if (typeof user.dailyResetMode !== "string" || typeof user.dailyResetTime !== "string")
+    return null;
+
+  const createdAt = parseRequiredDate(user.createdAt);
+  const updatedAt = parseRequiredDate(user.updatedAt);
+  if (!createdAt || !updatedAt) return null;
+
+  const expiresAt = parseOptionalDate(user.expiresAt);
+  const deletedAt = parseOptionalDate(user.deletedAt);
+  if (user.expiresAt != null && !expiresAt) return null;
+  if (user.deletedAt != null && !deletedAt) return null;
+
+  return {
+    ...(payload.user as User),
+    createdAt,
+    updatedAt,
+    expiresAt: expiresAt === undefined ? undefined : expiresAt,
+    deletedAt: deletedAt === undefined ? undefined : deletedAt,
+  } as User;
+}
+
+function stripKeySecret(key: Key): Omit<Key, "key"> {
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  const { key: _secretKey, ...rest } = key;
+  return rest;
+}
+
+function resolveKeyCacheTtlSeconds(key: Key): number {
+  const base = getCacheTtlSeconds();
+  const expiresAt = parseOptionalDate(key.expiresAt);
+  // expiresAt 存在但无法解析:安全起见不缓存
+  if (key.expiresAt != null && !expiresAt) return 0;
+  if (!(expiresAt instanceof Date)) return base;
+
+  const remainingMs = expiresAt.getTime() - Date.now();
+  if (remainingMs <= 0) return 0;
+  const remainingSeconds = Math.max(1, Math.floor(remainingMs / 1000));
+  return Math.min(base, remainingSeconds);
+}
+
+export async function getCachedActiveKey(keyString: string): Promise<Key | null> {
+  const redis = await getRedisForApiKeyAuthCache();
+  if (!redis) return null;
+
+  const keyHash = await sha256Hex(keyString);
+  if (!keyHash) return null;
+  const redisKey = REDIS_KEYS.keyByHash(keyHash);
+
+  try {
+    const raw = await redis.get(redisKey);
+    if (!raw) return null;
+
+    const parsed = JSON.parse(raw) as CachedKeyPayloadV1;
+    if (parsed?.v !== 1 || !parsed.key) {
+      redis.del(redisKey).catch(() => {});
+      return null;
+    }
+
+    const hydrated = hydrateKeyFromCache(keyString, parsed);
+    if (!hydrated) {
+      redis.del(redisKey).catch(() => {});
+      return null;
+    }
+
+    // 仅用于“活跃 key”缓存:不满足条件时视为缓存失效
+    if (hydrated.isEnabled !== true) {
+      redis.del(redisKey).catch(() => {});
+      return null;
+    }
+    if (hydrated.deletedAt) {
+      redis.del(redisKey).catch(() => {});
+      return null;
+    }
+    if (hydrated.expiresAt && hydrated.expiresAt.getTime() <= Date.now()) {
+      redis.del(redisKey).catch(() => {});
+      return null;
+    }
+
+    return hydrated;
+  } catch (error) {
+    // Fail open:缓存错误不影响鉴权正确性(会回落到 DB)
+    logger.debug(
+      { error: error instanceof Error ? error.message : String(error) },
+      "[ApiKeyAuthCache] Read key cache failed"
+    );
+    return null;
+  }
+}
+
+export async function cacheActiveKey(key: Key): Promise<void> {
+  const redis = await getRedisForApiKeyAuthCache();
+  if (!redis) return;
+
+  const ttlSeconds = resolveKeyCacheTtlSeconds(key);
+  const expiresAt = parseOptionalDate(key.expiresAt);
+  const expiresAtInvalid = key.expiresAt != null && !expiresAt;
+  const isExpired = expiresAt instanceof Date && expiresAt.getTime() <= Date.now();
+
+  const keyHash = await sha256Hex(key.key);
+  if (!keyHash) return;
+  const redisKey = REDIS_KEYS.keyByHash(keyHash);
+
+  // 非活跃 key:直接清理缓存,避免脏读误放行
+  if (key.isEnabled !== true || key.deletedAt || isExpired || expiresAtInvalid || ttlSeconds <= 0) {
+    try {
+      await redis.del(redisKey);
+    } catch {
+      // ignore
+    }
+    return;
+  }
+
+  const payload: CachedKeyPayloadV1 = { v: 1, key: stripKeySecret(key) };
+  try {
+    await redis.setex(redisKey, ttlSeconds, JSON.stringify(payload));
+  } catch (error) {
+    logger.debug(
+      { error: error instanceof Error ? error.message : String(error) },
+      "[ApiKeyAuthCache] Write key cache failed"
+    );
+  }
+}
+
+export async function invalidateCachedKey(keyString: string): Promise<void> {
+  const redis = await getRedisForApiKeyAuthCache();
+  if (!redis) return;
+
+  const keyHash = await sha256Hex(keyString);
+  if (!keyHash) return;
+  const redisKey = REDIS_KEYS.keyByHash(keyHash);
+  try {
+    await redis.del(redisKey);
+  } catch {
+    // ignore
+  }
+}
+
+export async function getCachedUser(userId: number): Promise<User | null> {
+  const redis = await getRedisForApiKeyAuthCache();
+  if (!redis) return null;
+
+  const redisKey = REDIS_KEYS.userById(userId);
+
+  try {
+    const raw = await redis.get(redisKey);
+    if (!raw) return null;
+
+    const parsed = JSON.parse(raw) as CachedUserPayloadV1;
+    if (parsed?.v !== 1 || !parsed.user) {
+      redis.del(redisKey).catch(() => {});
+      return null;
+    }
+
+    const hydrated = hydrateUserFromCache(parsed);
+    if (!hydrated) {
+      redis.del(redisKey).catch(() => {});
+      return null;
+    }
+
+    // validateApiKeyAndGetUser 的语义:user 仅要求“未删除”;isEnabled/expiresAt 等状态由上层按需校验(如 auth.ts)
+    if (hydrated.deletedAt) {
+      redis.del(redisKey).catch(() => {});
+      return null;
+    }
+
+    return hydrated;
+  } catch (error) {
+    logger.debug(
+      { error: error instanceof Error ? error.message : String(error) },
+      "[ApiKeyAuthCache] Read user cache failed"
+    );
+    return null;
+  }
+}
+
+export async function cacheUser(user: User): Promise<void> {
+  const redis = await getRedisForApiKeyAuthCache();
+  if (!redis) return;
+
+  if (user.deletedAt) return;
+
+  const ttlSeconds = getCacheTtlSeconds();
+  const redisKey = REDIS_KEYS.userById(user.id);
+  const payload: CachedUserPayloadV1 = { v: 1, user };
+  try {
+    await redis.setex(redisKey, ttlSeconds, JSON.stringify(payload));
+  } catch (error) {
+    logger.debug(
+      { error: error instanceof Error ? error.message : String(error) },
+      "[ApiKeyAuthCache] Write user cache failed"
+    );
+  }
+}
+
+export async function invalidateCachedUser(userId: number): Promise<void> {
+  const redis = await getRedisForApiKeyAuthCache();
+  if (!redis) return;
+
+  const redisKey = REDIS_KEYS.userById(userId);
+  try {
+    await redis.del(redisKey);
+  } catch {
+    // ignore
+  }
+}
+
+export async function cacheAuthResult(
+  keyString: string,
+  value: { key: Key; user: User }
+): Promise<void> {
+  const redis = await getRedisForApiKeyAuthCache();
+  if (!redis) return;
+
+  const { key, user } = value;
+  const keyHash = await sha256Hex(keyString);
+  if (!keyHash) return;
+  const keyRedisKey = REDIS_KEYS.keyByHash(keyHash);
+  const userRedisKey = REDIS_KEYS.userById(user.id);
+
+  const keyTtlSeconds = resolveKeyCacheTtlSeconds(key);
+  const userTtlSeconds = getCacheTtlSeconds();
+
+  try {
+    const pipeline = redis.pipeline();
+    if (keyTtlSeconds > 0 && key.isEnabled === true && !key.deletedAt) {
+      const keyPayload: CachedKeyPayloadV1 = { v: 1, key: stripKeySecret(key) };
+      pipeline.setex(keyRedisKey, keyTtlSeconds, JSON.stringify(keyPayload));
+    } else {
+      pipeline.del(keyRedisKey);
+    }
+
+    if (!user.deletedAt) {
+      const userPayload: CachedUserPayloadV1 = { v: 1, user };
+      pipeline.setex(userRedisKey, userTtlSeconds, JSON.stringify(userPayload));
+    } else {
+      pipeline.del(userRedisKey);
+    }
+
+    await pipeline.exec();
+  } catch (error) {
+    logger.debug(
+      { error: error instanceof Error ? error.message : String(error) },
+      "[ApiKeyAuthCache] Write auth cache failed"
+    );
+  }
+}

+ 384 - 0
src/lib/security/api-key-vacuum-filter.ts

@@ -0,0 +1,384 @@
+import { logger } from "@/lib/logger";
+import { VacuumFilter } from "@/lib/vacuum-filter/vacuum-filter";
+import { randomBytes } from "@/lib/vacuum-filter/random";
+
+type ApiKeyVacuumFilterStats = {
+  enabled: boolean;
+  ready: boolean;
+  loading: boolean;
+  lastReloadAt: number | null;
+  sourceKeyCount: number;
+  filterSize: number;
+  filterLoadFactor: number;
+  fingerprintBits: number;
+  maxKickSteps: number;
+};
+
+type ReloadOptions = {
+  reason: string;
+  /**
+   * 是否强制触发(忽略 cooldown)。
+   *
+   * 用途:
+   * - 多实例场景收到“key 已新增”的广播后,需要尽快重建避免误拒绝
+   */
+  force?: boolean;
+};
+
+/**
+ * 纯构建函数:从 key 列表构建 VacuumFilter。
+ *
+ * 导出原因:
+ * - 便于测试(不依赖 DB)
+ * - 便于未来扩展(例如:从 Redis/文件加载快照)
+ */
+export function buildVacuumFilterFromKeyStrings(options: {
+  keyStrings: string[];
+  fingerprintBits: number;
+  maxKickSteps: number;
+  seed: Uint8Array;
+}): VacuumFilter {
+  const { keyStrings, fingerprintBits, maxKickSteps, seed } = options;
+
+  const uniqueKeys = Array.from(new Set(keyStrings)).filter((v) => v.length > 0);
+
+  // 目标:尽量接近 Vacuum Filter 的高负载设计点,同时给“增量新增 key”留少量 headroom,
+  // 避免刚重建就接近极限导致频繁 insert_failed 重建。
+  const targetLoadFactor = 0.96;
+  const desiredLoadFactor = 0.9;
+  let maxItems = Math.max(
+    128,
+    Math.ceil((uniqueKeys.length * targetLoadFactor) / desiredLoadFactor)
+  );
+  let lastError: Error | null = null;
+
+  for (let attempt = 1; attempt <= 6; attempt++) {
+    const vf = new VacuumFilter({
+      maxItems,
+      fingerprintBits,
+      maxKickSteps,
+      seed,
+      targetLoadFactor,
+    });
+
+    let okAll = true;
+    for (const key of uniqueKeys) {
+      if (!vf.add(key)) {
+        okAll = false;
+        break;
+      }
+    }
+
+    if (okAll) {
+      return vf;
+    }
+
+    lastError = new Error(`build failed at attempt=${attempt}, maxItems=${maxItems}`);
+    maxItems = Math.ceil(maxItems * 1.6);
+  }
+
+  throw lastError ?? new Error("Vacuum filter build failed");
+}
+
+/**
+ * API Key Vacuum Filter(进程级单例)
+ *
+ * 用途:
+ * - 在访问数据库前,先用真空过滤器快速判定“肯定不存在”的 key,直接拒绝(减少 DB 压力、抵御爆破)
+ *
+ * 关键安全语义:
+ * - 仅用于“负向短路”:filter.has(key)===false 才能“肯定不存在”
+ * - filter.has(key)===true 只代表“可能存在”,仍必须走 DB 校验(避免假阳性误放行)
+ *
+ * 正确性约束:
+ * - 允许“过度包含”(比如包含禁用/过期 key,甚至包含已删除 key 的 fingerprint),只会降低短路命中率,不影响安全性。
+ * - 严禁“漏包含”有效 key:否则会产生错误拒绝。因此:
+ *   - 启动时尽量从 DB 全量加载(见 instrumentation)
+ *   - 新增 key 时增量写入(createKey -> noteExistingKey)
+ */
+class ApiKeyVacuumFilter {
+  private readonly enabled: boolean;
+  private readonly seed: Uint8Array;
+  private readonly fingerprintBits = 32;
+  private readonly maxKickSteps = 500;
+
+  private vf: VacuumFilter | null = null;
+  private loadingPromise: Promise<void> | null = null;
+
+  // 关键:当 vf 尚未就绪(或正在重建)时,新 key 可能在这段窗口期被创建。
+  // 若不记录并在下一次重建时纳入,会导致“漏包含”有效 key,从而误拒绝(假阴性)。
+  private pendingKeys = new Set<string>();
+  private readonly pendingKeysLimit = 10_000;
+
+  // 若重建过程中又收到新的重建请求(例如:多实例收到 key 创建广播),需要串行再跑一次。
+  private pendingReloadReason: string | null = null;
+  private pendingReloadForce = false;
+
+  private lastReloadAttemptAt: number | null = null;
+  private readonly reloadCooldownMs = 10_000;
+
+  private lastReloadAt: number | null = null;
+  private sourceKeyCount = 0;
+
+  constructor() {
+    // 默认开启:升级后无需额外配置即可启用(仅负向短路;不会影响鉴权正确性)。
+    // 如需排查或节省资源,可通过环境变量显式关闭:ENABLE_API_KEY_VACUUM_FILTER=false/0
+    if (typeof process === "undefined") {
+      // Edge/浏览器等无 process 环境:强制关闭(避免访问 process.env 抛错)
+      this.enabled = false;
+    } else {
+      const isEdgeRuntime = process.env.NEXT_RUNTIME === "edge";
+      const raw = process.env.ENABLE_API_KEY_VACUUM_FILTER?.trim();
+      const explicitlyDisabled = raw === "false" || raw === "0";
+      this.enabled = !isEdgeRuntime && !explicitlyDisabled;
+    }
+    this.seed = randomBytes(16);
+  }
+
+  /**
+   * 返回:
+   * - true:过滤器“肯定判断不存在”(可直接拒绝)
+   * - false:过滤器认为“可能存在”(必须继续走 DB)
+   * - null:过滤器未就绪或未启用(不要短路)
+   */
+  isDefinitelyNotPresent(keyString: string): boolean | null {
+    if (!this.enabled) return null;
+
+    // 重建过程中:安全优先,不短路(避免使用可能过期的 vf 产生误拒绝)
+    if (this.loadingPromise) {
+      return null;
+    }
+
+    const vf = this.vf;
+    if (!vf) {
+      // 懒加载:第一次触发时后台预热(同时保持“安全优先”:不就绪时不短路)
+      this.startBackgroundReload({ reason: "lazy_warmup" });
+      return null;
+    }
+
+    return !vf.has(keyString);
+  }
+
+  /**
+   * 将一个“已确认为存在”的 key 写入过滤器(尽量保持新建 key 的即时可用性)。
+   *
+   * 注意:写入失败不会影响正确性(仍会走 DB),只是降低短路命中率;失败后可依赖后台重建修复。
+   */
+  noteExistingKey(keyString: string): void {
+    if (!this.enabled) return;
+    const trimmed = keyString.trim();
+    if (!trimmed) return;
+
+    try {
+      const vf = this.vf;
+      if (!vf) {
+        // vf 未就绪:记录到 pending,确保下一次重建会覆盖到该 key(避免误拒绝)
+        if (this.pendingKeys.size < this.pendingKeysLimit) {
+          this.pendingKeys.add(trimmed);
+        } else {
+          logger.warn("[ApiKeyVacuumFilter] Pending keys overflow; scheduling rebuild", {
+            limit: this.pendingKeysLimit,
+          });
+        }
+        this.startBackgroundReload({ reason: "pending_key", force: true });
+        return;
+      }
+
+      // 重建进行中:同时写入 pending,确保新 filter 不会漏包含该 key
+      if (this.loadingPromise) {
+        if (this.pendingKeys.size < this.pendingKeysLimit) {
+          this.pendingKeys.add(trimmed);
+        } else {
+          logger.warn("[ApiKeyVacuumFilter] Pending keys overflow; scheduling rebuild", {
+            limit: this.pendingKeysLimit,
+          });
+        }
+
+        // 合并重建请求:当前重建结束后再跑一次,确保纳入 pendingKeys
+        this.startBackgroundReload({ reason: "pending_key_during_reload", force: true });
+      }
+
+      // 注意:不要用 vf.has(key) 来“去重” —— has 可能是短暂假阳性,后续插入/搬移可能让假阳性消失,
+      // 从而导致真正存在的 key 没被写入、最终产生误拒绝风险。对新建 key(应唯一)直接 add 更安全。
+      const ok = vf.add(trimmed);
+      if (!ok) {
+        logger.warn("[ApiKeyVacuumFilter] Insert failed; scheduling rebuild", {
+          keyLength: trimmed.length,
+        });
+        // 安全优先:插入失败意味着新 key 可能未被覆盖。
+        // 为避免误拒绝(假阴性),临时禁用短路,等待后台重建完成后再恢复。
+        if (this.pendingKeys.size < this.pendingKeysLimit) {
+          this.pendingKeys.add(trimmed);
+        } else {
+          logger.warn("[ApiKeyVacuumFilter] Pending keys overflow; scheduling rebuild", {
+            limit: this.pendingKeysLimit,
+          });
+        }
+        this.vf = null;
+        this.startBackgroundReload({ reason: "insert_failed", force: true });
+      }
+    } catch (error) {
+      logger.warn("[ApiKeyVacuumFilter] noteExistingKey failed; scheduling rebuild", {
+        error: error instanceof Error ? error.message : String(error),
+      });
+      if (this.pendingKeys.size < this.pendingKeysLimit) {
+        this.pendingKeys.add(trimmed);
+      } else {
+        logger.warn("[ApiKeyVacuumFilter] Pending keys overflow; scheduling rebuild", {
+          limit: this.pendingKeysLimit,
+        });
+      }
+      this.vf = null;
+      try {
+        this.startBackgroundReload({ reason: "note_existing_key_failed", force: true });
+      } catch {
+        // ignore
+      }
+    }
+  }
+
+  /**
+   * 外部触发:标记过滤器可能已过期,并强制后台重建。
+   *
+   * 典型场景:多实例环境下,某个实例创建了新 key;其它实例需要尽快重建,避免误拒绝。
+   */
+  invalidateAndReload(options: ReloadOptions): void {
+    if (!this.enabled) return;
+    this.vf = null;
+    this.startBackgroundReload({ ...options, force: true });
+  }
+
+  startBackgroundReload(options: ReloadOptions): void {
+    if (!this.enabled) return;
+    if (this.loadingPromise) {
+      // 重建进行中:合并请求,待当前重建结束后再跑一次(避免“读到旧快照”漏新 key)
+      this.pendingReloadReason = options.reason;
+      this.pendingReloadForce = this.pendingReloadForce || options.force === true;
+      return;
+    }
+
+    const now = Date.now();
+    if (
+      options.force !== true &&
+      this.lastReloadAttemptAt &&
+      now - this.lastReloadAttemptAt < this.reloadCooldownMs
+    ) {
+      return;
+    }
+    this.lastReloadAttemptAt = now;
+
+    this.loadingPromise = this.reloadFromDatabase(options)
+      .catch((error) => {
+        logger.warn("[ApiKeyVacuumFilter] Reload failed", {
+          reason: options.reason,
+          error: error instanceof Error ? error.message : String(error),
+        });
+      })
+      .finally(() => {
+        this.loadingPromise = null;
+
+        // 若重建期间又收到新的重建请求,串行补一次(避免漏 key)
+        if (this.pendingReloadReason) {
+          const reason = this.pendingReloadReason;
+          const force = this.pendingReloadForce;
+          this.pendingReloadReason = null;
+          this.pendingReloadForce = false;
+          this.startBackgroundReload({ reason, force });
+        }
+      });
+  }
+
+  getStats(): ApiKeyVacuumFilterStats {
+    const vf = this.vf;
+    return {
+      enabled: this.enabled,
+      ready: !!vf,
+      loading: !!this.loadingPromise,
+      lastReloadAt: this.lastReloadAt,
+      sourceKeyCount: this.sourceKeyCount,
+      filterSize: vf?.size() ?? 0,
+      filterLoadFactor: vf?.loadFactor() ?? 0,
+      fingerprintBits: this.fingerprintBits,
+      maxKickSteps: this.maxKickSteps,
+    };
+  }
+
+  // ==================== 预热/重建 ====================
+
+  private async reloadFromDatabase(options: ReloadOptions): Promise<void> {
+    // CI / 测试环境通常不接 DB;避免大量告警日志
+    const dsn = process.env.DSN || "";
+    if (
+      process.env.CI === "true" ||
+      process.env.NODE_ENV === "test" ||
+      process.env.VITEST === "true" ||
+      !dsn ||
+      dsn.includes("user:password@host:port")
+    ) {
+      logger.debug("[ApiKeyVacuumFilter] Skip reload (test env or DB not configured)");
+      return;
+    }
+
+    // 延迟 import,避免构建/测试阶段触发 DB 初始化
+    const [{ db }, { keys }, { isNull }] = await Promise.all([
+      import("@/drizzle/db"),
+      import("@/drizzle/schema"),
+      import("drizzle-orm"),
+    ]);
+
+    const rows = await db
+      .select({ key: keys.key })
+      .from(keys)
+      // 仅排除逻辑删除;禁用/过期 key 保留在 filter 中(安全:不会误拒绝)
+      .where(isNull(keys.deletedAt));
+
+    const keyStrings = rows
+      .map((r) => r.key)
+      .filter((v): v is string => typeof v === "string" && v.length > 0);
+
+    // 将 pendingKeys 合并进来:覆盖“重建窗口期创建的新 key”。
+    // 通过“Set 交换”获得快照,避免 snapshot-merge-clear 的竞态窗口:
+    // - reload 期间新增的 key 会进入新的 pendingKeys
+    // - 本次快照 key 会被纳入 built filter
+    // - 若 build 失败,会将快照 key 合并回 pendingKeys,避免漏 key
+    const pendingSnapshotSet = this.pendingKeys;
+    this.pendingKeys = new Set<string>();
+    const pendingSnapshot =
+      pendingSnapshotSet.size > 0 ? Array.from(pendingSnapshotSet.values()) : [];
+
+    let built: VacuumFilter;
+    try {
+      built = buildVacuumFilterFromKeyStrings({
+        keyStrings: pendingSnapshot.length > 0 ? keyStrings.concat(pendingSnapshot) : keyStrings,
+        fingerprintBits: this.fingerprintBits,
+        maxKickSteps: this.maxKickSteps,
+        seed: this.seed,
+      });
+    } catch (error) {
+      // build 失败:回滚快照,避免漏 key(同时保留 reload 期间新增的 key)
+      for (const k of pendingSnapshotSet.values()) {
+        if (this.pendingKeys.size >= this.pendingKeysLimit) break;
+        this.pendingKeys.add(k);
+      }
+      throw error;
+    }
+
+    this.vf = built;
+    this.sourceKeyCount = new Set(keyStrings).size;
+    this.lastReloadAt = Date.now();
+
+    logger.info("[ApiKeyVacuumFilter] Reloaded", {
+      reason: options.reason,
+      keyCount: this.sourceKeyCount,
+      loadFactor: Number(built.loadFactor().toFixed(4)),
+    });
+  }
+}
+
+// 使用 globalThis 保证单例(避免开发环境热重载重复实例化)
+const g = globalThis as unknown as { __CCH_API_KEY_VACUUM_FILTER__?: ApiKeyVacuumFilter };
+if (!g.__CCH_API_KEY_VACUUM_FILTER__) {
+  g.__CCH_API_KEY_VACUUM_FILTER__ = new ApiKeyVacuumFilter();
+}
+
+export const apiKeyVacuumFilter = g.__CCH_API_KEY_VACUUM_FILTER__;

+ 9 - 1
src/lib/session-manager.ts

@@ -1331,7 +1331,11 @@ export class SessionManager {
   /**
    * 存储 session 响应体(临时存储,5分钟过期)
    *
-   * 存储策略受 STORE_SESSION_MESSAGES 控制:
+   * 存储行为受 STORE_SESSION_RESPONSE_BODY 控制:
+   * - true (默认):存储响应体到 Redis 临时缓存
+   * - false:不存储(注意:不影响本次请求处理与统计,仅影响后续查看 response body)
+   *
+   * 存储策略(脱敏/原样)受 STORE_SESSION_MESSAGES 控制:
    * - true:原样存储响应内容
    * - false(默认):对 JSON 响应体中的 message 内容脱敏 [REDACTED]
    *
@@ -1344,6 +1348,10 @@ export class SessionManager {
     response: string | object,
     requestSequence?: number
   ): Promise<void> {
+    // 允许通过环境变量显式关闭响应体存储(例如隐私/节省 Redis 内存)。
+    // 注意:这里仅关闭“写入 Redis”这一步;调用方仍然可能在内存中读取响应体用于统计或错误检测。
+    if (!getEnvConfig().STORE_SESSION_RESPONSE_BODY) return;
+
     const redis = getRedisClient();
     if (!redis || redis.status !== "ready") return;
 

+ 213 - 0
src/lib/utils/upstream-error-detection.test.ts

@@ -0,0 +1,213 @@
+import { describe, expect, test } from "vitest";
+import { detectUpstreamErrorFromSseOrJsonText } from "@/lib/utils/upstream-error-detection";
+
+describe("detectUpstreamErrorFromSseOrJsonText", () => {
+  test("空响应体视为错误", () => {
+    expect(detectUpstreamErrorFromSseOrJsonText("")).toEqual({
+      isError: true,
+      code: "FAKE_200_EMPTY_BODY",
+    });
+  });
+
+  test("纯空白响应体视为错误", () => {
+    expect(detectUpstreamErrorFromSseOrJsonText("   \n\t  ")).toEqual({
+      isError: true,
+      code: "FAKE_200_EMPTY_BODY",
+    });
+  });
+
+  test("纯 JSON:error 字段非空视为错误", () => {
+    const res = detectUpstreamErrorFromSseOrJsonText('{"error":"当前无可用凭证"}');
+    expect(res.isError).toBe(true);
+  });
+
+  test("纯 JSON:error 为对象且 error.message 非空视为错误", () => {
+    const res = detectUpstreamErrorFromSseOrJsonText(
+      JSON.stringify({ error: { message: "error: no credentials" } })
+    );
+    expect(res.isError).toBe(true);
+  });
+
+  test.each([
+    '{"error":true}',
+    '{"error":42}',
+  ])("纯 JSON:error 为非字符串类型也应视为错误(%s)", (body) => {
+    const res = detectUpstreamErrorFromSseOrJsonText(body);
+    expect(res.isError).toBe(true);
+  });
+
+  test("JSON 数组输入不视为错误(目前不做解析)", () => {
+    const res = detectUpstreamErrorFromSseOrJsonText('[{"error":"something"}]');
+    expect(res.isError).toBe(false);
+  });
+
+  test("detail 应对 Bearer token 做脱敏(避免泄露到日志/Redis/DB)", () => {
+    const res = detectUpstreamErrorFromSseOrJsonText('{"error":"Bearer abc.def_ghi"}');
+    expect(res.isError).toBe(true);
+    if (res.isError) {
+      const detail = res.detail ?? "";
+      expect(detail).toContain("Bearer [REDACTED]");
+      expect(detail).not.toContain("abc.def_ghi");
+    }
+  });
+
+  test("detail 应对常见 API key 前缀做脱敏(避免泄露到日志/Redis/DB)", () => {
+    const res = detectUpstreamErrorFromSseOrJsonText('{"error":"sk-1234567890abcdef123456"}');
+    expect(res.isError).toBe(true);
+    if (res.isError) {
+      const detail = res.detail ?? "";
+      expect(detail).toContain("[REDACTED_KEY]");
+      expect(detail).not.toContain("sk-1234567890abcdef123456");
+    }
+  });
+
+  test("detail 应对 JWT 做脱敏(避免泄露到日志/Redis/DB)", () => {
+    const jwt = "eyJaaaaaaaaaaaaaaa.bbbbbbbbbbbbbbbbbbbb.cccccccccccccccccccc";
+    const res = detectUpstreamErrorFromSseOrJsonText(JSON.stringify({ error: jwt }));
+    expect(res.isError).toBe(true);
+    if (res.isError) {
+      const detail = res.detail ?? "";
+      expect(detail).toContain("[JWT]");
+      expect(detail).not.toContain("eyJaaaaaaaaaaaaaaa");
+    }
+  });
+
+  test("detail 应对 email 做脱敏(避免泄露到日志/Redis/DB)", () => {
+    const res = detectUpstreamErrorFromSseOrJsonText(
+      JSON.stringify({ error: "[email protected] is not allowed" })
+    );
+    expect(res.isError).toBe(true);
+    if (res.isError) {
+      const detail = res.detail ?? "";
+      expect(detail).toContain("[EMAIL]");
+      expect(detail).not.toContain("[email protected]");
+    }
+  });
+
+  test("detail 应对通用敏感键值做脱敏(避免泄露到日志/Redis/DB)", () => {
+    const res = detectUpstreamErrorFromSseOrJsonText(
+      JSON.stringify({ error: 'token=abc123 secret:xyz password:"p@ss" api_key=key123' })
+    );
+    expect(res.isError).toBe(true);
+    if (res.isError) {
+      const detail = res.detail ?? "";
+      expect(detail).toContain("token:***");
+      expect(detail).toContain("secret:***");
+      expect(detail).toContain("password:***");
+      expect(detail).toContain("api_key:***");
+      expect(detail).not.toContain("abc123");
+      expect(detail).not.toContain("xyz");
+      expect(detail).not.toContain("p@ss");
+      expect(detail).not.toContain("key123");
+    }
+  });
+
+  test("detail 应对常见配置/凭证路径做脱敏(避免泄露到日志/Redis/DB)", () => {
+    const res = detectUpstreamErrorFromSseOrJsonText(
+      JSON.stringify({ error: "failed to read /etc/app/config.yaml" })
+    );
+    expect(res.isError).toBe(true);
+    if (res.isError) {
+      const detail = res.detail ?? "";
+      expect(detail).toContain("[PATH]");
+      expect(detail).not.toContain("config.yaml");
+    }
+  });
+
+  test("detail 过长时应截断(避免把大段响应写入日志/DB)", () => {
+    const longText = "a".repeat(250);
+    const res = detectUpstreamErrorFromSseOrJsonText(JSON.stringify({ error: longText }));
+    expect(res.isError).toBe(true);
+    if (res.isError) {
+      const detail = res.detail ?? "";
+      expect(detail.endsWith("…")).toBe(true);
+      expect(detail.length).toBeLessThanOrEqual(201);
+    }
+  });
+
+  test("纯 JSON:error 为空字符串不视为错误", () => {
+    const res = detectUpstreamErrorFromSseOrJsonText('{"error":""}');
+    expect(res.isError).toBe(false);
+  });
+
+  test("纯 JSON:message 不包含关键字不视为错误", () => {
+    const res = detectUpstreamErrorFromSseOrJsonText('{"message":"all good"}');
+    expect(res.isError).toBe(false);
+  });
+
+  test("纯 JSON:小于 1000 字符且 message 包含 error 字样视为错误", () => {
+    const res = detectUpstreamErrorFromSseOrJsonText('{"message":"some error happened"}');
+    expect(res.isError).toBe(true);
+  });
+
+  test("纯 JSON:options.messageKeyword 可覆盖默认关键字判定", () => {
+    const res = detectUpstreamErrorFromSseOrJsonText('{"message":"boom happened"}', {
+      messageKeyword: /boom/i,
+    });
+    expect(res).toEqual({
+      isError: true,
+      code: "FAKE_200_JSON_MESSAGE_KEYWORD_MATCH",
+      detail: "boom happened",
+    });
+  });
+
+  test("纯 JSON:options.maxJsonCharsForMessageCheck 可关闭 message 关键字检测", () => {
+    const res = detectUpstreamErrorFromSseOrJsonText('{"message":"some error happened"}', {
+      maxJsonCharsForMessageCheck: 5,
+    });
+    expect(res.isError).toBe(false);
+  });
+
+  test("纯 JSON:大于等于 1000 字符时不做 message 关键字判定", () => {
+    const longMessage = "a".repeat(1000);
+    const res = detectUpstreamErrorFromSseOrJsonText(
+      JSON.stringify({ message: `${longMessage} error ${longMessage}` })
+    );
+    expect(res.isError).toBe(false);
+  });
+
+  test("纯 JSON:非法 JSON 不抛错且不视为错误", () => {
+    const res = detectUpstreamErrorFromSseOrJsonText("{not-json}");
+    expect(res.isError).toBe(false);
+  });
+
+  test("SSE:data JSON 包含非空 error 字段视为错误", () => {
+    const sse = ["event: message", 'data: {"error":"当前无可用凭证"}', ""].join("\n");
+    const res = detectUpstreamErrorFromSseOrJsonText(sse);
+    expect(res.isError).toBe(true);
+  });
+
+  test("SSE:data JSON error 为对象且 error.message 非空视为错误", () => {
+    const sse = ['data: {"error":{"message":"ERROR: no credentials"}}', ""].join("\n");
+    const res = detectUpstreamErrorFromSseOrJsonText(sse);
+    expect(res.isError).toBe(true);
+  });
+
+  test("SSE:data JSON 小于 1000 字符且 message 包含 error 字样视为错误", () => {
+    const sse = ['data: {"message":"ERROR: no credentials"}', ""].join("\n");
+    const res = detectUpstreamErrorFromSseOrJsonText(sse);
+    expect(res.isError).toBe(true);
+  });
+
+  test("SSE:message 为对象时不应误判为错误", () => {
+    // 类 Anthropic SSE:message 字段通常是对象(不是错误字符串)
+    const sse = [
+      'data: {"type":"message_start","message":{"id":"msg_1","type":"message","role":"assistant"}}',
+      "",
+    ].join("\n");
+    const res = detectUpstreamErrorFromSseOrJsonText(sse);
+    expect(res.isError).toBe(false);
+  });
+
+  test("SSE:不包含 error/message key 时不解析且不视为错误", () => {
+    const sse = ['data: {"foo":"bar"}', ""].join("\n");
+    const res = detectUpstreamErrorFromSseOrJsonText(sse);
+    expect(res.isError).toBe(false);
+  });
+
+  test("SSE:仅有 [DONE] 不视为错误", () => {
+    const sse = ["data: [DONE]", ""].join("\n");
+    const res = detectUpstreamErrorFromSseOrJsonText(sse);
+    expect(res.isError).toBe(false);
+  });
+});

+ 249 - 0
src/lib/utils/upstream-error-detection.ts

@@ -0,0 +1,249 @@
+import { parseSSEData } from "@/lib/utils/sse";
+
+/**
+ * 上游“假 200”错误检测(仅用于内部统计/熔断/故障转移判定)。
+ *
+ * 背景
+ * - 一些上游供应商在鉴权/配额/风控等错误场景下,会返回 HTTP 200,
+ *   但在 body 里给出错误 JSON(例如:`{"error":"当前无可用凭证"}`)。
+ * - 在流式 SSE 场景中,这类错误可能被包裹在某个 `data: {...}` 事件里。
+ * - CCH 在“已开始向客户端透传 SSE”后,无法再把 HTTP 状态码改成 4xx/5xx,
+ *   也无法阻止错误内容继续被传递到客户端。
+ *
+ * 为什么还要检测
+ * - 我们至少要让 CCH 自己意识到“这次请求实际上是失败的”,从而:
+ *   1) 触发故障转移/供应商熔断的失败统计;
+ *   2) 避免把 session 智能绑定(粘性)更新到一个实际不可用的 provider;
+ *   3) 让客户端下一次自动重试时,有机会切换到其他 provider(避免“假 200”导致重试仍复用同一坏 provider)。
+ *
+ * 设计目标(偏保守)
+ * - 仅基于结构化字段做启发式判断:`error` 与 `message`;
+ * - 不扫描模型生成的正文内容(例如 content/choices),避免把用户/模型自然语言里的 "error" 误判为上游错误;
+ * - message 关键字检测仅对“小体积 JSON”启用,降低误判与性能开销。
+ * - 返回的 `code` 是语言无关的错误码(便于写入 DB/监控/告警);
+ * - 返回的 `detail`(如有)会做脱敏与截断:用于日志排查,但不建议直接作为用户展示文案。
+ */
+export type UpstreamErrorDetectionResult =
+  | { isError: false }
+  | {
+      isError: true;
+      code: string;
+      detail?: string;
+    };
+
+type DetectionOptions = {
+  /**
+   * 仅对小体积 JSON 启用 message 关键字检测,避免误判与无谓开销。
+   *
+   * 说明:这里的“体积”是原始 JSON 文本(或 SSE 单个 data 的 JSON)序列化后的字符数,
+   * 而不是 HTTP 的 Content-Length。
+   */
+  maxJsonCharsForMessageCheck?: number;
+  /**
+   * message 关键字匹配规则(默认 /error/i)。
+   *
+   * 注意:该规则只用于检查 `message` 字段(字符串)。
+   * `error.message` 属于更强信号:只要 `error` 非空(含对象形式),就会直接判定为错误。
+   */
+  messageKeyword?: RegExp;
+};
+
+const DEFAULT_MAX_JSON_CHARS_FOR_MESSAGE_CHECK = 1000;
+const DEFAULT_MESSAGE_KEYWORD = /error/i;
+
+const FAKE_200_CODES = {
+  EMPTY_BODY: "FAKE_200_EMPTY_BODY",
+  JSON_ERROR_NON_EMPTY: "FAKE_200_JSON_ERROR_NON_EMPTY",
+  JSON_ERROR_MESSAGE_NON_EMPTY: "FAKE_200_JSON_ERROR_MESSAGE_NON_EMPTY",
+  JSON_MESSAGE_KEYWORD_MATCH: "FAKE_200_JSON_MESSAGE_KEYWORD_MATCH",
+} as const;
+
+// SSE 快速过滤:仅当文本里“看起来存在 JSON key”时才进入 parseSSEData(避免无谓解析)。
+// 注意:这里必须是 `"key"\s*:` 形式,避免误命中 JSON 字符串内容里的 `\"key\"`。
+const MAY_HAVE_JSON_ERROR_KEY = /"error"\s*:/;
+const MAY_HAVE_JSON_MESSAGE_KEY = /"message"\s*:/;
+
+function isPlainRecord(value: unknown): value is Record<string, unknown> {
+  return !!value && typeof value === "object" && !Array.isArray(value);
+}
+
+function hasNonEmptyValue(value: unknown): boolean {
+  // 这里的“非空”是为了判断“error 字段是否有内容”。
+  // - string:trim 后非空
+  // - number:非 0 且非 NaN(避免把默认 0 当作错误)
+  // - boolean:true 视为非空
+  // - array/object:存在元素/键才算非空
+  if (value === null || value === undefined) return false;
+  if (typeof value === "string") return value.trim().length > 0;
+  if (typeof value === "number") return !Number.isNaN(value) && value !== 0;
+  if (typeof value === "boolean") return value;
+  if (Array.isArray(value)) return value.length > 0;
+  if (typeof value === "object") return Object.keys(value as Record<string, unknown>).length > 0;
+  return true;
+}
+
+function sanitizeErrorTextForDetail(text: string): string {
+  // 注意:这里的目的不是“完美脱敏”,而是尽量降低上游错误信息中意外夹带敏感内容的风险。
+  // 若后续发现更多敏感模式,可在不改变检测语义的前提下补充。
+  let sanitized = text;
+
+  // Bearer token
+  sanitized = sanitized.replace(/Bearer\s+[A-Za-z0-9._-]+/gi, "Bearer [REDACTED]");
+
+  // Common API key prefixes (OpenAI/Claude/Codex 等)
+  sanitized = sanitized.replace(/\b(?:sk|rk|pk)-[A-Za-z0-9_-]{16,}\b/giu, "[REDACTED_KEY]");
+  sanitized = sanitized.replace(/\bAIza[0-9A-Za-z_-]{16,}\b/g, "[REDACTED_KEY]");
+
+  // JWT(base64url 三段)
+  sanitized = sanitized.replace(
+    /\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b/g,
+    "[JWT]"
+  );
+
+  // Email
+  sanitized = sanitized.replace(/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g, "[EMAIL]");
+
+  // 通用敏感键值(尽量覆盖常见写法)
+  sanitized = sanitized.replace(
+    /\b(password|token|secret|api[_-]?key)\b\s*[:=]\s*['"]?[^'"\s]+['"]?/gi,
+    "$1:***"
+  );
+
+  // 常见配置/凭证路径(避免把文件名/路径泄露到审计字段里)
+  sanitized = sanitized.replace(/\/[\w.-]+\.(?:env|ya?ml|json|conf|ini)/gi, "[PATH]");
+
+  return sanitized;
+}
+
+function truncateForDetail(text: string, maxLen: number = 200): string {
+  const trimmed = sanitizeErrorTextForDetail(text).trim();
+  if (trimmed.length <= maxLen) return trimmed;
+  return `${trimmed.slice(0, maxLen)}…`;
+}
+
+function detectFromJsonObject(
+  obj: Record<string, unknown>,
+  rawJsonChars: number,
+  options: Required<Pick<DetectionOptions, "maxJsonCharsForMessageCheck" | "messageKeyword">>
+): UpstreamErrorDetectionResult {
+  // 判定优先级:
+  // 1) `error` 非空:直接判定为错误(强信号)
+  // 2) 小体积 JSON 下,`message` 命中关键字:判定为错误(弱信号,但能覆盖部分“错误只写在 message”场景)
+  const errorValue = obj.error;
+  if (hasNonEmptyValue(errorValue)) {
+    // 优先展示 string 或 error.message,避免把整个对象塞进 detail
+    if (typeof errorValue === "string") {
+      return {
+        isError: true,
+        code: FAKE_200_CODES.JSON_ERROR_NON_EMPTY,
+        detail: truncateForDetail(errorValue),
+      };
+    }
+
+    if (isPlainRecord(errorValue) && typeof errorValue.message === "string") {
+      return {
+        isError: true,
+        code: FAKE_200_CODES.JSON_ERROR_MESSAGE_NON_EMPTY,
+        detail: truncateForDetail(errorValue.message),
+      };
+    }
+
+    return { isError: true, code: FAKE_200_CODES.JSON_ERROR_NON_EMPTY };
+  }
+
+  if (rawJsonChars < options.maxJsonCharsForMessageCheck) {
+    const message = typeof obj.message === "string" ? obj.message : null;
+
+    // 注意:仅检查 message 字段本身,不扫描其它字段。
+    if (message && options.messageKeyword.test(message)) {
+      return {
+        isError: true,
+        code: FAKE_200_CODES.JSON_MESSAGE_KEYWORD_MATCH,
+        detail: truncateForDetail(message),
+      };
+    }
+  }
+
+  return { isError: false };
+}
+
+/**
+ * 用于“流式 SSE 已经结束后”的补充检查:
+ * - 响应体为空:视为错误
+ * - JSON 里包含非空 error 字段:视为错误
+ * - 小于 1000 字符的 JSON:若 message 包含 "error" 字样:视为错误
+ *
+ * 注意与限制:
+ * - 该函数不负责判断 HTTP 状态码;调用方通常只在“上游返回 200 且 SSE 正常结束后”使用它。
+ * - 对 SSE 文本,仅解析 `data:` 事件中的 JSON(通过 parseSSEData)。
+ * - 如果文本不是合法 JSON / SSE,函数会返回 `{isError:false}`(不做过度猜测)。
+ */
+export function detectUpstreamErrorFromSseOrJsonText(
+  text: string,
+  options: DetectionOptions = {}
+): UpstreamErrorDetectionResult {
+  const merged: Required<Pick<DetectionOptions, "maxJsonCharsForMessageCheck" | "messageKeyword">> =
+    {
+      maxJsonCharsForMessageCheck:
+        options.maxJsonCharsForMessageCheck ?? DEFAULT_MAX_JSON_CHARS_FOR_MESSAGE_CHECK,
+      messageKeyword: options.messageKeyword ?? DEFAULT_MESSAGE_KEYWORD,
+    };
+
+  const trimmed = text.trim();
+  if (!trimmed) {
+    return { isError: true, code: FAKE_200_CODES.EMPTY_BODY };
+  }
+
+  // 情况 1:纯 JSON(对象)
+  // 上游可能 Content-Type 设置为 SSE,但实际上返回 JSON;此处只处理对象格式({...}),
+  // 不处理数组([...])以避免误判(数组场景的语义差异较大,后续若确认需要再扩展)。
+  if (trimmed.startsWith("{")) {
+    try {
+      const parsed = JSON.parse(trimmed) as unknown;
+      if (isPlainRecord(parsed)) {
+        return detectFromJsonObject(parsed, trimmed.length, merged);
+      }
+    } catch {
+      // JSON 解析失败:不视为错误,交由上层逻辑处理
+    }
+    return { isError: false };
+  }
+
+  if (trimmed.startsWith("[")) {
+    return { isError: false };
+  }
+
+  // 情况 2:SSE 文本。快速过滤:既无 "error"/"message" key 时跳过解析
+  // 注意:这里要求 key 命中 `"key"\s*:`,尽量避免误命中 JSON 字符串内容里的 `\"error\"`。
+  if (!MAY_HAVE_JSON_ERROR_KEY.test(text) && !MAY_HAVE_JSON_MESSAGE_KEY.test(text)) {
+    return { isError: false };
+  }
+
+  // parseSSEData 会把每个事件的 data 尝试解析成对象;我们只对 object data 做结构化判定。
+  const events = parseSSEData(text);
+  for (const evt of events) {
+    if (!isPlainRecord(evt.data)) continue;
+    // 性能优化:只有在 message 是字符串、且“看起来足够小”时才需要精确计算 JSON 字符数。
+    // 对大多数 SSE 事件(message 为对象、或没有 message),无需 JSON.stringify。
+    let chars = 0;
+    const errorValue = evt.data.error;
+    const messageValue = evt.data.message;
+    if (!hasNonEmptyValue(errorValue) && typeof messageValue === "string") {
+      if (messageValue.length >= merged.maxJsonCharsForMessageCheck) {
+        chars = merged.maxJsonCharsForMessageCheck; // >= 阈值即可跳过 message 关键字判定
+      } else {
+        try {
+          chars = JSON.stringify(evt.data).length;
+        } catch {
+          // stringify 失败时回退为近似值(仍保持“仅小体积 JSON 才做 message 检测”的意图)
+          chars = messageValue.length;
+        }
+      }
+    }
+
+    const res = detectFromJsonObject(evt.data, chars, merged);
+    if (res.isError) return res;
+  }
+
+  return { isError: false };
+}

+ 23 - 0
src/lib/vacuum-filter/random.ts

@@ -0,0 +1,23 @@
+type WebCryptoLike = {
+  getRandomValues(bytes: Uint8Array): Uint8Array;
+};
+
+function getWebCrypto(): WebCryptoLike | null {
+  const c = (globalThis as unknown as { crypto?: WebCryptoLike }).crypto;
+  return c && typeof c.getRandomValues === "function" ? c : null;
+}
+
+export function randomBytes(size: number): Uint8Array {
+  const out = new Uint8Array(size);
+  const webCrypto = getWebCrypto();
+  if (webCrypto) {
+    webCrypto.getRandomValues(out);
+    return out;
+  }
+
+  // 兜底:极端环境无 Web Crypto 时,使用 Math.random(仅用于 seed,不影响正确性)
+  for (let i = 0; i < out.length; i++) {
+    out[i] = Math.floor(Math.random() * 256);
+  }
+  return out;
+}

+ 606 - 0
src/lib/vacuum-filter/vacuum-filter.ts

@@ -0,0 +1,606 @@
+import { randomBytes } from "@/lib/vacuum-filter/random";
+
+const textEncoder = new TextEncoder();
+const BUCKET_SIZE = 4 as const;
+const DEFAULT_SCRATCH_BYTES = 256;
+
+/**
+ * Vacuum Filter(真空过滤器)
+ *
+ * 目标:
+ * - 近似集合成员查询(AMQ):支持插入 / 查询 / 删除
+ * - 无假阴性(在不发生“误删”的前提下):插入成功的元素,查询必定返回 true
+ * - 有假阳性:查询可能返回 true,但元素实际不存在(由 fingerprint 位数决定)
+ *
+ * 实现要点(对照论文与作者参考实现):
+ * - 结构与 Cuckoo Filter 类似:每个元素映射到两个 bucket(i1 与 i2),每个 bucket 4 个 slot
+ * - Alternate Range(AR):i2 在 i1 的局部范围内(提升局部性并提高高负载下成功率)
+ * - Vacuuming:插入遇到满桶时,优先做“局部换位路径搜索”(一跳前瞻),把空位“吸”过来,降低反复踢出重试
+ *
+ * 注意:
+ * - 本实现为工程可用版本,核心算法与 vacuuming 逻辑对齐论文/作者代码,但未做 semi-sorting 的 bit packing;
+ *   为 API Key 防护场景选择 32-bit fingerprint 时仍然具备非常好的空间与性能表现。
+ * - 删除是“近似删除”:理论上仍可能因 fingerprint 碰撞导致误删(概率与 FPR 同数量级)。
+ *   对安全敏感场景建议使用 32-bit fingerprint,降低碰撞与误删风险。
+ */
+
+export type VacuumFilterInitOptions = {
+  /**
+   * 预期最多插入的元素数量(用于计算 bucket 数量与装载率)。
+   * 该值越接近实际峰值,空间利用率越高;取值偏小可能导致插入失败。
+   */
+  maxItems: number;
+  /**
+   * 每个 bucket 的 slot 数;论文与常见实现为 4(此实现固定为 4)。
+   */
+  bucketSize?: 4;
+  /**
+   * fingerprint 位数(1~32)。
+   * - 位数越大,假阳性越低,但占用内存越多。
+   * - 推荐:32(用于安全敏感场景,尽量避免碰撞/误删风险)。
+   */
+  fingerprintBits?: number;
+  /**
+   * 最大踢出次数(失败后返回 false,调用方可选择扩容重建)。
+   */
+  maxKickSteps?: number;
+  /**
+   * 哈希种子(用于对抗可控输入导致的退化/碰撞攻击)。
+   * - 不传则进程启动时随机生成(每次重启不同)。
+   */
+  seed?: Uint8Array | string;
+  /**
+   * 目标装载率(越高越省内存,但插入更困难)。
+   * 论文/参考实现默认约 0.96(结合 VF 的 vacuuming 仍可维持高成功率)。
+   */
+  targetLoadFactor?: number;
+};
+
+type UndoLog = { pos: number[]; prev: number[] };
+
+class XorShift32 {
+  private state: number;
+
+  constructor(seed: number) {
+    const s = seed >>> 0;
+    // 避免全 0 状态(xorshift 会卡死)
+    this.state = s === 0 ? 0x9e3779b9 : s;
+  }
+
+  nextU32(): number {
+    // xorshift32
+    let x = this.state >>> 0;
+    x ^= (x << 13) >>> 0;
+    x ^= x >>> 17;
+    x ^= (x << 5) >>> 0;
+    this.state = x >>> 0;
+    return this.state;
+  }
+
+  nextInt(maxExclusive: number): number {
+    return maxExclusive <= 1 ? 0 : this.nextU32() % maxExclusive;
+  }
+
+  nextBool(): boolean {
+    return (this.nextU32() & 1) === 1;
+  }
+}
+
+function upperPower2(x: number): number {
+  if (x <= 1) return 1;
+  let ret = 1;
+  // 注意:不要用位运算左移(JS 位运算是 32-bit),用乘法避免大数溢出/变负数
+  while (ret < x) ret *= 2;
+  return ret;
+}
+
+function roundUpToMultiple(x: number, base: number): number {
+  if (base <= 0) return x;
+  const r = x % base;
+  return r === 0 ? x : x + (base - r);
+}
+
+// 解方程:1 + x(logc - logx + 1) - c = 0(参考实现同名函数)
+function solveEquation(c: number): number {
+  let x = c + 0.1;
+  let guard = 0;
+  const f = (v: number) => 1 + v * (Math.log(c) - Math.log(v) + 1) - c;
+  const fd = (v: number) => Math.log(c) - Math.log(v);
+  while (Math.abs(f(x)) > 0.001 && guard++ < 10_000) {
+    x -= f(x) / fd(x);
+    if (!Number.isFinite(x) || x <= 0) {
+      // 数值异常时回退到一个保守值,避免死循环
+      return c + 1;
+    }
+  }
+  return x;
+}
+
+// balls-in-bins 最大负载上界(参考实现同名函数)
+function ballsInBinsMaxLoad(balls: number, bins: number): number {
+  const m = balls;
+  const n = bins;
+  if (n <= 1) return m;
+
+  const c = m / (n * Math.log(n));
+  // 更准确的 bound..(c < 5 区间)
+  if (c < 5) {
+    const dc = solveEquation(c);
+    return (dc - 1 + 2) * Math.log(n);
+  }
+
+  return m / n + 1.5 * Math.sqrt((2 * m * Math.log(n)) / n);
+}
+
+/**
+ * 选择合适的 Alternate Range(power-of-two),移植自作者参考实现 proper_alt_range。
+ *
+ * 直觉:
+ * - AR 越小:局部性越好,但高负载下更容易出现“局部拥堵”导致插入失败
+ * - AR 越大:更容易找到空位,但局部性变差
+ * - Vacuum Filter 采用多档 AR(按 tag 的低位分组)兼顾两者
+ */
+function properAltRange(bucketCount: number, groupIndex: number): number {
+  const b = 4; // slots per bucket
+  const lf = 0.95; // target load factor (用于估算)
+  let altRange = 8;
+  while (altRange < bucketCount) {
+    const f = (4 - groupIndex) * 0.25; // group 占比(参考实现)
+    if (
+      ballsInBinsMaxLoad(f * b * lf * bucketCount, bucketCount / altRange) <
+      0.97 * b * altRange
+    ) {
+      return altRange;
+    }
+    // 同 upperPower2:避免 32-bit 位移溢出
+    altRange *= 2;
+  }
+  return altRange;
+}
+
+function normalizeSeed(seed?: VacuumFilterInitOptions["seed"]): Uint8Array {
+  if (!seed) return randomBytes(16);
+  if (typeof seed === "string") return textEncoder.encode(seed);
+  return new Uint8Array(seed);
+}
+
+function readU32LE(bytes: Uint8Array, offset: number): number {
+  return (
+    ((bytes[offset] ?? 0) |
+      ((bytes[offset + 1] ?? 0) << 8) |
+      ((bytes[offset + 2] ?? 0) << 16) |
+      ((bytes[offset + 3] ?? 0) << 24)) >>>
+    0
+  );
+}
+
+// MurmurHash3 x86 32-bit x2(共享同一份 bytes 扫描;用于生成 index/tag)
+function murmur3X86_32x2(
+  bytes: Uint8Array,
+  len: number,
+  seedA: number,
+  seedB: number,
+  out: Uint32Array
+): void {
+  let hA = seedA >>> 0;
+  let hB = seedB >>> 0;
+  const c1 = 0xcc9e2d51;
+  const c2 = 0x1b873593;
+
+  const length = len >>> 0;
+  const nblocks = (length / 4) | 0;
+  const blockLen = nblocks * 4;
+
+  for (let base = 0; base < blockLen; base += 4) {
+    let k =
+      (bytes[base] | (bytes[base + 1] << 8) | (bytes[base + 2] << 16) | (bytes[base + 3] << 24)) >>>
+      0;
+
+    k = Math.imul(k, c1) >>> 0;
+    k = ((k << 15) | (k >>> 17)) >>> 0;
+    k = Math.imul(k, c2) >>> 0;
+
+    hA ^= k;
+    hA = ((hA << 13) | (hA >>> 19)) >>> 0;
+    hA = (Math.imul(hA, 5) + 0xe6546b64) >>> 0;
+
+    hB ^= k;
+    hB = ((hB << 13) | (hB >>> 19)) >>> 0;
+    hB = (Math.imul(hB, 5) + 0xe6546b64) >>> 0;
+  }
+
+  // tail
+  let k1 = 0;
+  const tail = blockLen;
+  const rem = length & 3;
+  if (rem >= 3) {
+    k1 ^= bytes[tail + 2] << 16;
+  }
+  if (rem >= 2) {
+    k1 ^= bytes[tail + 1] << 8;
+  }
+  if (rem >= 1) {
+    k1 ^= bytes[tail];
+    k1 = Math.imul(k1, c1) >>> 0;
+    k1 = ((k1 << 15) | (k1 >>> 17)) >>> 0;
+    k1 = Math.imul(k1, c2) >>> 0;
+    hA ^= k1;
+    hB ^= k1;
+  }
+
+  // fmix (A)
+  hA ^= length;
+  hA ^= hA >>> 16;
+  hA = Math.imul(hA, 0x85ebca6b) >>> 0;
+  hA ^= hA >>> 13;
+  hA = Math.imul(hA, 0xc2b2ae35) >>> 0;
+  hA ^= hA >>> 16;
+
+  // fmix (B)
+  hB ^= length;
+  hB ^= hB >>> 16;
+  hB = Math.imul(hB, 0x85ebca6b) >>> 0;
+  hB ^= hB >>> 13;
+  hB = Math.imul(hB, 0xc2b2ae35) >>> 0;
+  hB ^= hB >>> 16;
+
+  out[0] = hA >>> 0;
+  out[1] = hB >>> 0;
+}
+
+export class VacuumFilter {
+  private readonly fingerprintBits: number;
+  private readonly tagMask: number;
+  private readonly maxKickSteps: number;
+  private readonly seed: Uint8Array;
+  private readonly hashSeedA: number;
+  private readonly hashSeedB: number;
+  private readonly rng: XorShift32;
+
+  // AR 组数固定为 4(与论文/参考实现一致)
+  private readonly lenMasks: [number, number, number, number];
+
+  private readonly numBuckets: number;
+  private readonly table: Uint32Array;
+  private numItems = 0;
+
+  // 热路径优化:避免 TextEncoder.encode 分配;每次 has/add/delete 复用同一块 scratch
+  private scratch: Uint8Array = new Uint8Array(DEFAULT_SCRATCH_BYTES);
+  private readonly hashOut: Uint32Array = new Uint32Array(2);
+  private tmpIndex = 0;
+  private tmpTag = 0;
+
+  constructor(options: VacuumFilterInitOptions) {
+    if (!Number.isFinite(options.maxItems) || options.maxItems <= 0) {
+      throw new Error("VacuumFilter: maxItems 必须为正数");
+    }
+
+    const rawFingerprintBits = options.fingerprintBits;
+    const fingerprintBits =
+      typeof rawFingerprintBits === "number" && Number.isFinite(rawFingerprintBits)
+        ? Math.floor(rawFingerprintBits)
+        : 32;
+    this.fingerprintBits = Math.max(1, Math.min(32, fingerprintBits));
+
+    const rawMaxKickSteps = options.maxKickSteps;
+    const maxKickSteps =
+      typeof rawMaxKickSteps === "number" && Number.isFinite(rawMaxKickSteps)
+        ? Math.floor(rawMaxKickSteps)
+        : 500;
+    this.maxKickSteps = Math.max(1, maxKickSteps);
+    this.seed = normalizeSeed(options.seed);
+    this.hashSeedA = (readU32LE(this.seed, 0) ^ 0x6a09e667) >>> 0;
+    this.hashSeedB = (readU32LE(this.seed, 4) ^ 0xbb67ae85) >>> 0;
+    this.rng = new XorShift32(readU32LE(this.seed, 8) ^ 0x3c6ef372);
+
+    // tagMask:用于从哈希中截取 fingerprint(32-bit 特判;避免 1<<31 的有符号溢出陷阱)
+    this.tagMask =
+      this.fingerprintBits === 32 ? 0xffffffff : (0xffffffff >>> (32 - this.fingerprintBits)) >>> 0;
+
+    const rawTargetLoadFactor = options.targetLoadFactor;
+    const rawTargetLoadFactorValue =
+      typeof rawTargetLoadFactor === "number" && Number.isFinite(rawTargetLoadFactor)
+        ? rawTargetLoadFactor
+        : 0.96;
+    const targetLoadFactor = Math.max(0.5, Math.min(0.99, rawTargetLoadFactorValue));
+
+    // 与作者实现一致:numBuckets ≈ maxItems / (0.96 * 4)
+    const maxItems = Math.ceil(options.maxItems);
+    // 工程上更保守:用 ceil 保证“按目标装载率”时能容纳 maxItems
+    let bucketCount = Math.ceil(maxItems / targetLoadFactor / BUCKET_SIZE);
+    bucketCount = Math.max(bucketCount, 128); // 避免过小导致 AR 设置异常
+
+    // 小规模表:使用更小的段长,避免强制对齐到 1024 导致空间浪费
+    // 参考作者另一份实现(vacuum.h)的初始化策略。
+    if (bucketCount < 10_000) {
+      const bigSeg =
+        bucketCount < 256 ? upperPower2(bucketCount) : upperPower2(Math.floor(bucketCount / 4));
+      bucketCount = roundUpToMultiple(bucketCount, bigSeg);
+
+      const mask = bigSeg - 1;
+      this.lenMasks = [mask, mask, mask, mask];
+      this.numBuckets = bucketCount;
+      this.table = new Uint32Array(this.numBuckets * BUCKET_SIZE);
+      return;
+    }
+
+    // Alternate Range 设置(aligned=false 路径)
+    const bigSeg = Math.max(1024, properAltRange(bucketCount, 0));
+    bucketCount = roundUpToMultiple(bucketCount, bigSeg);
+
+    const l0 = bigSeg - 1;
+    const l1 = properAltRange(bucketCount, 1) - 1;
+    const l2 = properAltRange(bucketCount, 2) - 1;
+    // 最后一组扩大一倍(参考实现)
+    const l3 = properAltRange(bucketCount, 3) * 2 - 1;
+
+    this.lenMasks = [l0, l1, l2, l3];
+
+    // 重要:保证 bucketCount 是所有 segment length 的倍数,避免 AltIndex 落到末段“越界”
+    // 由于这些长度都是 2 的幂,取最大值即可覆盖其它组(大幂必为小幂的倍数)。
+    const segLens = [l0 + 1, l1 + 1, l2 + 1, l3 + 1];
+    const maxSegLen = Math.max(...segLens);
+    this.numBuckets = roundUpToMultiple(bucketCount, upperPower2(maxSegLen));
+    this.table = new Uint32Array(this.numBuckets * BUCKET_SIZE);
+  }
+
+  /**
+   * 当前已插入的元素数量(插入成功才计数)
+   */
+  size(): number {
+    return this.numItems;
+  }
+
+  /**
+   * 表容量(slot 总数)
+   */
+  capacitySlots(): number {
+    return this.numBuckets * BUCKET_SIZE;
+  }
+
+  /**
+   * 负载因子(占用 slot / 总 slot)
+   */
+  loadFactor(): number {
+    return this.capacitySlots() === 0 ? 0 : this.numItems / this.capacitySlots();
+  }
+
+  /**
+   * 判断是否可能存在(true=可能存在;false=一定不存在)
+   */
+  has(key: string): boolean {
+    this.indexTag(key);
+    const i1 = this.tmpIndex;
+    const tag = this.tmpTag;
+
+    const table = this.table;
+    let start = i1 * BUCKET_SIZE;
+    if (
+      table[start] === tag ||
+      table[start + 1] === tag ||
+      table[start + 2] === tag ||
+      table[start + 3] === tag
+    ) {
+      return true;
+    }
+
+    const i2 = this.altIndex(i1, tag);
+    start = i2 * BUCKET_SIZE;
+    return (
+      table[start] === tag ||
+      table[start + 1] === tag ||
+      table[start + 2] === tag ||
+      table[start + 3] === tag
+    );
+  }
+
+  /**
+   * 插入(成功返回 true;失败返回 false)
+   */
+  add(key: string): boolean {
+    this.indexTag(key);
+    return this.addIndexTag(this.tmpIndex, this.tmpTag);
+  }
+
+  /**
+   * 删除(成功返回 true;未找到返回 false)
+   *
+   * 注意:这是“近似删除”,存在极低概率误删(fingerprint 碰撞导致不可区分)。
+   */
+  delete(key: string): boolean {
+    this.indexTag(key);
+    const i1 = this.tmpIndex;
+    const tag = this.tmpTag;
+    const i2 = this.altIndex(i1, tag);
+
+    const ok1 = this.deleteFromBucket(i1, tag);
+    if (ok1) {
+      this.numItems--;
+      return true;
+    }
+
+    const ok2 = this.deleteFromBucket(i2, tag);
+    if (ok2) {
+      this.numItems--;
+      return true;
+    }
+
+    return false;
+  }
+
+  // ==================== 内部实现 ====================
+
+  private indexTag(key: string): void {
+    // 使用 seeded MurmurHash3(32-bit)生成确定性哈希,降低可控输入退化风险
+    // 关键优化:ASCII 快路径(API Key/ID 通常为 ASCII),避免 TextEncoder.encode 分配
+    const strLen = key.length;
+    if (this.scratch.length < strLen) {
+      this.scratch = new Uint8Array(Math.max(this.scratch.length * 2, strLen));
+    }
+
+    let asciiLen = 0;
+    for (; asciiLen < strLen; asciiLen++) {
+      const c = key.charCodeAt(asciiLen);
+      if (c > 0x7f) break;
+      this.scratch[asciiLen] = c;
+    }
+
+    if (asciiLen === strLen) {
+      murmur3X86_32x2(this.scratch, strLen, this.hashSeedA, this.hashSeedB, this.hashOut);
+    } else {
+      // 非 ASCII:交给 TextEncoder(少见路径)
+      const keyBytes = textEncoder.encode(key);
+      murmur3X86_32x2(keyBytes, keyBytes.length, this.hashSeedA, this.hashSeedB, this.hashOut);
+    }
+
+    const hvIndex = this.hashOut[0] >>> 0;
+    const hvTag = this.hashOut[1] >>> 0;
+
+    // 参考实现使用 `hash % numBuckets`。这里保持简单、快速(即便 numBuckets 非 2 的幂也可用)。
+    const index = hvIndex % this.numBuckets;
+
+    let tag = (hvTag & this.tagMask) >>> 0;
+    if (tag === 0) tag = 1;
+
+    this.tmpIndex = index;
+    this.tmpTag = tag;
+  }
+
+  private altIndex(index: number, tag: number): number {
+    const segMask = this.lenMasks[tag & 3];
+
+    // delta = (tag * C) & segMask,若为 0 则置为 1,避免 alt==index
+    let delta = (Math.imul(tag, 0x5bd1e995) >>> 0) & segMask;
+    if (delta === 0) delta = 1;
+
+    // segLen 为 2 的幂:index % segLen 等价于 index & segMask(index 来自 32-bit hash,安全使用位运算)
+    const offset = (index & segMask) >>> 0;
+    const altOffset = (offset ^ delta) >>> 0;
+    return index - offset + altOffset;
+  }
+
+  private bucketStart(index: number): number {
+    return index * BUCKET_SIZE;
+  }
+
+  private writeSlot(pos: number, value: number, undo?: UndoLog): void {
+    if (undo) {
+      undo.pos.push(pos);
+      undo.prev.push(this.table[pos]);
+    }
+    this.table[pos] = value;
+  }
+
+  private rollback(undo: UndoLog): void {
+    for (let i = undo.pos.length - 1; i >= 0; i--) {
+      this.table[undo.pos[i]] = undo.prev[i];
+    }
+  }
+
+  private insertTagToBucket(index: number, tag: number, undo?: UndoLog): boolean {
+    const start = this.bucketStart(index);
+    if (this.table[start] === 0) {
+      this.writeSlot(start, tag, undo);
+      return true;
+    }
+    if (this.table[start + 1] === 0) {
+      this.writeSlot(start + 1, tag, undo);
+      return true;
+    }
+    if (this.table[start + 2] === 0) {
+      this.writeSlot(start + 2, tag, undo);
+      return true;
+    }
+    if (this.table[start + 3] === 0) {
+      this.writeSlot(start + 3, tag, undo);
+      return true;
+    }
+    return false;
+  }
+
+  private deleteFromBucket(index: number, tag: number): boolean {
+    const start = this.bucketStart(index);
+    if (this.table[start] === tag) {
+      this.table[start] = 0;
+      return true;
+    }
+    if (this.table[start + 1] === tag) {
+      this.table[start + 1] = 0;
+      return true;
+    }
+    if (this.table[start + 2] === tag) {
+      this.table[start + 2] = 0;
+      return true;
+    }
+    if (this.table[start + 3] === tag) {
+      this.table[start + 3] = 0;
+      return true;
+    }
+    return false;
+  }
+
+  private bucketOccupancy(index: number): number {
+    const start = this.bucketStart(index);
+    return (
+      (this.table[start] !== 0 ? 1 : 0) +
+      (this.table[start + 1] !== 0 ? 1 : 0) +
+      (this.table[start + 2] !== 0 ? 1 : 0) +
+      (this.table[start + 3] !== 0 ? 1 : 0)
+    );
+  }
+
+  private addIndexTag(index: number, tag: number): boolean {
+    const i1 = index;
+    const i2 = this.altIndex(i1, tag);
+
+    const occ1 = this.bucketOccupancy(i1);
+    const occ2 = this.bucketOccupancy(i2);
+
+    // 先尝试插入到“更空”的 bucket(参考实现:优先更少元素的桶)
+    const first = occ1 <= occ2 ? i1 : i2;
+    const second = first === i1 ? i2 : i1;
+
+    if (this.insertTagToBucket(first, tag) || this.insertTagToBucket(second, tag)) {
+      this.numItems++;
+      return true;
+    }
+
+    // 两个 bucket 都满:进入踢出 + vacuuming
+    // 关键语义:若最终插入失败,必须回滚所有修改,避免“丢元素”导致假阴性。
+    const undo: UndoLog = { pos: [], prev: [] };
+    let curIndex = this.rng.nextBool() ? i1 : i2;
+    let curTag = tag;
+
+    for (let count = 0; count < this.maxKickSteps; count++) {
+      // 1) 可能因上一次换位导致当前桶出现空位(保守再试一次)
+      if (this.insertTagToBucket(curIndex, curTag, undo)) {
+        this.numItems++;
+        return true;
+      }
+
+      // 2) Vacuuming(一跳前瞻):尝试把当前桶内某个 tag 挪到它的 alternate bucket 的空位
+      const start = this.bucketStart(curIndex);
+      for (let slot = 0; slot < BUCKET_SIZE; slot++) {
+        const existing = this.table[start + slot];
+        if (existing === 0) continue;
+        const alt = this.altIndex(curIndex, existing);
+        if (this.insertTagToBucket(alt, existing, undo)) {
+          // 将空位“吸”到当前 slot:existing 移走,curTag 填入
+          this.writeSlot(start + slot, curTag, undo);
+          this.numItems++;
+          return true;
+        }
+      }
+
+      // 3) 随机踢出一个 tag,继续链式搬运
+      const r = this.rng.nextInt(BUCKET_SIZE);
+      const oldTag = this.table[start + r];
+      this.writeSlot(start + r, curTag, undo);
+      curTag = oldTag;
+      curIndex = this.altIndex(curIndex, curTag);
+    }
+
+    this.rollback(undo);
+    return false;
+  }
+}

+ 13 - 4
src/lib/validation/schemas.ts

@@ -31,13 +31,13 @@ const ANTHROPIC_MAX_TOKENS_PREFERENCE = z.union([
   z.literal("inherit"),
   z
     .string()
-    .regex(/^\d+$/, "max_tokens must be 'inherit' or a numeric string")
+    .regex(/^\d+$/, 'max_tokens 必须为 "inherit" 或数字字符串')
     .refine(
       (val) => {
         const num = Number.parseInt(val, 10);
         return num >= 1 && num <= 64000;
       },
-      { message: "max_tokens must be between 1 and 64000" }
+      { message: "max_tokens 必须在 1 到 64000 之间" }
     ),
 ]);
 
@@ -45,13 +45,13 @@ const ANTHROPIC_THINKING_BUDGET_PREFERENCE = z.union([
   z.literal("inherit"),
   z
     .string()
-    .regex(/^\d+$/, "thinking.budget_tokens must be 'inherit' or a numeric string")
+    .regex(/^\d+$/, 'thinking.budget_tokens 必须为 "inherit" 或数字字符串')
     .refine(
       (val) => {
         const num = Number.parseInt(val, 10);
         return num >= 1024 && num <= 32000;
       },
-      { message: "thinking.budget_tokens must be between 1024 and 32000" }
+      { message: "thinking.budget_tokens 必须在 1024 到 32000 之间" }
     ),
 ]);
 
@@ -409,6 +409,11 @@ export const CreateProviderSchema = z
       .max(2147483647, "优先级超出整数范围")
       .optional()
       .default(0),
+    group_priorities: z
+      .record(z.string(), z.number().int().min(0).max(2147483647))
+      .nullable()
+      .optional()
+      .default(null),
     cost_multiplier: z.coerce.number().min(0, "成本倍率不能为负数").optional().default(1.0),
     group_tag: z.string().max(50, "分组标签不能超过50个字符").nullable().optional(),
     // Codex 支持:供应商类型和模型重定向
@@ -610,6 +615,10 @@ export const UpdateProviderSchema = z
       .min(0, "优先级不能为负数")
       .max(2147483647, "优先级超出整数范围")
       .optional(),
+    group_priorities: z
+      .record(z.string(), z.number().int().min(0).max(2147483647))
+      .nullable()
+      .optional(),
     cost_multiplier: z.coerce.number().min(0, "成本倍率不能为负数").optional(),
     group_tag: z.string().max(50, "分组标签不能超过50个字符").nullable().optional(),
     // Codex 支持:供应商类型和模型重定向

+ 1 - 0
src/repository/_shared/transformers.ts

@@ -85,6 +85,7 @@ export function toProvider(dbProvider: any): Provider {
     isEnabled: dbProvider?.isEnabled ?? true,
     weight: dbProvider?.weight ?? 1,
     priority: dbProvider?.priority ?? 0,
+    groupPriorities: dbProvider?.groupPriorities ?? null,
     costMultiplier: dbProvider?.costMultiplier ? parseFloat(dbProvider.costMultiplier) : 1.0,
     groupTag: dbProvider?.groupTag ?? null,
     providerType: dbProvider?.providerType ?? "claude",

+ 142 - 4
src/repository/key.ts

@@ -3,6 +3,16 @@
 import { and, count, desc, eq, gt, gte, inArray, isNull, lt, or, sql, sum } from "drizzle-orm";
 import { db } from "@/drizzle/db";
 import { keys, messageRequest, providers, users } from "@/drizzle/schema";
+import { CHANNEL_API_KEYS_UPDATED, publishCacheInvalidation } from "@/lib/redis/pubsub";
+import {
+  cacheActiveKey,
+  cacheAuthResult,
+  cacheUser,
+  getCachedActiveKey,
+  getCachedUser,
+  invalidateCachedKey,
+} from "@/lib/security/api-key-auth-cache";
+import { apiKeyVacuumFilter } from "@/lib/security/api-key-vacuum-filter";
 import { Decimal, toCostDecimal } from "@/lib/utils/currency";
 import type { CreateKeyData, Key, UpdateKeyData } from "@/types/key";
 import type { User } from "@/types/user";
@@ -161,12 +171,41 @@ export async function createKey(keyData: CreateKeyData): Promise<Key> {
     limitTotalUsd: keys.limitTotalUsd,
     limitConcurrentSessions: keys.limitConcurrentSessions,
     providerGroup: keys.providerGroup,
+    cacheTtlPreference: keys.cacheTtlPreference,
     createdAt: keys.createdAt,
     updatedAt: keys.updatedAt,
     deletedAt: keys.deletedAt,
   });
 
-  return toKey(key);
+  const created = toKey(key);
+  // 将新建 key 写入 Vacuum Filter(提升新 key 的即时可用性;失败不影响正确性)
+  try {
+    apiKeyVacuumFilter.noteExistingKey(created.key);
+  } catch {
+    // ignore
+  }
+  // Redis 缓存(最佳努力,不影响正确性)
+  // 注意:多实例环境下其它实例可能在 Vacuum Filter 尚未重建时收到新 key 的请求。
+  // 为减少“新 key 立刻使用偶发 401”的窗口,这里会等待 Redis 写入/广播;
+  // 但必须设置超时上限,避免 Redis 慢/不可用时拖慢 key 创建。
+  const redisBestEffortTimeoutMs = 200;
+  const redisTasks: Array<Promise<unknown>> = [];
+
+  redisTasks.push(cacheActiveKey(created).catch(() => {}));
+
+  // 多实例:广播 key 集合变更,触发其它实例重建 Vacuum Filter,避免误拒绝
+  const rateLimitRaw = process.env.ENABLE_RATE_LIMIT?.trim();
+  if (process.env.REDIS_URL && rateLimitRaw !== "false" && rateLimitRaw !== "0") {
+    redisTasks.push(publishCacheInvalidation(CHANNEL_API_KEYS_UPDATED).catch(() => {}));
+  }
+
+  if (redisTasks.length > 0) {
+    await Promise.race([
+      Promise.all(redisTasks),
+      new Promise<void>((resolve) => setTimeout(resolve, redisBestEffortTimeoutMs)),
+    ]);
+  }
+  return created;
 }
 
 export async function updateKey(id: number, keyData: UpdateKeyData): Promise<Key | null> {
@@ -232,7 +271,17 @@ export async function updateKey(id: number, keyData: UpdateKeyData): Promise<Key
     });
 
   if (!key) return null;
-  return toKey(key);
+  const updated = toKey(key);
+  // 变更 key 后,根据活跃状态更新/失效 Redis 缓存(最佳努力,不影响正确性)
+  const expiresAtMs = updated.expiresAt instanceof Date ? updated.expiresAt.getTime() : null;
+  const isExpired = typeof expiresAtMs === "number" && expiresAtMs <= Date.now();
+  const isActive = updated.isEnabled === true && !updated.deletedAt && !isExpired;
+  if (isActive) {
+    await cacheActiveKey(updated).catch(() => {});
+  } else {
+    await invalidateCachedKey(updated.key).catch(() => {});
+  }
+  return updated;
 }
 
 export async function findActiveKeyByUserIdAndName(
@@ -394,12 +443,34 @@ export async function deleteKey(id: number): Promise<boolean> {
     .update(keys)
     .set({ deletedAt: new Date() })
     .where(and(eq(keys.id, id), isNull(keys.deletedAt)))
-    .returning({ id: keys.id });
+    .returning({ id: keys.id, key: keys.key });
 
+  if (result.length > 0) {
+    await invalidateCachedKey(result[0].key).catch(() => {});
+  }
   return result.length > 0;
 }
 
 export async function findActiveKeyByKeyString(keyString: string): Promise<Key | null> {
+  const vfSaysMissing = apiKeyVacuumFilter.isDefinitelyNotPresent(keyString) === true;
+
+  // Redis 缓存命中:避免打 DB
+  const cached = await getCachedActiveKey(keyString);
+  if (cached) {
+    // 多实例一致性:若 Vacuum Filter 判定缺失但 Redis 命中,说明本机 filter 可能滞后。
+    // 最佳努力将 key 写入本机 filter(不影响正确性,仅提升后续性能)。
+    if (vfSaysMissing) {
+      apiKeyVacuumFilter.noteExistingKey(keyString);
+    }
+    return cached;
+  }
+
+  // Vacuum Filter 负向短路:肯定不存在则直接返回 null,避免打 DB
+  // 注意:此处必须放在 Redis 读取之后,避免多实例环境中新建 key 的短暂误拒绝窗口。
+  if (vfSaysMissing) {
+    return null;
+  }
+
   const [key] = await db
     .select({
       id: keys.id,
@@ -418,6 +489,7 @@ export async function findActiveKeyByKeyString(keyString: string): Promise<Key |
       limitTotalUsd: keys.limitTotalUsd,
       limitConcurrentSessions: keys.limitConcurrentSessions,
       providerGroup: keys.providerGroup,
+      cacheTtlPreference: keys.cacheTtlPreference,
       createdAt: keys.createdAt,
       updatedAt: keys.updatedAt,
       deletedAt: keys.deletedAt,
@@ -433,13 +505,77 @@ export async function findActiveKeyByKeyString(keyString: string): Promise<Key |
     );
 
   if (!key) return null;
-  return toKey(key);
+  const active = toKey(key);
+  cacheActiveKey(active).catch(() => {});
+  return active;
 }
 
 // 验证 API Key 并返回用户信息
 export async function validateApiKeyAndGetUser(
   keyString: string
 ): Promise<{ user: User; key: Key } | null> {
+  const vfSaysMissing = apiKeyVacuumFilter.isDefinitelyNotPresent(keyString) === true;
+
+  // 默认鉴权链路:Vacuum Filter -> Redis -> DB
+  const cachedKey = await getCachedActiveKey(keyString);
+  if (cachedKey) {
+    // 多实例一致性:若 Vacuum Filter 判定缺失但 Redis 命中,说明本机 filter 可能滞后。
+    // 最佳努力将 key 写入本机 filter(不影响正确性,仅提升后续性能)。
+    if (vfSaysMissing) {
+      apiKeyVacuumFilter.noteExistingKey(keyString);
+    }
+
+    const cachedUser = await getCachedUser(cachedKey.userId);
+    if (cachedUser) {
+      return { user: cachedUser, key: cachedKey };
+    }
+
+    // user 缓存 miss:仅补齐 user(相较 join 更轻量)
+    const [userRow] = await db
+      .select({
+        id: users.id,
+        name: users.name,
+        description: users.description,
+        role: users.role,
+        rpm: users.rpmLimit,
+        dailyQuota: users.dailyLimitUsd,
+        providerGroup: users.providerGroup,
+        tags: users.tags,
+        createdAt: users.createdAt,
+        updatedAt: users.updatedAt,
+        deletedAt: users.deletedAt,
+        limit5hUsd: users.limit5hUsd,
+        limitWeeklyUsd: users.limitWeeklyUsd,
+        limitMonthlyUsd: users.limitMonthlyUsd,
+        limitTotalUsd: users.limitTotalUsd,
+        limitConcurrentSessions: users.limitConcurrentSessions,
+        dailyResetMode: users.dailyResetMode,
+        dailyResetTime: users.dailyResetTime,
+        isEnabled: users.isEnabled,
+        expiresAt: users.expiresAt,
+        allowedClients: users.allowedClients,
+        allowedModels: users.allowedModels,
+      })
+      .from(users)
+      .where(and(eq(users.id, cachedKey.userId), isNull(users.deletedAt)));
+
+    if (!userRow) {
+      // join 语义:用户被删除则 key 无效;顺带清理 key 缓存避免重复 miss
+      invalidateCachedKey(keyString).catch(() => {});
+      return null;
+    }
+
+    const user = toUser(userRow);
+    cacheUser(user).catch(() => {});
+    return { user, key: cachedKey };
+  }
+
+  // Vacuum Filter 负向短路:肯定不存在则直接返回 null,避免打 DB
+  // 注意:此处必须放在 Redis 读取之后,避免多实例环境中新建 key 的短暂误拒绝窗口。
+  if (vfSaysMissing) {
+    return null;
+  }
+
   const result = await db
     .select({
       // Key fields
@@ -551,6 +687,8 @@ export async function validateApiKeyAndGetUser(
     deletedAt: row.keyDeletedAt,
   });
 
+  // 最佳努力:写入 Redis 缓存(不影响正确性)
+  cacheAuthResult(keyString, { user, key }).catch(() => {});
   return { user, key };
 }
 

+ 7 - 0
src/repository/provider.ts

@@ -24,6 +24,7 @@ export async function createProvider(providerData: CreateProviderData): Promise<
     isEnabled: providerData.is_enabled,
     weight: providerData.weight,
     priority: providerData.priority,
+    groupPriorities: providerData.group_priorities ?? null,
     costMultiplier:
       providerData.cost_multiplier != null ? providerData.cost_multiplier.toString() : "1.0",
     groupTag: providerData.group_tag,
@@ -175,6 +176,7 @@ export async function findProviderList(
       isEnabled: providers.isEnabled,
       weight: providers.weight,
       priority: providers.priority,
+      groupPriorities: providers.groupPriorities,
       costMultiplier: providers.costMultiplier,
       groupTag: providers.groupTag,
       providerType: providers.providerType,
@@ -252,6 +254,7 @@ export async function findAllProvidersFresh(): Promise<Provider[]> {
       isEnabled: providers.isEnabled,
       weight: providers.weight,
       priority: providers.priority,
+      groupPriorities: providers.groupPriorities,
       costMultiplier: providers.costMultiplier,
       groupTag: providers.groupTag,
       providerType: providers.providerType,
@@ -333,6 +336,7 @@ export async function findProviderById(id: number): Promise<Provider | null> {
       isEnabled: providers.isEnabled,
       weight: providers.weight,
       priority: providers.priority,
+      groupPriorities: providers.groupPriorities,
       costMultiplier: providers.costMultiplier,
       groupTag: providers.groupTag,
       providerType: providers.providerType,
@@ -403,6 +407,8 @@ export async function updateProvider(
   if (providerData.is_enabled !== undefined) dbData.isEnabled = providerData.is_enabled;
   if (providerData.weight !== undefined) dbData.weight = providerData.weight;
   if (providerData.priority !== undefined) dbData.priority = providerData.priority;
+  if (providerData.group_priorities !== undefined)
+    dbData.groupPriorities = providerData.group_priorities ?? null;
   if (providerData.cost_multiplier !== undefined)
     dbData.costMultiplier =
       providerData.cost_multiplier != null ? providerData.cost_multiplier.toString() : "1.0";
@@ -541,6 +547,7 @@ export async function updateProvider(
         isEnabled: providers.isEnabled,
         weight: providers.weight,
         priority: providers.priority,
+        groupPriorities: providers.groupPriorities,
         costMultiplier: providers.costMultiplier,
         groupTag: providers.groupTag,
         providerType: providers.providerType,

+ 11 - 2
src/repository/user.ts

@@ -3,6 +3,7 @@
 import { and, asc, eq, isNull, type SQL, sql } from "drizzle-orm";
 import { db } from "@/drizzle/db";
 import { keys as keysTable, users } from "@/drizzle/schema";
+import { cacheUser, invalidateCachedUser } from "@/lib/security/api-key-auth-cache";
 import type { CreateUserData, UpdateUserData, User } from "@/types/user";
 import { toUser } from "./_shared/transformers";
 
@@ -86,7 +87,9 @@ export async function createUser(userData: CreateUserData): Promise<User> {
     allowedModels: users.allowedModels,
   });
 
-  return toUser(user);
+  const created = toUser(user);
+  await cacheUser(created).catch(() => {});
+  return created;
 }
 
 export async function findUserList(limit: number = 50, offset: number = 0): Promise<User[]> {
@@ -432,7 +435,9 @@ export async function updateUser(id: number, userData: UpdateUserData): Promise<
 
   if (!user) return null;
 
-  return toUser(user);
+  const updated = toUser(user);
+  await cacheUser(updated).catch(() => {});
+  return updated;
 }
 
 export async function deleteUser(id: number): Promise<boolean> {
@@ -442,6 +447,9 @@ export async function deleteUser(id: number): Promise<boolean> {
     .where(and(eq(users.id, id), isNull(users.deletedAt)))
     .returning({ id: users.id });
 
+  if (result.length > 0) {
+    await invalidateCachedUser(id).catch(() => {});
+  }
   return result.length > 0;
 }
 
@@ -456,6 +464,7 @@ export async function markUserExpired(userId: number): Promise<boolean> {
     .where(and(eq(users.id, userId), eq(users.isEnabled, true), isNull(users.deletedAt)))
     .returning({ id: users.id });
 
+  await invalidateCachedUser(userId).catch(() => {});
   return result.length > 0;
 }
 

+ 4 - 0
src/types/provider.ts

@@ -59,6 +59,7 @@ export interface Provider {
 
   // 优先级和分组配置
   priority: number;
+  groupPriorities: Record<string, number> | null;
   costMultiplier: number;
   groupTag: string | null;
 
@@ -162,6 +163,7 @@ export interface ProviderDisplay {
   weight: number;
   // 优先级和分组配置
   priority: number;
+  groupPriorities: Record<string, number> | null;
   costMultiplier: number;
   groupTag: string | null;
   // 供应商类型
@@ -251,6 +253,7 @@ export interface CreateProviderData {
 
   // 优先级和分组配置
   priority?: number;
+  group_priorities?: Record<string, number> | null;
   cost_multiplier?: number;
   group_tag?: string | null;
 
@@ -322,6 +325,7 @@ export interface UpdateProviderData {
 
   // 优先级和分组配置
   priority?: number;
+  group_priorities?: Record<string, number> | null;
   cost_multiplier?: number;
   group_tag?: string | null;
 

+ 44 - 0
tests/unit/lib/env-store-session-response-body.test.ts

@@ -0,0 +1,44 @@
+import { afterEach, describe, expect, it } from "vitest";
+import { EnvSchema } from "@/lib/config/env.schema";
+
+describe("EnvSchema - STORE_SESSION_RESPONSE_BODY", () => {
+  const originalEnv = process.env.STORE_SESSION_RESPONSE_BODY;
+
+  afterEach(() => {
+    if (originalEnv === undefined) {
+      delete process.env.STORE_SESSION_RESPONSE_BODY;
+    } else {
+      process.env.STORE_SESSION_RESPONSE_BODY = originalEnv;
+    }
+  });
+
+  it("should default to true when not set", () => {
+    delete process.env.STORE_SESSION_RESPONSE_BODY;
+    const result = EnvSchema.parse(process.env);
+    expect(result.STORE_SESSION_RESPONSE_BODY).toBe(true);
+  });
+
+  it("should parse 'true' as true", () => {
+    process.env.STORE_SESSION_RESPONSE_BODY = "true";
+    const result = EnvSchema.parse(process.env);
+    expect(result.STORE_SESSION_RESPONSE_BODY).toBe(true);
+  });
+
+  it("should parse 'false' as false", () => {
+    process.env.STORE_SESSION_RESPONSE_BODY = "false";
+    const result = EnvSchema.parse(process.env);
+    expect(result.STORE_SESSION_RESPONSE_BODY).toBe(false);
+  });
+
+  it("should parse '0' as false", () => {
+    process.env.STORE_SESSION_RESPONSE_BODY = "0";
+    const result = EnvSchema.parse(process.env);
+    expect(result.STORE_SESSION_RESPONSE_BODY).toBe(false);
+  });
+
+  it("should parse '1' as true", () => {
+    process.env.STORE_SESSION_RESPONSE_BODY = "1";
+    const result = EnvSchema.parse(process.env);
+    expect(result.STORE_SESSION_RESPONSE_BODY).toBe(true);
+  });
+});

+ 12 - 0
tests/unit/lib/session-manager-redaction.test.ts

@@ -48,9 +48,11 @@ vi.mock("@/lib/redis", () => ({
 
 // Mock config - we'll control STORE_SESSION_MESSAGES dynamically
 let mockStoreMessages = false;
+let mockStoreSessionResponseBody = true;
 vi.mock("@/lib/config/env.schema", () => ({
   getEnvConfig: () => ({
     STORE_SESSION_MESSAGES: mockStoreMessages,
+    STORE_SESSION_RESPONSE_BODY: mockStoreSessionResponseBody,
     SESSION_TTL: 300,
   }),
 }));
@@ -62,10 +64,12 @@ describe("SessionManager - Redaction based on STORE_SESSION_MESSAGES", () => {
   beforeEach(() => {
     vi.clearAllMocks();
     mockStoreMessages = false; // default: redact
+    mockStoreSessionResponseBody = true; // default: store response body
   });
 
   afterEach(() => {
     mockStoreMessages = false;
+    mockStoreSessionResponseBody = true;
   });
 
   describe("storeSessionMessages", () => {
@@ -160,6 +164,14 @@ describe("SessionManager - Redaction based on STORE_SESSION_MESSAGES", () => {
   });
 
   describe("storeSessionResponse", () => {
+    it("should skip storing response body when STORE_SESSION_RESPONSE_BODY=false", async () => {
+      mockStoreSessionResponseBody = false;
+
+      await SessionManager.storeSessionResponse("sess_disabled", '{"message":"hello"}', 1);
+
+      expect(redisMock.setex).not.toHaveBeenCalled();
+    });
+
     it("should store redacted JSON response when STORE_SESSION_MESSAGES=false", async () => {
       mockStoreMessages = false;
       const responseBody = {

+ 201 - 0
tests/unit/proxy/provider-selector-group-priority.test.ts

@@ -0,0 +1,201 @@
+import { describe, expect, it } from "vitest";
+import type { Provider } from "@/types/provider";
+import { ProxyProviderResolver } from "@/app/v1/_lib/proxy/provider-selector";
+
+function makeProvider(overrides: Partial<Provider>): Provider {
+  return {
+    id: 1,
+    name: "test",
+    url: "https://api.example.com",
+    key: "sk-test",
+    providerVendorId: null,
+    isEnabled: true,
+    weight: 1,
+    priority: 0,
+    groupPriorities: null,
+    costMultiplier: 1,
+    groupTag: null,
+    providerType: "claude",
+    preserveClientIp: false,
+    modelRedirects: null,
+    allowedModels: null,
+    mcpPassthroughType: "none",
+    mcpPassthroughUrl: null,
+    limit5hUsd: null,
+    limitDailyUsd: null,
+    dailyResetMode: "fixed",
+    dailyResetTime: "00:00",
+    limitWeeklyUsd: null,
+    limitMonthlyUsd: null,
+    limitTotalUsd: null,
+    totalCostResetAt: null,
+    limitConcurrentSessions: 0,
+    maxRetryAttempts: null,
+    circuitBreakerFailureThreshold: 5,
+    circuitBreakerOpenDuration: 1800000,
+    circuitBreakerHalfOpenSuccessThreshold: 2,
+    proxyUrl: null,
+    proxyFallbackToDirect: false,
+    firstByteTimeoutStreamingMs: 30000,
+    streamingIdleTimeoutMs: 10000,
+    requestTimeoutNonStreamingMs: 600000,
+    websiteUrl: null,
+    faviconUrl: null,
+    cacheTtlPreference: null,
+    context1mPreference: null,
+    codexReasoningEffortPreference: null,
+    codexReasoningSummaryPreference: null,
+    codexTextVerbosityPreference: null,
+    codexParallelToolCallsPreference: null,
+    anthropicMaxTokensPreference: null,
+    anthropicThinkingBudgetPreference: null,
+    geminiGoogleSearchPreference: null,
+    tpm: null,
+    rpm: null,
+    rpd: null,
+    cc: null,
+    createdAt: new Date(),
+    updatedAt: new Date(),
+    ...overrides,
+  };
+}
+
+describe("resolveEffectivePriority", () => {
+  it("returns global priority when no groupPriorities", () => {
+    const provider = makeProvider({ priority: 5, groupPriorities: null });
+    expect(ProxyProviderResolver.resolveEffectivePriority(provider, "cli")).toBe(5);
+  });
+
+  it("returns group-specific priority when override exists", () => {
+    const provider = makeProvider({
+      priority: 5,
+      groupPriorities: { cli: 0, chat: 2 },
+    });
+    expect(ProxyProviderResolver.resolveEffectivePriority(provider, "cli")).toBe(0);
+    expect(ProxyProviderResolver.resolveEffectivePriority(provider, "chat")).toBe(2);
+  });
+
+  it("falls back to global when group not in overrides", () => {
+    const provider = makeProvider({
+      priority: 5,
+      groupPriorities: { cli: 0 },
+    });
+    expect(ProxyProviderResolver.resolveEffectivePriority(provider, "chat")).toBe(5);
+  });
+
+  it("returns global priority when userGroup is null", () => {
+    const provider = makeProvider({
+      priority: 5,
+      groupPriorities: { cli: 0 },
+    });
+    expect(ProxyProviderResolver.resolveEffectivePriority(provider, null)).toBe(5);
+  });
+
+  it("handles group priority of 0 correctly (not falsy)", () => {
+    const provider = makeProvider({
+      priority: 5,
+      groupPriorities: { cli: 0 },
+    });
+    expect(ProxyProviderResolver.resolveEffectivePriority(provider, "cli")).toBe(0);
+  });
+
+  it("handles comma-separated user groups (multi-group)", () => {
+    const provider = makeProvider({
+      priority: 10,
+      groupPriorities: { cli: 2, admin: 5, chat: 8 },
+    });
+    // Multi-group "cli,admin" should match both and take minimum (2)
+    expect(ProxyProviderResolver.resolveEffectivePriority(provider, "cli,admin")).toBe(2);
+    // Multi-group "admin,chat" should take minimum (5)
+    expect(ProxyProviderResolver.resolveEffectivePriority(provider, "admin,chat")).toBe(5);
+  });
+
+  it("falls back to global when no group in multi-group matches", () => {
+    const provider = makeProvider({
+      priority: 10,
+      groupPriorities: { cli: 2 },
+    });
+    // "admin,chat" has no matching overrides, should fall back to global (10)
+    expect(ProxyProviderResolver.resolveEffectivePriority(provider, "admin,chat")).toBe(10);
+  });
+
+  it("handles partial match in multi-group", () => {
+    const provider = makeProvider({
+      priority: 10,
+      groupPriorities: { cli: 3 },
+    });
+    // "cli,admin" - only "cli" matches, should return 3
+    expect(ProxyProviderResolver.resolveEffectivePriority(provider, "cli,admin")).toBe(3);
+  });
+});
+
+describe("selectTopPriority with group context", () => {
+  // Access private method via bracket notation for testing
+  const selectTopPriority = (providers: Provider[], userGroup?: string | null) =>
+    (ProxyProviderResolver as any).selectTopPriority(providers, userGroup);
+
+  it("selects providers by group-aware priority", () => {
+    const providerA = makeProvider({
+      id: 1,
+      name: "A",
+      priority: 5,
+      groupPriorities: { cli: 0 },
+    });
+    const providerB = makeProvider({
+      id: 2,
+      name: "B",
+      priority: 0,
+      groupPriorities: null,
+    });
+
+    // cli group: A has effective priority 0, B has effective priority 0
+    const result = selectTopPriority([providerA, providerB], "cli");
+    expect(result).toHaveLength(2);
+    expect(result.map((p: Provider) => p.id).sort()).toEqual([1, 2]);
+  });
+
+  it("without group context, uses global priority", () => {
+    const providerA = makeProvider({
+      id: 1,
+      name: "A",
+      priority: 5,
+      groupPriorities: { cli: 0 },
+    });
+    const providerB = makeProvider({
+      id: 2,
+      name: "B",
+      priority: 0,
+      groupPriorities: null,
+    });
+
+    // no group: A has priority 5, B has priority 0 -> only B selected
+    const result = selectTopPriority([providerA, providerB], null);
+    expect(result).toHaveLength(1);
+    expect(result[0].id).toBe(2);
+  });
+
+  it("group override changes which providers are top priority", () => {
+    const providerA = makeProvider({
+      id: 1,
+      name: "A",
+      priority: 5,
+      groupPriorities: { chat: 1 },
+    });
+    const providerB = makeProvider({
+      id: 2,
+      name: "B",
+      priority: 3,
+      groupPriorities: null,
+    });
+
+    // chat group: A=1, B=3 -> only A
+    const chatResult = selectTopPriority([providerA, providerB], "chat");
+    expect(chatResult).toHaveLength(1);
+    expect(chatResult[0].id).toBe(1);
+
+    // no group: A=5, B=3 -> only B
+    const noGroupResult = selectTopPriority([providerA, providerB], null);
+    expect(noGroupResult).toHaveLength(1);
+    expect(noGroupResult[0].id).toBe(2);
+  });
+});

+ 465 - 0
tests/unit/security/api-key-auth-cache-redis-key.test.ts

@@ -0,0 +1,465 @@
+import { afterEach, beforeEach, describe, expect, test, vi } from "vitest";
+import { createHash, webcrypto } from "node:crypto";
+import type { Key } from "@/types/key";
+import type { User } from "@/types/user";
+
+type RedisPipelineLike = {
+  setex(key: string, ttlSeconds: number, value: string): RedisPipelineLike;
+  del(key: string): RedisPipelineLike;
+  exec(): Promise<unknown>;
+};
+
+type RedisLike = {
+  get(key: string): Promise<string | null>;
+  setex(key: string, ttlSeconds: number, value: string): Promise<unknown>;
+  del(key: string): Promise<number>;
+  pipeline(): RedisPipelineLike;
+};
+
+type PipelineOp =
+  | { kind: "setex"; key: string; ttlSeconds: number; value: string }
+  | { kind: "del"; key: string };
+
+class FakeRedisPipeline implements RedisPipelineLike {
+  readonly ops: PipelineOp[] = [];
+  readonly exec = vi.fn(async () => {
+    for (const op of this.ops) {
+      if (op.kind === "setex") {
+        this.parent.store.set(op.key, op.value);
+      } else {
+        this.parent.store.delete(op.key);
+      }
+    }
+    return [];
+  });
+
+  constructor(private readonly parent: FakeRedis) {}
+
+  setex(key: string, ttlSeconds: number, value: string): RedisPipelineLike {
+    this.ops.push({ kind: "setex", key, ttlSeconds, value });
+    return this;
+  }
+
+  del(key: string): RedisPipelineLike {
+    this.ops.push({ kind: "del", key });
+    return this;
+  }
+}
+
+class FakeRedis implements RedisLike {
+  readonly store = new Map<string, string>();
+  readonly get = vi.fn(async (key: string) => this.store.get(key) ?? null);
+  readonly setex = vi.fn(async (key: string, _ttlSeconds: number, value: string) => {
+    this.store.set(key, value);
+    return "OK";
+  });
+  readonly del = vi.fn(async (key: string) => (this.store.delete(key) ? 1 : 0));
+  readonly pipeline = vi.fn(() => {
+    const pipeline = new FakeRedisPipeline(this);
+    this.pipelines.push(pipeline);
+    return pipeline;
+  });
+
+  readonly pipelines: FakeRedisPipeline[] = [];
+}
+
+let currentRedis: FakeRedis | null = null;
+const getRedisClient = vi.fn(() => currentRedis);
+
+vi.mock("@/lib/redis/client", () => ({
+  getRedisClient,
+}));
+
+function sha256HexNode(value: string): string {
+  return createHash("sha256").update(value).digest("hex");
+}
+
+function buildKey(overrides?: Partial<Key>): Key {
+  return {
+    id: 1,
+    userId: 10,
+    name: "k1",
+    key: "sk-secret",
+    isEnabled: true,
+    expiresAt: undefined,
+    canLoginWebUi: true,
+    limit5hUsd: null,
+    limitDailyUsd: null,
+    dailyResetMode: "fixed",
+    dailyResetTime: "00:00",
+    limitWeeklyUsd: null,
+    limitMonthlyUsd: null,
+    limitTotalUsd: null,
+    limitConcurrentSessions: 0,
+    providerGroup: null,
+    cacheTtlPreference: null,
+    createdAt: new Date("2026-01-01T00:00:00.000Z"),
+    updatedAt: new Date("2026-01-02T00:00:00.000Z"),
+    deletedAt: undefined,
+    ...overrides,
+  };
+}
+
+function buildUser(overrides?: Partial<User>): User {
+  return {
+    id: 10,
+    name: "u1",
+    description: "",
+    role: "user",
+    rpm: null,
+    dailyQuota: null,
+    providerGroup: null,
+    tags: [],
+    createdAt: new Date("2026-01-01T00:00:00.000Z"),
+    updatedAt: new Date("2026-01-02T00:00:00.000Z"),
+    deletedAt: undefined,
+    dailyResetMode: "fixed",
+    dailyResetTime: "00:00",
+    isEnabled: true,
+    expiresAt: null,
+    allowedClients: [],
+    allowedModels: [],
+    ...overrides,
+  };
+}
+
+function setEnv(values: Record<string, string | undefined>): void {
+  for (const [key, value] of Object.entries(values)) {
+    if (value === undefined) {
+      // eslint-disable-next-line @typescript-eslint/no-dynamic-delete
+      delete process.env[key];
+    } else {
+      process.env[key] = value;
+    }
+  }
+}
+
+describe("ApiKeyAuthCache:Redis key(哈希/命名/TTL/失效)", () => {
+  const originalEnv: Record<string, string | undefined> = {};
+
+  beforeEach(() => {
+    vi.resetModules();
+    vi.clearAllMocks();
+    currentRedis = new FakeRedis();
+
+    // 记录并覆盖本文件会改动的环境变量(避免泄漏到其它用例)
+    for (const k of [
+      "CI",
+      "NEXT_PHASE",
+      "NEXT_RUNTIME",
+      "ENABLE_RATE_LIMIT",
+      "REDIS_URL",
+      "ENABLE_API_KEY_REDIS_CACHE",
+      "API_KEY_AUTH_CACHE_TTL_SECONDS",
+    ]) {
+      originalEnv[k] = process.env[k];
+    }
+
+    setEnv({
+      CI: "false",
+      NEXT_PHASE: "",
+      NEXT_RUNTIME: "nodejs",
+      ENABLE_RATE_LIMIT: "true",
+      REDIS_URL: "redis://localhost:6379",
+      ENABLE_API_KEY_REDIS_CACHE: "true",
+      API_KEY_AUTH_CACHE_TTL_SECONDS: "60",
+    });
+
+    // 确保测试环境一定有 WebCrypto subtle(不依赖 Node 版本/运行模式)
+    vi.stubGlobal("crypto", webcrypto as unknown as Crypto);
+  });
+
+  afterEach(() => {
+    vi.useRealTimers();
+    vi.unstubAllGlobals();
+    setEnv(originalEnv);
+    currentRedis = null;
+  });
+
+  test("cacheActiveKey:应使用 SHA-256(keyString) 作为 Redis key,且不泄漏明文 key", async () => {
+    const { cacheActiveKey } = await import("@/lib/security/api-key-auth-cache");
+    const key = buildKey({ key: "sk-secret" });
+
+    await cacheActiveKey(key);
+
+    const expectedRedisKey = `api_key_auth:v1:key:${sha256HexNode("sk-secret")}`;
+    expect(getRedisClient).toHaveBeenCalled();
+    expect(currentRedis?.setex).toHaveBeenCalledTimes(1);
+
+    const [redisKey, ttlSeconds, payload] = currentRedis!.setex.mock.calls[0];
+    expect(redisKey).toBe(expectedRedisKey);
+    expect(redisKey).not.toContain("sk-secret");
+    expect(ttlSeconds).toBe(60);
+    expect(typeof payload).toBe("string");
+    expect(payload).not.toContain("sk-secret");
+
+    const parsed = JSON.parse(payload) as { v: number; key: Record<string, unknown> };
+    expect(parsed.v).toBe(1);
+    // payload.key 不应包含明文 key 字段
+    expect(Object.hasOwn(parsed.key, "key")).toBe(false);
+  });
+
+  test("cacheActiveKey + getCachedActiveKey:应可回读并水合 Date 字段", async () => {
+    const { cacheActiveKey, getCachedActiveKey } = await import(
+      "@/lib/security/api-key-auth-cache"
+    );
+    const key = buildKey({ key: "sk-roundtrip" });
+
+    await cacheActiveKey(key);
+    const cached = await getCachedActiveKey("sk-roundtrip");
+
+    expect(cached?.key).toBe("sk-roundtrip");
+    expect(cached?.id).toBe(1);
+    expect(cached?.userId).toBe(10);
+    expect(cached?.createdAt).toBeInstanceOf(Date);
+    expect(cached?.updatedAt).toBeInstanceOf(Date);
+    expect(cached?.createdAt.toISOString()).toBe(key.createdAt.toISOString());
+    expect(cached?.updatedAt.toISOString()).toBe(key.updatedAt.toISOString());
+  });
+
+  test("getCachedActiveKey:payload 版本不匹配时应删除缓存并返回 null", async () => {
+    const { getCachedActiveKey } = await import("@/lib/security/api-key-auth-cache");
+    const keyString = "sk-version-mismatch";
+    const redisKey = `api_key_auth:v1:key:${sha256HexNode(keyString)}`;
+
+    currentRedis!.store.set(
+      redisKey,
+      JSON.stringify({
+        v: 999,
+        key: {
+          id: 1,
+          userId: 10,
+          name: "k1",
+          isEnabled: true,
+          canLoginWebUi: true,
+          dailyResetMode: "fixed",
+          dailyResetTime: "00:00",
+          limitConcurrentSessions: 0,
+          createdAt: "2026-01-01T00:00:00.000Z",
+          updatedAt: "2026-01-02T00:00:00.000Z",
+        },
+      })
+    );
+
+    await expect(getCachedActiveKey(keyString)).resolves.toBeNull();
+    expect(currentRedis!.del).toHaveBeenCalledWith(redisKey);
+  });
+
+  describe("getCachedActiveKey:disabled/deleted/expired 应视为失效并清理", () => {
+    const cases = [
+      { name: "disabled", payload: { isEnabled: false } },
+      { name: "deleted", payload: { deletedAt: "2026-01-01T00:00:00.000Z" } },
+      { name: "expired", payload: { expiresAt: "2026-01-01T00:00:00.000Z" } },
+    ] as const;
+
+    test.each(cases)("$name", async ({ name, payload }) => {
+      vi.useFakeTimers();
+      vi.setSystemTime(new Date("2026-01-10T00:00:00.000Z"));
+
+      const { getCachedActiveKey } = await import("@/lib/security/api-key-auth-cache");
+
+      const keyString = `sk-${name}`;
+      const redisKey = `api_key_auth:v1:key:${sha256HexNode(keyString)}`;
+      currentRedis!.store.set(
+        redisKey,
+        JSON.stringify({
+          v: 1,
+          key: {
+            id: 1,
+            userId: 10,
+            name: "k1",
+            isEnabled: true,
+            canLoginWebUi: true,
+            dailyResetMode: "fixed",
+            dailyResetTime: "00:00",
+            limitConcurrentSessions: 0,
+            createdAt: "2026-01-01T00:00:00.000Z",
+            updatedAt: "2026-01-02T00:00:00.000Z",
+            ...payload,
+          },
+        })
+      );
+
+      await expect(getCachedActiveKey(keyString)).resolves.toBeNull();
+      expect(currentRedis!.del).toHaveBeenCalledWith(redisKey);
+    });
+  });
+
+  describe("cacheActiveKey:非活跃 key(禁用/已删/已过期/无效 expiresAt)应删除缓存,不应 setex", () => {
+    const cases: Array<{ name: string; key: Key }> = [
+      { name: "disabled", key: buildKey({ key: "sk-disabled", isEnabled: false }) },
+      {
+        name: "deleted",
+        key: buildKey({ key: "sk-deleted", deletedAt: new Date("2026-01-01T00:00:00.000Z") }),
+      },
+      {
+        name: "expired",
+        key: buildKey({ key: "sk-expired", expiresAt: new Date("2026-01-01T00:00:00.000Z") }),
+      },
+      {
+        name: "invalid_expiresAt",
+        // @ts-expect-error: 覆盖运行时边界
+        key: buildKey({ key: "sk-invalid", expiresAt: "not-a-date" }),
+      },
+    ];
+
+    test.each(cases)("$name", async ({ key }) => {
+      vi.useFakeTimers();
+      vi.setSystemTime(new Date("2026-01-10T00:00:00.000Z"));
+
+      const { cacheActiveKey } = await import("@/lib/security/api-key-auth-cache");
+
+      await cacheActiveKey(key);
+
+      const expectedRedisKey = `api_key_auth:v1:key:${sha256HexNode(key.key)}`;
+      expect(currentRedis!.setex).not.toHaveBeenCalled();
+      expect(currentRedis!.del).toHaveBeenCalledWith(expectedRedisKey);
+    });
+  });
+
+  test("cacheActiveKey:应按 key.expiresAt 剩余时间收敛 TTL(秒)", async () => {
+    vi.useFakeTimers();
+    vi.setSystemTime(new Date("2026-01-01T00:00:00.000Z"));
+
+    const { cacheActiveKey } = await import("@/lib/security/api-key-auth-cache");
+    const expiresAt = new Date(Date.now() + 30_000);
+    const key = buildKey({ key: "sk-ttl-cap", expiresAt });
+
+    await cacheActiveKey(key);
+
+    expect(currentRedis!.setex).toHaveBeenCalledTimes(1);
+    const [_redisKey, ttlSeconds] = currentRedis!.setex.mock.calls[0];
+    expect(ttlSeconds).toBe(30);
+  });
+
+  test("API_KEY_AUTH_CACHE_TTL_SECONDS:应 clamp 到最大 3600s", async () => {
+    setEnv({ API_KEY_AUTH_CACHE_TTL_SECONDS: "999999" });
+
+    const { cacheActiveKey } = await import("@/lib/security/api-key-auth-cache");
+    const key = buildKey({ key: "sk-ttl-max" });
+
+    await cacheActiveKey(key);
+
+    expect(currentRedis!.setex).toHaveBeenCalledTimes(1);
+    const [_redisKey, ttlSeconds] = currentRedis!.setex.mock.calls[0];
+    expect(ttlSeconds).toBe(3600);
+  });
+
+  test("invalidateCachedKey:应删除对应的 hashed Redis key", async () => {
+    const { invalidateCachedKey } = await import("@/lib/security/api-key-auth-cache");
+    const keyString = "sk-invalidate";
+
+    await invalidateCachedKey(keyString);
+
+    const expectedRedisKey = `api_key_auth:v1:key:${sha256HexNode(keyString)}`;
+    expect(currentRedis!.del).toHaveBeenCalledWith(expectedRedisKey);
+  });
+
+  test("cacheAuthResult:应使用 pipeline 写入 key cache(并遵守活跃条件)", async () => {
+    vi.useFakeTimers();
+    vi.setSystemTime(new Date("2026-01-01T00:00:00.000Z"));
+
+    const { cacheAuthResult } = await import("@/lib/security/api-key-auth-cache");
+
+    await cacheAuthResult("sk-auth", {
+      key: buildKey({ key: "sk-auth" }),
+      user: buildUser({ id: 10 }),
+    });
+
+    expect(currentRedis!.pipeline).toHaveBeenCalledTimes(1);
+    const pipeline = currentRedis!.pipelines[0];
+    expect(pipeline.exec).toHaveBeenCalledTimes(1);
+    const keyRedisKey = `api_key_auth:v1:key:${sha256HexNode("sk-auth")}`;
+    expect(pipeline.ops.some((op) => op.kind === "setex" && op.key === keyRedisKey)).toBe(true);
+  });
+
+  test("cacheAuthResult:key 非活跃时应 del key cache(避免脏读误放行)", async () => {
+    const { cacheAuthResult } = await import("@/lib/security/api-key-auth-cache");
+
+    await cacheAuthResult("sk-inactive", {
+      key: buildKey({ key: "sk-inactive", isEnabled: false }),
+      user: buildUser({ id: 10 }),
+    });
+
+    const keyRedisKey = `api_key_auth:v1:key:${sha256HexNode("sk-inactive")}`;
+    const pipeline = currentRedis!.pipelines[0];
+    expect(pipeline.ops.some((op) => op.kind === "del" && op.key === keyRedisKey)).toBe(true);
+  });
+
+  test("ENABLE_API_KEY_REDIS_CACHE=false:应完全禁用缓存(不触发 Redis 调用)", async () => {
+    setEnv({ ENABLE_API_KEY_REDIS_CACHE: "false" });
+    const { cacheActiveKey } = await import("@/lib/security/api-key-auth-cache");
+
+    await cacheActiveKey(buildKey({ key: "sk-disabled-by-env" }));
+
+    expect(getRedisClient).not.toHaveBeenCalled();
+    expect(currentRedis!.setex).not.toHaveBeenCalled();
+    expect(currentRedis!.del).not.toHaveBeenCalled();
+  });
+
+  test("ENABLE_API_KEY_REDIS_CACHE=0:应完全禁用缓存(不触发 Redis 调用)", async () => {
+    setEnv({ ENABLE_API_KEY_REDIS_CACHE: "0" });
+    const { cacheActiveKey } = await import("@/lib/security/api-key-auth-cache");
+
+    await cacheActiveKey(buildKey({ key: "sk-disabled-by-env-0" }));
+
+    expect(getRedisClient).not.toHaveBeenCalled();
+    expect(currentRedis!.setex).not.toHaveBeenCalled();
+    expect(currentRedis!.del).not.toHaveBeenCalled();
+  });
+
+  test("NEXT_RUNTIME=edge:应禁用缓存(避免在 Edge runtime 引入 Node Redis 依赖)", async () => {
+    setEnv({ NEXT_RUNTIME: "edge" });
+    const { getCachedActiveKey } = await import("@/lib/security/api-key-auth-cache");
+
+    await expect(getCachedActiveKey("sk-edge")).resolves.toBeNull();
+    expect(getRedisClient).not.toHaveBeenCalled();
+  });
+
+  test("ENABLE_RATE_LIMIT!=true 或缺少 REDIS_URL:应自动回落(不触发 Redis 调用)", async () => {
+    setEnv({ ENABLE_RATE_LIMIT: "false" });
+    const { cacheActiveKey } = await import("@/lib/security/api-key-auth-cache");
+    await cacheActiveKey(buildKey({ key: "sk-fallback-1" }));
+    expect(getRedisClient).not.toHaveBeenCalled();
+
+    vi.resetModules();
+    vi.clearAllMocks();
+    currentRedis = new FakeRedis();
+    setEnv({ ENABLE_RATE_LIMIT: "true", REDIS_URL: undefined });
+    const { cacheActiveKey: cacheActiveKey2 } = await import("@/lib/security/api-key-auth-cache");
+    await cacheActiveKey2(buildKey({ key: "sk-fallback-2" }));
+    expect(getRedisClient).not.toHaveBeenCalled();
+  });
+
+  test("ENABLE_RATE_LIMIT=1:应允许使用 Redis 缓存(兼容 1/0 写法)", async () => {
+    setEnv({ ENABLE_RATE_LIMIT: "1" });
+    const { cacheActiveKey } = await import("@/lib/security/api-key-auth-cache");
+
+    await cacheActiveKey(buildKey({ key: "sk-rate-limit-1" }));
+
+    expect(getRedisClient).toHaveBeenCalled();
+    expect(currentRedis!.setex).toHaveBeenCalledTimes(1);
+  });
+
+  test("crypto.subtle 缺失:sha256Hex 返回 null,应自动回落(不触发 Redis 调用)", async () => {
+    vi.unstubAllGlobals();
+    vi.stubGlobal("crypto", {} as unknown as Crypto);
+
+    const { cacheActiveKey } = await import("@/lib/security/api-key-auth-cache");
+    await cacheActiveKey(buildKey({ key: "sk-no-crypto" }));
+
+    expect(currentRedis!.setex).not.toHaveBeenCalled();
+    expect(currentRedis!.del).not.toHaveBeenCalled();
+  });
+
+  test("Redis 异常:get/setex 抛错时应 fail-open(不影响鉴权正确性)", async () => {
+    const { cacheActiveKey, getCachedActiveKey } = await import(
+      "@/lib/security/api-key-auth-cache"
+    );
+    currentRedis!.setex.mockRejectedValueOnce(new Error("REDIS_DOWN"));
+    await expect(cacheActiveKey(buildKey({ key: "sk-redis-down" }))).resolves.toBeUndefined();
+
+    currentRedis!.get.mockRejectedValueOnce(new Error("REDIS_DOWN"));
+    await expect(getCachedActiveKey("sk-redis-down")).resolves.toBeNull();
+  });
+});

+ 400 - 0
tests/unit/security/api-key-auth-cache.test.ts

@@ -0,0 +1,400 @@
+import { beforeEach, describe, expect, test, vi } from "vitest";
+import type { Key } from "@/types/key";
+import type { User } from "@/types/user";
+
+const isDefinitelyNotPresent = vi.fn(() => false);
+const noteExistingKey = vi.fn();
+
+const cacheActiveKey = vi.fn(async () => {});
+const cacheAuthResult = vi.fn(async () => {});
+const cacheUser = vi.fn(async () => {});
+const getCachedActiveKey = vi.fn<(keyString: string) => Promise<Key | null>>();
+const getCachedUser = vi.fn<(userId: number) => Promise<User | null>>();
+const invalidateCachedKey = vi.fn(async () => {});
+const publishCacheInvalidation = vi.fn(async () => {});
+
+const dbSelect = vi.fn();
+const dbInsert = vi.fn();
+const dbUpdate = vi.fn();
+
+vi.mock("@/lib/security/api-key-vacuum-filter", () => ({
+  apiKeyVacuumFilter: {
+    isDefinitelyNotPresent,
+    noteExistingKey,
+    startBackgroundReload: vi.fn(),
+    getStats: vi.fn(),
+  },
+}));
+
+vi.mock("@/lib/security/api-key-auth-cache", () => ({
+  cacheActiveKey,
+  cacheAuthResult,
+  cacheUser,
+  getCachedActiveKey,
+  getCachedUser,
+  invalidateCachedKey,
+}));
+
+vi.mock("@/lib/redis/pubsub", () => ({
+  CHANNEL_ERROR_RULES_UPDATED: "cch:cache:error_rules:updated",
+  CHANNEL_REQUEST_FILTERS_UPDATED: "cch:cache:request_filters:updated",
+  CHANNEL_SENSITIVE_WORDS_UPDATED: "cch:cache:sensitive_words:updated",
+  CHANNEL_API_KEYS_UPDATED: "cch:cache:api_keys:updated",
+  publishCacheInvalidation,
+  subscribeCacheInvalidation: vi.fn(async () => null),
+}));
+
+vi.mock("@/drizzle/db", () => ({
+  db: {
+    select: dbSelect,
+    insert: dbInsert,
+    update: dbUpdate,
+  },
+}));
+
+beforeEach(() => {
+  vi.clearAllMocks();
+  isDefinitelyNotPresent.mockReturnValue(false);
+  getCachedActiveKey.mockResolvedValue(null);
+  getCachedUser.mockResolvedValue(null);
+  dbSelect.mockImplementation(() => {
+    throw new Error("DB_ACCESS");
+  });
+  dbInsert.mockImplementation(() => {
+    throw new Error("DB_ACCESS");
+  });
+  dbUpdate.mockImplementation(() => {
+    throw new Error("DB_ACCESS");
+  });
+});
+
+function buildKey(overrides?: Partial<Key>): Key {
+  return {
+    id: 1,
+    userId: 10,
+    name: "k1",
+    key: "sk-test",
+    isEnabled: true,
+    expiresAt: undefined,
+    canLoginWebUi: true,
+    limit5hUsd: null,
+    limitDailyUsd: null,
+    dailyResetMode: "fixed",
+    dailyResetTime: "00:00",
+    limitWeeklyUsd: null,
+    limitMonthlyUsd: null,
+    limitTotalUsd: null,
+    limitConcurrentSessions: 0,
+    providerGroup: null,
+    cacheTtlPreference: null,
+    createdAt: new Date("2026-01-01T00:00:00.000Z"),
+    updatedAt: new Date("2026-01-02T00:00:00.000Z"),
+    deletedAt: undefined,
+    ...overrides,
+  };
+}
+
+function buildUser(overrides?: Partial<User>): User {
+  return {
+    id: 10,
+    name: "u1",
+    description: "",
+    role: "user",
+    rpm: null,
+    dailyQuota: null,
+    providerGroup: null,
+    tags: [],
+    createdAt: new Date("2026-01-01T00:00:00.000Z"),
+    updatedAt: new Date("2026-01-02T00:00:00.000Z"),
+    deletedAt: undefined,
+    limit5hUsd: undefined,
+    limitWeeklyUsd: undefined,
+    limitMonthlyUsd: undefined,
+    limitTotalUsd: null,
+    limitConcurrentSessions: undefined,
+    dailyResetMode: "fixed",
+    dailyResetTime: "00:00",
+    isEnabled: true,
+    expiresAt: null,
+    allowedClients: [],
+    allowedModels: [],
+    ...overrides,
+  };
+}
+
+describe("API Key 鉴权缓存:VacuumFilter -> Redis -> DB", () => {
+  test("findActiveKeyByKeyString:Vacuum Filter 误判缺失时,Redis 命中应纠正(避免误拒绝)", async () => {
+    const cachedKey = buildKey({ key: "sk-cached-missing" });
+    isDefinitelyNotPresent.mockReturnValueOnce(true);
+    getCachedActiveKey.mockResolvedValueOnce(cachedKey);
+
+    const { findActiveKeyByKeyString } = await import("@/repository/key");
+    await expect(findActiveKeyByKeyString("sk-cached-missing")).resolves.toEqual(cachedKey);
+    expect(noteExistingKey).toHaveBeenCalledWith("sk-cached-missing");
+    expect(dbSelect).not.toHaveBeenCalled();
+  });
+
+  test("validateApiKeyAndGetUser:Vacuum Filter 误判缺失时,Redis key+user 命中应纠正(避免误拒绝)", async () => {
+    const cachedKey = buildKey({ key: "sk-cached-missing", userId: 10 });
+    const cachedUser = buildUser({ id: 10 });
+    isDefinitelyNotPresent.mockReturnValueOnce(true);
+    getCachedActiveKey.mockResolvedValueOnce(cachedKey);
+    getCachedUser.mockResolvedValueOnce(cachedUser);
+
+    const { validateApiKeyAndGetUser } = await import("@/repository/key");
+    await expect(validateApiKeyAndGetUser("sk-cached-missing")).resolves.toEqual({
+      user: cachedUser,
+      key: cachedKey,
+    });
+    expect(noteExistingKey).toHaveBeenCalledWith("sk-cached-missing");
+    expect(dbSelect).not.toHaveBeenCalled();
+  });
+
+  test("findActiveKeyByKeyString:Redis 命中时应避免打 DB", async () => {
+    const cachedKey = buildKey({ key: "sk-cached" });
+    getCachedActiveKey.mockResolvedValueOnce(cachedKey);
+    dbSelect.mockImplementation(() => {
+      throw new Error("DB_ACCESS");
+    });
+
+    const { findActiveKeyByKeyString } = await import("@/repository/key");
+    await expect(findActiveKeyByKeyString("sk-cached")).resolves.toEqual(cachedKey);
+    expect(getCachedActiveKey).toHaveBeenCalledWith("sk-cached");
+    expect(dbSelect).not.toHaveBeenCalled();
+  });
+
+  test("findActiveKeyByKeyString:VF 判定不存在且 Redis 未命中时应短路返回 null", async () => {
+    isDefinitelyNotPresent.mockReturnValueOnce(true);
+    getCachedActiveKey.mockResolvedValueOnce(null);
+
+    const { findActiveKeyByKeyString } = await import("@/repository/key");
+    await expect(findActiveKeyByKeyString("sk-nonexistent")).resolves.toBeNull();
+    expect(dbSelect).not.toHaveBeenCalled();
+  });
+
+  test("validateApiKeyAndGetUser:key+user Redis 命中时应避免打 DB", async () => {
+    const cachedKey = buildKey({ key: "sk-cached", userId: 10 });
+    const cachedUser = buildUser({ id: 10 });
+    getCachedActiveKey.mockResolvedValueOnce(cachedKey);
+    getCachedUser.mockResolvedValueOnce(cachedUser);
+    dbSelect.mockImplementation(() => {
+      throw new Error("DB_ACCESS");
+    });
+
+    const { validateApiKeyAndGetUser } = await import("@/repository/key");
+    await expect(validateApiKeyAndGetUser("sk-cached")).resolves.toEqual({
+      user: cachedUser,
+      key: cachedKey,
+    });
+    expect(getCachedActiveKey).toHaveBeenCalledWith("sk-cached");
+    expect(getCachedUser).toHaveBeenCalledWith(10);
+    expect(dbSelect).not.toHaveBeenCalled();
+  });
+
+  test("validateApiKeyAndGetUser:key Redis 命中 + user miss 时应只查 user 并写回缓存", async () => {
+    const cachedKey = buildKey({ key: "sk-cached", userId: 10 });
+    getCachedActiveKey.mockResolvedValueOnce(cachedKey);
+    getCachedUser.mockResolvedValueOnce(null);
+
+    const userRow = {
+      id: 10,
+      name: "u1",
+      description: "",
+      role: "user",
+      rpm: null,
+      dailyQuota: null,
+      providerGroup: null,
+      tags: [],
+      createdAt: new Date("2026-01-01T00:00:00.000Z"),
+      updatedAt: new Date("2026-01-02T00:00:00.000Z"),
+      deletedAt: null,
+      limit5hUsd: null,
+      limitWeeklyUsd: null,
+      limitMonthlyUsd: null,
+      limitTotalUsd: null,
+      limitConcurrentSessions: null,
+      dailyResetMode: "fixed",
+      dailyResetTime: "00:00",
+      isEnabled: true,
+      expiresAt: null,
+      allowedClients: [],
+      allowedModels: [],
+    };
+
+    dbSelect.mockReturnValueOnce({
+      from: () => ({
+        where: async () => [userRow],
+      }),
+    });
+
+    const { validateApiKeyAndGetUser } = await import("@/repository/key");
+    const result = await validateApiKeyAndGetUser("sk-cached");
+    expect(result?.key).toEqual(cachedKey);
+    expect(result?.user.id).toBe(10);
+    expect(cacheUser).toHaveBeenCalledTimes(1);
+    expect(cacheAuthResult).not.toHaveBeenCalled();
+  });
+
+  test("validateApiKeyAndGetUser:缓存未命中时应走 DB join 并写入 auth 缓存", async () => {
+    getCachedActiveKey.mockResolvedValueOnce(null);
+
+    const joinRow = {
+      keyId: 1,
+      keyUserId: 10,
+      keyString: "sk-db",
+      keyName: "k1",
+      keyIsEnabled: true,
+      keyExpiresAt: null,
+      keyCanLoginWebUi: true,
+      keyLimit5hUsd: null,
+      keyLimitDailyUsd: null,
+      keyDailyResetMode: "fixed",
+      keyDailyResetTime: "00:00",
+      keyLimitWeeklyUsd: null,
+      keyLimitMonthlyUsd: null,
+      keyLimitTotalUsd: null,
+      keyLimitConcurrentSessions: 0,
+      keyProviderGroup: null,
+      keyCacheTtlPreference: null,
+      keyCreatedAt: new Date("2026-01-01T00:00:00.000Z"),
+      keyUpdatedAt: new Date("2026-01-02T00:00:00.000Z"),
+      keyDeletedAt: null,
+      userId: 10,
+      userName: "u1",
+      userDescription: "",
+      userRole: "user",
+      userRpm: null,
+      userDailyQuota: null,
+      userProviderGroup: null,
+      userLimit5hUsd: null,
+      userLimitWeeklyUsd: null,
+      userLimitMonthlyUsd: null,
+      userLimitTotalUsd: null,
+      userLimitConcurrentSessions: null,
+      userDailyResetMode: "fixed",
+      userDailyResetTime: "00:00",
+      userIsEnabled: true,
+      userExpiresAt: null,
+      userAllowedClients: [],
+      userAllowedModels: [],
+      userCreatedAt: new Date("2026-01-01T00:00:00.000Z"),
+      userUpdatedAt: new Date("2026-01-02T00:00:00.000Z"),
+      userDeletedAt: null,
+    };
+
+    dbSelect.mockReturnValueOnce({
+      from: () => ({
+        innerJoin: () => ({
+          where: async () => [joinRow],
+        }),
+      }),
+    });
+
+    const { validateApiKeyAndGetUser } = await import("@/repository/key");
+    const result = await validateApiKeyAndGetUser("sk-db");
+    expect(result?.key.key).toBe("sk-db");
+    expect(result?.user.id).toBe(10);
+    expect(cacheAuthResult).toHaveBeenCalledTimes(1);
+  });
+});
+
+describe("API Key 鉴权缓存:写入/失效点覆盖", () => {
+  test("createKey:应广播 API key 集合变更(多实例触发 Vacuum Filter 重建)", async () => {
+    const prevEnableRateLimit = process.env.ENABLE_RATE_LIMIT;
+    const prevRedisUrl = process.env.REDIS_URL;
+    process.env.ENABLE_RATE_LIMIT = "true";
+    process.env.REDIS_URL = "redis://localhost:6379";
+
+    const now = new Date("2026-01-02T00:00:00.000Z");
+    const keyRow = {
+      id: 1,
+      userId: 10,
+      key: "sk-created",
+      name: "k1",
+      isEnabled: true,
+      expiresAt: null,
+      canLoginWebUi: true,
+      limit5hUsd: null,
+      limitDailyUsd: null,
+      dailyResetMode: "fixed",
+      dailyResetTime: "00:00",
+      limitWeeklyUsd: null,
+      limitMonthlyUsd: null,
+      limitTotalUsd: null,
+      limitConcurrentSessions: 0,
+      providerGroup: null,
+      cacheTtlPreference: null,
+      createdAt: now,
+      updatedAt: now,
+      deletedAt: null,
+    };
+
+    dbInsert.mockReturnValueOnce({
+      values: () => ({
+        returning: async () => [keyRow],
+      }),
+    });
+
+    try {
+      const { createKey } = await import("@/repository/key");
+      const created = await createKey({ user_id: 10, name: "k1", key: "sk-created" });
+      expect(created.key).toBe("sk-created");
+      expect(publishCacheInvalidation).toHaveBeenCalledWith("cch:cache:api_keys:updated");
+    } finally {
+      process.env.ENABLE_RATE_LIMIT = prevEnableRateLimit;
+      process.env.REDIS_URL = prevRedisUrl;
+    }
+  });
+
+  test("updateKey:应触发 cacheActiveKey", async () => {
+    const keyRow = {
+      id: 1,
+      userId: 10,
+      key: "sk-update",
+      name: "k1",
+      isEnabled: true,
+      expiresAt: null,
+      canLoginWebUi: true,
+      limit5hUsd: null,
+      limitDailyUsd: null,
+      dailyResetMode: "fixed",
+      dailyResetTime: "00:00",
+      limitWeeklyUsd: null,
+      limitMonthlyUsd: null,
+      limitTotalUsd: null,
+      limitConcurrentSessions: 0,
+      providerGroup: null,
+      cacheTtlPreference: null,
+      createdAt: new Date("2026-01-01T00:00:00.000Z"),
+      updatedAt: new Date("2026-01-02T00:00:00.000Z"),
+      deletedAt: null,
+    };
+
+    dbUpdate.mockReturnValueOnce({
+      set: () => ({
+        where: () => ({
+          returning: async () => [keyRow],
+        }),
+      }),
+    });
+
+    const { updateKey } = await import("@/repository/key");
+    const updated = await updateKey(1, { name: "k2" });
+    expect(updated?.key).toBe("sk-update");
+    expect(cacheActiveKey).toHaveBeenCalledTimes(1);
+  });
+
+  test("deleteKey:删除成功时应触发 invalidateCachedKey", async () => {
+    dbUpdate.mockReturnValueOnce({
+      set: () => ({
+        where: () => ({
+          returning: async () => [{ id: 1, key: "sk-deleted" }],
+        }),
+      }),
+    });
+
+    const { deleteKey } = await import("@/repository/key");
+    await expect(deleteKey(1)).resolves.toBe(true);
+    expect(invalidateCachedKey).toHaveBeenCalledWith("sk-deleted");
+  });
+});

+ 85 - 0
tests/unit/security/api-key-vacuum-filter-build.test.ts

@@ -0,0 +1,85 @@
+import { describe, expect, test, vi } from "vitest";
+
+describe("buildVacuumFilterFromKeyStrings", () => {
+  test("应去重并忽略空字符串,且覆盖所有 key", async () => {
+    const { buildVacuumFilterFromKeyStrings } = await import(
+      "@/lib/security/api-key-vacuum-filter"
+    );
+    const vf = buildVacuumFilterFromKeyStrings({
+      keyStrings: ["k1", "k2", "k1", ""],
+      fingerprintBits: 32,
+      maxKickSteps: 500,
+      seed: Buffer.from("unit-test-seed"),
+    });
+
+    expect(vf.size()).toBe(2);
+    expect(vf.has("k1")).toBe(true);
+    expect(vf.has("k2")).toBe(true);
+  });
+
+  test("空数组输入:应返回空 filter", async () => {
+    const { buildVacuumFilterFromKeyStrings } = await import(
+      "@/lib/security/api-key-vacuum-filter"
+    );
+    const vf = buildVacuumFilterFromKeyStrings({
+      keyStrings: [],
+      fingerprintBits: 32,
+      maxKickSteps: 500,
+      seed: Buffer.from("unit-test-seed"),
+    });
+
+    expect(vf.size()).toBe(0);
+  });
+
+  test("全空字符串:应返回空 filter", async () => {
+    const { buildVacuumFilterFromKeyStrings } = await import(
+      "@/lib/security/api-key-vacuum-filter"
+    );
+    const vf = buildVacuumFilterFromKeyStrings({
+      keyStrings: ["", "", ""],
+      fingerprintBits: 32,
+      maxKickSteps: 500,
+      seed: Buffer.from("unit-test-seed"),
+    });
+
+    expect(vf.size()).toBe(0);
+  });
+
+  test("构建失败时应扩容重试", async () => {
+    vi.resetModules();
+    const maxItemsSeen: number[] = [];
+
+    vi.doMock("@/lib/vacuum-filter/vacuum-filter", () => {
+      class VacuumFilter {
+        private readonly maxItems: number;
+
+        constructor(options: { maxItems: number }) {
+          this.maxItems = options.maxItems;
+          maxItemsSeen.push(options.maxItems);
+        }
+
+        add(_keyString: string): boolean {
+          // 强制第一次失败(maxItems=128),第二次成功(maxItems=ceil(128*1.6)=205)
+          return this.maxItems >= 205;
+        }
+      }
+
+      return { VacuumFilter };
+    });
+
+    const { buildVacuumFilterFromKeyStrings } = await import(
+      "@/lib/security/api-key-vacuum-filter"
+    );
+    buildVacuumFilterFromKeyStrings({
+      keyStrings: ["k1"],
+      fingerprintBits: 32,
+      maxKickSteps: 500,
+      seed: Buffer.from("unit-test-seed"),
+    });
+
+    expect(maxItemsSeen).toEqual([128, 205]);
+
+    vi.doUnmock("@/lib/vacuum-filter/vacuum-filter");
+    vi.resetModules();
+  });
+});

+ 83 - 0
tests/unit/security/api-key-vacuum-filter-reloading.test.ts

@@ -0,0 +1,83 @@
+import { afterEach, beforeEach, describe, expect, test, vi } from "vitest";
+
+function setEnv(values: Record<string, string | undefined>): void {
+  for (const [key, value] of Object.entries(values)) {
+    if (value === undefined) {
+      // eslint-disable-next-line @typescript-eslint/no-dynamic-delete
+      delete process.env[key];
+    } else {
+      process.env[key] = value;
+    }
+  }
+}
+
+describe("ApiKeyVacuumFilter:重建窗口安全性", () => {
+  const originalEnv: Record<string, string | undefined> = {};
+
+  beforeEach(() => {
+    vi.resetModules();
+
+    // eslint-disable-next-line @typescript-eslint/no-dynamic-delete
+    delete (globalThis as unknown as { __CCH_API_KEY_VACUUM_FILTER__?: unknown })
+      .__CCH_API_KEY_VACUUM_FILTER__;
+
+    for (const k of ["NEXT_RUNTIME", "ENABLE_API_KEY_VACUUM_FILTER"]) {
+      originalEnv[k] = process.env[k];
+    }
+    setEnv({
+      NEXT_RUNTIME: "nodejs",
+      ENABLE_API_KEY_VACUUM_FILTER: "true",
+    });
+  });
+
+  afterEach(() => {
+    setEnv(originalEnv);
+    vi.unstubAllGlobals();
+    vi.useRealTimers();
+    // eslint-disable-next-line @typescript-eslint/no-dynamic-delete
+    delete (globalThis as unknown as { __CCH_API_KEY_VACUUM_FILTER__?: unknown })
+      .__CCH_API_KEY_VACUUM_FILTER__;
+  });
+
+  test("loadingPromise 存在时应返回 null(不短路)", async () => {
+    const [{ apiKeyVacuumFilter }, { VacuumFilter }] = await Promise.all([
+      import("@/lib/security/api-key-vacuum-filter"),
+      import("@/lib/vacuum-filter/vacuum-filter"),
+    ]);
+
+    const vf = new VacuumFilter({
+      maxItems: 16,
+      fingerprintBits: 32,
+      maxKickSteps: 100,
+      seed: "unit-test-reloading",
+    });
+    expect(vf.add("k1")).toBe(true);
+
+    (apiKeyVacuumFilter as unknown as { vf: VacuumFilter }).vf = vf;
+    (apiKeyVacuumFilter as unknown as { loadingPromise: Promise<void> | null }).loadingPromise =
+      new Promise<void>(() => {});
+
+    expect(apiKeyVacuumFilter.isDefinitelyNotPresent("k1")).toBeNull();
+    expect(apiKeyVacuumFilter.isDefinitelyNotPresent("missing")).toBeNull();
+  });
+
+  test("ENABLE_API_KEY_VACUUM_FILTER=0:应禁用过滤器(不短路)", async () => {
+    setEnv({ ENABLE_API_KEY_VACUUM_FILTER: "0" });
+    const { apiKeyVacuumFilter } = await import("@/lib/security/api-key-vacuum-filter");
+
+    expect(apiKeyVacuumFilter.getStats().enabled).toBe(false);
+    expect(apiKeyVacuumFilter.isDefinitelyNotPresent("missing")).toBeNull();
+  });
+
+  test("未设置 ENABLE_API_KEY_VACUUM_FILTER:应默认启用(仅负向短路)", async () => {
+    vi.resetModules();
+    // eslint-disable-next-line @typescript-eslint/no-dynamic-delete
+    delete (globalThis as unknown as { __CCH_API_KEY_VACUUM_FILTER__?: unknown })
+      .__CCH_API_KEY_VACUUM_FILTER__;
+
+    setEnv({ NEXT_RUNTIME: "nodejs", ENABLE_API_KEY_VACUUM_FILTER: undefined });
+    const { apiKeyVacuumFilter } = await import("@/lib/security/api-key-vacuum-filter");
+
+    expect(apiKeyVacuumFilter.getStats().enabled).toBe(true);
+  });
+});

+ 41 - 0
tests/unit/security/api-key-vacuum-filter-shortcircuit.test.ts

@@ -0,0 +1,41 @@
+import { describe, expect, test, vi } from "vitest";
+
+const isDefinitelyNotPresent = vi.fn(() => true);
+
+vi.mock("@/lib/security/api-key-vacuum-filter", () => ({
+  apiKeyVacuumFilter: {
+    isDefinitelyNotPresent,
+    noteExistingKey: vi.fn(),
+    startBackgroundReload: vi.fn(),
+    getStats: vi.fn(),
+  },
+}));
+
+// 如果 Vacuum Filter 没有短路成功,这些 DB 调用会触发并让测试失败
+vi.mock("@/drizzle/db", () => ({
+  db: {
+    select: vi.fn(() => {
+      throw new Error("DB_ACCESS");
+    }),
+    insert: vi.fn(() => {
+      throw new Error("DB_ACCESS");
+    }),
+    update: vi.fn(() => {
+      throw new Error("DB_ACCESS");
+    }),
+  },
+}));
+
+describe("API Key Vacuum Filter:负向短路(避免打 DB)", () => {
+  test("validateApiKeyAndGetUser:definitely not present 时应直接返回 null", async () => {
+    const { validateApiKeyAndGetUser } = await import("@/repository/key");
+    await expect(validateApiKeyAndGetUser("invalid_key")).resolves.toBeNull();
+    expect(isDefinitelyNotPresent).toHaveBeenCalledWith("invalid_key");
+  });
+
+  test("findActiveKeyByKeyString:definitely not present 时应直接返回 null", async () => {
+    const { findActiveKeyByKeyString } = await import("@/repository/key");
+    await expect(findActiveKeyByKeyString("invalid_key")).resolves.toBeNull();
+    expect(isDefinitelyNotPresent).toHaveBeenCalledWith("invalid_key");
+  });
+});

+ 161 - 0
tests/unit/security/auth-validateKey-cache.test.ts

@@ -0,0 +1,161 @@
+import { beforeEach, describe, expect, test, vi } from "vitest";
+import type { Key } from "@/types/key";
+import type { User } from "@/types/user";
+
+const isDefinitelyNotPresent = vi.fn(() => false);
+const noteExistingKey = vi.fn();
+
+const getCachedActiveKey = vi.fn();
+const getCachedUser = vi.fn();
+
+// 如果缓存路径未命中,这些 DB 调用会触发并让测试失败
+vi.mock("@/drizzle/db", () => ({
+  db: {
+    select: vi.fn(() => {
+      throw new Error("DB_ACCESS");
+    }),
+    insert: vi.fn(() => {
+      throw new Error("DB_ACCESS");
+    }),
+    update: vi.fn(() => {
+      throw new Error("DB_ACCESS");
+    }),
+  },
+}));
+
+vi.mock("@/lib/security/api-key-vacuum-filter", () => ({
+  apiKeyVacuumFilter: {
+    isDefinitelyNotPresent,
+    noteExistingKey,
+    startBackgroundReload: vi.fn(),
+    invalidateAndReload: vi.fn(),
+    getStats: vi.fn(),
+  },
+}));
+
+vi.mock("@/lib/security/api-key-auth-cache", () => ({
+  getCachedActiveKey,
+  getCachedUser,
+  cacheActiveKey: vi.fn(async () => {}),
+  cacheAuthResult: vi.fn(async () => {}),
+  cacheUser: vi.fn(async () => {}),
+  invalidateCachedKey: vi.fn(async () => {}),
+  invalidateCachedUser: vi.fn(async () => {}),
+}));
+
+function buildKey(overrides?: Partial<Key>): Key {
+  const now = new Date("2026-02-08T00:00:00.000Z");
+  return {
+    id: 1,
+    userId: 10,
+    name: "k1",
+    key: "sk-user-login",
+    isEnabled: true,
+    expiresAt: undefined,
+    canLoginWebUi: true,
+    limit5hUsd: null,
+    limitDailyUsd: null,
+    dailyResetMode: "fixed",
+    dailyResetTime: "00:00",
+    limitWeeklyUsd: null,
+    limitMonthlyUsd: null,
+    limitTotalUsd: null,
+    limitConcurrentSessions: 0,
+    providerGroup: null,
+    cacheTtlPreference: null,
+    createdAt: now,
+    updatedAt: now,
+    deletedAt: undefined,
+    ...overrides,
+  };
+}
+
+function buildUser(overrides?: Partial<User>): User {
+  const now = new Date("2026-02-08T00:00:00.000Z");
+  return {
+    id: 10,
+    name: "u1",
+    description: "",
+    role: "user",
+    rpm: null,
+    dailyQuota: null,
+    providerGroup: null,
+    tags: [],
+    createdAt: now,
+    updatedAt: now,
+    deletedAt: undefined,
+    dailyResetMode: "fixed",
+    dailyResetTime: "00:00",
+    isEnabled: true,
+    expiresAt: null,
+    allowedClients: [],
+    allowedModels: [],
+    ...overrides,
+  };
+}
+
+describe("auth.ts:validateKey(Vacuum Filter -> Redis -> DB)", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    isDefinitelyNotPresent.mockReturnValue(false);
+    getCachedActiveKey.mockResolvedValue(null);
+    getCachedUser.mockResolvedValue(null);
+  });
+
+  test("Redis key+user 命中时:validateKey 应不访问 DB 且返回 session(保护 login 侧热路径)", async () => {
+    const cachedKey = buildKey({ key: "sk-user-login", canLoginWebUi: true, userId: 10 });
+    const cachedUser = buildUser({ id: 10 });
+    getCachedActiveKey.mockResolvedValueOnce(cachedKey);
+    getCachedUser.mockResolvedValueOnce(cachedUser);
+
+    const { validateKey } = await import("@/lib/auth");
+    await expect(validateKey("sk-user-login")).resolves.toEqual({
+      user: cachedUser,
+      key: cachedKey,
+    });
+    expect(isDefinitelyNotPresent).toHaveBeenCalledWith("sk-user-login");
+  });
+
+  test("用户禁用:缓存命中也应拒绝(保护登录/会话)", async () => {
+    const cachedKey = buildKey({ key: "sk-user-disabled", canLoginWebUi: true, userId: 10 });
+    const cachedUser = buildUser({ id: 10, isEnabled: false });
+    getCachedActiveKey.mockResolvedValueOnce(cachedKey);
+    getCachedUser.mockResolvedValueOnce(cachedUser);
+
+    const { validateKey } = await import("@/lib/auth");
+    await expect(validateKey("sk-user-disabled")).resolves.toBeNull();
+  });
+
+  test("用户过期:缓存命中也应拒绝(保护登录/会话)", async () => {
+    const cachedKey = buildKey({ key: "sk-user-expired", canLoginWebUi: true, userId: 10 });
+    const cachedUser = buildUser({ id: 10, expiresAt: new Date("2000-01-01T00:00:00.000Z") });
+    getCachedActiveKey.mockResolvedValueOnce(cachedKey);
+    getCachedUser.mockResolvedValueOnce(cachedUser);
+
+    const { validateKey } = await import("@/lib/auth");
+    await expect(validateKey("sk-user-expired")).resolves.toBeNull();
+  });
+
+  test("canLoginWebUi=false 且 allowReadOnlyAccess=false:缓存命中也应拒绝", async () => {
+    const cachedKey = buildKey({ key: "sk-no-webui", canLoginWebUi: false, userId: 10 });
+    const cachedUser = buildUser({ id: 10 });
+    getCachedActiveKey.mockResolvedValueOnce(cachedKey);
+    getCachedUser.mockResolvedValueOnce(cachedUser);
+
+    const { validateKey } = await import("@/lib/auth");
+    await expect(validateKey("sk-no-webui", { allowReadOnlyAccess: false })).resolves.toBeNull();
+  });
+
+  test("allowReadOnlyAccess=true:应允许 canLoginWebUi=false 的 key 登录只读页面", async () => {
+    const cachedKey = buildKey({ key: "sk-readonly", canLoginWebUi: false, userId: 10 });
+    const cachedUser = buildUser({ id: 10 });
+    getCachedActiveKey.mockResolvedValueOnce(cachedKey);
+    getCachedUser.mockResolvedValueOnce(cachedUser);
+
+    const { validateKey } = await import("@/lib/auth");
+    await expect(validateKey("sk-readonly", { allowReadOnlyAccess: true })).resolves.toEqual({
+      user: cachedUser,
+      key: cachedKey,
+    });
+  });
+});

+ 133 - 0
tests/unit/vacuum-filter/vacuum-filter.test.ts

@@ -0,0 +1,133 @@
+import { describe, expect, test } from "vitest";
+import { VacuumFilter } from "@/lib/vacuum-filter/vacuum-filter";
+
+describe("VacuumFilter", () => {
+  test("add/has/delete 基本语义正确", () => {
+    const vf = new VacuumFilter({
+      maxItems: 1000,
+      fingerprintBits: 32,
+      maxKickSteps: 500,
+      seed: "unit-test-seed",
+    });
+
+    expect(vf.has("k1")).toBe(false);
+    expect(vf.add("k1")).toBe(true);
+    expect(vf.has("k1")).toBe(true);
+
+    expect(vf.delete("k1")).toBe(true);
+    expect(vf.has("k1")).toBe(false);
+
+    // 删除不存在的 key
+    expect(vf.delete("k1")).toBe(false);
+  });
+
+  test("高负载下插入与查询稳定(无假阴性)", () => {
+    const n = 20_000;
+    const vf = new VacuumFilter({
+      maxItems: n,
+      fingerprintBits: 32,
+      maxKickSteps: 1000,
+      seed: "unit-test-high-load",
+    });
+
+    for (let i = 0; i < n; i++) {
+      const ok = vf.add(`key_${i}`);
+      expect(ok).toBe(true);
+    }
+
+    for (let i = 0; i < n; i++) {
+      expect(vf.has(`key_${i}`)).toBe(true);
+    }
+
+    // 删除一小部分(碰撞概率极低;使用 32-bit fingerprint 避免测试随机性)
+    for (let i = 0; i < 200; i++) {
+      expect(vf.delete(`key_${i}`)).toBe(true);
+      expect(vf.has(`key_${i}`)).toBe(false);
+    }
+  });
+
+  test("插入失败必须回滚(不丢元素,不引入假阴性)", () => {
+    const vf = new VacuumFilter({
+      maxItems: 10,
+      fingerprintBits: 32,
+      maxKickSteps: 50,
+      seed: "unit-test-rollback-on-failure",
+    });
+
+    const inserted: string[] = [];
+    let failed = false;
+
+    for (let i = 0; i < 5000; i++) {
+      const key = `key_${i}`;
+      const ok = vf.add(key);
+      if (!ok) {
+        failed = true;
+        break;
+      }
+      inserted.push(key);
+    }
+
+    expect(failed).toBe(true);
+    expect(vf.size()).toBe(inserted.length);
+
+    // 已插入的元素必须都能查到(无假阴性)
+    for (const key of inserted) {
+      expect(vf.has(key)).toBe(true);
+    }
+  });
+
+  test("构造参数包含 NaN 时应使用默认值(不崩溃)", () => {
+    const vf = new VacuumFilter({
+      maxItems: 1000,
+      // @ts-expect-error: 用于覆盖运行时边界情况
+      fingerprintBits: Number.NaN,
+      // @ts-expect-error: 用于覆盖运行时边界情况
+      maxKickSteps: Number.NaN,
+      // @ts-expect-error: 用于覆盖运行时边界情况
+      targetLoadFactor: Number.NaN,
+      seed: "unit-test-nan-options",
+    });
+
+    expect(vf.add("k1")).toBe(true);
+    expect(vf.has("k1")).toBe(true);
+  });
+
+  test("非 ASCII 字符串也应可用(UTF-8 编码路径)", () => {
+    const vf = new VacuumFilter({
+      maxItems: 1000,
+      fingerprintBits: 32,
+      maxKickSteps: 500,
+      seed: "unit-test-non-ascii",
+    });
+
+    const keys = ["你好", "ключ", "テスト"];
+    for (const key of keys) {
+      expect(vf.add(key)).toBe(true);
+      expect(vf.has(key)).toBe(true);
+    }
+  });
+
+  test("alternate index 应为可逆映射(alt(alt(i,tag),tag)=i)且不越界", () => {
+    const vf = new VacuumFilter({
+      maxItems: 50_000,
+      fingerprintBits: 32,
+      maxKickSteps: 1000,
+      seed: "unit-test-alt-index-involution",
+    });
+
+    const numBuckets = vf.capacitySlots() / 4;
+    // @ts-expect-error: 单测需要覆盖私有方法的核心不变量
+    const altIndex = (index: number, tag: number) => vf.altIndex(index, tag) as number;
+
+    for (let i = 0; i < 10_000; i++) {
+      const index = i % numBuckets;
+      const tag = (i * 2654435761) >>> 0;
+      const alt = altIndex(index, tag);
+      expect(alt).toBeGreaterThanOrEqual(0);
+      expect(alt).toBeLessThan(numBuckets);
+
+      const back = altIndex(alt, tag);
+      expect(back).toBe(index);
+    }
+  });
+});