Răsfoiți Sursa

fix(probe): remove success log filtering to enable latency curve rendering (#636)

* fix(providers): auto-backfill vendor aggregation for legacy providers

When upgrading from older versions, existing providers were incorrectly
grouped under "Unknown Vendor #0" instead of being auto-aggregated by
their website domain.

Changes:
- Add backfillProviderVendorsFromProviders() to auto-create vendors from
  provider URLs during startup
- Add deriveDisplayNameFromDomain() to generate display names from domains
- Integrate vendor backfill into instrumentation.ts startup flow
- Fix providerVendorId type to allow null (Schema, Type, Transformer)
- Add vendorId=-1 protection for orphaned providers in frontend
- Add i18n keys for orphanedProviders in 5 languages

The backfill runs idempotently on startup, processing providers with
null or 0 vendorId, extracting domains from websiteUrl (preferred) or
url, and creating/associating vendors accordingly.

Co-Authored-By: Claude Opus 4.5 <[email protected]>

* fix(probe): remove success log filtering to enable latency curve rendering

Problem:
- Endpoint probe scheduler runs correctly every 10s
- But successful probes were filtered by 60s sampling threshold
- ~5/6 success probes not recorded to history table
- Frontend latency curve could not be rendered

Solution:
- Remove shouldLogScheduledSuccess() filtering function
- Remove SUCCESS_LOG_MIN_INTERVAL_MS constant
- Always call recordProviderEndpointProbeResult() for all probes
- Change default retention from 7 days to 1 day (storage: ~40MB/day)
- Update .env.example to reflect new behavior
Ding 3 săptămâni în urmă
părinte
comite
72c6341e16

+ 3 - 6
.env.example

@@ -128,11 +128,8 @@ ENDPOINT_PROBE_CONCURRENCY=10
 ENDPOINT_PROBE_CYCLE_JITTER_MS=1000
 ENDPOINT_PROBE_LOCK_TTL_MS=30000
 
-# 探测日志策略
-# - scheduled 成功日志按最小间隔采样(默认每端点最多 1 条/分钟)
-# - 失败日志全量记录
-ENDPOINT_PROBE_SUCCESS_LOG_MIN_INTERVAL_MS=60000
-
 # 探测日志保留与清理
-ENDPOINT_PROBE_LOG_RETENTION_DAYS=7
+# - 所有探测结果(成功/失败)均记录到历史表
+# - 自动清理任务每 24 小时运行,删除过期记录
+ENDPOINT_PROBE_LOG_RETENTION_DAYS=1
 ENDPOINT_PROBE_LOG_CLEANUP_BATCH_SIZE=10000

+ 1 - 1
src/lib/provider-endpoints/probe-log-cleanup.ts

@@ -15,7 +15,7 @@ function parseIntWithDefault(value: string | undefined, fallback: number): numbe
 
 const RETENTION_DAYS = Math.max(
   0,
-  parseIntWithDefault(process.env.ENDPOINT_PROBE_LOG_RETENTION_DAYS, 7)
+  parseIntWithDefault(process.env.ENDPOINT_PROBE_LOG_RETENTION_DAYS, 1)
 );
 const CLEANUP_BATCH_SIZE = Math.max(
   1,

+ 4 - 43
src/lib/provider-endpoints/probe.ts

@@ -2,11 +2,7 @@ import "server-only";
 
 import { recordEndpointFailure } from "@/lib/endpoint-circuit-breaker";
 import { logger } from "@/lib/logger";
-import {
-  findProviderEndpointById,
-  recordProviderEndpointProbeResult,
-  updateProviderEndpointProbeSnapshot,
-} from "@/repository";
+import { findProviderEndpointById, recordProviderEndpointProbeResult } from "@/repository";
 import type { ProviderEndpoint, ProviderEndpointProbeSource } from "@/types/provider";
 
 export type EndpointProbeMethod = "HEAD" | "GET";
@@ -29,10 +25,6 @@ const DEFAULT_TIMEOUT_MS = Math.max(
   1,
   parseIntWithDefault(process.env.ENDPOINT_PROBE_TIMEOUT_MS, 5_000)
 );
-const SUCCESS_LOG_MIN_INTERVAL_MS = Math.max(
-  0,
-  parseIntWithDefault(process.env.ENDPOINT_PROBE_SUCCESS_LOG_MIN_INTERVAL_MS, 60_000)
-);
 
 function safeUrlForLog(rawUrl: string): string {
   try {
@@ -139,19 +131,6 @@ export async function probeEndpointUrl(
 
 type ProbeTarget = Pick<ProviderEndpoint, "id" | "url" | "lastProbedAt" | "lastProbeOk">;
 
-function shouldLogScheduledSuccess(endpoint: ProbeTarget, probedAt: Date): boolean {
-  if (!endpoint.lastProbedAt) {
-    return true;
-  }
-
-  if (endpoint.lastProbeOk !== true) {
-    return true;
-  }
-
-  const elapsedMs = probedAt.getTime() - endpoint.lastProbedAt.getTime();
-  return elapsedMs >= SUCCESS_LOG_MIN_INTERVAL_MS;
-}
-
 export async function probeProviderEndpointAndRecordByEndpoint(input: {
   endpoint: ProbeTarget;
   source: ProviderEndpointProbeSource;
@@ -169,28 +148,10 @@ export async function probeProviderEndpointAndRecordByEndpoint(input: {
     await recordEndpointFailure(input.endpoint.id, new Error(message));
   }
 
-  const shouldWriteLog =
-    input.source !== "scheduled" ||
-    !result.ok ||
-    shouldLogScheduledSuccess(input.endpoint, probedAt);
-
-  if (shouldWriteLog) {
-    await recordProviderEndpointProbeResult({
-      endpointId: input.endpoint.id,
-      source: input.source,
-      ok: result.ok,
-      statusCode: result.statusCode,
-      latencyMs: result.latencyMs,
-      errorType: result.errorType,
-      errorMessage: result.errorMessage,
-      probedAt,
-    });
-
-    return result;
-  }
-
-  await updateProviderEndpointProbeSnapshot({
+  // Always record probe results to history table (removed filtering logic)
+  await recordProviderEndpointProbeResult({
     endpointId: input.endpoint.id,
+    source: input.source,
     ok: result.ok,
     statusCode: result.statusCode,
     latencyMs: result.latencyMs,

+ 2 - 8
tests/unit/lib/provider-endpoints/probe.test.ts

@@ -329,14 +329,13 @@ describe("provider-endpoints: probe", () => {
     expect(recordFailureMock).not.toHaveBeenCalled();
   });
 
-  test("probeProviderEndpointAndRecord: scheduled 成功按间隔采样写日志,其余仅更新快照", async () => {
+  test("probeProviderEndpointAndRecord: scheduled 成功总是写入探测日志记录", async () => {
     vi.useFakeTimers();
     vi.setSystemTime(new Date("2026-01-01T00:00:30.000Z"));
 
     vi.resetModules();
 
     const recordMock = vi.fn(async () => {});
-    const snapshotMock = vi.fn(async () => {});
     const recordFailureMock = vi.fn(async () => {});
 
     const endpoint = makeEndpoint({
@@ -359,7 +358,6 @@ describe("provider-endpoints: probe", () => {
     vi.doMock("@/repository", () => ({
       findProviderEndpointById: vi.fn(async () => endpoint),
       recordProviderEndpointProbeResult: recordMock,
-      updateProviderEndpointProbeSnapshot: snapshotMock,
     }));
     vi.doMock("@/lib/endpoint-circuit-breaker", () => ({
       recordEndpointFailure: recordFailureMock,
@@ -374,8 +372,7 @@ describe("provider-endpoints: probe", () => {
     const result = await probeProviderEndpointAndRecord({ endpointId: 1, source: "scheduled" });
 
     expect(result).toEqual(expect.objectContaining({ ok: true, statusCode: 200 }));
-    expect(recordMock).not.toHaveBeenCalled();
-    expect(snapshotMock).toHaveBeenCalledTimes(1);
+    expect(recordMock).toHaveBeenCalledTimes(1);
     expect(recordFailureMock).not.toHaveBeenCalled();
   });
 
@@ -383,7 +380,6 @@ describe("provider-endpoints: probe", () => {
     vi.resetModules();
 
     const recordMock = vi.fn(async () => {});
-    const snapshotMock = vi.fn(async () => {});
     const recordFailureMock = vi.fn(async () => {});
 
     vi.doMock("@/lib/logger", () => ({
@@ -401,7 +397,6 @@ describe("provider-endpoints: probe", () => {
         makeEndpoint({ id: 123, url: "https://example.com" })
       ),
       recordProviderEndpointProbeResult: recordMock,
-      updateProviderEndpointProbeSnapshot: snapshotMock,
     }));
     vi.doMock("@/lib/endpoint-circuit-breaker", () => ({
       recordEndpointFailure: recordFailureMock,
@@ -419,6 +414,5 @@ describe("provider-endpoints: probe", () => {
 
     expect(recordFailureMock).toHaveBeenCalledTimes(2);
     expect(recordMock).toHaveBeenCalledTimes(2);
-    expect(snapshotMock).not.toHaveBeenCalled();
   });
 });