Преглед изворни кода

feat(probe): switch endpoint probing to TCP by default and improve circuit reset UX

Replace HTTP HEAD/GET endpoint probes with pure TCP connection probes
(net.createConnection) to avoid leaving access logs on upstream servers.
ENDPOINT_PROBE_METHOD env var controls the method (TCP/HEAD/GET, default TCP).

Move circuit reset button from dropdown menu to action bar for faster access
when endpoints are tripped. Add optimistic update to immediately reflect
closed state on reset.
ding113 пре 1 недеља
родитељ
комит
1291f850

+ 2 - 0
.env.example

@@ -150,6 +150,8 @@ ENDPOINT_PROBE_TIMEOUT_MS=5000
 ENDPOINT_PROBE_CONCURRENCY=10
 ENDPOINT_PROBE_CYCLE_JITTER_MS=1000
 ENDPOINT_PROBE_LOCK_TTL_MS=30000
+# Probe method: TCP (default, no HTTP request / no access log), HEAD, GET
+ENDPOINT_PROBE_METHOD=TCP
 
 # 探测日志保留与清理
 # - 所有探测结果(成功/失败)均记录到历史表

+ 1 - 1
src/actions/provider-endpoints.ts

@@ -404,7 +404,7 @@ export async function probeProviderEndpoint(input: unknown): Promise<
     endpoint: ProviderEndpoint;
     result: {
       ok: boolean;
-      method: "HEAD" | "GET";
+      method: "HEAD" | "GET" | "TCP";
       statusCode: number | null;
       latencyMs: number | null;
       errorType: string | null;

+ 35 - 9
src/app/[locale]/settings/providers/_components/provider-endpoints-table.tsx

@@ -286,12 +286,24 @@ function EndpointRow({
       if (!res.ok) throw new Error(res.error);
       return res;
     },
+    onMutate: () => {
+      // Optimistic update: immediately set circuit state to closed
+      queryClient.setQueriesData<Record<number, EndpointCircuitState>>(
+        { queryKey: ["endpoint-circuit-info"] },
+        (old) => {
+          if (!old) return old;
+          return { ...old, [endpoint.id]: "closed" as EndpointCircuitState };
+        }
+      );
+    },
     onSuccess: () => {
       queryClient.invalidateQueries({ queryKey: ["endpoint-circuit-info"] });
       queryClient.invalidateQueries({ queryKey: ["provider-endpoints"] });
       toast.success(tStatus("resetCircuitSuccess"));
     },
     onError: () => {
+      // Revert optimistic update on failure
+      queryClient.invalidateQueries({ queryKey: ["endpoint-circuit-info"] });
       toast.error(tStatus("resetCircuitFailed"));
     },
   });
@@ -378,6 +390,29 @@ function EndpointRow({
               )}
             </Button>
 
+            {isCircuitTripped && (
+              <TooltipProvider>
+                <Tooltip delayDuration={200}>
+                  <TooltipTrigger asChild>
+                    <Button
+                      variant="ghost"
+                      size="icon"
+                      className="h-8 w-8"
+                      onClick={() => resetCircuitMutation.mutate()}
+                      disabled={resetCircuitMutation.isPending}
+                    >
+                      {resetCircuitMutation.isPending ? (
+                        <Loader2 className="h-4 w-4 animate-spin" />
+                      ) : (
+                        <RotateCcw className="h-4 w-4" />
+                      )}
+                    </Button>
+                  </TooltipTrigger>
+                  <TooltipContent>{tStatus("resetCircuit")}</TooltipContent>
+                </Tooltip>
+              </TooltipProvider>
+            )}
+
             <EditEndpointDialog endpoint={endpoint} />
 
             <DropdownMenu>
@@ -387,15 +422,6 @@ function EndpointRow({
                 </Button>
               </DropdownMenuTrigger>
               <DropdownMenuContent align="end">
-                {isCircuitTripped && (
-                  <DropdownMenuItem
-                    onClick={() => resetCircuitMutation.mutate()}
-                    disabled={resetCircuitMutation.isPending}
-                  >
-                    <RotateCcw className="mr-2 h-4 w-4" />
-                    {tStatus("resetCircuit")}
-                  </DropdownMenuItem>
-                )}
                 <DropdownMenuItem
                   className="text-destructive focus:text-destructive"
                   onClick={() => {

+ 84 - 1
src/lib/provider-endpoints/probe.ts

@@ -1,11 +1,12 @@
 import "server-only";
 
+import net from "node:net";
 import { recordEndpointFailure } from "@/lib/endpoint-circuit-breaker";
 import { logger } from "@/lib/logger";
 import { findProviderEndpointById, recordProviderEndpointProbeResult } from "@/repository";
 import type { ProviderEndpoint, ProviderEndpointProbeSource } from "@/types/provider";
 
-export type EndpointProbeMethod = "HEAD" | "GET";
+export type EndpointProbeMethod = "HEAD" | "GET" | "TCP";
 
 export interface EndpointProbeResult {
   ok: boolean;
@@ -26,6 +27,12 @@ const DEFAULT_TIMEOUT_MS = Math.max(
   parseIntWithDefault(process.env.ENDPOINT_PROBE_TIMEOUT_MS, 5_000)
 );
 
+function resolveProbeMethod(): EndpointProbeMethod {
+  const raw = process.env.ENDPOINT_PROBE_METHOD?.toUpperCase();
+  if (raw === "HEAD" || raw === "GET") return raw;
+  return "TCP";
+}
+
 function safeUrlForLog(rawUrl: string): string {
   try {
     // Avoid leaking credentials/querystring in logs.
@@ -71,6 +78,75 @@ function toErrorInfo(error: unknown): { type: string; message: string } {
   return { type: "unknown_error", message: String(error) };
 }
 
+async function probeEndpointTcp(rawUrl: string, timeoutMs: number): Promise<EndpointProbeResult> {
+  let parsed: URL;
+  try {
+    parsed = new URL(rawUrl);
+  } catch {
+    return {
+      ok: false,
+      method: "TCP",
+      statusCode: null,
+      latencyMs: null,
+      errorType: "invalid_url",
+      errorMessage: "invalid_url",
+    };
+  }
+
+  const port = parsed.port
+    ? Number.parseInt(parsed.port, 10)
+    : parsed.protocol === "https:"
+      ? 443
+      : 80;
+  const host = parsed.hostname;
+
+  const start = Date.now();
+
+  return new Promise<EndpointProbeResult>((resolve) => {
+    const socket = net.createConnection({ host, port, timeout: timeoutMs }, () => {
+      const latencyMs = Date.now() - start;
+      socket.destroy();
+      resolve({
+        ok: true,
+        method: "TCP",
+        statusCode: null,
+        latencyMs,
+        errorType: null,
+        errorMessage: null,
+      });
+    });
+
+    socket.on("timeout", () => {
+      socket.destroy();
+      resolve({
+        ok: false,
+        method: "TCP",
+        statusCode: null,
+        latencyMs: null,
+        errorType: "timeout",
+        errorMessage: "timeout",
+      });
+    });
+
+    socket.on("error", (error) => {
+      const latencyMs = Date.now() - start;
+      logger.debug("[EndpointProbe] TCP probe failed", {
+        url: safeUrlForLog(rawUrl),
+        errorMessage: error.message,
+      });
+      socket.destroy();
+      resolve({
+        ok: false,
+        method: "TCP",
+        statusCode: null,
+        latencyMs,
+        errorType: "network_error",
+        errorMessage: error.message,
+      });
+    });
+  });
+}
+
 async function tryProbe(
   url: string,
   method: EndpointProbeMethod,
@@ -122,6 +198,13 @@ export async function probeEndpointUrl(
   url: string,
   timeoutMs: number = DEFAULT_TIMEOUT_MS
 ): Promise<EndpointProbeResult> {
+  const method = resolveProbeMethod();
+
+  if (method === "TCP") {
+    return probeEndpointTcp(url, timeoutMs);
+  }
+
+  // HTTP-based probing: try HEAD first, fallback to GET on network failure
   const head = await tryProbe(url, "HEAD", timeoutMs);
   if (head.statusCode === null) {
     return tryProbe(url, "GET", timeoutMs);

+ 183 - 0
tests/unit/lib/provider-endpoints/probe.test.ts

@@ -26,10 +26,12 @@ function makeEndpoint(overrides: Partial<ProviderEndpoint>): ProviderEndpoint {
 afterEach(() => {
   vi.unstubAllGlobals();
   vi.useRealTimers();
+  delete process.env.ENDPOINT_PROBE_METHOD;
 });
 
 describe("provider-endpoints: probe", () => {
   test("probeEndpointUrl: HEAD 成功时直接返回,不触发 GET", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "HEAD";
     vi.resetModules();
 
     const logger = {
@@ -69,6 +71,7 @@ describe("provider-endpoints: probe", () => {
   });
 
   test("probeEndpointUrl: HEAD 网络错误时回退 GET", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "HEAD";
     vi.resetModules();
 
     const logger = {
@@ -111,6 +114,7 @@ describe("provider-endpoints: probe", () => {
   });
 
   test("probeEndpointUrl: 5xx 返回 ok=false 且标注 http_5xx", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "HEAD";
     vi.resetModules();
 
     const logger = {
@@ -148,6 +152,7 @@ describe("provider-endpoints: probe", () => {
   });
 
   test("probeEndpointUrl: 4xx 仍视为 ok=true", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "HEAD";
     vi.resetModules();
 
     const logger = {
@@ -183,6 +188,7 @@ describe("provider-endpoints: probe", () => {
   });
 
   test("probeEndpointUrl: AbortError 归类为 timeout", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "HEAD";
     vi.resetModules();
 
     const logger = {
@@ -264,6 +270,7 @@ describe("provider-endpoints: probe", () => {
   });
 
   test("probeProviderEndpointAndRecord: 记录入库字段包含 source/ok/statusCode/latency/probedAt", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "HEAD";
     vi.useFakeTimers();
     vi.setSystemTime(new Date("2026-01-01T00:00:00.000Z"));
 
@@ -330,6 +337,7 @@ describe("provider-endpoints: probe", () => {
   });
 
   test("probeProviderEndpointAndRecord: scheduled 成功总是写入探测日志记录", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "HEAD";
     vi.useFakeTimers();
     vi.setSystemTime(new Date("2026-01-01T00:00:30.000Z"));
 
@@ -377,6 +385,7 @@ describe("provider-endpoints: probe", () => {
   });
 
   test("probeProviderEndpointAndRecord: 失败会计入端点熔断计数(scheduled 与 manual)", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "HEAD";
     vi.resetModules();
 
     const recordMock = vi.fn(async () => {});
@@ -415,4 +424,178 @@ describe("provider-endpoints: probe", () => {
     expect(recordFailureMock).toHaveBeenCalledTimes(2);
     expect(recordMock).toHaveBeenCalledTimes(2);
   });
+
+  test("probeEndpointUrl: TCP mode connects to host:port without HTTP request", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "TCP";
+    vi.resetModules();
+
+    const logger = {
+      debug: vi.fn(),
+      info: vi.fn(),
+      warn: vi.fn(),
+      trace: vi.fn(),
+      error: vi.fn(),
+      fatal: vi.fn(),
+    };
+
+    vi.doMock("@/lib/logger", () => ({ logger }));
+    vi.doMock("@/repository", () => ({
+      findProviderEndpointById: vi.fn(),
+      recordProviderEndpointProbeResult: vi.fn(),
+    }));
+    vi.doMock("@/lib/endpoint-circuit-breaker", () => ({
+      recordEndpointFailure: vi.fn(async () => {}),
+    }));
+
+    // Mock net.createConnection to simulate successful TCP connection
+    const mockSocket = {
+      destroy: vi.fn(),
+      on: vi.fn(),
+    };
+
+    vi.doMock("node:net", () => ({
+      default: {
+        createConnection: vi.fn((_opts: unknown, cb: () => void) => {
+          // Simulate immediate successful connection
+          setTimeout(() => cb(), 0);
+          return mockSocket;
+        }),
+      },
+    }));
+
+    const fetchMock = vi.fn();
+    vi.stubGlobal("fetch", fetchMock);
+
+    const { probeEndpointUrl } = await import("@/lib/provider-endpoints/probe");
+    const result = await probeEndpointUrl("https://api.example.com:8443/v1", 5000);
+
+    expect(result.ok).toBe(true);
+    expect(result.method).toBe("TCP");
+    expect(result.statusCode).toBeNull();
+    expect(result.errorType).toBeNull();
+    expect(result.latencyMs).toBeTypeOf("number");
+    // fetch should never be called in TCP mode
+    expect(fetchMock).not.toHaveBeenCalled();
+  });
+
+  test("probeEndpointUrl: TCP mode defaults to port 80 for http URLs", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "TCP";
+    vi.resetModules();
+
+    vi.doMock("@/lib/logger", () => ({
+      logger: {
+        debug: vi.fn(),
+        info: vi.fn(),
+        warn: vi.fn(),
+        trace: vi.fn(),
+        error: vi.fn(),
+        fatal: vi.fn(),
+      },
+    }));
+    vi.doMock("@/repository", () => ({
+      findProviderEndpointById: vi.fn(),
+      recordProviderEndpointProbeResult: vi.fn(),
+    }));
+    vi.doMock("@/lib/endpoint-circuit-breaker", () => ({
+      recordEndpointFailure: vi.fn(async () => {}),
+    }));
+
+    const mockSocket = {
+      destroy: vi.fn(),
+      on: vi.fn(),
+    };
+
+    vi.doMock("node:net", () => ({
+      default: {
+        createConnection: vi.fn((_opts: unknown, cb: () => void) => {
+          setTimeout(() => cb(), 0);
+          return mockSocket;
+        }),
+      },
+    }));
+
+    const { probeEndpointUrl } = await import("@/lib/provider-endpoints/probe");
+    const result = await probeEndpointUrl("http://api.example.com/v1/messages", 5000);
+
+    // TCP connection succeeds, no HTTP status code
+    expect(result.ok).toBe(true);
+    expect(result.method).toBe("TCP");
+    expect(result.statusCode).toBeNull();
+  });
+
+  test("probeEndpointUrl: TCP mode returns invalid_url for bad URLs", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "TCP";
+    vi.resetModules();
+
+    vi.doMock("@/lib/logger", () => ({
+      logger: {
+        debug: vi.fn(),
+        info: vi.fn(),
+        warn: vi.fn(),
+        trace: vi.fn(),
+        error: vi.fn(),
+        fatal: vi.fn(),
+      },
+    }));
+    vi.doMock("@/repository", () => ({
+      findProviderEndpointById: vi.fn(),
+      recordProviderEndpointProbeResult: vi.fn(),
+    }));
+    vi.doMock("@/lib/endpoint-circuit-breaker", () => ({
+      recordEndpointFailure: vi.fn(async () => {}),
+    }));
+
+    const { probeEndpointUrl } = await import("@/lib/provider-endpoints/probe");
+    const result = await probeEndpointUrl("not-a-valid-url", 5000);
+
+    expect(result.ok).toBe(false);
+    expect(result.method).toBe("TCP");
+    expect(result.errorType).toBe("invalid_url");
+  });
+
+  test("probeEndpointUrl: defaults to TCP when ENDPOINT_PROBE_METHOD is not set", async () => {
+    delete process.env.ENDPOINT_PROBE_METHOD;
+    vi.resetModules();
+
+    vi.doMock("@/lib/logger", () => ({
+      logger: {
+        debug: vi.fn(),
+        info: vi.fn(),
+        warn: vi.fn(),
+        trace: vi.fn(),
+        error: vi.fn(),
+        fatal: vi.fn(),
+      },
+    }));
+    vi.doMock("@/repository", () => ({
+      findProviderEndpointById: vi.fn(),
+      recordProviderEndpointProbeResult: vi.fn(),
+    }));
+    vi.doMock("@/lib/endpoint-circuit-breaker", () => ({
+      recordEndpointFailure: vi.fn(async () => {}),
+    }));
+
+    const mockSocket = {
+      destroy: vi.fn(),
+      on: vi.fn(),
+    };
+
+    vi.doMock("node:net", () => ({
+      default: {
+        createConnection: vi.fn((_opts: unknown, cb: () => void) => {
+          setTimeout(() => cb(), 0);
+          return mockSocket;
+        }),
+      },
+    }));
+
+    const fetchMock = vi.fn();
+    vi.stubGlobal("fetch", fetchMock);
+
+    const { probeEndpointUrl } = await import("@/lib/provider-endpoints/probe");
+    const result = await probeEndpointUrl("https://example.com", 5000);
+
+    expect(result.method).toBe("TCP");
+    expect(fetchMock).not.toHaveBeenCalled();
+  });
 });