Răsfoiți Sursa

feat(probe): switch endpoint probing to TCP by default and improve circuit reset UX

Replace HTTP HEAD/GET endpoint probes with pure TCP connection probes
(net.createConnection) to avoid leaving access logs on upstream servers.
ENDPOINT_PROBE_METHOD env var controls the method (TCP/HEAD/GET, default TCP).

Move circuit reset button from dropdown menu to action bar for faster access
when endpoints are tripped. Add optimistic update to immediately reflect
closed state on reset.
ding113 1 săptămână în urmă
părinte
comite
1291f850

+ 2 - 0
.env.example

@@ -150,6 +150,8 @@ ENDPOINT_PROBE_TIMEOUT_MS=5000
 ENDPOINT_PROBE_CONCURRENCY=10
 ENDPOINT_PROBE_CYCLE_JITTER_MS=1000
 ENDPOINT_PROBE_LOCK_TTL_MS=30000
+# Probe method: TCP (default, no HTTP request / no access log), HEAD, GET
+ENDPOINT_PROBE_METHOD=TCP
 
 # 探测日志保留与清理
 # - 所有探测结果(成功/失败)均记录到历史表

+ 1 - 1
src/actions/provider-endpoints.ts

@@ -404,7 +404,7 @@ export async function probeProviderEndpoint(input: unknown): Promise<
     endpoint: ProviderEndpoint;
     result: {
       ok: boolean;
-      method: "HEAD" | "GET";
+      method: "HEAD" | "GET" | "TCP";
       statusCode: number | null;
       latencyMs: number | null;
       errorType: string | null;

+ 35 - 9
src/app/[locale]/settings/providers/_components/provider-endpoints-table.tsx

@@ -286,12 +286,24 @@ function EndpointRow({
       if (!res.ok) throw new Error(res.error);
       return res;
     },
+    onMutate: () => {
+      // Optimistic update: immediately set circuit state to closed
+      queryClient.setQueriesData<Record<number, EndpointCircuitState>>(
+        { queryKey: ["endpoint-circuit-info"] },
+        (old) => {
+          if (!old) return old;
+          return { ...old, [endpoint.id]: "closed" as EndpointCircuitState };
+        }
+      );
+    },
     onSuccess: () => {
       queryClient.invalidateQueries({ queryKey: ["endpoint-circuit-info"] });
       queryClient.invalidateQueries({ queryKey: ["provider-endpoints"] });
       toast.success(tStatus("resetCircuitSuccess"));
     },
     onError: () => {
+      // Revert optimistic update on failure
+      queryClient.invalidateQueries({ queryKey: ["endpoint-circuit-info"] });
       toast.error(tStatus("resetCircuitFailed"));
     },
   });
@@ -378,6 +390,29 @@ function EndpointRow({
               )}
             </Button>
 
+            {isCircuitTripped && (
+              <TooltipProvider>
+                <Tooltip delayDuration={200}>
+                  <TooltipTrigger asChild>
+                    <Button
+                      variant="ghost"
+                      size="icon"
+                      className="h-8 w-8"
+                      onClick={() => resetCircuitMutation.mutate()}
+                      disabled={resetCircuitMutation.isPending}
+                    >
+                      {resetCircuitMutation.isPending ? (
+                        <Loader2 className="h-4 w-4 animate-spin" />
+                      ) : (
+                        <RotateCcw className="h-4 w-4" />
+                      )}
+                    </Button>
+                  </TooltipTrigger>
+                  <TooltipContent>{tStatus("resetCircuit")}</TooltipContent>
+                </Tooltip>
+              </TooltipProvider>
+            )}
+
             <EditEndpointDialog endpoint={endpoint} />
 
             <DropdownMenu>
@@ -387,15 +422,6 @@ function EndpointRow({
                 </Button>
               </DropdownMenuTrigger>
               <DropdownMenuContent align="end">
-                {isCircuitTripped && (
-                  <DropdownMenuItem
-                    onClick={() => resetCircuitMutation.mutate()}
-                    disabled={resetCircuitMutation.isPending}
-                  >
-                    <RotateCcw className="mr-2 h-4 w-4" />
-                    {tStatus("resetCircuit")}
-                  </DropdownMenuItem>
-                )}
                 <DropdownMenuItem
                   className="text-destructive focus:text-destructive"
                   onClick={() => {

+ 84 - 1
src/lib/provider-endpoints/probe.ts

@@ -1,11 +1,12 @@
 import "server-only";
 
+import net from "node:net";
 import { recordEndpointFailure } from "@/lib/endpoint-circuit-breaker";
 import { logger } from "@/lib/logger";
 import { findProviderEndpointById, recordProviderEndpointProbeResult } from "@/repository";
 import type { ProviderEndpoint, ProviderEndpointProbeSource } from "@/types/provider";
 
-export type EndpointProbeMethod = "HEAD" | "GET";
+export type EndpointProbeMethod = "HEAD" | "GET" | "TCP";
 
 export interface EndpointProbeResult {
   ok: boolean;
@@ -26,6 +27,12 @@ const DEFAULT_TIMEOUT_MS = Math.max(
   parseIntWithDefault(process.env.ENDPOINT_PROBE_TIMEOUT_MS, 5_000)
 );
 
+function resolveProbeMethod(): EndpointProbeMethod {
+  const raw = process.env.ENDPOINT_PROBE_METHOD?.toUpperCase();
+  if (raw === "HEAD" || raw === "GET") return raw;
+  return "TCP";
+}
+
 function safeUrlForLog(rawUrl: string): string {
   try {
     // Avoid leaking credentials/querystring in logs.
@@ -71,6 +78,75 @@ function toErrorInfo(error: unknown): { type: string; message: string } {
   return { type: "unknown_error", message: String(error) };
 }
 
+async function probeEndpointTcp(rawUrl: string, timeoutMs: number): Promise<EndpointProbeResult> {
+  let parsed: URL;
+  try {
+    parsed = new URL(rawUrl);
+  } catch {
+    return {
+      ok: false,
+      method: "TCP",
+      statusCode: null,
+      latencyMs: null,
+      errorType: "invalid_url",
+      errorMessage: "invalid_url",
+    };
+  }
+
+  const port = parsed.port
+    ? Number.parseInt(parsed.port, 10)
+    : parsed.protocol === "https:"
+      ? 443
+      : 80;
+  const host = parsed.hostname;
+
+  const start = Date.now();
+
+  return new Promise<EndpointProbeResult>((resolve) => {
+    const socket = net.createConnection({ host, port, timeout: timeoutMs }, () => {
+      const latencyMs = Date.now() - start;
+      socket.destroy();
+      resolve({
+        ok: true,
+        method: "TCP",
+        statusCode: null,
+        latencyMs,
+        errorType: null,
+        errorMessage: null,
+      });
+    });
+
+    socket.on("timeout", () => {
+      socket.destroy();
+      resolve({
+        ok: false,
+        method: "TCP",
+        statusCode: null,
+        latencyMs: null,
+        errorType: "timeout",
+        errorMessage: "timeout",
+      });
+    });
+
+    socket.on("error", (error) => {
+      const latencyMs = Date.now() - start;
+      logger.debug("[EndpointProbe] TCP probe failed", {
+        url: safeUrlForLog(rawUrl),
+        errorMessage: error.message,
+      });
+      socket.destroy();
+      resolve({
+        ok: false,
+        method: "TCP",
+        statusCode: null,
+        latencyMs,
+        errorType: "network_error",
+        errorMessage: error.message,
+      });
+    });
+  });
+}
+
 async function tryProbe(
   url: string,
   method: EndpointProbeMethod,
@@ -122,6 +198,13 @@ export async function probeEndpointUrl(
   url: string,
   timeoutMs: number = DEFAULT_TIMEOUT_MS
 ): Promise<EndpointProbeResult> {
+  const method = resolveProbeMethod();
+
+  if (method === "TCP") {
+    return probeEndpointTcp(url, timeoutMs);
+  }
+
+  // HTTP-based probing: try HEAD first, fallback to GET on network failure
   const head = await tryProbe(url, "HEAD", timeoutMs);
   if (head.statusCode === null) {
     return tryProbe(url, "GET", timeoutMs);

+ 183 - 0
tests/unit/lib/provider-endpoints/probe.test.ts

@@ -26,10 +26,12 @@ function makeEndpoint(overrides: Partial<ProviderEndpoint>): ProviderEndpoint {
 afterEach(() => {
   vi.unstubAllGlobals();
   vi.useRealTimers();
+  delete process.env.ENDPOINT_PROBE_METHOD;
 });
 
 describe("provider-endpoints: probe", () => {
   test("probeEndpointUrl: HEAD 成功时直接返回,不触发 GET", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "HEAD";
     vi.resetModules();
 
     const logger = {
@@ -69,6 +71,7 @@ describe("provider-endpoints: probe", () => {
   });
 
   test("probeEndpointUrl: HEAD 网络错误时回退 GET", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "HEAD";
     vi.resetModules();
 
     const logger = {
@@ -111,6 +114,7 @@ describe("provider-endpoints: probe", () => {
   });
 
   test("probeEndpointUrl: 5xx 返回 ok=false 且标注 http_5xx", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "HEAD";
     vi.resetModules();
 
     const logger = {
@@ -148,6 +152,7 @@ describe("provider-endpoints: probe", () => {
   });
 
   test("probeEndpointUrl: 4xx 仍视为 ok=true", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "HEAD";
     vi.resetModules();
 
     const logger = {
@@ -183,6 +188,7 @@ describe("provider-endpoints: probe", () => {
   });
 
   test("probeEndpointUrl: AbortError 归类为 timeout", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "HEAD";
     vi.resetModules();
 
     const logger = {
@@ -264,6 +270,7 @@ describe("provider-endpoints: probe", () => {
   });
 
   test("probeProviderEndpointAndRecord: 记录入库字段包含 source/ok/statusCode/latency/probedAt", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "HEAD";
     vi.useFakeTimers();
     vi.setSystemTime(new Date("2026-01-01T00:00:00.000Z"));
 
@@ -330,6 +337,7 @@ describe("provider-endpoints: probe", () => {
   });
 
   test("probeProviderEndpointAndRecord: scheduled 成功总是写入探测日志记录", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "HEAD";
     vi.useFakeTimers();
     vi.setSystemTime(new Date("2026-01-01T00:00:30.000Z"));
 
@@ -377,6 +385,7 @@ describe("provider-endpoints: probe", () => {
   });
 
   test("probeProviderEndpointAndRecord: 失败会计入端点熔断计数(scheduled 与 manual)", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "HEAD";
     vi.resetModules();
 
     const recordMock = vi.fn(async () => {});
@@ -415,4 +424,178 @@ describe("provider-endpoints: probe", () => {
     expect(recordFailureMock).toHaveBeenCalledTimes(2);
     expect(recordMock).toHaveBeenCalledTimes(2);
   });
+
+  test("probeEndpointUrl: TCP mode connects to host:port without HTTP request", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "TCP";
+    vi.resetModules();
+
+    const logger = {
+      debug: vi.fn(),
+      info: vi.fn(),
+      warn: vi.fn(),
+      trace: vi.fn(),
+      error: vi.fn(),
+      fatal: vi.fn(),
+    };
+
+    vi.doMock("@/lib/logger", () => ({ logger }));
+    vi.doMock("@/repository", () => ({
+      findProviderEndpointById: vi.fn(),
+      recordProviderEndpointProbeResult: vi.fn(),
+    }));
+    vi.doMock("@/lib/endpoint-circuit-breaker", () => ({
+      recordEndpointFailure: vi.fn(async () => {}),
+    }));
+
+    // Mock net.createConnection to simulate successful TCP connection
+    const mockSocket = {
+      destroy: vi.fn(),
+      on: vi.fn(),
+    };
+
+    vi.doMock("node:net", () => ({
+      default: {
+        createConnection: vi.fn((_opts: unknown, cb: () => void) => {
+          // Simulate immediate successful connection
+          setTimeout(() => cb(), 0);
+          return mockSocket;
+        }),
+      },
+    }));
+
+    const fetchMock = vi.fn();
+    vi.stubGlobal("fetch", fetchMock);
+
+    const { probeEndpointUrl } = await import("@/lib/provider-endpoints/probe");
+    const result = await probeEndpointUrl("https://api.example.com:8443/v1", 5000);
+
+    expect(result.ok).toBe(true);
+    expect(result.method).toBe("TCP");
+    expect(result.statusCode).toBeNull();
+    expect(result.errorType).toBeNull();
+    expect(result.latencyMs).toBeTypeOf("number");
+    // fetch should never be called in TCP mode
+    expect(fetchMock).not.toHaveBeenCalled();
+  });
+
+  test("probeEndpointUrl: TCP mode defaults to port 80 for http URLs", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "TCP";
+    vi.resetModules();
+
+    vi.doMock("@/lib/logger", () => ({
+      logger: {
+        debug: vi.fn(),
+        info: vi.fn(),
+        warn: vi.fn(),
+        trace: vi.fn(),
+        error: vi.fn(),
+        fatal: vi.fn(),
+      },
+    }));
+    vi.doMock("@/repository", () => ({
+      findProviderEndpointById: vi.fn(),
+      recordProviderEndpointProbeResult: vi.fn(),
+    }));
+    vi.doMock("@/lib/endpoint-circuit-breaker", () => ({
+      recordEndpointFailure: vi.fn(async () => {}),
+    }));
+
+    const mockSocket = {
+      destroy: vi.fn(),
+      on: vi.fn(),
+    };
+
+    vi.doMock("node:net", () => ({
+      default: {
+        createConnection: vi.fn((_opts: unknown, cb: () => void) => {
+          setTimeout(() => cb(), 0);
+          return mockSocket;
+        }),
+      },
+    }));
+
+    const { probeEndpointUrl } = await import("@/lib/provider-endpoints/probe");
+    const result = await probeEndpointUrl("http://api.example.com/v1/messages", 5000);
+
+    // TCP connection succeeds, no HTTP status code
+    expect(result.ok).toBe(true);
+    expect(result.method).toBe("TCP");
+    expect(result.statusCode).toBeNull();
+  });
+
+  test("probeEndpointUrl: TCP mode returns invalid_url for bad URLs", async () => {
+    process.env.ENDPOINT_PROBE_METHOD = "TCP";
+    vi.resetModules();
+
+    vi.doMock("@/lib/logger", () => ({
+      logger: {
+        debug: vi.fn(),
+        info: vi.fn(),
+        warn: vi.fn(),
+        trace: vi.fn(),
+        error: vi.fn(),
+        fatal: vi.fn(),
+      },
+    }));
+    vi.doMock("@/repository", () => ({
+      findProviderEndpointById: vi.fn(),
+      recordProviderEndpointProbeResult: vi.fn(),
+    }));
+    vi.doMock("@/lib/endpoint-circuit-breaker", () => ({
+      recordEndpointFailure: vi.fn(async () => {}),
+    }));
+
+    const { probeEndpointUrl } = await import("@/lib/provider-endpoints/probe");
+    const result = await probeEndpointUrl("not-a-valid-url", 5000);
+
+    expect(result.ok).toBe(false);
+    expect(result.method).toBe("TCP");
+    expect(result.errorType).toBe("invalid_url");
+  });
+
+  test("probeEndpointUrl: defaults to TCP when ENDPOINT_PROBE_METHOD is not set", async () => {
+    delete process.env.ENDPOINT_PROBE_METHOD;
+    vi.resetModules();
+
+    vi.doMock("@/lib/logger", () => ({
+      logger: {
+        debug: vi.fn(),
+        info: vi.fn(),
+        warn: vi.fn(),
+        trace: vi.fn(),
+        error: vi.fn(),
+        fatal: vi.fn(),
+      },
+    }));
+    vi.doMock("@/repository", () => ({
+      findProviderEndpointById: vi.fn(),
+      recordProviderEndpointProbeResult: vi.fn(),
+    }));
+    vi.doMock("@/lib/endpoint-circuit-breaker", () => ({
+      recordEndpointFailure: vi.fn(async () => {}),
+    }));
+
+    const mockSocket = {
+      destroy: vi.fn(),
+      on: vi.fn(),
+    };
+
+    vi.doMock("node:net", () => ({
+      default: {
+        createConnection: vi.fn((_opts: unknown, cb: () => void) => {
+          setTimeout(() => cb(), 0);
+          return mockSocket;
+        }),
+      },
+    }));
+
+    const fetchMock = vi.fn();
+    vi.stubGlobal("fetch", fetchMock);
+
+    const { probeEndpointUrl } = await import("@/lib/provider-endpoints/probe");
+    const result = await probeEndpointUrl("https://example.com", 5000);
+
+    expect(result.method).toBe("TCP");
+    expect(fetchMock).not.toHaveBeenCalled();
+  });
 });