proxy-forwarder-hedge-first-byte.test.ts 55 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641
  1. import { beforeEach, describe, expect, test, vi } from "vitest";
  2. import { resolveEndpointPolicy } from "@/app/v1/_lib/proxy/endpoint-policy";
  3. const mocks = vi.hoisted(() => ({
  4. pickRandomProviderWithExclusion: vi.fn(),
  5. recordSuccess: vi.fn(),
  6. recordFailure: vi.fn(async () => {}),
  7. getCircuitState: vi.fn(() => "closed"),
  8. getProviderHealthInfo: vi.fn(async () => ({
  9. health: { failureCount: 0 },
  10. config: { failureThreshold: 3 },
  11. })),
  12. updateSessionBindingSmart: vi.fn(async () => ({ updated: true, reason: "test" })),
  13. updateSessionProvider: vi.fn(async () => {}),
  14. clearSessionProvider: vi.fn(async () => {}),
  15. isHttp2Enabled: vi.fn(async () => false),
  16. getPreferredProviderEndpoints: vi.fn(async () => []),
  17. getEndpointFilterStats: vi.fn(async () => null),
  18. recordEndpointSuccess: vi.fn(async () => {}),
  19. recordEndpointFailure: vi.fn(async () => {}),
  20. isVendorTypeCircuitOpen: vi.fn(async () => false),
  21. recordVendorTypeAllEndpointsTimeout: vi.fn(async () => {}),
  22. categorizeErrorAsync: vi.fn(async () => 0),
  23. getErrorDetectionResultAsync: vi.fn(async () => ({ matched: false })),
  24. getCachedSystemSettings: vi.fn(async () => ({
  25. enableThinkingSignatureRectifier: true,
  26. enableThinkingBudgetRectifier: true,
  27. })),
  28. storeSessionSpecialSettings: vi.fn(async () => {}),
  29. }));
  30. vi.mock("@/lib/logger", () => ({
  31. logger: {
  32. debug: vi.fn(),
  33. info: vi.fn(),
  34. warn: vi.fn(),
  35. trace: vi.fn(),
  36. error: vi.fn(),
  37. fatal: vi.fn(),
  38. },
  39. }));
  40. vi.mock("@/lib/config", async (importOriginal) => {
  41. const actual = await importOriginal<typeof import("@/lib/config")>();
  42. return {
  43. ...actual,
  44. getCachedSystemSettings: mocks.getCachedSystemSettings,
  45. isHttp2Enabled: mocks.isHttp2Enabled,
  46. };
  47. });
  48. vi.mock("@/lib/provider-endpoints/endpoint-selector", () => ({
  49. getPreferredProviderEndpoints: mocks.getPreferredProviderEndpoints,
  50. getEndpointFilterStats: mocks.getEndpointFilterStats,
  51. }));
  52. vi.mock("@/lib/endpoint-circuit-breaker", () => ({
  53. recordEndpointSuccess: mocks.recordEndpointSuccess,
  54. recordEndpointFailure: mocks.recordEndpointFailure,
  55. }));
  56. vi.mock("@/lib/circuit-breaker", () => ({
  57. getCircuitState: mocks.getCircuitState,
  58. getProviderHealthInfo: mocks.getProviderHealthInfo,
  59. recordFailure: mocks.recordFailure,
  60. recordSuccess: mocks.recordSuccess,
  61. }));
  62. vi.mock("@/lib/vendor-type-circuit-breaker", () => ({
  63. isVendorTypeCircuitOpen: mocks.isVendorTypeCircuitOpen,
  64. recordVendorTypeAllEndpointsTimeout: mocks.recordVendorTypeAllEndpointsTimeout,
  65. }));
  66. vi.mock("@/lib/session-manager", () => ({
  67. SessionManager: {
  68. updateSessionBindingSmart: mocks.updateSessionBindingSmart,
  69. updateSessionProvider: mocks.updateSessionProvider,
  70. clearSessionProvider: mocks.clearSessionProvider,
  71. storeSessionSpecialSettings: mocks.storeSessionSpecialSettings,
  72. },
  73. }));
  74. vi.mock("@/app/v1/_lib/proxy/provider-selector", () => ({
  75. ProxyProviderResolver: {
  76. pickRandomProviderWithExclusion: mocks.pickRandomProviderWithExclusion,
  77. },
  78. }));
  79. vi.mock("@/app/v1/_lib/proxy/errors", async (importOriginal) => {
  80. const actual = await importOriginal<typeof import("@/app/v1/_lib/proxy/errors")>();
  81. return {
  82. ...actual,
  83. categorizeErrorAsync: mocks.categorizeErrorAsync,
  84. getErrorDetectionResultAsync: mocks.getErrorDetectionResultAsync,
  85. };
  86. });
  87. import {
  88. ErrorCategory as ProxyErrorCategory,
  89. ProxyError as UpstreamProxyError,
  90. getErrorDetectionResultAsync,
  91. } from "@/app/v1/_lib/proxy/errors";
  92. import { ProxyForwarder } from "@/app/v1/_lib/proxy/forwarder";
  93. import { ModelRedirector } from "@/app/v1/_lib/proxy/model-redirector";
  94. import { ProxySession } from "@/app/v1/_lib/proxy/session";
  95. import { logger } from "@/lib/logger";
  96. import type { Provider } from "@/types/provider";
  97. type AttemptRuntime = {
  98. clearResponseTimeout?: () => void;
  99. responseController?: AbortController;
  100. };
  101. function createProvider(overrides: Partial<Provider> = {}): Provider {
  102. return {
  103. id: 1,
  104. name: "p1",
  105. url: "https://provider.example.com",
  106. key: "k",
  107. providerVendorId: null,
  108. isEnabled: true,
  109. weight: 1,
  110. priority: 0,
  111. groupPriorities: null,
  112. costMultiplier: 1,
  113. groupTag: null,
  114. providerType: "claude",
  115. preserveClientIp: false,
  116. modelRedirects: null,
  117. allowedModels: null,
  118. mcpPassthroughType: "none",
  119. mcpPassthroughUrl: null,
  120. limit5hUsd: null,
  121. limitDailyUsd: null,
  122. dailyResetMode: "fixed",
  123. dailyResetTime: "00:00",
  124. limitWeeklyUsd: null,
  125. limitMonthlyUsd: null,
  126. limitTotalUsd: null,
  127. totalCostResetAt: null,
  128. limitConcurrentSessions: 0,
  129. maxRetryAttempts: 1,
  130. circuitBreakerFailureThreshold: 5,
  131. circuitBreakerOpenDuration: 1_800_000,
  132. circuitBreakerHalfOpenSuccessThreshold: 2,
  133. proxyUrl: null,
  134. proxyFallbackToDirect: false,
  135. firstByteTimeoutStreamingMs: 100,
  136. streamingIdleTimeoutMs: 0,
  137. requestTimeoutNonStreamingMs: 0,
  138. websiteUrl: null,
  139. faviconUrl: null,
  140. cacheTtlPreference: null,
  141. context1mPreference: null,
  142. codexReasoningEffortPreference: null,
  143. codexReasoningSummaryPreference: null,
  144. codexTextVerbosityPreference: null,
  145. codexParallelToolCallsPreference: null,
  146. codexServiceTierPreference: null,
  147. anthropicMaxTokensPreference: null,
  148. anthropicThinkingBudgetPreference: null,
  149. anthropicAdaptiveThinking: null,
  150. geminiGoogleSearchPreference: null,
  151. tpm: 0,
  152. rpm: 0,
  153. rpd: 0,
  154. cc: 0,
  155. createdAt: new Date(),
  156. updatedAt: new Date(),
  157. deletedAt: null,
  158. ...overrides,
  159. };
  160. }
  161. function createSession(clientAbortSignal: AbortSignal | null = null): ProxySession {
  162. const headers = new Headers();
  163. const session = Object.create(ProxySession.prototype);
  164. Object.assign(session, {
  165. startTime: Date.now(),
  166. method: "POST",
  167. requestUrl: new URL("https://example.com/v1/messages"),
  168. headers,
  169. originalHeaders: new Headers(headers),
  170. headerLog: JSON.stringify(Object.fromEntries(headers.entries())),
  171. request: {
  172. model: "claude-test",
  173. log: "(test)",
  174. message: {
  175. model: "claude-test",
  176. stream: true,
  177. messages: [{ role: "user", content: "hi" }],
  178. },
  179. },
  180. userAgent: null,
  181. context: null,
  182. clientAbortSignal,
  183. userName: "test-user",
  184. authState: { success: true, user: null, key: null, apiKey: null },
  185. provider: null,
  186. messageContext: null,
  187. sessionId: "sess-hedge",
  188. requestSequence: 1,
  189. originalFormat: "claude",
  190. providerType: null,
  191. originalModelName: null,
  192. originalUrlPathname: null,
  193. providerChain: [],
  194. cacheTtlResolved: null,
  195. context1mApplied: false,
  196. specialSettings: [],
  197. cachedPriceData: undefined,
  198. cachedBillingModelSource: undefined,
  199. endpointPolicy: resolveEndpointPolicy("/v1/messages"),
  200. isHeaderModified: () => false,
  201. });
  202. return session as ProxySession;
  203. }
  204. function createStreamingResponse(params: {
  205. label: string;
  206. firstChunkDelayMs: number;
  207. controller: AbortController;
  208. }): Response {
  209. const encoder = new TextEncoder();
  210. let timeoutId: ReturnType<typeof setTimeout> | null = null;
  211. const stream = new ReadableStream<Uint8Array>({
  212. start(controller) {
  213. const onAbort = () => {
  214. if (timeoutId) {
  215. clearTimeout(timeoutId);
  216. }
  217. controller.close();
  218. };
  219. if (params.controller.signal.aborted) {
  220. onAbort();
  221. return;
  222. }
  223. params.controller.signal.addEventListener("abort", onAbort, { once: true });
  224. timeoutId = setTimeout(() => {
  225. if (params.controller.signal.aborted) {
  226. controller.close();
  227. return;
  228. }
  229. controller.enqueue(encoder.encode(`data: {"provider":"${params.label}"}\n\n`));
  230. controller.close();
  231. }, params.firstChunkDelayMs);
  232. },
  233. });
  234. return new Response(stream, {
  235. status: 200,
  236. headers: { "content-type": "text/event-stream" },
  237. });
  238. }
  239. function createDelayedFailure(params: {
  240. delayMs: number;
  241. error: Error;
  242. controller: AbortController;
  243. }): Promise<Response> {
  244. return new Promise((_, reject) => {
  245. let timeoutId: ReturnType<typeof setTimeout> | null = null;
  246. const rejectWithError = () => {
  247. if (timeoutId) {
  248. clearTimeout(timeoutId);
  249. }
  250. reject(params.error);
  251. };
  252. if (params.controller.signal.aborted) {
  253. rejectWithError();
  254. return;
  255. }
  256. params.controller.signal.addEventListener("abort", rejectWithError, { once: true });
  257. timeoutId = setTimeout(() => {
  258. params.controller.signal.removeEventListener("abort", rejectWithError);
  259. reject(params.error);
  260. }, params.delayMs);
  261. });
  262. }
  263. function withThinkingBlocks(session: ProxySession): void {
  264. session.request.message = {
  265. model: "claude-test",
  266. stream: true,
  267. messages: [
  268. {
  269. role: "assistant",
  270. content: [
  271. { type: "thinking", thinking: "t", signature: "sig_thinking" },
  272. { type: "text", text: "hello", signature: "sig_text_should_remove" },
  273. { type: "redacted_thinking", data: "r", signature: "sig_redacted" },
  274. ],
  275. },
  276. ],
  277. };
  278. }
  279. describe("ProxyForwarder - first-byte hedge scheduling", () => {
  280. beforeEach(() => {
  281. vi.clearAllMocks();
  282. });
  283. test("shadow session redirect should not overwrite initial provider redirect and winner should keep its own redirect", () => {
  284. const requestedModel = "claude-haiku-4-5-20251001";
  285. const fireworksRedirect = "accounts/fireworks/routers/kimi-k2p5-turbo";
  286. const minimaxRedirect = "MiniMax-M2.7-highspeed";
  287. const fireworks = createProvider({
  288. id: 383,
  289. name: "fireworks",
  290. modelRedirects: [{ matchType: "exact", source: requestedModel, target: fireworksRedirect }],
  291. });
  292. const minimax = createProvider({
  293. id: 206,
  294. name: "Minimax Max",
  295. modelRedirects: [{ matchType: "exact", source: requestedModel, target: minimaxRedirect }],
  296. });
  297. const session = createSession();
  298. session.request.model = requestedModel;
  299. session.request.message.model = requestedModel;
  300. session.setProvider(fireworks);
  301. session.addProviderToChain(fireworks, { reason: "initial_selection" });
  302. expect(ModelRedirector.apply(session, fireworks)).toBe(true);
  303. expect(session.request.model).toBe(fireworksRedirect);
  304. expect(session.getProviderChain()[0].modelRedirect).toMatchObject({
  305. originalModel: requestedModel,
  306. redirectedModel: fireworksRedirect,
  307. billingModel: requestedModel,
  308. });
  309. const shadow = (
  310. ProxyForwarder as unknown as {
  311. createStreamingShadowSession: (session: ProxySession, provider: Provider) => ProxySession;
  312. }
  313. ).createStreamingShadowSession(session, minimax);
  314. expect(shadow.request.model).toBe(fireworksRedirect);
  315. expect(ModelRedirector.apply(shadow, minimax)).toBe(true);
  316. expect(shadow.request.model).toBe(minimaxRedirect);
  317. // Hedge 备选供应商的重定向只能影响自己的 attempt,不能污染初始供应商的链路项。
  318. expect(session.getProviderChain()[0].modelRedirect).toMatchObject({
  319. originalModel: requestedModel,
  320. redirectedModel: fireworksRedirect,
  321. billingModel: requestedModel,
  322. });
  323. (
  324. ProxyForwarder as unknown as {
  325. syncWinningAttemptSession: (target: ProxySession, source: ProxySession) => void;
  326. }
  327. ).syncWinningAttemptSession(session, shadow);
  328. session.setProvider(minimax);
  329. session.addProviderToChain(minimax, {
  330. reason: "hedge_winner",
  331. attemptNumber: 2,
  332. statusCode: 200,
  333. });
  334. const hedgeWinner = session
  335. .getProviderChain()
  336. .find((item) => item.id === minimax.id && item.reason === "hedge_winner");
  337. expect(hedgeWinner?.modelRedirect).toMatchObject({
  338. originalModel: requestedModel,
  339. redirectedModel: minimaxRedirect,
  340. billingModel: requestedModel,
  341. });
  342. });
  343. test("shadow session should clone current model redirect snapshot instead of sharing it", () => {
  344. const requestedModel = "claude-haiku-4-5-20251001";
  345. const fireworksRedirect = "accounts/fireworks/routers/kimi-k2p5-turbo";
  346. const fireworks = createProvider({
  347. id: 383,
  348. name: "fireworks",
  349. modelRedirects: [{ matchType: "exact", source: requestedModel, target: fireworksRedirect }],
  350. });
  351. const fallback = createProvider({
  352. id: 206,
  353. name: "Minimax Max",
  354. });
  355. const session = createSession();
  356. session.request.model = requestedModel;
  357. session.request.message.model = requestedModel;
  358. session.setProvider(fireworks);
  359. session.addProviderToChain(fireworks, { reason: "initial_selection" });
  360. expect(ModelRedirector.apply(session, fireworks)).toBe(true);
  361. const shadow = (
  362. ProxyForwarder as unknown as {
  363. createStreamingShadowSession: (session: ProxySession, provider: Provider) => ProxySession;
  364. }
  365. ).createStreamingShadowSession(session, fallback);
  366. const sessionState = session as unknown as {
  367. currentModelRedirect: {
  368. providerId: number;
  369. redirect: {
  370. originalModel: string;
  371. redirectedModel: string;
  372. billingModel: string;
  373. };
  374. } | null;
  375. };
  376. const shadowState = shadow as unknown as {
  377. currentModelRedirect: {
  378. providerId: number;
  379. redirect: {
  380. originalModel: string;
  381. redirectedModel: string;
  382. billingModel: string;
  383. };
  384. } | null;
  385. };
  386. expect(shadowState.currentModelRedirect).toEqual(sessionState.currentModelRedirect);
  387. if (!sessionState.currentModelRedirect || !shadowState.currentModelRedirect) {
  388. throw new Error("expected currentModelRedirect to be copied into shadow session");
  389. }
  390. shadowState.currentModelRedirect.redirect.redirectedModel = "shadow-only-model";
  391. expect(sessionState.currentModelRedirect.redirect.redirectedModel).toBe(fireworksRedirect);
  392. });
  393. test("switching to provider without redirect should clear stale redirect snapshot", () => {
  394. const requestedModel = "claude-haiku-4-5-20251001";
  395. const fireworksRedirect = "accounts/fireworks/routers/kimi-k2p5-turbo";
  396. const fireworks = createProvider({
  397. id: 383,
  398. name: "fireworks",
  399. modelRedirects: [{ matchType: "exact", source: requestedModel, target: fireworksRedirect }],
  400. });
  401. const plainProvider = createProvider({
  402. id: 520,
  403. name: "plain provider",
  404. modelRedirects: null,
  405. });
  406. const session = createSession();
  407. session.request.model = requestedModel;
  408. session.request.message.model = requestedModel;
  409. session.setProvider(fireworks);
  410. session.addProviderToChain(fireworks, { reason: "initial_selection" });
  411. expect(ModelRedirector.apply(session, fireworks)).toBe(true);
  412. expect(ModelRedirector.apply(session, plainProvider)).toBe(false);
  413. expect(session.request.model).toBe(requestedModel);
  414. const sessionState = session as unknown as {
  415. currentModelRedirect: unknown;
  416. };
  417. expect(sessionState.currentModelRedirect).toBeNull();
  418. session.setProvider(plainProvider);
  419. session.addProviderToChain(plainProvider, {
  420. reason: "retry_success",
  421. attemptNumber: 2,
  422. statusCode: 200,
  423. });
  424. const plainEntry = session
  425. .getProviderChain()
  426. .find((item) => item.id === plainProvider.id && item.reason === "retry_success");
  427. expect(plainEntry?.modelRedirect).toBeUndefined();
  428. });
  429. test("public hedge path should preserve redirect details for winner and loser attempts", async () => {
  430. vi.useFakeTimers();
  431. try {
  432. const requestedModel = "claude-haiku-4-5-20251001";
  433. const fireworksRedirect = "accounts/fireworks/routers/kimi-k2p5-turbo";
  434. const minimaxRedirect = "MiniMax-M2.7-highspeed";
  435. const fireworks = createProvider({
  436. id: 383,
  437. name: "fireworks",
  438. firstByteTimeoutStreamingMs: 100,
  439. modelRedirects: [{ matchType: "exact", source: requestedModel, target: fireworksRedirect }],
  440. });
  441. const minimax = createProvider({
  442. id: 206,
  443. name: "Minimax Max",
  444. firstByteTimeoutStreamingMs: 100,
  445. modelRedirects: [{ matchType: "exact", source: requestedModel, target: minimaxRedirect }],
  446. });
  447. const session = createSession();
  448. session.request.model = requestedModel;
  449. session.request.message.model = requestedModel;
  450. session.setProvider(fireworks);
  451. session.addProviderToChain(fireworks, { reason: "initial_selection" });
  452. mocks.pickRandomProviderWithExclusion.mockResolvedValueOnce(minimax);
  453. const doForward = vi.spyOn(
  454. ProxyForwarder as unknown as {
  455. doForward: (...args: unknown[]) => Promise<Response>;
  456. },
  457. "doForward"
  458. );
  459. const controller1 = new AbortController();
  460. const controller2 = new AbortController();
  461. doForward.mockImplementationOnce(async (attemptSession, providerForRequest) => {
  462. const runtime = attemptSession as ProxySession & AttemptRuntime;
  463. runtime.responseController = controller1;
  464. runtime.clearResponseTimeout = vi.fn();
  465. expect(
  466. ModelRedirector.apply(attemptSession as ProxySession, providerForRequest as Provider)
  467. ).toBe(true);
  468. return createStreamingResponse({
  469. label: "fireworks",
  470. firstChunkDelayMs: 220,
  471. controller: controller1,
  472. });
  473. });
  474. doForward.mockImplementationOnce(async (attemptSession, providerForRequest) => {
  475. const runtime = attemptSession as ProxySession & AttemptRuntime;
  476. runtime.responseController = controller2;
  477. runtime.clearResponseTimeout = vi.fn();
  478. expect(
  479. ModelRedirector.apply(attemptSession as ProxySession, providerForRequest as Provider)
  480. ).toBe(true);
  481. return createStreamingResponse({
  482. label: "minimax",
  483. firstChunkDelayMs: 40,
  484. controller: controller2,
  485. });
  486. });
  487. const responsePromise = ProxyForwarder.send(session);
  488. await vi.advanceTimersByTimeAsync(100);
  489. expect(doForward).toHaveBeenCalledTimes(2);
  490. await vi.advanceTimersByTimeAsync(50);
  491. const response = await responsePromise;
  492. expect(await response.text()).toContain('"provider":"minimax"');
  493. const chain = session.getProviderChain();
  494. expect(
  495. chain.find((item) => item.id === minimax.id && item.reason === "hedge_winner")
  496. ?.modelRedirect
  497. ).toMatchObject({
  498. originalModel: requestedModel,
  499. redirectedModel: minimaxRedirect,
  500. billingModel: requestedModel,
  501. });
  502. expect(
  503. chain.find((item) => item.id === fireworks.id && item.reason === "hedge_loser_cancelled")
  504. ?.modelRedirect
  505. ).toMatchObject({
  506. originalModel: requestedModel,
  507. redirectedModel: fireworksRedirect,
  508. billingModel: requestedModel,
  509. });
  510. } finally {
  511. vi.useRealTimers();
  512. }
  513. });
  514. test("public hedge path should retain redirect on shadow retry_failed entries", async () => {
  515. vi.useFakeTimers();
  516. try {
  517. const requestedModel = "claude-haiku-4-5-20251001";
  518. const fireworksRedirect = "accounts/fireworks/routers/kimi-k2p5-turbo";
  519. const minimaxRedirect = "MiniMax-M2.7-highspeed";
  520. const fireworks = createProvider({
  521. id: 383,
  522. name: "fireworks",
  523. firstByteTimeoutStreamingMs: 100,
  524. modelRedirects: [{ matchType: "exact", source: requestedModel, target: fireworksRedirect }],
  525. });
  526. const minimax = createProvider({
  527. id: 206,
  528. name: "Minimax Max",
  529. firstByteTimeoutStreamingMs: 100,
  530. modelRedirects: [{ matchType: "exact", source: requestedModel, target: minimaxRedirect }],
  531. });
  532. const session = createSession();
  533. session.request.model = requestedModel;
  534. session.request.message.model = requestedModel;
  535. session.setProvider(fireworks);
  536. session.addProviderToChain(fireworks, { reason: "initial_selection" });
  537. mocks.pickRandomProviderWithExclusion
  538. .mockResolvedValueOnce(minimax)
  539. .mockResolvedValueOnce(null);
  540. mocks.categorizeErrorAsync.mockResolvedValue(ProxyErrorCategory.PROVIDER_ERROR);
  541. const doForward = vi.spyOn(
  542. ProxyForwarder as unknown as {
  543. doForward: (...args: unknown[]) => Promise<Response>;
  544. },
  545. "doForward"
  546. );
  547. const controller1 = new AbortController();
  548. doForward.mockImplementationOnce(async (attemptSession, providerForRequest) => {
  549. const runtime = attemptSession as ProxySession & AttemptRuntime;
  550. runtime.responseController = controller1;
  551. runtime.clearResponseTimeout = vi.fn();
  552. expect(
  553. ModelRedirector.apply(attemptSession as ProxySession, providerForRequest as Provider)
  554. ).toBe(true);
  555. return createStreamingResponse({
  556. label: "fireworks",
  557. firstChunkDelayMs: 220,
  558. controller: controller1,
  559. });
  560. });
  561. doForward.mockImplementationOnce(async (attemptSession, providerForRequest) => {
  562. expect(
  563. ModelRedirector.apply(attemptSession as ProxySession, providerForRequest as Provider)
  564. ).toBe(true);
  565. throw new UpstreamProxyError("minimax upstream failed", 500);
  566. });
  567. const responsePromise = ProxyForwarder.send(session);
  568. await vi.advanceTimersByTimeAsync(100);
  569. await vi.advanceTimersByTimeAsync(150);
  570. const response = await responsePromise;
  571. expect(await response.text()).toContain('"provider":"fireworks"');
  572. const retryFailed = session
  573. .getProviderChain()
  574. .find((item) => item.id === minimax.id && item.reason === "retry_failed");
  575. expect(retryFailed?.modelRedirect).toMatchObject({
  576. originalModel: requestedModel,
  577. redirectedModel: minimaxRedirect,
  578. billingModel: requestedModel,
  579. });
  580. } finally {
  581. vi.useRealTimers();
  582. }
  583. });
  584. test("first provider exceeds first-byte threshold, second provider starts and wins by first chunk", async () => {
  585. vi.useFakeTimers();
  586. try {
  587. const provider1 = createProvider({ id: 1, name: "p1", firstByteTimeoutStreamingMs: 100 });
  588. const provider2 = createProvider({ id: 2, name: "p2", firstByteTimeoutStreamingMs: 100 });
  589. const session = createSession();
  590. session.setProvider(provider1);
  591. mocks.pickRandomProviderWithExclusion.mockResolvedValueOnce(provider2);
  592. const doForward = vi.spyOn(
  593. ProxyForwarder as unknown as {
  594. doForward: (...args: unknown[]) => Promise<Response>;
  595. },
  596. "doForward"
  597. );
  598. const controller1 = new AbortController();
  599. const controller2 = new AbortController();
  600. doForward.mockImplementationOnce(async (attemptSession) => {
  601. const runtime = attemptSession as ProxySession & AttemptRuntime;
  602. runtime.responseController = controller1;
  603. runtime.clearResponseTimeout = vi.fn();
  604. return createStreamingResponse({
  605. label: "p1",
  606. firstChunkDelayMs: 220,
  607. controller: controller1,
  608. });
  609. });
  610. doForward.mockImplementationOnce(async (attemptSession) => {
  611. const runtime = attemptSession as ProxySession & AttemptRuntime;
  612. runtime.responseController = controller2;
  613. runtime.clearResponseTimeout = vi.fn();
  614. return createStreamingResponse({
  615. label: "p2",
  616. firstChunkDelayMs: 40,
  617. controller: controller2,
  618. });
  619. });
  620. const responsePromise = ProxyForwarder.send(session);
  621. await vi.advanceTimersByTimeAsync(100);
  622. expect(doForward).toHaveBeenCalledTimes(2);
  623. await vi.advanceTimersByTimeAsync(50);
  624. const response = await responsePromise;
  625. expect(await response.text()).toContain('"provider":"p2"');
  626. expect(controller1.signal.aborted).toBe(true);
  627. expect(controller2.signal.aborted).toBe(false);
  628. expect(mocks.recordFailure).not.toHaveBeenCalled();
  629. expect(mocks.recordSuccess).not.toHaveBeenCalled();
  630. expect(session.provider?.id).toBe(2);
  631. expect(mocks.updateSessionBindingSmart).toHaveBeenCalledWith("sess-hedge", 2, 0, false, true);
  632. } finally {
  633. vi.useRealTimers();
  634. }
  635. });
  636. test("高并发模式:hedge winner 成功后不应写 session provider 观测信息", async () => {
  637. vi.useFakeTimers();
  638. try {
  639. const provider1 = createProvider({ id: 1, name: "p1", firstByteTimeoutStreamingMs: 100 });
  640. const provider2 = createProvider({ id: 2, name: "p2", firstByteTimeoutStreamingMs: 100 });
  641. const session = createSession();
  642. session.setHighConcurrencyModeEnabled(true);
  643. session.setProvider(provider1);
  644. mocks.pickRandomProviderWithExclusion.mockResolvedValueOnce(provider2);
  645. const doForward = vi.spyOn(
  646. ProxyForwarder as unknown as {
  647. doForward: (...args: unknown[]) => Promise<Response>;
  648. },
  649. "doForward"
  650. );
  651. const controller1 = new AbortController();
  652. const controller2 = new AbortController();
  653. doForward.mockImplementationOnce(async (attemptSession) => {
  654. const runtime = attemptSession as ProxySession & AttemptRuntime;
  655. runtime.responseController = controller1;
  656. runtime.clearResponseTimeout = vi.fn();
  657. return createStreamingResponse({
  658. label: "p1",
  659. firstChunkDelayMs: 220,
  660. controller: controller1,
  661. });
  662. });
  663. doForward.mockImplementationOnce(async (attemptSession) => {
  664. const runtime = attemptSession as ProxySession & AttemptRuntime;
  665. runtime.responseController = controller2;
  666. runtime.clearResponseTimeout = vi.fn();
  667. return createStreamingResponse({
  668. label: "p2",
  669. firstChunkDelayMs: 40,
  670. controller: controller2,
  671. });
  672. });
  673. const responsePromise = ProxyForwarder.send(session);
  674. await vi.advanceTimersByTimeAsync(100);
  675. await vi.advanceTimersByTimeAsync(50);
  676. const response = await responsePromise;
  677. expect(await response.text()).toContain('"provider":"p2"');
  678. expect(mocks.updateSessionProvider).not.toHaveBeenCalled();
  679. } finally {
  680. vi.useRealTimers();
  681. }
  682. });
  683. test("characterization: hedge still launches alternative provider when maxRetryAttempts > 1", async () => {
  684. vi.useFakeTimers();
  685. try {
  686. const provider1 = createProvider({
  687. id: 1,
  688. name: "p1",
  689. maxRetryAttempts: 3,
  690. firstByteTimeoutStreamingMs: 100,
  691. });
  692. const provider2 = createProvider({
  693. id: 2,
  694. name: "p2",
  695. maxRetryAttempts: 3,
  696. firstByteTimeoutStreamingMs: 100,
  697. });
  698. const session = createSession();
  699. session.setProvider(provider1);
  700. mocks.pickRandomProviderWithExclusion.mockResolvedValueOnce(provider2);
  701. const doForward = vi.spyOn(
  702. ProxyForwarder as unknown as {
  703. doForward: (...args: unknown[]) => Promise<Response>;
  704. },
  705. "doForward"
  706. );
  707. const controller1 = new AbortController();
  708. const controller2 = new AbortController();
  709. doForward.mockImplementationOnce(async (attemptSession) => {
  710. const runtime = attemptSession as ProxySession & AttemptRuntime;
  711. runtime.responseController = controller1;
  712. runtime.clearResponseTimeout = vi.fn();
  713. return createStreamingResponse({
  714. label: "p1",
  715. firstChunkDelayMs: 220,
  716. controller: controller1,
  717. });
  718. });
  719. doForward.mockImplementationOnce(async (attemptSession) => {
  720. const runtime = attemptSession as ProxySession & AttemptRuntime;
  721. runtime.responseController = controller2;
  722. runtime.clearResponseTimeout = vi.fn();
  723. return createStreamingResponse({
  724. label: "p2",
  725. firstChunkDelayMs: 40,
  726. controller: controller2,
  727. });
  728. });
  729. const responsePromise = ProxyForwarder.send(session);
  730. await vi.advanceTimersByTimeAsync(100);
  731. expect(doForward).toHaveBeenCalledTimes(2);
  732. expect(mocks.pickRandomProviderWithExclusion).toHaveBeenCalledTimes(1);
  733. const chainBeforeWinner = session.getProviderChain();
  734. expect(chainBeforeWinner).toEqual(
  735. expect.arrayContaining([
  736. expect.objectContaining({ reason: "hedge_triggered", id: 1 }),
  737. expect.objectContaining({ reason: "hedge_launched", id: 2 }),
  738. ])
  739. );
  740. await vi.advanceTimersByTimeAsync(50);
  741. const response = await responsePromise;
  742. expect(await response.text()).toContain('"provider":"p2"');
  743. expect(controller1.signal.aborted).toBe(true);
  744. expect(session.provider?.id).toBe(2);
  745. } finally {
  746. vi.useRealTimers();
  747. }
  748. });
  749. test("first provider can still win after hedge started if it emits first chunk earlier than fallback", async () => {
  750. vi.useFakeTimers();
  751. try {
  752. const provider1 = createProvider({ id: 1, name: "p1", firstByteTimeoutStreamingMs: 100 });
  753. const provider2 = createProvider({ id: 2, name: "p2", firstByteTimeoutStreamingMs: 100 });
  754. const session = createSession();
  755. session.setProvider(provider1);
  756. mocks.pickRandomProviderWithExclusion.mockResolvedValueOnce(provider2);
  757. const doForward = vi.spyOn(
  758. ProxyForwarder as unknown as {
  759. doForward: (...args: unknown[]) => Promise<Response>;
  760. },
  761. "doForward"
  762. );
  763. const controller1 = new AbortController();
  764. const controller2 = new AbortController();
  765. doForward.mockImplementationOnce(async (attemptSession) => {
  766. const runtime = attemptSession as ProxySession & AttemptRuntime;
  767. runtime.responseController = controller1;
  768. runtime.clearResponseTimeout = vi.fn();
  769. return createStreamingResponse({
  770. label: "p1",
  771. firstChunkDelayMs: 140,
  772. controller: controller1,
  773. });
  774. });
  775. doForward.mockImplementationOnce(async (attemptSession) => {
  776. const runtime = attemptSession as ProxySession & AttemptRuntime;
  777. runtime.responseController = controller2;
  778. runtime.clearResponseTimeout = vi.fn();
  779. return createStreamingResponse({
  780. label: "p2",
  781. firstChunkDelayMs: 120,
  782. controller: controller2,
  783. });
  784. });
  785. const responsePromise = ProxyForwarder.send(session);
  786. await vi.advanceTimersByTimeAsync(100);
  787. expect(doForward).toHaveBeenCalledTimes(2);
  788. await vi.advanceTimersByTimeAsync(45);
  789. const response = await responsePromise;
  790. expect(await response.text()).toContain('"provider":"p1"');
  791. expect(controller1.signal.aborted).toBe(false);
  792. expect(controller2.signal.aborted).toBe(true);
  793. expect(mocks.recordFailure).not.toHaveBeenCalled();
  794. expect(mocks.recordSuccess).not.toHaveBeenCalled();
  795. expect(session.provider?.id).toBe(1);
  796. } finally {
  797. vi.useRealTimers();
  798. }
  799. });
  800. test("when multiple providers all exceed threshold, hedge scheduler keeps expanding until a later provider wins", async () => {
  801. vi.useFakeTimers();
  802. try {
  803. const provider1 = createProvider({ id: 1, name: "p1", firstByteTimeoutStreamingMs: 100 });
  804. const provider2 = createProvider({ id: 2, name: "p2", firstByteTimeoutStreamingMs: 100 });
  805. const provider3 = createProvider({ id: 3, name: "p3", firstByteTimeoutStreamingMs: 100 });
  806. const session = createSession();
  807. session.setProvider(provider1);
  808. mocks.pickRandomProviderWithExclusion
  809. .mockResolvedValueOnce(provider2)
  810. .mockResolvedValueOnce(provider3);
  811. const doForward = vi.spyOn(
  812. ProxyForwarder as unknown as {
  813. doForward: (...args: unknown[]) => Promise<Response>;
  814. },
  815. "doForward"
  816. );
  817. const controller1 = new AbortController();
  818. const controller2 = new AbortController();
  819. const controller3 = new AbortController();
  820. doForward.mockImplementationOnce(async (attemptSession) => {
  821. const runtime = attemptSession as ProxySession & AttemptRuntime;
  822. runtime.responseController = controller1;
  823. runtime.clearResponseTimeout = vi.fn();
  824. return createStreamingResponse({
  825. label: "p1",
  826. firstChunkDelayMs: 400,
  827. controller: controller1,
  828. });
  829. });
  830. doForward.mockImplementationOnce(async (attemptSession) => {
  831. const runtime = attemptSession as ProxySession & AttemptRuntime;
  832. runtime.responseController = controller2;
  833. runtime.clearResponseTimeout = vi.fn();
  834. return createStreamingResponse({
  835. label: "p2",
  836. firstChunkDelayMs: 400,
  837. controller: controller2,
  838. });
  839. });
  840. doForward.mockImplementationOnce(async (attemptSession) => {
  841. const runtime = attemptSession as ProxySession & AttemptRuntime;
  842. runtime.responseController = controller3;
  843. runtime.clearResponseTimeout = vi.fn();
  844. return createStreamingResponse({
  845. label: "p3",
  846. firstChunkDelayMs: 20,
  847. controller: controller3,
  848. });
  849. });
  850. const responsePromise = ProxyForwarder.send(session);
  851. await vi.advanceTimersByTimeAsync(200);
  852. expect(doForward).toHaveBeenCalledTimes(3);
  853. await vi.advanceTimersByTimeAsync(25);
  854. const response = await responsePromise;
  855. expect(await response.text()).toContain('"provider":"p3"');
  856. expect(controller1.signal.aborted).toBe(true);
  857. expect(controller2.signal.aborted).toBe(true);
  858. expect(controller3.signal.aborted).toBe(false);
  859. expect(mocks.recordFailure).not.toHaveBeenCalled();
  860. expect(mocks.recordSuccess).not.toHaveBeenCalled();
  861. expect(session.provider?.id).toBe(3);
  862. } finally {
  863. vi.useRealTimers();
  864. }
  865. });
  866. test("client abort before any winner should abort all in-flight attempts, return 499, and clear sticky provider binding", async () => {
  867. vi.useFakeTimers();
  868. try {
  869. const requestedModel = "claude-haiku-4-5-20251001";
  870. const provider1 = createProvider({
  871. id: 1,
  872. name: "p1",
  873. firstByteTimeoutStreamingMs: 100,
  874. modelRedirects: [
  875. {
  876. matchType: "exact",
  877. source: requestedModel,
  878. target: "accounts/fireworks/routers/kimi-k2p5-turbo",
  879. },
  880. ],
  881. });
  882. const provider2 = createProvider({
  883. id: 2,
  884. name: "p2",
  885. firstByteTimeoutStreamingMs: 100,
  886. modelRedirects: [
  887. { matchType: "exact", source: requestedModel, target: "MiniMax-M2.7-highspeed" },
  888. ],
  889. });
  890. const clientAbortController = new AbortController();
  891. const session = createSession(clientAbortController.signal);
  892. session.request.model = requestedModel;
  893. session.request.message.model = requestedModel;
  894. session.setProvider(provider1);
  895. mocks.pickRandomProviderWithExclusion.mockResolvedValueOnce(provider2);
  896. const doForward = vi.spyOn(
  897. ProxyForwarder as unknown as {
  898. doForward: (...args: unknown[]) => Promise<Response>;
  899. },
  900. "doForward"
  901. );
  902. const controller1 = new AbortController();
  903. const controller2 = new AbortController();
  904. doForward.mockImplementationOnce(async (attemptSession, providerForRequest) => {
  905. const runtime = attemptSession as ProxySession & AttemptRuntime;
  906. runtime.responseController = controller1;
  907. runtime.clearResponseTimeout = vi.fn();
  908. expect(
  909. ModelRedirector.apply(attemptSession as ProxySession, providerForRequest as Provider)
  910. ).toBe(true);
  911. return createStreamingResponse({
  912. label: "p1",
  913. firstChunkDelayMs: 500,
  914. controller: controller1,
  915. });
  916. });
  917. doForward.mockImplementationOnce(async (attemptSession, providerForRequest) => {
  918. const runtime = attemptSession as ProxySession & AttemptRuntime;
  919. runtime.responseController = controller2;
  920. runtime.clearResponseTimeout = vi.fn();
  921. expect(
  922. ModelRedirector.apply(attemptSession as ProxySession, providerForRequest as Provider)
  923. ).toBe(true);
  924. return createStreamingResponse({
  925. label: "p2",
  926. firstChunkDelayMs: 500,
  927. controller: controller2,
  928. });
  929. });
  930. const responsePromise = ProxyForwarder.send(session);
  931. const rejection = expect(responsePromise).rejects.toMatchObject({
  932. statusCode: 499,
  933. });
  934. await vi.advanceTimersByTimeAsync(100);
  935. expect(doForward).toHaveBeenCalledTimes(2);
  936. clientAbortController.abort(new Error("client_cancelled"));
  937. await vi.runAllTimersAsync();
  938. await rejection;
  939. expect(controller1.signal.aborted).toBe(true);
  940. expect(controller2.signal.aborted).toBe(true);
  941. expect(mocks.clearSessionProvider).toHaveBeenCalledWith("sess-hedge");
  942. expect(mocks.recordFailure).not.toHaveBeenCalled();
  943. expect(mocks.recordSuccess).not.toHaveBeenCalled();
  944. const chain = session.getProviderChain();
  945. expect(
  946. chain.find((item) => item.id === provider1.id && item.reason === "client_abort")
  947. ?.modelRedirect
  948. ).toMatchObject({
  949. originalModel: requestedModel,
  950. redirectedModel: "accounts/fireworks/routers/kimi-k2p5-turbo",
  951. billingModel: requestedModel,
  952. });
  953. expect(
  954. chain.find((item) => item.id === provider2.id && item.reason === "client_abort")
  955. ?.modelRedirect
  956. ).toMatchObject({
  957. originalModel: requestedModel,
  958. redirectedModel: "MiniMax-M2.7-highspeed",
  959. billingModel: requestedModel,
  960. });
  961. } finally {
  962. vi.useRealTimers();
  963. }
  964. });
  965. test("hedge launcher rejection should settle request instead of hanging", async () => {
  966. vi.useFakeTimers();
  967. try {
  968. const provider1 = createProvider({ id: 1, name: "p1", firstByteTimeoutStreamingMs: 100 });
  969. const session = createSession();
  970. session.setProvider(provider1);
  971. mocks.pickRandomProviderWithExclusion.mockRejectedValueOnce(new Error("selector down"));
  972. const doForward = vi.spyOn(
  973. ProxyForwarder as unknown as {
  974. doForward: (...args: unknown[]) => Promise<Response>;
  975. },
  976. "doForward"
  977. );
  978. const controller1 = new AbortController();
  979. doForward.mockImplementationOnce(async (attemptSession) => {
  980. const runtime = attemptSession as ProxySession & AttemptRuntime;
  981. runtime.responseController = controller1;
  982. runtime.clearResponseTimeout = vi.fn();
  983. return createStreamingResponse({
  984. label: "p1",
  985. firstChunkDelayMs: 500,
  986. controller: controller1,
  987. });
  988. });
  989. const responsePromise = ProxyForwarder.send(session);
  990. const rejection = expect(responsePromise).rejects.toMatchObject({
  991. statusCode: 503,
  992. });
  993. await vi.advanceTimersByTimeAsync(100);
  994. await vi.runAllTimersAsync();
  995. await rejection;
  996. expect(controller1.signal.aborted).toBe(true);
  997. } finally {
  998. vi.useRealTimers();
  999. }
  1000. });
  1001. test("strict endpoint pool exhaustion should converge to terminal fallback instead of provider-specific error", async () => {
  1002. vi.useFakeTimers();
  1003. try {
  1004. const provider1 = createProvider({
  1005. id: 1,
  1006. name: "p1",
  1007. providerType: "claude",
  1008. providerVendorId: 123,
  1009. firstByteTimeoutStreamingMs: 100,
  1010. });
  1011. const session = createSession();
  1012. session.requestUrl = new URL("https://example.com/v1/messages");
  1013. session.setProvider(provider1);
  1014. mocks.getPreferredProviderEndpoints.mockRejectedValueOnce(new Error("Redis connection lost"));
  1015. mocks.pickRandomProviderWithExclusion.mockResolvedValueOnce(null);
  1016. const responsePromise = ProxyForwarder.send(session);
  1017. const errorPromise = responsePromise.catch((rejection) => rejection as UpstreamProxyError);
  1018. await vi.runAllTimersAsync();
  1019. const error = await errorPromise;
  1020. expect(mocks.pickRandomProviderWithExclusion).toHaveBeenCalled();
  1021. expect(error).toBeInstanceOf(UpstreamProxyError);
  1022. expect(error.statusCode).toBe(503);
  1023. expect(error.message).toBe("所有供应商暂时不可用,请稍后重试");
  1024. } finally {
  1025. vi.useRealTimers();
  1026. }
  1027. });
  1028. test.each([
  1029. {
  1030. name: "provider error",
  1031. category: ProxyErrorCategory.PROVIDER_ERROR,
  1032. errorFactory: (provider: Provider) =>
  1033. new UpstreamProxyError("Provider returned 401: invalid key", 401, {
  1034. body: '{"error":"invalid_api_key"}',
  1035. providerId: provider.id,
  1036. providerName: provider.name,
  1037. }),
  1038. },
  1039. {
  1040. name: "resource not found",
  1041. category: ProxyErrorCategory.RESOURCE_NOT_FOUND,
  1042. errorFactory: (provider: Provider) =>
  1043. new UpstreamProxyError("Provider returned 404: model not found", 404, {
  1044. body: '{"error":"model_not_found"}',
  1045. providerId: provider.id,
  1046. providerName: provider.name,
  1047. }),
  1048. },
  1049. {
  1050. name: "system error",
  1051. category: ProxyErrorCategory.SYSTEM_ERROR,
  1052. errorFactory: () => new Error("fetch failed"),
  1053. },
  1054. ])("when a real hedge race ends with only $name, terminal error should be generic fallback", async ({
  1055. category,
  1056. errorFactory,
  1057. }) => {
  1058. vi.useFakeTimers();
  1059. try {
  1060. const provider1 = createProvider({ id: 1, name: "p1", firstByteTimeoutStreamingMs: 100 });
  1061. const provider2 = createProvider({ id: 2, name: "p2", firstByteTimeoutStreamingMs: 100 });
  1062. const session = createSession();
  1063. session.setProvider(provider1);
  1064. mocks.pickRandomProviderWithExclusion
  1065. .mockResolvedValueOnce(provider2)
  1066. .mockResolvedValueOnce(null);
  1067. mocks.categorizeErrorAsync.mockResolvedValueOnce(category).mockResolvedValueOnce(category);
  1068. const doForward = vi.spyOn(
  1069. ProxyForwarder as unknown as {
  1070. doForward: (...args: unknown[]) => Promise<Response>;
  1071. },
  1072. "doForward"
  1073. );
  1074. const controller1 = new AbortController();
  1075. const controller2 = new AbortController();
  1076. doForward.mockImplementationOnce(async (attemptSession) => {
  1077. const runtime = attemptSession as ProxySession & AttemptRuntime;
  1078. runtime.responseController = controller1;
  1079. runtime.clearResponseTimeout = vi.fn();
  1080. return createDelayedFailure({
  1081. delayMs: 150,
  1082. error: errorFactory(provider1),
  1083. controller: controller1,
  1084. });
  1085. });
  1086. doForward.mockImplementationOnce(async (attemptSession) => {
  1087. const runtime = attemptSession as ProxySession & AttemptRuntime;
  1088. runtime.responseController = controller2;
  1089. runtime.clearResponseTimeout = vi.fn();
  1090. return createDelayedFailure({
  1091. delayMs: 160,
  1092. error: errorFactory(provider2),
  1093. controller: controller2,
  1094. });
  1095. });
  1096. const responsePromise = ProxyForwarder.send(session);
  1097. const errorPromise = responsePromise.catch((rejection) => rejection as UpstreamProxyError);
  1098. await vi.advanceTimersByTimeAsync(100);
  1099. expect(doForward).toHaveBeenCalledTimes(2);
  1100. await vi.runAllTimersAsync();
  1101. const error = await errorPromise;
  1102. expect(error).toBeInstanceOf(UpstreamProxyError);
  1103. expect(error.statusCode).toBe(503);
  1104. expect(error.message).toBe("所有供应商暂时不可用,请稍后重试");
  1105. expect(error.message).not.toContain("invalid key");
  1106. expect(error.message).not.toContain("model not found");
  1107. expect(mocks.clearSessionProvider).toHaveBeenCalledWith("sess-hedge");
  1108. } finally {
  1109. vi.useRealTimers();
  1110. }
  1111. });
  1112. test("non-retryable client errors should stop hedge immediately and preserve original error", async () => {
  1113. const provider1 = createProvider({ id: 1, name: "p1", firstByteTimeoutStreamingMs: 100 });
  1114. const provider2 = createProvider({ id: 2, name: "p2", firstByteTimeoutStreamingMs: 100 });
  1115. const session = createSession();
  1116. session.setProvider(provider1);
  1117. const originalError = new UpstreamProxyError("prompt too long", 400, {
  1118. body: '{"error":"prompt_too_long"}',
  1119. providerId: provider1.id,
  1120. providerName: provider1.name,
  1121. });
  1122. mocks.pickRandomProviderWithExclusion.mockResolvedValueOnce(provider2);
  1123. mocks.categorizeErrorAsync.mockResolvedValueOnce(ProxyErrorCategory.NON_RETRYABLE_CLIENT_ERROR);
  1124. vi.mocked(getErrorDetectionResultAsync).mockResolvedValueOnce({
  1125. matched: true,
  1126. ruleId: 42,
  1127. category: "thinking_error",
  1128. pattern: "prompt too long",
  1129. matchType: "contains",
  1130. description: "Prompt too long",
  1131. overrideStatusCode: 400,
  1132. });
  1133. const doForward = vi.spyOn(
  1134. ProxyForwarder as unknown as {
  1135. doForward: (...args: unknown[]) => Promise<Response>;
  1136. },
  1137. "doForward"
  1138. );
  1139. doForward.mockRejectedValueOnce(originalError);
  1140. const error = await ProxyForwarder.send(session).catch(
  1141. (rejection) => rejection as UpstreamProxyError
  1142. );
  1143. expect(error).toBe(originalError);
  1144. expect(error.message).toBe("prompt too long");
  1145. expect(doForward).toHaveBeenCalledTimes(1);
  1146. expect(mocks.pickRandomProviderWithExclusion).not.toHaveBeenCalled();
  1147. expect(mocks.clearSessionProvider).toHaveBeenCalledWith("sess-hedge");
  1148. expect(session.getProviderChain()).toEqual(
  1149. expect.arrayContaining([
  1150. expect.objectContaining({
  1151. reason: "client_error_non_retryable",
  1152. statusCode: 400,
  1153. errorDetails: expect.objectContaining({
  1154. matchedRule: expect.objectContaining({
  1155. ruleId: 42,
  1156. }),
  1157. }),
  1158. }),
  1159. ])
  1160. );
  1161. expect(vi.mocked(logger.warn)).toHaveBeenCalledWith(
  1162. "ProxyForwarder: Non-retryable client error in hedge, aborting all attempts",
  1163. expect.objectContaining({
  1164. matchedRuleId: 42,
  1165. matchedRuleName: "Prompt too long",
  1166. matchedRulePattern: "prompt too long",
  1167. matchedRuleCategory: "thinking_error",
  1168. matchedRuleMatchType: "contains",
  1169. matchedRuleHasOverrideResponse: false,
  1170. matchedRuleHasOverrideStatusCode: true,
  1171. })
  1172. );
  1173. });
  1174. test("hedge 备选供应商命中 thinking signature 错误时,应整流后在同供应商重试并保留审计", async () => {
  1175. vi.useFakeTimers();
  1176. try {
  1177. const provider1 = createProvider({ id: 1, name: "p1", firstByteTimeoutStreamingMs: 100 });
  1178. const provider2 = createProvider({ id: 2, name: "p2", firstByteTimeoutStreamingMs: 100 });
  1179. const session = createSession();
  1180. session.setProvider(provider1);
  1181. withThinkingBlocks(session);
  1182. mocks.pickRandomProviderWithExclusion.mockResolvedValueOnce(provider2);
  1183. mocks.categorizeErrorAsync.mockResolvedValueOnce(
  1184. ProxyErrorCategory.NON_RETRYABLE_CLIENT_ERROR
  1185. );
  1186. const signatureError = new UpstreamProxyError(
  1187. "Invalid `signature` in `thinking` block",
  1188. 400,
  1189. {
  1190. body: '{"error":"invalid_signature"}',
  1191. providerId: provider2.id,
  1192. providerName: provider2.name,
  1193. }
  1194. );
  1195. const doForward = vi.spyOn(
  1196. ProxyForwarder as unknown as {
  1197. doForward: (...args: unknown[]) => Promise<Response>;
  1198. },
  1199. "doForward"
  1200. );
  1201. const controller1 = new AbortController();
  1202. const controller2First = new AbortController();
  1203. const controller2Retry = new AbortController();
  1204. doForward.mockImplementationOnce(async (attemptSession) => {
  1205. const runtime = attemptSession as ProxySession & AttemptRuntime;
  1206. runtime.responseController = controller1;
  1207. runtime.clearResponseTimeout = vi.fn();
  1208. return createStreamingResponse({
  1209. label: "p1",
  1210. firstChunkDelayMs: 600,
  1211. controller: controller1,
  1212. });
  1213. });
  1214. doForward.mockImplementationOnce(async (attemptSession) => {
  1215. const runtime = attemptSession as ProxySession & AttemptRuntime;
  1216. runtime.responseController = controller2First;
  1217. runtime.clearResponseTimeout = vi.fn();
  1218. return createDelayedFailure({
  1219. delayMs: 50,
  1220. error: signatureError,
  1221. controller: controller2First,
  1222. });
  1223. });
  1224. doForward.mockImplementationOnce(async (attemptSession) => {
  1225. const runtime = attemptSession as ProxySession & AttemptRuntime;
  1226. const body = runtime.request.message as {
  1227. messages: Array<{ content: Array<Record<string, unknown>> }>;
  1228. };
  1229. const blocks = body.messages[0].content;
  1230. expect(blocks.some((block) => block.type === "thinking")).toBe(false);
  1231. expect(blocks.some((block) => block.type === "redacted_thinking")).toBe(false);
  1232. expect(blocks.some((block) => "signature" in block)).toBe(false);
  1233. runtime.responseController = controller2Retry;
  1234. runtime.clearResponseTimeout = vi.fn();
  1235. return createStreamingResponse({
  1236. label: "p2-rectified",
  1237. firstChunkDelayMs: 180,
  1238. controller: controller2Retry,
  1239. });
  1240. });
  1241. const responsePromise = ProxyForwarder.send(session);
  1242. await vi.advanceTimersByTimeAsync(100);
  1243. expect(doForward).toHaveBeenCalledTimes(2);
  1244. await vi.advanceTimersByTimeAsync(55);
  1245. expect(doForward).toHaveBeenCalledTimes(3);
  1246. await vi.advanceTimersByTimeAsync(200);
  1247. const response = await responsePromise;
  1248. expect(await response.text()).toContain('"provider":"p2-rectified"');
  1249. expect(session.provider?.id).toBe(2);
  1250. expect(controller1.signal.aborted).toBe(true);
  1251. expect(mocks.pickRandomProviderWithExclusion).toHaveBeenCalled();
  1252. expect(mocks.storeSessionSpecialSettings).toHaveBeenCalledWith(
  1253. "sess-hedge",
  1254. expect.arrayContaining([
  1255. expect.objectContaining({
  1256. type: "thinking_signature_rectifier",
  1257. hit: true,
  1258. providerId: 2,
  1259. }),
  1260. ]),
  1261. 1
  1262. );
  1263. } finally {
  1264. vi.useRealTimers();
  1265. }
  1266. });
  1267. test("hedge 路径命中 thinking budget 错误时,应整流后在同供应商重试", async () => {
  1268. vi.useFakeTimers();
  1269. try {
  1270. const provider1 = createProvider({ id: 1, name: "p1", firstByteTimeoutStreamingMs: 100 });
  1271. const provider2 = createProvider({ id: 2, name: "p2", firstByteTimeoutStreamingMs: 100 });
  1272. const session = createSession();
  1273. session.setProvider(provider1);
  1274. session.request.message = {
  1275. model: "claude-test",
  1276. stream: true,
  1277. max_tokens: 1000,
  1278. thinking: { type: "enabled", budget_tokens: 500 },
  1279. messages: [{ role: "user", content: "hi" }],
  1280. };
  1281. mocks.pickRandomProviderWithExclusion.mockResolvedValueOnce(provider2);
  1282. mocks.categorizeErrorAsync.mockResolvedValueOnce(
  1283. ProxyErrorCategory.NON_RETRYABLE_CLIENT_ERROR
  1284. );
  1285. const budgetError = new UpstreamProxyError(
  1286. "thinking.enabled.budget_tokens: Input should be greater than or equal to 1024",
  1287. 400,
  1288. {
  1289. body: '{"error":"budget_too_low"}',
  1290. providerId: provider1.id,
  1291. providerName: provider1.name,
  1292. }
  1293. );
  1294. const doForward = vi.spyOn(
  1295. ProxyForwarder as unknown as {
  1296. doForward: (...args: unknown[]) => Promise<Response>;
  1297. },
  1298. "doForward"
  1299. );
  1300. const controller1First = new AbortController();
  1301. const controller1Retry = new AbortController();
  1302. const controller2 = new AbortController();
  1303. doForward.mockImplementationOnce(async (attemptSession) => {
  1304. const runtime = attemptSession as ProxySession & AttemptRuntime;
  1305. runtime.responseController = controller1First;
  1306. runtime.clearResponseTimeout = vi.fn();
  1307. return createDelayedFailure({
  1308. delayMs: 140,
  1309. error: budgetError,
  1310. controller: controller1First,
  1311. });
  1312. });
  1313. doForward.mockImplementationOnce(async (attemptSession) => {
  1314. const runtime = attemptSession as ProxySession & AttemptRuntime;
  1315. runtime.responseController = controller2;
  1316. runtime.clearResponseTimeout = vi.fn();
  1317. return createStreamingResponse({
  1318. label: "p2",
  1319. firstChunkDelayMs: 500,
  1320. controller: controller2,
  1321. });
  1322. });
  1323. doForward.mockImplementationOnce(async (attemptSession) => {
  1324. const runtime = attemptSession as ProxySession & AttemptRuntime;
  1325. const body = runtime.request.message as {
  1326. max_tokens: number;
  1327. thinking: { type: string; budget_tokens: number };
  1328. };
  1329. expect(body.max_tokens).toBe(64000);
  1330. expect(body.thinking.type).toBe("enabled");
  1331. expect(body.thinking.budget_tokens).toBe(32000);
  1332. runtime.responseController = controller1Retry;
  1333. runtime.clearResponseTimeout = vi.fn();
  1334. return createStreamingResponse({
  1335. label: "p1-budget-rectified",
  1336. firstChunkDelayMs: 40,
  1337. controller: controller1Retry,
  1338. });
  1339. });
  1340. const responsePromise = ProxyForwarder.send(session);
  1341. await vi.advanceTimersByTimeAsync(100);
  1342. expect(doForward).toHaveBeenCalledTimes(2);
  1343. await vi.advanceTimersByTimeAsync(45);
  1344. expect(doForward).toHaveBeenCalledTimes(3);
  1345. await vi.advanceTimersByTimeAsync(50);
  1346. const response = await responsePromise;
  1347. expect(await response.text()).toContain('"provider":"p1-budget-rectified"');
  1348. expect(session.provider?.id).toBe(1);
  1349. expect(mocks.pickRandomProviderWithExclusion).toHaveBeenCalledTimes(1);
  1350. expect(session.getSpecialSettings()).toEqual(
  1351. expect.arrayContaining([
  1352. expect.objectContaining({
  1353. type: "thinking_budget_rectifier",
  1354. hit: true,
  1355. providerId: 1,
  1356. }),
  1357. ])
  1358. );
  1359. } finally {
  1360. vi.useRealTimers();
  1361. }
  1362. });
  1363. test("endpoint resolution failure should not inflate launchedProviderCount, winner gets request_success not hedge_winner", async () => {
  1364. vi.useFakeTimers();
  1365. try {
  1366. const provider1 = createProvider({
  1367. id: 1,
  1368. name: "p1",
  1369. providerVendorId: 123,
  1370. firstByteTimeoutStreamingMs: 100,
  1371. });
  1372. const provider2 = createProvider({
  1373. id: 2,
  1374. name: "p2",
  1375. providerVendorId: null,
  1376. firstByteTimeoutStreamingMs: 100,
  1377. });
  1378. const session = createSession();
  1379. session.requestUrl = new URL("https://example.com/v1/messages");
  1380. session.setProvider(provider1);
  1381. // Provider 1's strict endpoint resolution will fail
  1382. mocks.getPreferredProviderEndpoints.mockRejectedValueOnce(
  1383. new Error("Endpoint resolution failed")
  1384. );
  1385. // After provider 1 fails, pick provider 2 as alternative
  1386. mocks.pickRandomProviderWithExclusion.mockResolvedValueOnce(provider2);
  1387. const doForward = vi.spyOn(
  1388. ProxyForwarder as unknown as {
  1389. doForward: (...args: unknown[]) => Promise<Response>;
  1390. },
  1391. "doForward"
  1392. );
  1393. const controller2 = new AbortController();
  1394. // Only provider 2 reaches doForward (provider 1 fails at endpoint resolution)
  1395. doForward.mockImplementationOnce(async (attemptSession) => {
  1396. const runtime = attemptSession as ProxySession & AttemptRuntime;
  1397. runtime.responseController = controller2;
  1398. runtime.clearResponseTimeout = vi.fn();
  1399. return createStreamingResponse({
  1400. label: "p2",
  1401. firstChunkDelayMs: 10,
  1402. controller: controller2,
  1403. });
  1404. });
  1405. const responsePromise = ProxyForwarder.send(session);
  1406. await vi.advanceTimersByTimeAsync(200);
  1407. const response = await responsePromise;
  1408. expect(await response.text()).toContain('"provider":"p2"');
  1409. expect(session.provider?.id).toBe(2);
  1410. // Key assertion: since only provider 2 actually launched (provider 1 failed at
  1411. // endpoint resolution before incrementing launchedProviderCount), the winner
  1412. // should be classified as "request_success" not "hedge_winner".
  1413. const chain = session.getProviderChain();
  1414. const winnerEntry = chain.find(
  1415. (entry) => entry.reason === "request_success" || entry.reason === "hedge_winner"
  1416. );
  1417. expect(winnerEntry).toBeDefined();
  1418. expect(winnerEntry!.reason).toBe("request_success");
  1419. } finally {
  1420. vi.useRealTimers();
  1421. }
  1422. });
  1423. });