billing-model-source.test.ts 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069
  1. import { beforeEach, describe, expect, it, vi } from "vitest";
  2. import type { ModelPrice, ModelPriceData } from "@/types/model-price";
  3. import type { SystemSettings } from "@/types/system-config";
  4. const asyncTasks: Promise<void>[] = [];
  5. const cloudPriceSyncRequests: Array<{ reason: string }> = [];
  6. vi.mock("@/lib/async-task-manager", () => ({
  7. AsyncTaskManager: {
  8. register: (_taskId: string, promise: Promise<void>) => {
  9. asyncTasks.push(promise);
  10. return new AbortController();
  11. },
  12. cleanup: () => {},
  13. cancel: () => {},
  14. },
  15. }));
  16. vi.mock("@/lib/logger", () => ({
  17. logger: {
  18. debug: () => {},
  19. info: () => {},
  20. warn: () => {},
  21. error: () => {},
  22. trace: () => {},
  23. },
  24. }));
  25. vi.mock("@/lib/price-sync/cloud-price-updater", () => ({
  26. requestCloudPriceTableSync: (payload: { reason: string }) => {
  27. cloudPriceSyncRequests.push(payload);
  28. },
  29. }));
  30. vi.mock("@/repository/model-price", () => ({
  31. findLatestPriceByModel: vi.fn(),
  32. }));
  33. vi.mock("@/repository/system-config", () => ({
  34. getSystemSettings: vi.fn(),
  35. }));
  36. vi.mock("@/repository/message", () => ({
  37. updateMessageRequestCost: vi.fn(),
  38. updateMessageRequestDetails: vi.fn(),
  39. updateMessageRequestDuration: vi.fn(),
  40. }));
  41. vi.mock("@/lib/session-manager", () => ({
  42. SessionManager: {
  43. updateSessionUsage: vi.fn(),
  44. updateSessionProvider: vi.fn(),
  45. storeSessionResponse: vi.fn(),
  46. extractCodexPromptCacheKey: vi.fn(),
  47. updateSessionWithCodexCacheKey: vi.fn(),
  48. },
  49. }));
  50. vi.mock("@/lib/rate-limit", () => ({
  51. RateLimitService: {
  52. trackCost: vi.fn(),
  53. trackUserDailyCost: vi.fn(),
  54. },
  55. }));
  56. vi.mock("@/lib/session-tracker", () => ({
  57. SessionTracker: {
  58. refreshSession: vi.fn(),
  59. },
  60. }));
  61. vi.mock("@/lib/proxy-status-tracker", () => ({
  62. ProxyStatusTracker: {
  63. getInstance: () => ({
  64. endRequest: () => {},
  65. }),
  66. },
  67. }));
  68. import { ProxyResponseHandler } from "@/app/v1/_lib/proxy/response-handler";
  69. import { ProxySession } from "@/app/v1/_lib/proxy/session";
  70. import { getCachedSystemSettings, invalidateSystemSettingsCache } from "@/lib/config";
  71. import { SessionManager } from "@/lib/session-manager";
  72. import { RateLimitService } from "@/lib/rate-limit";
  73. import { SessionTracker } from "@/lib/session-tracker";
  74. import {
  75. updateMessageRequestCost,
  76. updateMessageRequestDetails,
  77. updateMessageRequestDuration,
  78. } from "@/repository/message";
  79. import { findLatestPriceByModel } from "@/repository/model-price";
  80. import { getSystemSettings } from "@/repository/system-config";
  81. beforeEach(() => {
  82. vi.clearAllMocks();
  83. cloudPriceSyncRequests.splice(0, cloudPriceSyncRequests.length);
  84. invalidateSystemSettingsCache();
  85. });
  86. function makeSystemSettings(
  87. billingModelSource: SystemSettings["billingModelSource"],
  88. codexPriorityBillingSource: SystemSettings["codexPriorityBillingSource"] = "requested",
  89. enableHighConcurrencyMode: boolean = false
  90. ): SystemSettings {
  91. const now = new Date();
  92. return {
  93. id: 1,
  94. siteTitle: "test",
  95. allowGlobalUsageView: false,
  96. currencyDisplay: "USD",
  97. billingModelSource,
  98. codexPriorityBillingSource,
  99. timezone: null,
  100. enableAutoCleanup: false,
  101. cleanupRetentionDays: 30,
  102. cleanupSchedule: "0 2 * * *",
  103. cleanupBatchSize: 10000,
  104. enableClientVersionCheck: false,
  105. verboseProviderError: false,
  106. enableHttp2: false,
  107. enableHighConcurrencyMode,
  108. interceptAnthropicWarmupRequests: false,
  109. enableThinkingSignatureRectifier: true,
  110. enableThinkingBudgetRectifier: true,
  111. enableBillingHeaderRectifier: true,
  112. enableResponseInputRectifier: true,
  113. enableCodexSessionIdCompletion: true,
  114. enableClaudeMetadataUserIdInjection: true,
  115. enableResponseFixer: true,
  116. responseFixerConfig: {
  117. fixTruncatedJson: true,
  118. fixSseFormat: true,
  119. fixEncoding: true,
  120. maxJsonDepth: 200,
  121. maxFixSize: 1024 * 1024,
  122. },
  123. createdAt: now,
  124. updatedAt: now,
  125. };
  126. }
  127. function makePriceRecord(modelName: string, priceData: ModelPriceData): ModelPrice {
  128. const now = new Date();
  129. return {
  130. id: 1,
  131. modelName,
  132. priceData,
  133. createdAt: now,
  134. updatedAt: now,
  135. };
  136. }
  137. function createSession({
  138. originalModel,
  139. redirectedModel,
  140. sessionId,
  141. messageId,
  142. enableHighConcurrencyMode = false,
  143. providerOverrides,
  144. requestMessage,
  145. }: {
  146. originalModel: string;
  147. redirectedModel: string;
  148. sessionId: string;
  149. messageId: number;
  150. enableHighConcurrencyMode?: boolean;
  151. providerOverrides?: Record<string, unknown>;
  152. requestMessage?: Record<string, unknown>;
  153. }): ProxySession {
  154. const session = new (
  155. ProxySession as unknown as {
  156. new (init: {
  157. startTime: number;
  158. method: string;
  159. requestUrl: URL;
  160. headers: Headers;
  161. headerLog: string;
  162. request: { message: Record<string, unknown>; log: string; model: string | null };
  163. userAgent: string | null;
  164. context: unknown;
  165. clientAbortSignal: AbortSignal | null;
  166. }): ProxySession;
  167. }
  168. )({
  169. startTime: Date.now(),
  170. method: "POST",
  171. requestUrl: new URL("http://localhost/v1/messages"),
  172. headers: new Headers(),
  173. headerLog: "",
  174. request: { message: requestMessage ?? {}, log: "(test)", model: redirectedModel },
  175. userAgent: null,
  176. context: {},
  177. clientAbortSignal: null,
  178. });
  179. session.setOriginalModel(originalModel);
  180. session.setSessionId(sessionId);
  181. session.setHighConcurrencyModeEnabled(enableHighConcurrencyMode);
  182. const provider = {
  183. id: 99,
  184. name: "test-provider",
  185. url: "https://api.anthropic.com",
  186. providerType: "claude",
  187. costMultiplier: 1.0,
  188. streamingIdleTimeoutMs: 0,
  189. ...providerOverrides,
  190. } as any;
  191. const user = {
  192. id: 123,
  193. name: "test-user",
  194. dailyResetTime: "00:00",
  195. dailyResetMode: "fixed",
  196. } as any;
  197. const key = {
  198. id: 456,
  199. name: "test-key",
  200. dailyResetTime: "00:00",
  201. dailyResetMode: "fixed",
  202. } as any;
  203. session.setProvider(provider);
  204. session.setAuthState({
  205. user,
  206. key,
  207. apiKey: "sk-test",
  208. success: true,
  209. });
  210. session.setMessageContext({
  211. id: messageId,
  212. createdAt: new Date(),
  213. user,
  214. key,
  215. apiKey: "sk-test",
  216. });
  217. return session;
  218. }
  219. function createNonStreamResponse(
  220. usage: { input_tokens: number; output_tokens: number },
  221. extras?: Record<string, unknown>
  222. ): Response {
  223. return new Response(
  224. JSON.stringify({
  225. type: "message",
  226. usage,
  227. ...(extras ?? {}),
  228. }),
  229. {
  230. status: 200,
  231. headers: { "content-type": "application/json" },
  232. }
  233. );
  234. }
  235. function createStreamResponse(usage: { input_tokens: number; output_tokens: number }): Response {
  236. const sseText = `event: message_delta\ndata: ${JSON.stringify({ usage })}\n\n`;
  237. const encoder = new TextEncoder();
  238. const stream = new ReadableStream<Uint8Array>({
  239. start(controller) {
  240. controller.enqueue(encoder.encode(sseText));
  241. controller.close();
  242. },
  243. });
  244. return new Response(stream, {
  245. status: 200,
  246. headers: { "content-type": "text/event-stream" },
  247. });
  248. }
  249. async function drainAsyncTasks(): Promise<void> {
  250. const tasks = asyncTasks.splice(0, asyncTasks.length);
  251. await Promise.all(tasks);
  252. }
  253. function captureRateLimitCosts(): number[] {
  254. const rateLimitCosts: number[] = [];
  255. vi.mocked(RateLimitService.trackCost).mockImplementation(
  256. async (_keyId: number, _providerId: number, _sessionId: string, costUsd: number) => {
  257. rateLimitCosts.push(costUsd);
  258. }
  259. );
  260. return rateLimitCosts;
  261. }
  262. async function runScenario({
  263. billingModelSource,
  264. isStream,
  265. enableHighConcurrencyMode = false,
  266. }: {
  267. billingModelSource: SystemSettings["billingModelSource"];
  268. isStream: boolean;
  269. enableHighConcurrencyMode?: boolean;
  270. }): Promise<{ dbCostUsd: string; sessionCostUsd: string; rateLimitCost: number }> {
  271. invalidateSystemSettingsCache();
  272. const usage = { input_tokens: 2, output_tokens: 3 };
  273. const originalModel = "original-model";
  274. const redirectedModel = "redirected-model";
  275. const originalPriceData: ModelPriceData = { input_cost_per_token: 1, output_cost_per_token: 1 };
  276. const redirectedPriceData: ModelPriceData = {
  277. input_cost_per_token: 10,
  278. output_cost_per_token: 10,
  279. };
  280. vi.mocked(getSystemSettings).mockResolvedValue(
  281. makeSystemSettings(billingModelSource, "requested", enableHighConcurrencyMode)
  282. );
  283. vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
  284. if (modelName === originalModel) {
  285. return makePriceRecord(modelName, originalPriceData);
  286. }
  287. if (modelName === redirectedModel) {
  288. return makePriceRecord(modelName, redirectedPriceData);
  289. }
  290. return null;
  291. });
  292. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  293. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  294. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  295. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  296. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  297. const dbCosts: string[] = [];
  298. vi.mocked(updateMessageRequestCost).mockImplementation(async (_id: number, costUsd: unknown) => {
  299. dbCosts.push(String(costUsd));
  300. });
  301. const sessionCosts: string[] = [];
  302. vi.mocked(SessionManager.updateSessionUsage).mockImplementation(
  303. async (_sessionId: string, payload: Record<string, unknown>) => {
  304. if (typeof payload.costUsd === "string") {
  305. sessionCosts.push(payload.costUsd);
  306. }
  307. }
  308. );
  309. const rateLimitCosts: number[] = [];
  310. vi.mocked(RateLimitService.trackCost).mockImplementation(
  311. async (_keyId: number, _providerId: number, _sessionId: string, costUsd: number) => {
  312. rateLimitCosts.push(costUsd);
  313. }
  314. );
  315. const session = createSession({
  316. originalModel,
  317. redirectedModel,
  318. sessionId: `sess-${billingModelSource}-${isStream ? "s" : "n"}`,
  319. messageId: isStream ? 2001 : 2000,
  320. enableHighConcurrencyMode,
  321. });
  322. const response = isStream ? createStreamResponse(usage) : createNonStreamResponse(usage);
  323. const clientResponse = await ProxyResponseHandler.dispatch(session, response);
  324. if (isStream) {
  325. await clientResponse.text();
  326. }
  327. await drainAsyncTasks();
  328. const dbCostUsd = dbCosts[0] ?? "";
  329. const sessionCostUsd = sessionCosts[0] ?? "";
  330. const rateLimitCost = rateLimitCosts[0] ?? Number.NaN;
  331. return { dbCostUsd, sessionCostUsd, rateLimitCost };
  332. }
  333. describe("Billing model source - Redis session cost vs DB cost", () => {
  334. it("非流式响应:配置 = original 时 Session 成本与数据库一致", async () => {
  335. const result = await runScenario({ billingModelSource: "original", isStream: false });
  336. expect(result.dbCostUsd).toBe("5");
  337. expect(result.sessionCostUsd).toBe("5");
  338. expect(result.rateLimitCost).toBe(5);
  339. });
  340. it("非流式响应:配置 = redirected 时 Session 成本与数据库一致", async () => {
  341. const result = await runScenario({ billingModelSource: "redirected", isStream: false });
  342. expect(result.dbCostUsd).toBe("50");
  343. expect(result.sessionCostUsd).toBe("50");
  344. expect(result.rateLimitCost).toBe(50);
  345. });
  346. it("流式响应:配置 = original 时 Session 成本与数据库一致", async () => {
  347. const result = await runScenario({ billingModelSource: "original", isStream: true });
  348. expect(result.dbCostUsd).toBe("5");
  349. expect(result.sessionCostUsd).toBe("5");
  350. expect(result.rateLimitCost).toBe(5);
  351. });
  352. it("流式响应:配置 = redirected 时 Session 成本与数据库一致", async () => {
  353. const result = await runScenario({ billingModelSource: "redirected", isStream: true });
  354. expect(result.dbCostUsd).toBe("50");
  355. expect(result.sessionCostUsd).toBe("50");
  356. expect(result.rateLimitCost).toBe(50);
  357. });
  358. it("从 original 切换到 redirected 后应生效", async () => {
  359. const original = await runScenario({ billingModelSource: "original", isStream: false });
  360. const redirected = await runScenario({ billingModelSource: "redirected", isStream: false });
  361. expect(original.sessionCostUsd).toBe("5");
  362. expect(redirected.sessionCostUsd).toBe("50");
  363. expect(original.sessionCostUsd).not.toBe(redirected.sessionCostUsd);
  364. });
  365. it("高并发模式:仍更新 DB cost 与限流 cost,但跳过 session usage / session refresh 观测写入", async () => {
  366. const result = await runScenario({
  367. billingModelSource: "redirected",
  368. enableHighConcurrencyMode: true,
  369. isStream: false,
  370. });
  371. expect(result.dbCostUsd).toBe("50");
  372. expect(result.rateLimitCost).toBe(50);
  373. expect(result.sessionCostUsd).toBe("");
  374. expect(vi.mocked(SessionManager.storeSessionResponse)).not.toHaveBeenCalled();
  375. expect(vi.mocked(SessionManager.updateSessionUsage)).not.toHaveBeenCalled();
  376. expect(vi.mocked(SessionTracker.refreshSession)).not.toHaveBeenCalled();
  377. });
  378. it("高并发模式:流式成功收尾时不应更新 session provider 观测信息", async () => {
  379. const result = await runScenario({
  380. billingModelSource: "redirected",
  381. enableHighConcurrencyMode: true,
  382. isStream: true,
  383. });
  384. expect(result.dbCostUsd).toBe("50");
  385. expect(result.rateLimitCost).toBe(50);
  386. expect(vi.mocked(SessionManager.updateSessionProvider)).not.toHaveBeenCalled();
  387. });
  388. it("nested pricing: gpt-5.4 alias model should bill from pricing.openai when provider is chatgpt", async () => {
  389. vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected"));
  390. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  391. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  392. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  393. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  394. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  395. vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
  396. if (modelName === "gpt-5.4") {
  397. return makePriceRecord(modelName, {
  398. mode: "responses",
  399. model_family: "gpt",
  400. litellm_provider: "chatgpt",
  401. pricing: {
  402. openai: {
  403. input_cost_per_token: 2.5,
  404. output_cost_per_token: 15,
  405. },
  406. },
  407. });
  408. }
  409. return null;
  410. });
  411. const dbCosts: string[] = [];
  412. vi.mocked(updateMessageRequestCost).mockImplementation(
  413. async (_id: number, costUsd: unknown) => {
  414. dbCosts.push(String(costUsd));
  415. }
  416. );
  417. const rateLimitCosts = captureRateLimitCosts();
  418. const sessionCosts: string[] = [];
  419. vi.mocked(SessionManager.updateSessionUsage).mockImplementation(
  420. async (_sessionId: string, payload: Record<string, unknown>) => {
  421. if (typeof payload.costUsd === "string") {
  422. sessionCosts.push(payload.costUsd);
  423. }
  424. }
  425. );
  426. const session = createSession({
  427. originalModel: "gpt-5.4",
  428. redirectedModel: "gpt-5.4",
  429. sessionId: "sess-gpt54-chatgpt",
  430. messageId: 3100,
  431. providerOverrides: {
  432. name: "ChatGPT",
  433. url: "https://chatgpt.com/backend-api/codex",
  434. providerType: "codex",
  435. },
  436. });
  437. const response = createNonStreamResponse({ input_tokens: 2, output_tokens: 3 });
  438. await ProxyResponseHandler.dispatch(session, response);
  439. await drainAsyncTasks();
  440. expect(dbCosts[0]).toBe("50");
  441. expect(sessionCosts[0]).toBe("50");
  442. });
  443. it("codex fast: requested mode ignores actual priority when request tier is default", async () => {
  444. vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected"));
  445. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  446. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  447. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  448. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  449. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  450. vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
  451. if (modelName === "gpt-5.4") {
  452. return makePriceRecord(modelName, {
  453. mode: "responses",
  454. model_family: "gpt",
  455. litellm_provider: "chatgpt",
  456. pricing: {
  457. openai: {
  458. input_cost_per_token: 1,
  459. output_cost_per_token: 10,
  460. input_cost_per_token_priority: 2,
  461. output_cost_per_token_priority: 20,
  462. },
  463. },
  464. });
  465. }
  466. return null;
  467. });
  468. const dbCosts: string[] = [];
  469. vi.mocked(updateMessageRequestCost).mockImplementation(
  470. async (_id: number, costUsd: unknown) => {
  471. dbCosts.push(String(costUsd));
  472. }
  473. );
  474. const rateLimitCosts = captureRateLimitCosts();
  475. const sessionCosts: string[] = [];
  476. vi.mocked(SessionManager.updateSessionUsage).mockImplementation(
  477. async (_sessionId: string, payload: Record<string, unknown>) => {
  478. if (typeof payload.costUsd === "string") {
  479. sessionCosts.push(payload.costUsd);
  480. }
  481. }
  482. );
  483. const session = createSession({
  484. originalModel: "gpt-5.4",
  485. redirectedModel: "gpt-5.4",
  486. sessionId: "sess-gpt54-priority-actual",
  487. messageId: 3200,
  488. providerOverrides: {
  489. name: "ChatGPT",
  490. url: "https://chatgpt.com/backend-api/codex",
  491. providerType: "codex",
  492. },
  493. requestMessage: { service_tier: "default" },
  494. });
  495. const response = createNonStreamResponse(
  496. { input_tokens: 2, output_tokens: 3 },
  497. { service_tier: "priority" }
  498. );
  499. await ProxyResponseHandler.dispatch(session, response);
  500. await drainAsyncTasks();
  501. expect(dbCosts[0]).toBe("32");
  502. expect(sessionCosts[0]).toBe("32");
  503. expect(rateLimitCosts[0]).toBe(32);
  504. });
  505. it("codex fast: falls back to requested priority pricing when response omits service_tier", async () => {
  506. vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected"));
  507. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  508. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  509. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  510. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  511. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  512. vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
  513. if (modelName === "gpt-5.4") {
  514. return makePriceRecord(modelName, {
  515. mode: "responses",
  516. model_family: "gpt",
  517. litellm_provider: "chatgpt",
  518. pricing: {
  519. openai: {
  520. input_cost_per_token: 1,
  521. output_cost_per_token: 10,
  522. input_cost_per_token_priority: 2,
  523. output_cost_per_token_priority: 20,
  524. },
  525. },
  526. });
  527. }
  528. return null;
  529. });
  530. const dbCosts: string[] = [];
  531. vi.mocked(updateMessageRequestCost).mockImplementation(
  532. async (_id: number, costUsd: unknown) => {
  533. dbCosts.push(String(costUsd));
  534. }
  535. );
  536. const rateLimitCosts = captureRateLimitCosts();
  537. const session = createSession({
  538. originalModel: "gpt-5.4",
  539. redirectedModel: "gpt-5.4",
  540. sessionId: "sess-gpt54-priority-requested",
  541. messageId: 3201,
  542. providerOverrides: {
  543. name: "ChatGPT",
  544. url: "https://chatgpt.com/backend-api/codex",
  545. providerType: "codex",
  546. },
  547. requestMessage: { service_tier: "priority" },
  548. });
  549. const response = createNonStreamResponse({ input_tokens: 2, output_tokens: 3 });
  550. await ProxyResponseHandler.dispatch(session, response);
  551. await drainAsyncTasks();
  552. expect(dbCosts[0]).toBe("64");
  553. expect(rateLimitCosts[0]).toBe(64);
  554. });
  555. it("codex fast: uses long-context priority pricing when request is priority and response omits service_tier", async () => {
  556. vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected"));
  557. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  558. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  559. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  560. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  561. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  562. vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
  563. if (modelName === "gpt-5.4") {
  564. return makePriceRecord(modelName, {
  565. mode: "responses",
  566. model_family: "gpt",
  567. litellm_provider: "chatgpt",
  568. pricing: {
  569. openai: {
  570. input_cost_per_token: 1,
  571. output_cost_per_token: 10,
  572. input_cost_per_token_priority: 2,
  573. output_cost_per_token_priority: 20,
  574. input_cost_per_token_above_272k_tokens: 5,
  575. output_cost_per_token_above_272k_tokens: 50,
  576. input_cost_per_token_above_272k_tokens_priority: 7,
  577. output_cost_per_token_above_272k_tokens_priority: 70,
  578. },
  579. },
  580. });
  581. }
  582. return null;
  583. });
  584. const dbCosts: string[] = [];
  585. vi.mocked(updateMessageRequestCost).mockImplementation(
  586. async (_id: number, costUsd: unknown) => {
  587. dbCosts.push(String(costUsd));
  588. }
  589. );
  590. const rateLimitCosts = captureRateLimitCosts();
  591. const sessionCosts: string[] = [];
  592. vi.mocked(SessionManager.updateSessionUsage).mockImplementation(
  593. async (_sessionId: string, payload: Record<string, unknown>) => {
  594. if (typeof payload.costUsd === "string") {
  595. sessionCosts.push(payload.costUsd);
  596. }
  597. }
  598. );
  599. const session = createSession({
  600. originalModel: "gpt-5.4",
  601. redirectedModel: "gpt-5.4",
  602. sessionId: "sess-gpt54-priority-requested-long-context",
  603. messageId: 3203,
  604. providerOverrides: {
  605. name: "ChatGPT",
  606. url: "https://chatgpt.com/backend-api/codex",
  607. providerType: "codex",
  608. },
  609. requestMessage: { service_tier: "priority" },
  610. });
  611. const response = createNonStreamResponse({ input_tokens: 272001, output_tokens: 2 });
  612. await ProxyResponseHandler.dispatch(session, response);
  613. await drainAsyncTasks();
  614. expect(dbCosts[0]).toBe("1904147");
  615. expect(sessionCosts[0]).toBe("1904147");
  616. expect(rateLimitCosts[0]).toBe(1904147);
  617. });
  618. it("codex fast: requested mode keeps priority pricing even when actual tier is downgraded", async () => {
  619. vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected"));
  620. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  621. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  622. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  623. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  624. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  625. vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
  626. if (modelName === "gpt-5.4") {
  627. return makePriceRecord(modelName, {
  628. mode: "responses",
  629. model_family: "gpt",
  630. litellm_provider: "chatgpt",
  631. pricing: {
  632. openai: {
  633. input_cost_per_token: 1,
  634. output_cost_per_token: 10,
  635. input_cost_per_token_priority: 2,
  636. output_cost_per_token_priority: 20,
  637. },
  638. },
  639. });
  640. }
  641. return null;
  642. });
  643. const dbCosts: string[] = [];
  644. vi.mocked(updateMessageRequestCost).mockImplementation(
  645. async (_id: number, costUsd: unknown) => {
  646. dbCosts.push(String(costUsd));
  647. }
  648. );
  649. const rateLimitCosts = captureRateLimitCosts();
  650. const session = createSession({
  651. originalModel: "gpt-5.4",
  652. redirectedModel: "gpt-5.4",
  653. sessionId: "sess-gpt54-priority-downgraded",
  654. messageId: 3202,
  655. providerOverrides: {
  656. name: "ChatGPT",
  657. url: "https://chatgpt.com/backend-api/codex",
  658. providerType: "codex",
  659. },
  660. requestMessage: { service_tier: "priority" },
  661. });
  662. const response = createNonStreamResponse(
  663. { input_tokens: 2, output_tokens: 3 },
  664. { service_tier: "default" }
  665. );
  666. await ProxyResponseHandler.dispatch(session, response);
  667. await drainAsyncTasks();
  668. expect(dbCosts[0]).toBe("64");
  669. expect(rateLimitCosts[0]).toBe(64);
  670. });
  671. it("codex fast: actual mode uses priority pricing when response reports service_tier=priority", async () => {
  672. vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected", "actual"));
  673. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  674. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  675. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  676. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  677. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  678. vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
  679. if (modelName === "gpt-5.4") {
  680. return makePriceRecord(modelName, {
  681. mode: "responses",
  682. model_family: "gpt",
  683. litellm_provider: "chatgpt",
  684. pricing: {
  685. openai: {
  686. input_cost_per_token: 1,
  687. output_cost_per_token: 10,
  688. input_cost_per_token_priority: 2,
  689. output_cost_per_token_priority: 20,
  690. },
  691. },
  692. });
  693. }
  694. return null;
  695. });
  696. const dbCosts: string[] = [];
  697. vi.mocked(updateMessageRequestCost).mockImplementation(
  698. async (_id: number, costUsd: unknown) => {
  699. dbCosts.push(String(costUsd));
  700. }
  701. );
  702. const rateLimitCosts = captureRateLimitCosts();
  703. const session = createSession({
  704. originalModel: "gpt-5.4",
  705. redirectedModel: "gpt-5.4",
  706. sessionId: "sess-gpt54-priority-actual-mode-upgrade",
  707. messageId: 3204,
  708. providerOverrides: {
  709. name: "ChatGPT",
  710. url: "https://chatgpt.com/backend-api/codex",
  711. providerType: "codex",
  712. },
  713. requestMessage: { service_tier: "default" },
  714. });
  715. const response = createNonStreamResponse(
  716. { input_tokens: 2, output_tokens: 3 },
  717. { service_tier: "priority" }
  718. );
  719. await ProxyResponseHandler.dispatch(session, response);
  720. await drainAsyncTasks();
  721. expect(dbCosts[0]).toBe("64");
  722. expect(rateLimitCosts[0]).toBe(64);
  723. });
  724. it("codex fast: actual mode does not use priority pricing when response explicitly reports non-priority tier", async () => {
  725. vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected", "actual"));
  726. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  727. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  728. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  729. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  730. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  731. vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
  732. if (modelName === "gpt-5.4") {
  733. return makePriceRecord(modelName, {
  734. mode: "responses",
  735. model_family: "gpt",
  736. litellm_provider: "chatgpt",
  737. pricing: {
  738. openai: {
  739. input_cost_per_token: 1,
  740. output_cost_per_token: 10,
  741. input_cost_per_token_priority: 2,
  742. output_cost_per_token_priority: 20,
  743. },
  744. },
  745. });
  746. }
  747. return null;
  748. });
  749. const dbCosts: string[] = [];
  750. vi.mocked(updateMessageRequestCost).mockImplementation(
  751. async (_id: number, costUsd: unknown) => {
  752. dbCosts.push(String(costUsd));
  753. }
  754. );
  755. const rateLimitCosts = captureRateLimitCosts();
  756. const session = createSession({
  757. originalModel: "gpt-5.4",
  758. redirectedModel: "gpt-5.4",
  759. sessionId: "sess-gpt54-priority-actual-mode-downgrade",
  760. messageId: 3205,
  761. providerOverrides: {
  762. name: "ChatGPT",
  763. url: "https://chatgpt.com/backend-api/codex",
  764. providerType: "codex",
  765. },
  766. requestMessage: { service_tier: "priority" },
  767. });
  768. const response = createNonStreamResponse(
  769. { input_tokens: 2, output_tokens: 3 },
  770. { service_tier: "default" }
  771. );
  772. await ProxyResponseHandler.dispatch(session, response);
  773. await drainAsyncTasks();
  774. expect(dbCosts[0]).toBe("32");
  775. expect(rateLimitCosts[0]).toBe(32);
  776. });
  777. it("codex fast: actual mode falls back to requested priority pricing when response omits service_tier", async () => {
  778. vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected", "actual"));
  779. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  780. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  781. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  782. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  783. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  784. vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
  785. if (modelName === "gpt-5.4") {
  786. return makePriceRecord(modelName, {
  787. mode: "responses",
  788. model_family: "gpt",
  789. litellm_provider: "chatgpt",
  790. pricing: {
  791. openai: {
  792. input_cost_per_token: 1,
  793. output_cost_per_token: 10,
  794. input_cost_per_token_priority: 2,
  795. output_cost_per_token_priority: 20,
  796. },
  797. },
  798. });
  799. }
  800. return null;
  801. });
  802. const dbCosts: string[] = [];
  803. vi.mocked(updateMessageRequestCost).mockImplementation(
  804. async (_id: number, costUsd: unknown) => {
  805. dbCosts.push(String(costUsd));
  806. }
  807. );
  808. const rateLimitCosts = captureRateLimitCosts();
  809. const session = createSession({
  810. originalModel: "gpt-5.4",
  811. redirectedModel: "gpt-5.4",
  812. sessionId: "sess-gpt54-priority-actual-mode-fallback",
  813. messageId: 3206,
  814. providerOverrides: {
  815. name: "ChatGPT",
  816. url: "https://chatgpt.com/backend-api/codex",
  817. providerType: "codex",
  818. },
  819. requestMessage: { service_tier: "priority" },
  820. });
  821. const response = createNonStreamResponse({ input_tokens: 2, output_tokens: 3 });
  822. await ProxyResponseHandler.dispatch(session, response);
  823. await drainAsyncTasks();
  824. expect(dbCosts[0]).toBe("64");
  825. expect(rateLimitCosts[0]).toBe(64);
  826. });
  827. it("codex fast: actual mode reuses cached system setting when direct settings read fails", async () => {
  828. vi.mocked(getSystemSettings).mockResolvedValueOnce(makeSystemSettings("redirected", "actual"));
  829. await getCachedSystemSettings();
  830. vi.mocked(getSystemSettings).mockRejectedValueOnce(new Error("db down"));
  831. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  832. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  833. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  834. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  835. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  836. vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
  837. if (modelName === "gpt-5.4") {
  838. return makePriceRecord(modelName, {
  839. mode: "responses",
  840. model_family: "gpt",
  841. litellm_provider: "chatgpt",
  842. pricing: {
  843. openai: {
  844. input_cost_per_token: 1,
  845. output_cost_per_token: 10,
  846. input_cost_per_token_priority: 2,
  847. output_cost_per_token_priority: 20,
  848. },
  849. },
  850. });
  851. }
  852. return null;
  853. });
  854. const dbCosts: string[] = [];
  855. vi.mocked(updateMessageRequestCost).mockImplementation(
  856. async (_id: number, costUsd: unknown) => {
  857. dbCosts.push(String(costUsd));
  858. }
  859. );
  860. const rateLimitCosts = captureRateLimitCosts();
  861. const session = createSession({
  862. originalModel: "gpt-5.4",
  863. redirectedModel: "gpt-5.4",
  864. sessionId: "sess-gpt54-priority-actual-mode-cached-settings",
  865. messageId: 3207,
  866. providerOverrides: {
  867. name: "ChatGPT",
  868. url: "https://chatgpt.com/backend-api/codex",
  869. providerType: "codex",
  870. },
  871. requestMessage: { service_tier: "priority" },
  872. });
  873. const response = createNonStreamResponse(
  874. { input_tokens: 2, output_tokens: 3 },
  875. { service_tier: "default" }
  876. );
  877. await ProxyResponseHandler.dispatch(session, response);
  878. await drainAsyncTasks();
  879. expect(dbCosts[0]).toBe("32");
  880. expect(rateLimitCosts[0]).toBe(32);
  881. });
  882. });
  883. describe("价格表缺失/查询失败:不计费放行", () => {
  884. async function runNoPriceScenario(options: {
  885. billingModelSource: SystemSettings["billingModelSource"];
  886. isStream: boolean;
  887. priceLookup: "none" | "throws";
  888. }): Promise<{ dbCostCalls: number; rateLimitCalls: number }> {
  889. const usage = { input_tokens: 2, output_tokens: 3 };
  890. const originalModel = "original-model";
  891. const redirectedModel = "redirected-model";
  892. vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings(options.billingModelSource));
  893. if (options.priceLookup === "none") {
  894. vi.mocked(findLatestPriceByModel).mockResolvedValue(null);
  895. } else {
  896. vi.mocked(findLatestPriceByModel).mockImplementation(async () => {
  897. throw new Error("db query failed");
  898. });
  899. }
  900. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  901. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  902. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  903. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  904. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  905. vi.mocked(updateMessageRequestCost).mockResolvedValue(undefined);
  906. vi.mocked(RateLimitService.trackCost).mockResolvedValue(undefined);
  907. vi.mocked(SessionManager.updateSessionUsage).mockResolvedValue(undefined);
  908. const session = createSession({
  909. originalModel,
  910. redirectedModel,
  911. sessionId: `sess-no-price-${options.billingModelSource}-${options.isStream ? "s" : "n"}`,
  912. messageId: options.isStream ? 3001 : 3000,
  913. });
  914. const response = options.isStream
  915. ? createStreamResponse(usage)
  916. : createNonStreamResponse(usage);
  917. const clientResponse = await ProxyResponseHandler.dispatch(session, response);
  918. await clientResponse.text();
  919. await drainAsyncTasks();
  920. return {
  921. dbCostCalls: vi.mocked(updateMessageRequestCost).mock.calls.length,
  922. rateLimitCalls: vi.mocked(RateLimitService.trackCost).mock.calls.length,
  923. };
  924. }
  925. it("无价格:不写入 DB cost,不追踪限流 cost,并触发一次异步同步", async () => {
  926. const result = await runNoPriceScenario({
  927. billingModelSource: "redirected",
  928. isStream: false,
  929. priceLookup: "none",
  930. });
  931. expect(result.dbCostCalls).toBe(0);
  932. expect(result.rateLimitCalls).toBe(0);
  933. expect(cloudPriceSyncRequests).toEqual([{ reason: "missing-model" }]);
  934. });
  935. it("价格查询抛错:不应影响响应,不写入 DB cost,不追踪限流 cost", async () => {
  936. const result = await runNoPriceScenario({
  937. billingModelSource: "original",
  938. isStream: true,
  939. priceLookup: "throws",
  940. });
  941. expect(result.dbCostCalls).toBe(0);
  942. expect(result.rateLimitCalls).toBe(0);
  943. });
  944. });