billing-model-source.test.ts 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071
  1. import { beforeEach, describe, expect, it, vi } from "vitest";
  2. import type { ModelPrice, ModelPriceData } from "@/types/model-price";
  3. import type { SystemSettings } from "@/types/system-config";
  4. const asyncTasks: Promise<void>[] = [];
  5. const cloudPriceSyncRequests: Array<{ reason: string }> = [];
  6. vi.mock("@/lib/async-task-manager", () => ({
  7. AsyncTaskManager: {
  8. register: (_taskId: string, promise: Promise<void>) => {
  9. asyncTasks.push(promise);
  10. return new AbortController();
  11. },
  12. cleanup: () => {},
  13. cancel: () => {},
  14. },
  15. }));
  16. vi.mock("@/lib/logger", () => ({
  17. logger: {
  18. debug: () => {},
  19. info: () => {},
  20. warn: () => {},
  21. error: () => {},
  22. trace: () => {},
  23. },
  24. }));
  25. vi.mock("@/lib/price-sync/cloud-price-updater", () => ({
  26. requestCloudPriceTableSync: (payload: { reason: string }) => {
  27. cloudPriceSyncRequests.push(payload);
  28. },
  29. }));
  30. vi.mock("@/repository/model-price", () => ({
  31. findLatestPriceByModel: vi.fn(),
  32. }));
  33. vi.mock("@/repository/system-config", () => ({
  34. getSystemSettings: vi.fn(),
  35. }));
  36. vi.mock("@/repository/message", () => ({
  37. updateMessageRequestCostWithBreakdown: vi.fn(),
  38. updateMessageRequestDetails: vi.fn(),
  39. updateMessageRequestDuration: vi.fn(),
  40. }));
  41. vi.mock("@/lib/session-manager", () => ({
  42. SessionManager: {
  43. updateSessionUsage: vi.fn(),
  44. updateSessionProvider: vi.fn(),
  45. storeSessionResponse: vi.fn(),
  46. extractCodexPromptCacheKey: vi.fn(),
  47. updateSessionWithCodexCacheKey: vi.fn(),
  48. },
  49. }));
  50. vi.mock("@/lib/rate-limit", () => ({
  51. RateLimitService: {
  52. trackCost: vi.fn(),
  53. trackUserDailyCost: vi.fn(),
  54. },
  55. }));
  56. vi.mock("@/lib/session-tracker", () => ({
  57. SessionTracker: {
  58. refreshSession: vi.fn(),
  59. },
  60. }));
  61. vi.mock("@/lib/proxy-status-tracker", () => ({
  62. ProxyStatusTracker: {
  63. getInstance: () => ({
  64. endRequest: () => {},
  65. }),
  66. },
  67. }));
  68. import { ProxyResponseHandler } from "@/app/v1/_lib/proxy/response-handler";
  69. import { ProxySession } from "@/app/v1/_lib/proxy/session";
  70. import { getCachedSystemSettings, invalidateSystemSettingsCache } from "@/lib/config";
  71. import { SessionManager } from "@/lib/session-manager";
  72. import { RateLimitService } from "@/lib/rate-limit";
  73. import { SessionTracker } from "@/lib/session-tracker";
  74. import {
  75. updateMessageRequestCostWithBreakdown,
  76. updateMessageRequestDetails,
  77. updateMessageRequestDuration,
  78. } from "@/repository/message";
  79. import { findLatestPriceByModel } from "@/repository/model-price";
  80. import { getSystemSettings } from "@/repository/system-config";
  81. beforeEach(() => {
  82. vi.clearAllMocks();
  83. cloudPriceSyncRequests.splice(0, cloudPriceSyncRequests.length);
  84. invalidateSystemSettingsCache();
  85. });
  86. function makeSystemSettings(
  87. billingModelSource: SystemSettings["billingModelSource"],
  88. codexPriorityBillingSource: SystemSettings["codexPriorityBillingSource"] = "requested",
  89. enableHighConcurrencyMode: boolean = false
  90. ): SystemSettings {
  91. const now = new Date();
  92. return {
  93. id: 1,
  94. siteTitle: "test",
  95. allowGlobalUsageView: false,
  96. currencyDisplay: "USD",
  97. billingModelSource,
  98. codexPriorityBillingSource,
  99. timezone: null,
  100. enableAutoCleanup: false,
  101. cleanupRetentionDays: 30,
  102. cleanupSchedule: "0 2 * * *",
  103. cleanupBatchSize: 10000,
  104. enableClientVersionCheck: false,
  105. verboseProviderError: false,
  106. enableHttp2: false,
  107. enableHighConcurrencyMode,
  108. interceptAnthropicWarmupRequests: false,
  109. enableThinkingSignatureRectifier: true,
  110. enableThinkingBudgetRectifier: true,
  111. enableBillingHeaderRectifier: true,
  112. enableResponseInputRectifier: true,
  113. enableCodexSessionIdCompletion: true,
  114. enableClaudeMetadataUserIdInjection: true,
  115. enableResponseFixer: true,
  116. responseFixerConfig: {
  117. fixTruncatedJson: true,
  118. fixSseFormat: true,
  119. fixEncoding: true,
  120. maxJsonDepth: 200,
  121. maxFixSize: 1024 * 1024,
  122. },
  123. createdAt: now,
  124. updatedAt: now,
  125. };
  126. }
  127. function makePriceRecord(modelName: string, priceData: ModelPriceData): ModelPrice {
  128. const now = new Date();
  129. return {
  130. id: 1,
  131. modelName,
  132. priceData,
  133. createdAt: now,
  134. updatedAt: now,
  135. };
  136. }
  137. function createSession({
  138. originalModel,
  139. redirectedModel,
  140. sessionId,
  141. messageId,
  142. enableHighConcurrencyMode = false,
  143. providerOverrides,
  144. requestMessage,
  145. }: {
  146. originalModel: string;
  147. redirectedModel: string;
  148. sessionId: string;
  149. messageId: number;
  150. enableHighConcurrencyMode?: boolean;
  151. providerOverrides?: Record<string, unknown>;
  152. requestMessage?: Record<string, unknown>;
  153. }): ProxySession {
  154. const session = new (
  155. ProxySession as unknown as {
  156. new (init: {
  157. startTime: number;
  158. method: string;
  159. requestUrl: URL;
  160. headers: Headers;
  161. headerLog: string;
  162. request: { message: Record<string, unknown>; log: string; model: string | null };
  163. userAgent: string | null;
  164. context: unknown;
  165. clientAbortSignal: AbortSignal | null;
  166. }): ProxySession;
  167. }
  168. )({
  169. startTime: Date.now(),
  170. method: "POST",
  171. requestUrl: new URL("http://localhost/v1/messages"),
  172. headers: new Headers(),
  173. headerLog: "",
  174. request: { message: requestMessage ?? {}, log: "(test)", model: redirectedModel },
  175. userAgent: null,
  176. context: {},
  177. clientAbortSignal: null,
  178. });
  179. session.setOriginalModel(originalModel);
  180. session.setSessionId(sessionId);
  181. session.setHighConcurrencyModeEnabled(enableHighConcurrencyMode);
  182. const provider = {
  183. id: 99,
  184. name: "test-provider",
  185. url: "https://api.anthropic.com",
  186. providerType: "claude",
  187. costMultiplier: 1.0,
  188. streamingIdleTimeoutMs: 0,
  189. ...providerOverrides,
  190. } as any;
  191. const user = {
  192. id: 123,
  193. name: "test-user",
  194. dailyResetTime: "00:00",
  195. dailyResetMode: "fixed",
  196. } as any;
  197. const key = {
  198. id: 456,
  199. name: "test-key",
  200. dailyResetTime: "00:00",
  201. dailyResetMode: "fixed",
  202. } as any;
  203. session.setProvider(provider);
  204. session.setAuthState({
  205. user,
  206. key,
  207. apiKey: "sk-test",
  208. success: true,
  209. });
  210. session.setMessageContext({
  211. id: messageId,
  212. createdAt: new Date(),
  213. user,
  214. key,
  215. apiKey: "sk-test",
  216. });
  217. return session;
  218. }
  219. function createNonStreamResponse(
  220. usage: { input_tokens: number; output_tokens: number },
  221. extras?: Record<string, unknown>
  222. ): Response {
  223. return new Response(
  224. JSON.stringify({
  225. type: "message",
  226. usage,
  227. ...(extras ?? {}),
  228. }),
  229. {
  230. status: 200,
  231. headers: { "content-type": "application/json" },
  232. }
  233. );
  234. }
  235. function createStreamResponse(usage: { input_tokens: number; output_tokens: number }): Response {
  236. const sseText = `event: message_delta\ndata: ${JSON.stringify({ usage })}\n\n`;
  237. const encoder = new TextEncoder();
  238. const stream = new ReadableStream<Uint8Array>({
  239. start(controller) {
  240. controller.enqueue(encoder.encode(sseText));
  241. controller.close();
  242. },
  243. });
  244. return new Response(stream, {
  245. status: 200,
  246. headers: { "content-type": "text/event-stream" },
  247. });
  248. }
  249. async function drainAsyncTasks(): Promise<void> {
  250. const tasks = asyncTasks.splice(0, asyncTasks.length);
  251. await Promise.all(tasks);
  252. }
  253. function captureRateLimitCosts(): number[] {
  254. const rateLimitCosts: number[] = [];
  255. vi.mocked(RateLimitService.trackCost).mockImplementation(
  256. async (_keyId: number, _providerId: number, _sessionId: string, costUsd: number) => {
  257. rateLimitCosts.push(costUsd);
  258. }
  259. );
  260. return rateLimitCosts;
  261. }
  262. async function runScenario({
  263. billingModelSource,
  264. isStream,
  265. enableHighConcurrencyMode = false,
  266. }: {
  267. billingModelSource: SystemSettings["billingModelSource"];
  268. isStream: boolean;
  269. enableHighConcurrencyMode?: boolean;
  270. }): Promise<{ dbCostUsd: string; sessionCostUsd: string; rateLimitCost: number }> {
  271. invalidateSystemSettingsCache();
  272. const usage = { input_tokens: 2, output_tokens: 3 };
  273. const originalModel = "original-model";
  274. const redirectedModel = "redirected-model";
  275. const originalPriceData: ModelPriceData = { input_cost_per_token: 1, output_cost_per_token: 1 };
  276. const redirectedPriceData: ModelPriceData = {
  277. input_cost_per_token: 10,
  278. output_cost_per_token: 10,
  279. };
  280. vi.mocked(getSystemSettings).mockResolvedValue(
  281. makeSystemSettings(billingModelSource, "requested", enableHighConcurrencyMode)
  282. );
  283. vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
  284. if (modelName === originalModel) {
  285. return makePriceRecord(modelName, originalPriceData);
  286. }
  287. if (modelName === redirectedModel) {
  288. return makePriceRecord(modelName, redirectedPriceData);
  289. }
  290. return null;
  291. });
  292. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  293. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  294. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  295. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  296. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  297. const dbCosts: string[] = [];
  298. vi.mocked(updateMessageRequestCostWithBreakdown).mockImplementation(
  299. async (_id: number, costUsd: unknown) => {
  300. dbCosts.push(String(costUsd));
  301. }
  302. );
  303. const sessionCosts: string[] = [];
  304. vi.mocked(SessionManager.updateSessionUsage).mockImplementation(
  305. async (_sessionId: string, payload: Record<string, unknown>) => {
  306. if (typeof payload.costUsd === "string") {
  307. sessionCosts.push(payload.costUsd);
  308. }
  309. }
  310. );
  311. const rateLimitCosts: number[] = [];
  312. vi.mocked(RateLimitService.trackCost).mockImplementation(
  313. async (_keyId: number, _providerId: number, _sessionId: string, costUsd: number) => {
  314. rateLimitCosts.push(costUsd);
  315. }
  316. );
  317. const session = createSession({
  318. originalModel,
  319. redirectedModel,
  320. sessionId: `sess-${billingModelSource}-${isStream ? "s" : "n"}`,
  321. messageId: isStream ? 2001 : 2000,
  322. enableHighConcurrencyMode,
  323. });
  324. const response = isStream ? createStreamResponse(usage) : createNonStreamResponse(usage);
  325. const clientResponse = await ProxyResponseHandler.dispatch(session, response);
  326. if (isStream) {
  327. await clientResponse.text();
  328. }
  329. await drainAsyncTasks();
  330. const dbCostUsd = dbCosts[0] ?? "";
  331. const sessionCostUsd = sessionCosts[0] ?? "";
  332. const rateLimitCost = rateLimitCosts[0] ?? Number.NaN;
  333. return { dbCostUsd, sessionCostUsd, rateLimitCost };
  334. }
  335. describe("Billing model source - Redis session cost vs DB cost", () => {
  336. it("非流式响应:配置 = original 时 Session 成本与数据库一致", async () => {
  337. const result = await runScenario({ billingModelSource: "original", isStream: false });
  338. expect(result.dbCostUsd).toBe("5");
  339. expect(result.sessionCostUsd).toBe("5");
  340. expect(result.rateLimitCost).toBe(5);
  341. });
  342. it("非流式响应:配置 = redirected 时 Session 成本与数据库一致", async () => {
  343. const result = await runScenario({ billingModelSource: "redirected", isStream: false });
  344. expect(result.dbCostUsd).toBe("50");
  345. expect(result.sessionCostUsd).toBe("50");
  346. expect(result.rateLimitCost).toBe(50);
  347. });
  348. it("流式响应:配置 = original 时 Session 成本与数据库一致", async () => {
  349. const result = await runScenario({ billingModelSource: "original", isStream: true });
  350. expect(result.dbCostUsd).toBe("5");
  351. expect(result.sessionCostUsd).toBe("5");
  352. expect(result.rateLimitCost).toBe(5);
  353. });
  354. it("流式响应:配置 = redirected 时 Session 成本与数据库一致", async () => {
  355. const result = await runScenario({ billingModelSource: "redirected", isStream: true });
  356. expect(result.dbCostUsd).toBe("50");
  357. expect(result.sessionCostUsd).toBe("50");
  358. expect(result.rateLimitCost).toBe(50);
  359. });
  360. it("从 original 切换到 redirected 后应生效", async () => {
  361. const original = await runScenario({ billingModelSource: "original", isStream: false });
  362. const redirected = await runScenario({ billingModelSource: "redirected", isStream: false });
  363. expect(original.sessionCostUsd).toBe("5");
  364. expect(redirected.sessionCostUsd).toBe("50");
  365. expect(original.sessionCostUsd).not.toBe(redirected.sessionCostUsd);
  366. });
  367. it("高并发模式:仍更新 DB cost 与限流 cost,但跳过 session usage / session refresh 观测写入", async () => {
  368. const result = await runScenario({
  369. billingModelSource: "redirected",
  370. enableHighConcurrencyMode: true,
  371. isStream: false,
  372. });
  373. expect(result.dbCostUsd).toBe("50");
  374. expect(result.rateLimitCost).toBe(50);
  375. expect(result.sessionCostUsd).toBe("");
  376. expect(vi.mocked(SessionManager.storeSessionResponse)).not.toHaveBeenCalled();
  377. expect(vi.mocked(SessionManager.updateSessionUsage)).not.toHaveBeenCalled();
  378. expect(vi.mocked(SessionTracker.refreshSession)).not.toHaveBeenCalled();
  379. });
  380. it("高并发模式:流式成功收尾时不应更新 session provider 观测信息", async () => {
  381. const result = await runScenario({
  382. billingModelSource: "redirected",
  383. enableHighConcurrencyMode: true,
  384. isStream: true,
  385. });
  386. expect(result.dbCostUsd).toBe("50");
  387. expect(result.rateLimitCost).toBe(50);
  388. expect(vi.mocked(SessionManager.updateSessionProvider)).not.toHaveBeenCalled();
  389. });
  390. it("nested pricing: gpt-5.4 alias model should bill from pricing.openai when provider is chatgpt", async () => {
  391. vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected"));
  392. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  393. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  394. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  395. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  396. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  397. vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
  398. if (modelName === "gpt-5.4") {
  399. return makePriceRecord(modelName, {
  400. mode: "responses",
  401. model_family: "gpt",
  402. litellm_provider: "chatgpt",
  403. pricing: {
  404. openai: {
  405. input_cost_per_token: 2.5,
  406. output_cost_per_token: 15,
  407. },
  408. },
  409. });
  410. }
  411. return null;
  412. });
  413. const dbCosts: string[] = [];
  414. vi.mocked(updateMessageRequestCostWithBreakdown).mockImplementation(
  415. async (_id: number, costUsd: unknown) => {
  416. dbCosts.push(String(costUsd));
  417. }
  418. );
  419. const rateLimitCosts = captureRateLimitCosts();
  420. const sessionCosts: string[] = [];
  421. vi.mocked(SessionManager.updateSessionUsage).mockImplementation(
  422. async (_sessionId: string, payload: Record<string, unknown>) => {
  423. if (typeof payload.costUsd === "string") {
  424. sessionCosts.push(payload.costUsd);
  425. }
  426. }
  427. );
  428. const session = createSession({
  429. originalModel: "gpt-5.4",
  430. redirectedModel: "gpt-5.4",
  431. sessionId: "sess-gpt54-chatgpt",
  432. messageId: 3100,
  433. providerOverrides: {
  434. name: "ChatGPT",
  435. url: "https://chatgpt.com/backend-api/codex",
  436. providerType: "codex",
  437. },
  438. });
  439. const response = createNonStreamResponse({ input_tokens: 2, output_tokens: 3 });
  440. await ProxyResponseHandler.dispatch(session, response);
  441. await drainAsyncTasks();
  442. expect(dbCosts[0]).toBe("50");
  443. expect(sessionCosts[0]).toBe("50");
  444. });
  445. it("codex fast: requested mode ignores actual priority when request tier is default", async () => {
  446. vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected"));
  447. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  448. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  449. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  450. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  451. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  452. vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
  453. if (modelName === "gpt-5.4") {
  454. return makePriceRecord(modelName, {
  455. mode: "responses",
  456. model_family: "gpt",
  457. litellm_provider: "chatgpt",
  458. pricing: {
  459. openai: {
  460. input_cost_per_token: 1,
  461. output_cost_per_token: 10,
  462. input_cost_per_token_priority: 2,
  463. output_cost_per_token_priority: 20,
  464. },
  465. },
  466. });
  467. }
  468. return null;
  469. });
  470. const dbCosts: string[] = [];
  471. vi.mocked(updateMessageRequestCostWithBreakdown).mockImplementation(
  472. async (_id: number, costUsd: unknown) => {
  473. dbCosts.push(String(costUsd));
  474. }
  475. );
  476. const rateLimitCosts = captureRateLimitCosts();
  477. const sessionCosts: string[] = [];
  478. vi.mocked(SessionManager.updateSessionUsage).mockImplementation(
  479. async (_sessionId: string, payload: Record<string, unknown>) => {
  480. if (typeof payload.costUsd === "string") {
  481. sessionCosts.push(payload.costUsd);
  482. }
  483. }
  484. );
  485. const session = createSession({
  486. originalModel: "gpt-5.4",
  487. redirectedModel: "gpt-5.4",
  488. sessionId: "sess-gpt54-priority-actual",
  489. messageId: 3200,
  490. providerOverrides: {
  491. name: "ChatGPT",
  492. url: "https://chatgpt.com/backend-api/codex",
  493. providerType: "codex",
  494. },
  495. requestMessage: { service_tier: "default" },
  496. });
  497. const response = createNonStreamResponse(
  498. { input_tokens: 2, output_tokens: 3 },
  499. { service_tier: "priority" }
  500. );
  501. await ProxyResponseHandler.dispatch(session, response);
  502. await drainAsyncTasks();
  503. expect(dbCosts[0]).toBe("32");
  504. expect(sessionCosts[0]).toBe("32");
  505. expect(rateLimitCosts[0]).toBe(32);
  506. });
  507. it("codex fast: falls back to requested priority pricing when response omits service_tier", async () => {
  508. vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected"));
  509. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  510. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  511. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  512. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  513. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  514. vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
  515. if (modelName === "gpt-5.4") {
  516. return makePriceRecord(modelName, {
  517. mode: "responses",
  518. model_family: "gpt",
  519. litellm_provider: "chatgpt",
  520. pricing: {
  521. openai: {
  522. input_cost_per_token: 1,
  523. output_cost_per_token: 10,
  524. input_cost_per_token_priority: 2,
  525. output_cost_per_token_priority: 20,
  526. },
  527. },
  528. });
  529. }
  530. return null;
  531. });
  532. const dbCosts: string[] = [];
  533. vi.mocked(updateMessageRequestCostWithBreakdown).mockImplementation(
  534. async (_id: number, costUsd: unknown) => {
  535. dbCosts.push(String(costUsd));
  536. }
  537. );
  538. const rateLimitCosts = captureRateLimitCosts();
  539. const session = createSession({
  540. originalModel: "gpt-5.4",
  541. redirectedModel: "gpt-5.4",
  542. sessionId: "sess-gpt54-priority-requested",
  543. messageId: 3201,
  544. providerOverrides: {
  545. name: "ChatGPT",
  546. url: "https://chatgpt.com/backend-api/codex",
  547. providerType: "codex",
  548. },
  549. requestMessage: { service_tier: "priority" },
  550. });
  551. const response = createNonStreamResponse({ input_tokens: 2, output_tokens: 3 });
  552. await ProxyResponseHandler.dispatch(session, response);
  553. await drainAsyncTasks();
  554. expect(dbCosts[0]).toBe("64");
  555. expect(rateLimitCosts[0]).toBe(64);
  556. });
  557. it("codex fast: uses long-context priority pricing when request is priority and response omits service_tier", async () => {
  558. vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected"));
  559. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  560. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  561. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  562. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  563. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  564. vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
  565. if (modelName === "gpt-5.4") {
  566. return makePriceRecord(modelName, {
  567. mode: "responses",
  568. model_family: "gpt",
  569. litellm_provider: "chatgpt",
  570. pricing: {
  571. openai: {
  572. input_cost_per_token: 1,
  573. output_cost_per_token: 10,
  574. input_cost_per_token_priority: 2,
  575. output_cost_per_token_priority: 20,
  576. input_cost_per_token_above_272k_tokens: 5,
  577. output_cost_per_token_above_272k_tokens: 50,
  578. input_cost_per_token_above_272k_tokens_priority: 7,
  579. output_cost_per_token_above_272k_tokens_priority: 70,
  580. },
  581. },
  582. });
  583. }
  584. return null;
  585. });
  586. const dbCosts: string[] = [];
  587. vi.mocked(updateMessageRequestCostWithBreakdown).mockImplementation(
  588. async (_id: number, costUsd: unknown) => {
  589. dbCosts.push(String(costUsd));
  590. }
  591. );
  592. const rateLimitCosts = captureRateLimitCosts();
  593. const sessionCosts: string[] = [];
  594. vi.mocked(SessionManager.updateSessionUsage).mockImplementation(
  595. async (_sessionId: string, payload: Record<string, unknown>) => {
  596. if (typeof payload.costUsd === "string") {
  597. sessionCosts.push(payload.costUsd);
  598. }
  599. }
  600. );
  601. const session = createSession({
  602. originalModel: "gpt-5.4",
  603. redirectedModel: "gpt-5.4",
  604. sessionId: "sess-gpt54-priority-requested-long-context",
  605. messageId: 3203,
  606. providerOverrides: {
  607. name: "ChatGPT",
  608. url: "https://chatgpt.com/backend-api/codex",
  609. providerType: "codex",
  610. },
  611. requestMessage: { service_tier: "priority" },
  612. });
  613. const response = createNonStreamResponse({ input_tokens: 272001, output_tokens: 2 });
  614. await ProxyResponseHandler.dispatch(session, response);
  615. await drainAsyncTasks();
  616. expect(dbCosts[0]).toBe("1904147");
  617. expect(sessionCosts[0]).toBe("1904147");
  618. expect(rateLimitCosts[0]).toBe(1904147);
  619. });
  620. it("codex fast: requested mode keeps priority pricing even when actual tier is downgraded", async () => {
  621. vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected"));
  622. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  623. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  624. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  625. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  626. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  627. vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
  628. if (modelName === "gpt-5.4") {
  629. return makePriceRecord(modelName, {
  630. mode: "responses",
  631. model_family: "gpt",
  632. litellm_provider: "chatgpt",
  633. pricing: {
  634. openai: {
  635. input_cost_per_token: 1,
  636. output_cost_per_token: 10,
  637. input_cost_per_token_priority: 2,
  638. output_cost_per_token_priority: 20,
  639. },
  640. },
  641. });
  642. }
  643. return null;
  644. });
  645. const dbCosts: string[] = [];
  646. vi.mocked(updateMessageRequestCostWithBreakdown).mockImplementation(
  647. async (_id: number, costUsd: unknown) => {
  648. dbCosts.push(String(costUsd));
  649. }
  650. );
  651. const rateLimitCosts = captureRateLimitCosts();
  652. const session = createSession({
  653. originalModel: "gpt-5.4",
  654. redirectedModel: "gpt-5.4",
  655. sessionId: "sess-gpt54-priority-downgraded",
  656. messageId: 3202,
  657. providerOverrides: {
  658. name: "ChatGPT",
  659. url: "https://chatgpt.com/backend-api/codex",
  660. providerType: "codex",
  661. },
  662. requestMessage: { service_tier: "priority" },
  663. });
  664. const response = createNonStreamResponse(
  665. { input_tokens: 2, output_tokens: 3 },
  666. { service_tier: "default" }
  667. );
  668. await ProxyResponseHandler.dispatch(session, response);
  669. await drainAsyncTasks();
  670. expect(dbCosts[0]).toBe("64");
  671. expect(rateLimitCosts[0]).toBe(64);
  672. });
  673. it("codex fast: actual mode uses priority pricing when response reports service_tier=priority", async () => {
  674. vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected", "actual"));
  675. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  676. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  677. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  678. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  679. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  680. vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
  681. if (modelName === "gpt-5.4") {
  682. return makePriceRecord(modelName, {
  683. mode: "responses",
  684. model_family: "gpt",
  685. litellm_provider: "chatgpt",
  686. pricing: {
  687. openai: {
  688. input_cost_per_token: 1,
  689. output_cost_per_token: 10,
  690. input_cost_per_token_priority: 2,
  691. output_cost_per_token_priority: 20,
  692. },
  693. },
  694. });
  695. }
  696. return null;
  697. });
  698. const dbCosts: string[] = [];
  699. vi.mocked(updateMessageRequestCostWithBreakdown).mockImplementation(
  700. async (_id: number, costUsd: unknown) => {
  701. dbCosts.push(String(costUsd));
  702. }
  703. );
  704. const rateLimitCosts = captureRateLimitCosts();
  705. const session = createSession({
  706. originalModel: "gpt-5.4",
  707. redirectedModel: "gpt-5.4",
  708. sessionId: "sess-gpt54-priority-actual-mode-upgrade",
  709. messageId: 3204,
  710. providerOverrides: {
  711. name: "ChatGPT",
  712. url: "https://chatgpt.com/backend-api/codex",
  713. providerType: "codex",
  714. },
  715. requestMessage: { service_tier: "default" },
  716. });
  717. const response = createNonStreamResponse(
  718. { input_tokens: 2, output_tokens: 3 },
  719. { service_tier: "priority" }
  720. );
  721. await ProxyResponseHandler.dispatch(session, response);
  722. await drainAsyncTasks();
  723. expect(dbCosts[0]).toBe("64");
  724. expect(rateLimitCosts[0]).toBe(64);
  725. });
  726. it("codex fast: actual mode does not use priority pricing when response explicitly reports non-priority tier", async () => {
  727. vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected", "actual"));
  728. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  729. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  730. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  731. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  732. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  733. vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
  734. if (modelName === "gpt-5.4") {
  735. return makePriceRecord(modelName, {
  736. mode: "responses",
  737. model_family: "gpt",
  738. litellm_provider: "chatgpt",
  739. pricing: {
  740. openai: {
  741. input_cost_per_token: 1,
  742. output_cost_per_token: 10,
  743. input_cost_per_token_priority: 2,
  744. output_cost_per_token_priority: 20,
  745. },
  746. },
  747. });
  748. }
  749. return null;
  750. });
  751. const dbCosts: string[] = [];
  752. vi.mocked(updateMessageRequestCostWithBreakdown).mockImplementation(
  753. async (_id: number, costUsd: unknown) => {
  754. dbCosts.push(String(costUsd));
  755. }
  756. );
  757. const rateLimitCosts = captureRateLimitCosts();
  758. const session = createSession({
  759. originalModel: "gpt-5.4",
  760. redirectedModel: "gpt-5.4",
  761. sessionId: "sess-gpt54-priority-actual-mode-downgrade",
  762. messageId: 3205,
  763. providerOverrides: {
  764. name: "ChatGPT",
  765. url: "https://chatgpt.com/backend-api/codex",
  766. providerType: "codex",
  767. },
  768. requestMessage: { service_tier: "priority" },
  769. });
  770. const response = createNonStreamResponse(
  771. { input_tokens: 2, output_tokens: 3 },
  772. { service_tier: "default" }
  773. );
  774. await ProxyResponseHandler.dispatch(session, response);
  775. await drainAsyncTasks();
  776. expect(dbCosts[0]).toBe("32");
  777. expect(rateLimitCosts[0]).toBe(32);
  778. });
  779. it("codex fast: actual mode falls back to requested priority pricing when response omits service_tier", async () => {
  780. vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected", "actual"));
  781. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  782. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  783. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  784. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  785. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  786. vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
  787. if (modelName === "gpt-5.4") {
  788. return makePriceRecord(modelName, {
  789. mode: "responses",
  790. model_family: "gpt",
  791. litellm_provider: "chatgpt",
  792. pricing: {
  793. openai: {
  794. input_cost_per_token: 1,
  795. output_cost_per_token: 10,
  796. input_cost_per_token_priority: 2,
  797. output_cost_per_token_priority: 20,
  798. },
  799. },
  800. });
  801. }
  802. return null;
  803. });
  804. const dbCosts: string[] = [];
  805. vi.mocked(updateMessageRequestCostWithBreakdown).mockImplementation(
  806. async (_id: number, costUsd: unknown) => {
  807. dbCosts.push(String(costUsd));
  808. }
  809. );
  810. const rateLimitCosts = captureRateLimitCosts();
  811. const session = createSession({
  812. originalModel: "gpt-5.4",
  813. redirectedModel: "gpt-5.4",
  814. sessionId: "sess-gpt54-priority-actual-mode-fallback",
  815. messageId: 3206,
  816. providerOverrides: {
  817. name: "ChatGPT",
  818. url: "https://chatgpt.com/backend-api/codex",
  819. providerType: "codex",
  820. },
  821. requestMessage: { service_tier: "priority" },
  822. });
  823. const response = createNonStreamResponse({ input_tokens: 2, output_tokens: 3 });
  824. await ProxyResponseHandler.dispatch(session, response);
  825. await drainAsyncTasks();
  826. expect(dbCosts[0]).toBe("64");
  827. expect(rateLimitCosts[0]).toBe(64);
  828. });
  829. it("codex fast: actual mode reuses cached system setting when direct settings read fails", async () => {
  830. vi.mocked(getSystemSettings).mockResolvedValueOnce(makeSystemSettings("redirected", "actual"));
  831. await getCachedSystemSettings();
  832. vi.mocked(getSystemSettings).mockRejectedValueOnce(new Error("db down"));
  833. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  834. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  835. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  836. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  837. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  838. vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => {
  839. if (modelName === "gpt-5.4") {
  840. return makePriceRecord(modelName, {
  841. mode: "responses",
  842. model_family: "gpt",
  843. litellm_provider: "chatgpt",
  844. pricing: {
  845. openai: {
  846. input_cost_per_token: 1,
  847. output_cost_per_token: 10,
  848. input_cost_per_token_priority: 2,
  849. output_cost_per_token_priority: 20,
  850. },
  851. },
  852. });
  853. }
  854. return null;
  855. });
  856. const dbCosts: string[] = [];
  857. vi.mocked(updateMessageRequestCostWithBreakdown).mockImplementation(
  858. async (_id: number, costUsd: unknown) => {
  859. dbCosts.push(String(costUsd));
  860. }
  861. );
  862. const rateLimitCosts = captureRateLimitCosts();
  863. const session = createSession({
  864. originalModel: "gpt-5.4",
  865. redirectedModel: "gpt-5.4",
  866. sessionId: "sess-gpt54-priority-actual-mode-cached-settings",
  867. messageId: 3207,
  868. providerOverrides: {
  869. name: "ChatGPT",
  870. url: "https://chatgpt.com/backend-api/codex",
  871. providerType: "codex",
  872. },
  873. requestMessage: { service_tier: "priority" },
  874. });
  875. const response = createNonStreamResponse(
  876. { input_tokens: 2, output_tokens: 3 },
  877. { service_tier: "default" }
  878. );
  879. await ProxyResponseHandler.dispatch(session, response);
  880. await drainAsyncTasks();
  881. expect(dbCosts[0]).toBe("32");
  882. expect(rateLimitCosts[0]).toBe(32);
  883. });
  884. });
  885. describe("价格表缺失/查询失败:不计费放行", () => {
  886. async function runNoPriceScenario(options: {
  887. billingModelSource: SystemSettings["billingModelSource"];
  888. isStream: boolean;
  889. priceLookup: "none" | "throws";
  890. }): Promise<{ dbCostCalls: number; rateLimitCalls: number }> {
  891. const usage = { input_tokens: 2, output_tokens: 3 };
  892. const originalModel = "original-model";
  893. const redirectedModel = "redirected-model";
  894. vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings(options.billingModelSource));
  895. if (options.priceLookup === "none") {
  896. vi.mocked(findLatestPriceByModel).mockResolvedValue(null);
  897. } else {
  898. vi.mocked(findLatestPriceByModel).mockImplementation(async () => {
  899. throw new Error("db query failed");
  900. });
  901. }
  902. vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined);
  903. vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined);
  904. vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined);
  905. vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined);
  906. vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined);
  907. vi.mocked(updateMessageRequestCostWithBreakdown).mockResolvedValue(undefined);
  908. vi.mocked(RateLimitService.trackCost).mockResolvedValue(undefined);
  909. vi.mocked(SessionManager.updateSessionUsage).mockResolvedValue(undefined);
  910. const session = createSession({
  911. originalModel,
  912. redirectedModel,
  913. sessionId: `sess-no-price-${options.billingModelSource}-${options.isStream ? "s" : "n"}`,
  914. messageId: options.isStream ? 3001 : 3000,
  915. });
  916. const response = options.isStream
  917. ? createStreamResponse(usage)
  918. : createNonStreamResponse(usage);
  919. const clientResponse = await ProxyResponseHandler.dispatch(session, response);
  920. await clientResponse.text();
  921. await drainAsyncTasks();
  922. return {
  923. dbCostCalls: vi.mocked(updateMessageRequestCostWithBreakdown).mock.calls.length,
  924. rateLimitCalls: vi.mocked(RateLimitService.trackCost).mock.calls.length,
  925. };
  926. }
  927. it("无价格:不写入 DB cost,不追踪限流 cost,并触发一次异步同步", async () => {
  928. const result = await runNoPriceScenario({
  929. billingModelSource: "redirected",
  930. isStream: false,
  931. priceLookup: "none",
  932. });
  933. expect(result.dbCostCalls).toBe(0);
  934. expect(result.rateLimitCalls).toBe(0);
  935. expect(cloudPriceSyncRequests).toEqual([{ reason: "missing-model" }]);
  936. });
  937. it("价格查询抛错:不应影响响应,不写入 DB cost,不追踪限流 cost", async () => {
  938. const result = await runNoPriceScenario({
  939. billingModelSource: "original",
  940. isStream: true,
  941. priceLookup: "throws",
  942. });
  943. expect(result.dbCostCalls).toBe(0);
  944. expect(result.rateLimitCalls).toBe(0);
  945. });
  946. });