cost-calculation-swap-cache-ttl.test.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. import { describe, expect, test } from "vitest";
  2. import { calculateRequestCostBreakdown } from "@/lib/utils/cost-calculation";
  3. import { applySwapCacheTtlBilling } from "@/app/v1/_lib/proxy/response-handler";
  4. import type { UsageMetrics } from "@/app/v1/_lib/proxy/response-handler";
  5. import type { ModelPriceData } from "@/types/model-price";
  6. function makePriceData(overrides: Partial<ModelPriceData> = {}): ModelPriceData {
  7. return {
  8. input_cost_per_token: 0.000003, // $3/MTok
  9. output_cost_per_token: 0.000015, // $15/MTok
  10. cache_creation_input_token_cost: 0.00000375, // 1.25x input (5m rate)
  11. cache_read_input_token_cost: 0.0000003, // 0.1x input
  12. cache_creation_input_token_cost_above_1hr: 0.000006, // 2x input (1h rate)
  13. ...overrides,
  14. };
  15. }
  16. /**
  17. * Wrapper around the real applySwapCacheTtlBilling that returns a new object
  18. * (the production function mutates in-place).
  19. */
  20. function applySwap(
  21. usage: {
  22. cache_creation_5m_input_tokens?: number;
  23. cache_creation_1h_input_tokens?: number;
  24. cache_ttl?: "5m" | "1h";
  25. },
  26. swap: boolean
  27. ) {
  28. const copy = { ...usage } as UsageMetrics;
  29. applySwapCacheTtlBilling(copy, swap);
  30. return {
  31. cache_creation_5m_input_tokens: copy.cache_creation_5m_input_tokens,
  32. cache_creation_1h_input_tokens: copy.cache_creation_1h_input_tokens,
  33. cache_ttl: copy.cache_ttl,
  34. };
  35. }
  36. describe("swap cache TTL billing", () => {
  37. test("swap=false: normal billing (5m tokens at 5m rate, 1h tokens at 1h rate)", () => {
  38. const tokens = { cache_creation_5m_input_tokens: 1000, cache_creation_1h_input_tokens: 0 };
  39. const swapped = applySwap(tokens, false);
  40. const result = calculateRequestCostBreakdown(
  41. { input_tokens: 0, output_tokens: 0, ...swapped },
  42. makePriceData()
  43. );
  44. // 1000 * 0.00000375 (5m rate)
  45. expect(result.cache_creation).toBeCloseTo(0.00375, 6);
  46. });
  47. test("swap=true: 1h tokens billed at 5m rate (cheaper)", () => {
  48. // Provider reports 1h, but actually bills at 5m rate
  49. const tokens = { cache_creation_5m_input_tokens: 0, cache_creation_1h_input_tokens: 1000 };
  50. const swapped = applySwap(tokens, true);
  51. const result = calculateRequestCostBreakdown(
  52. { input_tokens: 0, output_tokens: 0, ...swapped },
  53. makePriceData()
  54. );
  55. // After swap: 1h tokens (1000) moved to 5m bucket -> 1000 * 0.00000375
  56. expect(result.cache_creation).toBeCloseTo(0.00375, 6);
  57. });
  58. test("swap=true: 5m tokens billed at 1h rate (more expensive)", () => {
  59. // Provider reports 5m, but actually bills at 1h rate
  60. const tokens = { cache_creation_5m_input_tokens: 1000, cache_creation_1h_input_tokens: 0 };
  61. const swapped = applySwap(tokens, true);
  62. const result = calculateRequestCostBreakdown(
  63. { input_tokens: 0, output_tokens: 0, ...swapped },
  64. makePriceData()
  65. );
  66. // After swap: 5m tokens (1000) moved to 1h bucket -> 1000 * 0.000006
  67. expect(result.cache_creation).toBeCloseTo(0.006, 6);
  68. });
  69. test("swap inverts both buckets when both have tokens", () => {
  70. const tokens = { cache_creation_5m_input_tokens: 200, cache_creation_1h_input_tokens: 800 };
  71. const normalResult = calculateRequestCostBreakdown(
  72. { input_tokens: 0, output_tokens: 0, ...applySwap(tokens, false) },
  73. makePriceData()
  74. );
  75. const swappedResult = calculateRequestCostBreakdown(
  76. { input_tokens: 0, output_tokens: 0, ...applySwap(tokens, true) },
  77. makePriceData()
  78. );
  79. // Normal: 200 * 0.00000375 + 800 * 0.000006 = 0.00075 + 0.0048 = 0.00555
  80. expect(normalResult.cache_creation).toBeCloseTo(0.00555, 6);
  81. // Swapped: 800 * 0.00000375 + 200 * 0.000006 = 0.003 + 0.0012 = 0.0042
  82. expect(swappedResult.cache_creation).toBeCloseTo(0.0042, 6);
  83. // Swapped is cheaper because more tokens went to the cheaper 5m rate
  84. expect(swappedResult.cache_creation).toBeLessThan(normalResult.cache_creation);
  85. });
  86. test("swap exchanges buckets when only one bucket has tokens", () => {
  87. const tokens5mOnly = { cache_creation_5m_input_tokens: 500, cache_creation_1h_input_tokens: 0 };
  88. const normal5m = applySwap(tokens5mOnly, false);
  89. const swapped5m = applySwap(tokens5mOnly, true);
  90. // Normal: 500 at 5m rate
  91. expect(normal5m.cache_creation_5m_input_tokens).toBe(500);
  92. expect(normal5m.cache_creation_1h_input_tokens).toBe(0);
  93. // Swapped: 500 moved to 1h bucket, 0 moved to 5m bucket
  94. expect(swapped5m.cache_creation_5m_input_tokens).toBe(0);
  95. expect(swapped5m.cache_creation_1h_input_tokens).toBe(500);
  96. });
  97. test("swap with undefined tokens treats them as undefined (no crash)", () => {
  98. const tokens = {
  99. cache_creation_5m_input_tokens: undefined,
  100. cache_creation_1h_input_tokens: 1000,
  101. };
  102. const swapped = applySwap(tokens, true);
  103. expect(swapped.cache_creation_5m_input_tokens).toBe(1000);
  104. expect(swapped.cache_creation_1h_input_tokens).toBeUndefined();
  105. // Should not crash when passed to cost calculation
  106. const result = calculateRequestCostBreakdown(
  107. { input_tokens: 0, output_tokens: 0, ...swapped },
  108. makePriceData()
  109. );
  110. // 1000 at 5m rate
  111. expect(result.cache_creation).toBeCloseTo(0.00375, 6);
  112. });
  113. test("swap also inverts cache_ttl value", () => {
  114. const usage5m = {
  115. cache_creation_5m_input_tokens: 100,
  116. cache_creation_1h_input_tokens: 0,
  117. cache_ttl: "5m" as const,
  118. };
  119. const usage1h = {
  120. cache_creation_5m_input_tokens: 0,
  121. cache_creation_1h_input_tokens: 100,
  122. cache_ttl: "1h" as const,
  123. };
  124. const swapped5m = applySwap(usage5m, true);
  125. const swapped1h = applySwap(usage1h, true);
  126. expect(swapped5m.cache_ttl).toBe("1h");
  127. expect(swapped1h.cache_ttl).toBe("5m");
  128. });
  129. test("swap with only cache_creation_input_tokens (total) and cache_ttl=1h routes total to 5m bucket", () => {
  130. // Upstream sends total without explicit buckets + cache_ttl: "1h"
  131. // After swap: cache_ttl becomes "5m", so total should go to 5m bucket (not 1h)
  132. const usage = { cache_ttl: "1h" as const };
  133. const swapped = applySwap(usage, true);
  134. // cache_ttl should be inverted
  135. expect(swapped.cache_ttl).toBe("5m");
  136. // Simulate how response-handler resolves buckets after swap:
  137. // resolvedCacheTtl = "5m" (swapped), cache_creation_input_tokens = 1000 (total)
  138. const resolvedCacheTtl = swapped.cache_ttl;
  139. const totalTokens = 1000;
  140. const cache5m = resolvedCacheTtl === "1h" ? undefined : totalTokens;
  141. const cache1h = resolvedCacheTtl === "1h" ? totalTokens : undefined;
  142. // Total should land in 5m bucket (cheaper), not 1h
  143. expect(cache5m).toBe(1000);
  144. expect(cache1h).toBeUndefined();
  145. const result = calculateRequestCostBreakdown(
  146. {
  147. input_tokens: 0,
  148. output_tokens: 0,
  149. cache_creation_5m_input_tokens: cache5m,
  150. cache_creation_1h_input_tokens: cache1h,
  151. },
  152. makePriceData()
  153. );
  154. // 1000 * 0.00000375 (5m rate)
  155. expect(result.cache_creation).toBeCloseTo(0.00375, 6);
  156. });
  157. });
  158. describe("applySwapCacheTtlBilling (direct)", () => {
  159. test("swap=false is a no-op", () => {
  160. const usage: UsageMetrics = {
  161. input_tokens: 100,
  162. output_tokens: 50,
  163. cache_creation_5m_input_tokens: 200,
  164. cache_creation_1h_input_tokens: 300,
  165. cache_ttl: "5m",
  166. };
  167. const before = { ...usage };
  168. applySwapCacheTtlBilling(usage, false);
  169. expect(usage).toEqual(before);
  170. });
  171. test("swap=undefined is a no-op", () => {
  172. const usage: UsageMetrics = {
  173. cache_creation_5m_input_tokens: 200,
  174. cache_creation_1h_input_tokens: 300,
  175. cache_ttl: "1h",
  176. };
  177. const before = { ...usage };
  178. applySwapCacheTtlBilling(usage, undefined);
  179. expect(usage).toEqual(before);
  180. });
  181. test("swap=true swaps bucket values", () => {
  182. const usage: UsageMetrics = {
  183. cache_creation_5m_input_tokens: 200,
  184. cache_creation_1h_input_tokens: 300,
  185. };
  186. applySwapCacheTtlBilling(usage, true);
  187. expect(usage.cache_creation_5m_input_tokens).toBe(300);
  188. expect(usage.cache_creation_1h_input_tokens).toBe(200);
  189. });
  190. test("swap=true inverts cache_ttl 5m->1h", () => {
  191. const usage: UsageMetrics = { cache_ttl: "5m" };
  192. applySwapCacheTtlBilling(usage, true);
  193. expect(usage.cache_ttl).toBe("1h");
  194. });
  195. test("swap=true inverts cache_ttl 1h->5m", () => {
  196. const usage: UsageMetrics = { cache_ttl: "1h" };
  197. applySwapCacheTtlBilling(usage, true);
  198. expect(usage.cache_ttl).toBe("5m");
  199. });
  200. test("swap=true leaves undefined cache_ttl as undefined", () => {
  201. const usage: UsageMetrics = { cache_creation_5m_input_tokens: 100 };
  202. applySwapCacheTtlBilling(usage, true);
  203. expect(usage.cache_ttl).toBeUndefined();
  204. });
  205. test("swap=true with undefined bucket values does not crash", () => {
  206. const usage: UsageMetrics = {};
  207. applySwapCacheTtlBilling(usage, true);
  208. expect(usage.cache_creation_5m_input_tokens).toBeUndefined();
  209. expect(usage.cache_creation_1h_input_tokens).toBeUndefined();
  210. });
  211. test("swap=true preserves non-cache fields", () => {
  212. const usage: UsageMetrics = {
  213. input_tokens: 100,
  214. output_tokens: 50,
  215. cache_read_input_tokens: 75,
  216. cache_creation_5m_input_tokens: 200,
  217. cache_creation_1h_input_tokens: 300,
  218. cache_ttl: "5m",
  219. };
  220. applySwapCacheTtlBilling(usage, true);
  221. expect(usage.input_tokens).toBe(100);
  222. expect(usage.output_tokens).toBe(50);
  223. expect(usage.cache_read_input_tokens).toBe(75);
  224. });
  225. test("swap=true does not touch mixed cache_ttl", () => {
  226. const usage: UsageMetrics = {
  227. cache_creation_5m_input_tokens: 100,
  228. cache_creation_1h_input_tokens: 200,
  229. cache_ttl: "mixed",
  230. };
  231. applySwapCacheTtlBilling(usage, true);
  232. // Buckets swap
  233. expect(usage.cache_creation_5m_input_tokens).toBe(200);
  234. expect(usage.cache_creation_1h_input_tokens).toBe(100);
  235. // "mixed" is not "5m" or "1h", so stays unchanged
  236. expect(usage.cache_ttl).toBe("mixed");
  237. });
  238. test("applySwapCacheTtlBilling does not affect a pre-cloned copy (caller isolation)", () => {
  239. const original: UsageMetrics = {
  240. cache_creation_5m_input_tokens: 200,
  241. cache_creation_1h_input_tokens: 800,
  242. cache_ttl: "5m",
  243. };
  244. const snapshot = { ...original };
  245. // Clone then swap (mimics the fixed normalizeUsageWithSwap pattern)
  246. const clone = { ...original };
  247. applySwapCacheTtlBilling(clone, true);
  248. // Original must be untouched
  249. expect(original.cache_creation_5m_input_tokens).toBe(snapshot.cache_creation_5m_input_tokens);
  250. expect(original.cache_creation_1h_input_tokens).toBe(snapshot.cache_creation_1h_input_tokens);
  251. expect(original.cache_ttl).toBe(snapshot.cache_ttl);
  252. // Clone should have swapped values
  253. expect(clone.cache_creation_5m_input_tokens).toBe(800);
  254. expect(clone.cache_creation_1h_input_tokens).toBe(200);
  255. expect(clone.cache_ttl).toBe("1h");
  256. });
  257. test("double swap returns to original values (idempotency)", () => {
  258. const usage: UsageMetrics = {
  259. input_tokens: 500,
  260. output_tokens: 250,
  261. cache_creation_5m_input_tokens: 200,
  262. cache_creation_1h_input_tokens: 800,
  263. cache_read_input_tokens: 150,
  264. cache_ttl: "5m",
  265. };
  266. const original = { ...usage };
  267. applySwapCacheTtlBilling(usage, true);
  268. // After first swap, values are inverted
  269. expect(usage.cache_creation_5m_input_tokens).toBe(800);
  270. expect(usage.cache_creation_1h_input_tokens).toBe(200);
  271. expect(usage.cache_ttl).toBe("1h");
  272. applySwapCacheTtlBilling(usage, true);
  273. // After second swap, values return to original
  274. expect(usage.cache_creation_5m_input_tokens).toBe(original.cache_creation_5m_input_tokens);
  275. expect(usage.cache_creation_1h_input_tokens).toBe(original.cache_creation_1h_input_tokens);
  276. expect(usage.cache_ttl).toBe(original.cache_ttl);
  277. // Non-cache fields unchanged throughout
  278. expect(usage.input_tokens).toBe(original.input_tokens);
  279. expect(usage.output_tokens).toBe(original.output_tokens);
  280. expect(usage.cache_read_input_tokens).toBe(original.cache_read_input_tokens);
  281. });
  282. });