cost-calculation-breakdown.test.ts 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. import { describe, expect, test } from "vitest";
  2. import {
  3. calculateRequestCost,
  4. calculateRequestCostBreakdown,
  5. matchLongContextPricing,
  6. type CostBreakdown,
  7. } from "@/lib/utils/cost-calculation";
  8. import type { ModelPriceData } from "@/types/model-price";
  9. function makePriceData(overrides: Partial<ModelPriceData> = {}): ModelPriceData {
  10. return {
  11. input_cost_per_token: 0.000003, // $3/MTok
  12. output_cost_per_token: 0.000015, // $15/MTok
  13. cache_creation_input_token_cost: 0.00000375, // 1.25x input
  14. cache_read_input_token_cost: 0.0000003, // 0.1x input
  15. ...overrides,
  16. };
  17. }
  18. describe("calculateRequestCostBreakdown", () => {
  19. test("basic input + output tokens", () => {
  20. const result = calculateRequestCostBreakdown(
  21. { input_tokens: 1000, output_tokens: 500 },
  22. makePriceData()
  23. );
  24. expect(result.input).toBeCloseTo(0.003, 6); // 1000 * 0.000003
  25. expect(result.output).toBeCloseTo(0.0075, 6); // 500 * 0.000015
  26. expect(result.cache_creation).toBe(0);
  27. expect(result.cache_read).toBe(0);
  28. expect(result.total).toBeCloseTo(0.0105, 6);
  29. });
  30. test("cache creation (5m + 1h) + cache read", () => {
  31. const result = calculateRequestCostBreakdown(
  32. {
  33. input_tokens: 100,
  34. output_tokens: 50,
  35. cache_creation_5m_input_tokens: 200,
  36. cache_creation_1h_input_tokens: 300,
  37. cache_read_input_tokens: 1000,
  38. },
  39. makePriceData({
  40. cache_creation_input_token_cost_above_1hr: 0.000006, // 2x input
  41. })
  42. );
  43. // cache_creation = 200 * 0.00000375 + 300 * 0.000006
  44. expect(result.cache_creation).toBeCloseTo(0.00255, 6);
  45. // cache_read = 1000 * 0.0000003
  46. expect(result.cache_read).toBeCloseTo(0.0003, 6);
  47. expect(result.total).toBeCloseTo(
  48. result.input + result.output + result.cache_creation + result.cache_read,
  49. 10
  50. );
  51. });
  52. test("image tokens go to input/output buckets", () => {
  53. const result = calculateRequestCostBreakdown(
  54. {
  55. input_tokens: 100,
  56. output_tokens: 50,
  57. input_image_tokens: 500,
  58. output_image_tokens: 200,
  59. },
  60. makePriceData({
  61. input_cost_per_image_token: 0.00001,
  62. output_cost_per_image_token: 0.00005,
  63. })
  64. );
  65. // input = 100 * 0.000003 + 500 * 0.00001
  66. expect(result.input).toBeCloseTo(0.0053, 6);
  67. // output = 50 * 0.000015 + 200 * 0.00005
  68. expect(result.output).toBeCloseTo(0.01075, 6);
  69. });
  70. test("tiered pricing with context1mApplied", () => {
  71. const result = calculateRequestCostBreakdown(
  72. {
  73. input_tokens: 300000, // crosses 200k threshold
  74. output_tokens: 100,
  75. },
  76. makePriceData(),
  77. true // context1mApplied
  78. );
  79. // input: 300000 * 0.000003 * 2.0 = 1.8 (all tokens at premium when context > 200K)
  80. expect(result.input).toBeCloseTo(1.8, 4);
  81. // output: 100 * 0.000015 * 1.5 = 0.00225 (output also at premium when context > 200K)
  82. expect(result.output).toBeCloseTo(0.00225, 6);
  83. });
  84. test("200k tier pricing (Gemini style)", () => {
  85. const result = calculateRequestCostBreakdown(
  86. {
  87. input_tokens: 300000, // crosses 200k threshold
  88. output_tokens: 100,
  89. },
  90. makePriceData({
  91. input_cost_per_token_above_200k_tokens: 0.000006, // 2x base for >200k
  92. })
  93. );
  94. // input: 300000 * 0.000006 = 1.8 (all tokens at above-200k rate when context > 200K)
  95. expect(result.input).toBeCloseTo(1.8, 4);
  96. });
  97. test("uses priority long-context pricing fields in breakdown when available", () => {
  98. const result = calculateRequestCostBreakdown(
  99. {
  100. input_tokens: 272001,
  101. output_tokens: 2,
  102. cache_read_input_tokens: 10,
  103. },
  104. makePriceData({
  105. mode: "responses",
  106. model_family: "gpt",
  107. input_cost_per_token_priority: 2,
  108. output_cost_per_token_priority: 20,
  109. cache_read_input_token_cost_priority: 0.2,
  110. input_cost_per_token_above_272k_tokens: 5,
  111. output_cost_per_token_above_272k_tokens: 50,
  112. cache_read_input_token_cost_above_272k_tokens: 0.5,
  113. input_cost_per_token_above_272k_tokens_priority: 7,
  114. output_cost_per_token_above_272k_tokens_priority: 70,
  115. cache_read_input_token_cost_above_272k_tokens_priority: 0.7,
  116. }),
  117. false,
  118. true
  119. );
  120. expect(result.input).toBe(1904007);
  121. expect(result.output).toBe(140);
  122. expect(result.cache_read).toBe(7);
  123. expect(result.total).toBe(1904154);
  124. });
  125. test("falls back to regular long-context pricing in breakdown when priority long-context fields are absent", () => {
  126. const result = calculateRequestCostBreakdown(
  127. {
  128. input_tokens: 272001,
  129. output_tokens: 2,
  130. cache_read_input_tokens: 10,
  131. },
  132. makePriceData({
  133. mode: "responses",
  134. model_family: "gpt",
  135. input_cost_per_token_priority: 2,
  136. output_cost_per_token_priority: 20,
  137. cache_read_input_token_cost_priority: 0.2,
  138. input_cost_per_token_above_272k_tokens: 5,
  139. output_cost_per_token_above_272k_tokens: 50,
  140. cache_read_input_token_cost_above_272k_tokens: 0.5,
  141. input_cost_per_token_above_272k_tokens_priority: undefined,
  142. output_cost_per_token_above_272k_tokens_priority: undefined,
  143. cache_read_input_token_cost_above_272k_tokens_priority: undefined,
  144. }),
  145. false,
  146. true
  147. );
  148. expect(result.input).toBe(1360005);
  149. expect(result.output).toBe(100);
  150. expect(result.cache_read).toBe(5);
  151. expect(result.total).toBe(1360110);
  152. });
  153. test("categories sum to total", () => {
  154. const result = calculateRequestCostBreakdown(
  155. {
  156. input_tokens: 5000,
  157. output_tokens: 2000,
  158. cache_creation_input_tokens: 1000,
  159. cache_read_input_tokens: 3000,
  160. },
  161. makePriceData()
  162. );
  163. const sum = result.input + result.output + result.cache_creation + result.cache_read;
  164. expect(result.total).toBeCloseTo(sum, 10);
  165. });
  166. test("zero usage returns all zeros", () => {
  167. const result = calculateRequestCostBreakdown({}, makePriceData());
  168. expect(result).toEqual({
  169. input: 0,
  170. output: 0,
  171. cache_creation: 0,
  172. cache_read: 0,
  173. total: 0,
  174. });
  175. });
  176. test("per-request cost goes to input bucket", () => {
  177. const result = calculateRequestCostBreakdown(
  178. { input_tokens: 0 },
  179. makePriceData({ input_cost_per_request: 0.01 })
  180. );
  181. expect(result.input).toBeCloseTo(0.01, 6);
  182. expect(result.total).toBeCloseTo(0.01, 6);
  183. });
  184. test("cache_creation_input_tokens distributed by cache_ttl", () => {
  185. // When only cache_creation_input_tokens is set (no 5m/1h split),
  186. // it should be assigned based on cache_ttl
  187. const result = calculateRequestCostBreakdown(
  188. {
  189. input_tokens: 0,
  190. output_tokens: 0,
  191. cache_creation_input_tokens: 1000,
  192. cache_ttl: "1h",
  193. },
  194. makePriceData({
  195. cache_creation_input_token_cost_above_1hr: 0.000006,
  196. })
  197. );
  198. // 1000 tokens should go to 1h tier at 0.000006
  199. expect(result.cache_creation).toBeCloseTo(0.006, 6);
  200. });
  201. test("long_context_pricing uses full-request premium prices after threshold", () => {
  202. const priceData = makePriceData({
  203. long_context_pricing: {
  204. threshold_tokens: 272000,
  205. scope: "request",
  206. input_multiplier: 2,
  207. output_multiplier: 1.5,
  208. cache_read_input_multiplier: 2,
  209. },
  210. });
  211. const usage = {
  212. input_tokens: 272001,
  213. output_tokens: 100,
  214. cache_read_input_tokens: 50,
  215. };
  216. const match = matchLongContextPricing(usage, priceData);
  217. expect(match).not.toBeNull();
  218. const cost = calculateRequestCost(usage, priceData, {
  219. multiplier: 1,
  220. context1mApplied: false,
  221. longContextPricing: match?.pricing ?? null,
  222. });
  223. expect(cost.toNumber()).toBeCloseTo(1.634286, 6);
  224. });
  225. test("long_context_pricing threshold is exclusive at exactly 272000 tokens", () => {
  226. const priceData = makePriceData({
  227. long_context_pricing: {
  228. threshold_tokens: 272000,
  229. scope: "request",
  230. input_multiplier: 2,
  231. },
  232. });
  233. const match = matchLongContextPricing(
  234. {
  235. input_tokens: 272000,
  236. output_tokens: 10,
  237. },
  238. priceData
  239. );
  240. expect(match).toBeNull();
  241. });
  242. });