tiered_settle_test.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739
  1. package service
  2. import (
  3. "math"
  4. "math/rand"
  5. "sync"
  6. "testing"
  7. "github.com/QuantumNous/new-api/dto"
  8. "github.com/QuantumNous/new-api/pkg/billingexpr"
  9. relaycommon "github.com/QuantumNous/new-api/relay/common"
  10. "github.com/shopspring/decimal"
  11. )
  12. // Claude Sonnet-style tiered expression: standard vs long-context
  13. const sonnetTieredExpr = `p <= 200000 ? tier("standard", p * 1.5 + c * 7.5) : tier("long_context", p * 3 + c * 11.25)`
  14. // Simple flat expression
  15. const flatExpr = `tier("default", p * 2 + c * 10)`
  16. // Expression with cache tokens
  17. const cacheExpr = `tier("default", p * 2 + c * 10 + cr * 0.2 + cc * 2.5 + cc1h * 4)`
  18. // Expression with request probes
  19. const probeExpr = `param("service_tier") == "fast" ? tier("fast", p * 4 + c * 20) : tier("normal", p * 2 + c * 10)`
  20. const testQuotaPerUnit = 500_000.0
  21. func makeSnapshot(expr string, groupRatio float64, estPrompt, estCompletion int) *billingexpr.BillingSnapshot {
  22. return &billingexpr.BillingSnapshot{
  23. BillingMode: "tiered_expr",
  24. ExprString: expr,
  25. ExprHash: billingexpr.ExprHashString(expr),
  26. GroupRatio: groupRatio,
  27. EstimatedPromptTokens: estPrompt,
  28. EstimatedCompletionTokens: estCompletion,
  29. QuotaPerUnit: testQuotaPerUnit,
  30. }
  31. }
  32. func makeRelayInfo(expr string, groupRatio float64, estPrompt, estCompletion int) *relaycommon.RelayInfo {
  33. snap := makeSnapshot(expr, groupRatio, estPrompt, estCompletion)
  34. cost, trace, _ := billingexpr.RunExpr(expr, billingexpr.TokenParams{P: float64(estPrompt), C: float64(estCompletion)})
  35. quotaBeforeGroup := cost / 1_000_000 * testQuotaPerUnit
  36. snap.EstimatedQuotaBeforeGroup = quotaBeforeGroup
  37. snap.EstimatedQuotaAfterGroup = billingexpr.QuotaRound(quotaBeforeGroup * groupRatio)
  38. snap.EstimatedTier = trace.MatchedTier
  39. return &relaycommon.RelayInfo{
  40. TieredBillingSnapshot: snap,
  41. FinalPreConsumedQuota: snap.EstimatedQuotaAfterGroup,
  42. }
  43. }
  44. // ---------------------------------------------------------------------------
  45. // Existing tests (preserved)
  46. // ---------------------------------------------------------------------------
  47. func TestTryTieredSettleUsesFrozenRequestInput(t *testing.T) {
  48. exprStr := `param("service_tier") == "fast" ? tier("fast", p * 2) : tier("normal", p)`
  49. relayInfo := &relaycommon.RelayInfo{
  50. TieredBillingSnapshot: &billingexpr.BillingSnapshot{
  51. BillingMode: "tiered_expr",
  52. ExprString: exprStr,
  53. ExprHash: billingexpr.ExprHashString(exprStr),
  54. GroupRatio: 1.0,
  55. EstimatedPromptTokens: 100,
  56. EstimatedCompletionTokens: 0,
  57. EstimatedQuotaAfterGroup: 50,
  58. QuotaPerUnit: testQuotaPerUnit,
  59. },
  60. BillingRequestInput: &billingexpr.RequestInput{
  61. Body: []byte(`{"service_tier":"fast"}`),
  62. },
  63. }
  64. ok, quota, result := TryTieredSettle(relayInfo, billingexpr.TokenParams{P: 100})
  65. if !ok {
  66. t.Fatal("expected tiered settle to apply")
  67. }
  68. // fast: p*2 = 200; quota = 200 / 1M * 500K = 100
  69. if quota != 100 {
  70. t.Fatalf("quota = %d, want 100", quota)
  71. }
  72. if result == nil || result.MatchedTier != "fast" {
  73. t.Fatalf("matched tier = %v, want fast", result)
  74. }
  75. }
  76. func TestTryTieredSettleFallsBackToFrozenPreConsumeOnExprError(t *testing.T) {
  77. relayInfo := &relaycommon.RelayInfo{
  78. FinalPreConsumedQuota: 321,
  79. TieredBillingSnapshot: &billingexpr.BillingSnapshot{
  80. BillingMode: "tiered_expr",
  81. ExprString: `invalid +-+ expr`,
  82. ExprHash: billingexpr.ExprHashString(`invalid +-+ expr`),
  83. GroupRatio: 1.0,
  84. EstimatedQuotaAfterGroup: 123,
  85. },
  86. }
  87. ok, quota, result := TryTieredSettle(relayInfo, billingexpr.TokenParams{P: 100})
  88. if !ok {
  89. t.Fatal("expected tiered settle to apply")
  90. }
  91. if quota != 321 {
  92. t.Fatalf("quota = %d, want 321", quota)
  93. }
  94. if result != nil {
  95. t.Fatalf("result = %#v, want nil", result)
  96. }
  97. }
  98. // ---------------------------------------------------------------------------
  99. // Pre-consume vs Post-consume consistency
  100. // ---------------------------------------------------------------------------
  101. func TestTryTieredSettle_PreConsumeMatchesPostConsume(t *testing.T) {
  102. info := makeRelayInfo(flatExpr, 1.0, 1000, 500)
  103. params := billingexpr.TokenParams{P: 1000, C: 500}
  104. ok, quota, _ := TryTieredSettle(info, params)
  105. if !ok {
  106. t.Fatal("expected tiered settle")
  107. }
  108. // p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500
  109. if quota != 3500 {
  110. t.Fatalf("quota = %d, want 3500", quota)
  111. }
  112. if quota != info.FinalPreConsumedQuota {
  113. t.Fatalf("pre-consume %d != post-consume %d", info.FinalPreConsumedQuota, quota)
  114. }
  115. }
  116. func TestTryTieredSettle_PostConsumeOverPreConsume(t *testing.T) {
  117. info := makeRelayInfo(flatExpr, 1.0, 1000, 500)
  118. preConsumed := info.FinalPreConsumedQuota // 3500
  119. // Actual usage is higher than estimated
  120. params := billingexpr.TokenParams{P: 2000, C: 1000}
  121. ok, quota, _ := TryTieredSettle(info, params)
  122. if !ok {
  123. t.Fatal("expected tiered settle")
  124. }
  125. // p*2 + c*10 = 14000; quota = 14000 / 1M * 500K = 7000
  126. if quota != 7000 {
  127. t.Fatalf("quota = %d, want 7000", quota)
  128. }
  129. if quota <= preConsumed {
  130. t.Fatalf("expected supplement: actual %d should > pre-consumed %d", quota, preConsumed)
  131. }
  132. }
  133. func TestTryTieredSettle_PostConsumeUnderPreConsume(t *testing.T) {
  134. info := makeRelayInfo(flatExpr, 1.0, 1000, 500)
  135. preConsumed := info.FinalPreConsumedQuota // 3500
  136. // Actual usage is lower than estimated
  137. params := billingexpr.TokenParams{P: 100, C: 50}
  138. ok, quota, _ := TryTieredSettle(info, params)
  139. if !ok {
  140. t.Fatal("expected tiered settle")
  141. }
  142. // p*2 + c*10 = 700; quota = 700 / 1M * 500K = 350
  143. if quota != 350 {
  144. t.Fatalf("quota = %d, want 350", quota)
  145. }
  146. if quota >= preConsumed {
  147. t.Fatalf("expected refund: actual %d should < pre-consumed %d", quota, preConsumed)
  148. }
  149. }
  150. // ---------------------------------------------------------------------------
  151. // Tiered boundary conditions
  152. // ---------------------------------------------------------------------------
  153. func TestTryTieredSettle_ExactBoundary(t *testing.T) {
  154. info := makeRelayInfo(sonnetTieredExpr, 1.0, 200000, 1000)
  155. // p == 200000 => standard tier (p <= 200000)
  156. ok, quota, result := TryTieredSettle(info, billingexpr.TokenParams{P: 200000, C: 1000})
  157. if !ok {
  158. t.Fatal("expected tiered settle")
  159. }
  160. // standard: p*1.5 + c*7.5 = 307500; quota = 307500 / 1M * 500K = 153750
  161. if quota != 153750 {
  162. t.Fatalf("quota = %d, want 153750", quota)
  163. }
  164. if result.MatchedTier != "standard" {
  165. t.Fatalf("tier = %s, want standard", result.MatchedTier)
  166. }
  167. }
  168. func TestTryTieredSettle_BoundaryPlusOne(t *testing.T) {
  169. info := makeRelayInfo(sonnetTieredExpr, 1.0, 200000, 1000)
  170. // p == 200001 => crosses to long_context tier
  171. ok, quota, result := TryTieredSettle(info, billingexpr.TokenParams{P: 200001, C: 1000})
  172. if !ok {
  173. t.Fatal("expected tiered settle")
  174. }
  175. // long_context: p*3 + c*11.25 = 611253; quota = round(611253 / 1M * 500K) = 305627
  176. if quota != 305627 {
  177. t.Fatalf("quota = %d, want 305627", quota)
  178. }
  179. if result.MatchedTier != "long_context" {
  180. t.Fatalf("tier = %s, want long_context", result.MatchedTier)
  181. }
  182. if !result.CrossedTier {
  183. t.Fatal("expected CrossedTier = true")
  184. }
  185. }
  186. func TestTryTieredSettle_ZeroTokens(t *testing.T) {
  187. info := makeRelayInfo(flatExpr, 1.0, 0, 0)
  188. ok, quota, result := TryTieredSettle(info, billingexpr.TokenParams{P: 0, C: 0})
  189. if !ok {
  190. t.Fatal("expected tiered settle")
  191. }
  192. if quota != 0 {
  193. t.Fatalf("quota = %d, want 0", quota)
  194. }
  195. if result == nil {
  196. t.Fatal("result should not be nil")
  197. }
  198. }
  199. func TestTryTieredSettle_HugeTokens(t *testing.T) {
  200. info := makeRelayInfo(flatExpr, 1.0, 10000000, 5000000)
  201. ok, quota, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 10000000, C: 5000000})
  202. if !ok {
  203. t.Fatal("expected tiered settle")
  204. }
  205. // p*2 + c*10 = 70000000; quota = 70000000 / 1M * 500K = 35000000
  206. if quota != 35000000 {
  207. t.Fatalf("quota = %d, want 35000000", quota)
  208. }
  209. }
  210. func TestTryTieredSettle_CacheTokensAffectSettlement(t *testing.T) {
  211. info := makeRelayInfo(cacheExpr, 1.0, 1000, 500)
  212. // Without cache tokens
  213. ok1, quota1, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500})
  214. if !ok1 {
  215. t.Fatal("expected tiered settle")
  216. }
  217. // p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500
  218. // With cache tokens
  219. ok2, quota2, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500, CR: 10000, CC: 5000, CC1h: 2000})
  220. if !ok2 {
  221. t.Fatal("expected tiered settle")
  222. }
  223. // 2000 + 5000 + 2000 + 12500 + 8000 = 29500; quota = 29500 / 1M * 500K = 14750
  224. if quota2 <= quota1 {
  225. t.Fatalf("cache tokens should increase quota: without=%d, with=%d", quota1, quota2)
  226. }
  227. if quota1 != 3500 {
  228. t.Fatalf("no-cache quota = %d, want 3500", quota1)
  229. }
  230. if quota2 != 14750 {
  231. t.Fatalf("cache quota = %d, want 14750", quota2)
  232. }
  233. }
  234. // ---------------------------------------------------------------------------
  235. // Request probe tests
  236. // ---------------------------------------------------------------------------
  237. func TestTryTieredSettle_RequestProbeInfluencesBilling(t *testing.T) {
  238. info := makeRelayInfo(probeExpr, 1.0, 1000, 500)
  239. info.BillingRequestInput = &billingexpr.RequestInput{
  240. Body: []byte(`{"service_tier":"fast"}`),
  241. }
  242. ok, quota, result := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500})
  243. if !ok {
  244. t.Fatal("expected tiered settle")
  245. }
  246. // fast: p*4 + c*20 = 14000; quota = 14000 / 1M * 500K = 7000
  247. if quota != 7000 {
  248. t.Fatalf("quota = %d, want 7000", quota)
  249. }
  250. if result.MatchedTier != "fast" {
  251. t.Fatalf("tier = %s, want fast", result.MatchedTier)
  252. }
  253. }
  254. func TestTryTieredSettle_NoRequestInput_FallsBackToDefault(t *testing.T) {
  255. info := makeRelayInfo(probeExpr, 1.0, 1000, 500)
  256. // No BillingRequestInput set — param("service_tier") returns nil, not "fast"
  257. ok, quota, result := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500})
  258. if !ok {
  259. t.Fatal("expected tiered settle")
  260. }
  261. // normal: p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500
  262. if quota != 3500 {
  263. t.Fatalf("quota = %d, want 3500", quota)
  264. }
  265. if result.MatchedTier != "normal" {
  266. t.Fatalf("tier = %s, want normal", result.MatchedTier)
  267. }
  268. }
  269. // ---------------------------------------------------------------------------
  270. // Group ratio tests
  271. // ---------------------------------------------------------------------------
  272. func TestTryTieredSettle_GroupRatioScaling(t *testing.T) {
  273. info := makeRelayInfo(flatExpr, 1.5, 1000, 500)
  274. ok, quota, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500})
  275. if !ok {
  276. t.Fatal("expected tiered settle")
  277. }
  278. // exprCost = 7000, quotaBeforeGroup = 3500, afterGroup = round(3500 * 1.5) = 5250
  279. if quota != 5250 {
  280. t.Fatalf("quota = %d, want 5250", quota)
  281. }
  282. }
  283. func TestTryTieredSettle_GroupRatioZero(t *testing.T) {
  284. info := makeRelayInfo(flatExpr, 0, 1000, 500)
  285. ok, quota, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500})
  286. if !ok {
  287. t.Fatal("expected tiered settle")
  288. }
  289. if quota != 0 {
  290. t.Fatalf("quota = %d, want 0 (group ratio = 0)", quota)
  291. }
  292. }
  293. // ---------------------------------------------------------------------------
  294. // Ratio mode (negative tests) — TryTieredSettle must return false
  295. // ---------------------------------------------------------------------------
  296. func TestTryTieredSettle_RatioMode_NilSnapshot(t *testing.T) {
  297. info := &relaycommon.RelayInfo{
  298. TieredBillingSnapshot: nil,
  299. }
  300. ok, _, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500})
  301. if ok {
  302. t.Fatal("expected TryTieredSettle to return false when snapshot is nil")
  303. }
  304. }
  305. func TestTryTieredSettle_RatioMode_WrongBillingMode(t *testing.T) {
  306. info := &relaycommon.RelayInfo{
  307. TieredBillingSnapshot: &billingexpr.BillingSnapshot{
  308. BillingMode: "ratio",
  309. ExprString: flatExpr,
  310. ExprHash: billingexpr.ExprHashString(flatExpr),
  311. GroupRatio: 1.0,
  312. },
  313. }
  314. ok, _, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500})
  315. if ok {
  316. t.Fatal("expected TryTieredSettle to return false for ratio billing mode")
  317. }
  318. }
  319. func TestTryTieredSettle_RatioMode_EmptyBillingMode(t *testing.T) {
  320. info := &relaycommon.RelayInfo{
  321. TieredBillingSnapshot: &billingexpr.BillingSnapshot{
  322. BillingMode: "",
  323. ExprString: flatExpr,
  324. ExprHash: billingexpr.ExprHashString(flatExpr),
  325. GroupRatio: 1.0,
  326. },
  327. }
  328. ok, _, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500})
  329. if ok {
  330. t.Fatal("expected TryTieredSettle to return false for empty billing mode")
  331. }
  332. }
  333. // ---------------------------------------------------------------------------
  334. // Fallback tests
  335. // ---------------------------------------------------------------------------
  336. func TestTryTieredSettle_ErrorFallbackToEstimatedQuotaAfterGroup(t *testing.T) {
  337. info := &relaycommon.RelayInfo{
  338. FinalPreConsumedQuota: 0,
  339. TieredBillingSnapshot: &billingexpr.BillingSnapshot{
  340. BillingMode: "tiered_expr",
  341. ExprString: `invalid expr!!!`,
  342. ExprHash: billingexpr.ExprHashString(`invalid expr!!!`),
  343. GroupRatio: 1.0,
  344. EstimatedQuotaAfterGroup: 999,
  345. },
  346. }
  347. ok, quota, result := TryTieredSettle(info, billingexpr.TokenParams{P: 100})
  348. if !ok {
  349. t.Fatal("expected tiered settle to apply")
  350. }
  351. // FinalPreConsumedQuota is 0, should fall back to EstimatedQuotaAfterGroup
  352. if quota != 999 {
  353. t.Fatalf("quota = %d, want 999", quota)
  354. }
  355. if result != nil {
  356. t.Fatal("result should be nil on error fallback")
  357. }
  358. }
  359. // ---------------------------------------------------------------------------
  360. // BuildTieredTokenParams: token normalization and ratio parity tests
  361. // ---------------------------------------------------------------------------
  362. func tieredQuota(exprStr string, usage *dto.Usage, isClaudeSemantic bool, groupRatio float64) float64 {
  363. usedVars := billingexpr.UsedVars(exprStr)
  364. params := BuildTieredTokenParams(usage, isClaudeSemantic, usedVars)
  365. cost, _, _ := billingexpr.RunExpr(exprStr, params)
  366. return cost / 1_000_000 * testQuotaPerUnit * groupRatio
  367. }
  368. func ratioQuota(usage *dto.Usage, isClaudeSemantic bool, modelRatio, completionRatio, cacheRatio, imageRatio, groupRatio float64) float64 {
  369. dPromptTokens := decimal.NewFromInt(int64(usage.PromptTokens))
  370. dCacheTokens := decimal.NewFromInt(int64(usage.PromptTokensDetails.CachedTokens))
  371. dCcTokens := decimal.NewFromInt(int64(usage.PromptTokensDetails.CachedCreationTokens))
  372. dImgTokens := decimal.NewFromInt(int64(usage.PromptTokensDetails.ImageTokens))
  373. dCompletionTokens := decimal.NewFromInt(int64(usage.CompletionTokens))
  374. dModelRatio := decimal.NewFromFloat(modelRatio)
  375. dCompletionRatio := decimal.NewFromFloat(completionRatio)
  376. dCacheRatio := decimal.NewFromFloat(cacheRatio)
  377. dImageRatio := decimal.NewFromFloat(imageRatio)
  378. dGroupRatio := decimal.NewFromFloat(groupRatio)
  379. baseTokens := dPromptTokens
  380. if !isClaudeSemantic {
  381. baseTokens = baseTokens.Sub(dCacheTokens)
  382. baseTokens = baseTokens.Sub(dCcTokens)
  383. baseTokens = baseTokens.Sub(dImgTokens)
  384. }
  385. cachedTokensWithRatio := dCacheTokens.Mul(dCacheRatio)
  386. imageTokensWithRatio := dImgTokens.Mul(dImageRatio)
  387. promptQuota := baseTokens.Add(cachedTokensWithRatio).Add(imageTokensWithRatio)
  388. completionQuota := dCompletionTokens.Mul(dCompletionRatio)
  389. ratio := dModelRatio.Mul(dGroupRatio)
  390. result := promptQuota.Add(completionQuota).Mul(ratio)
  391. f, _ := result.Float64()
  392. return f
  393. }
  394. func TestBuildTieredTokenParams_GPT_WithCache(t *testing.T) {
  395. usage := &dto.Usage{
  396. PromptTokens: 1000,
  397. CompletionTokens: 500,
  398. PromptTokensDetails: dto.InputTokenDetails{
  399. CachedTokens: 200,
  400. TextTokens: 800,
  401. },
  402. }
  403. expr := `tier("base", p * 2.5 + c * 15 + cr * 0.25)`
  404. got := tieredQuota(expr, usage, false, 1.0)
  405. // P=800, C=500, CR=200 → (800*2.5 + 500*15 + 200*0.25) * 0.5 = 4775
  406. want := 4775.0
  407. if math.Abs(got-want) > 0.01 {
  408. t.Fatalf("quota = %f, want %f", got, want)
  409. }
  410. }
  411. func TestBuildTieredTokenParams_GPT_NoCacheVar(t *testing.T) {
  412. usage := &dto.Usage{
  413. PromptTokens: 1000,
  414. CompletionTokens: 500,
  415. PromptTokensDetails: dto.InputTokenDetails{
  416. CachedTokens: 200,
  417. TextTokens: 800,
  418. },
  419. }
  420. expr := `tier("base", p * 2.5 + c * 15)`
  421. got := tieredQuota(expr, usage, false, 1.0)
  422. // No cr → P=1000 (cache stays in P), C=500 → (1000*2.5 + 500*15) * 0.5 = 5000
  423. want := 5000.0
  424. if math.Abs(got-want) > 0.01 {
  425. t.Fatalf("quota = %f, want %f", got, want)
  426. }
  427. }
  428. func TestBuildTieredTokenParams_GPT_WithImage(t *testing.T) {
  429. usage := &dto.Usage{
  430. PromptTokens: 1000,
  431. CompletionTokens: 500,
  432. PromptTokensDetails: dto.InputTokenDetails{
  433. ImageTokens: 200,
  434. TextTokens: 800,
  435. },
  436. }
  437. expr := `tier("base", p * 2 + c * 8 + img * 2.5)`
  438. got := tieredQuota(expr, usage, false, 1.0)
  439. // P=800, C=500, Img=200 → (800*2 + 500*8 + 200*2.5) * 0.5 = 3050
  440. want := 3050.0
  441. if math.Abs(got-want) > 0.01 {
  442. t.Fatalf("quota = %f, want %f", got, want)
  443. }
  444. }
  445. func TestBuildTieredTokenParams_Claude_WithCache(t *testing.T) {
  446. usage := &dto.Usage{
  447. PromptTokens: 800,
  448. CompletionTokens: 500,
  449. PromptTokensDetails: dto.InputTokenDetails{
  450. CachedTokens: 200,
  451. TextTokens: 800,
  452. },
  453. }
  454. expr := `tier("base", p * 3 + c * 15 + cr * 0.3)`
  455. got := tieredQuota(expr, usage, true, 1.0)
  456. // Claude: P=800 (no subtraction), C=500, CR=200 → (800*3 + 500*15 + 200*0.3) * 0.5 = 4980
  457. want := 4980.0
  458. if math.Abs(got-want) > 0.01 {
  459. t.Fatalf("quota = %f, want %f", got, want)
  460. }
  461. }
  462. func TestBuildTieredTokenParams_GPT_AudioOutput(t *testing.T) {
  463. usage := &dto.Usage{
  464. PromptTokens: 1000,
  465. CompletionTokens: 600,
  466. CompletionTokenDetails: dto.OutputTokenDetails{
  467. AudioTokens: 100,
  468. TextTokens: 500,
  469. },
  470. }
  471. expr := `tier("base", p * 2 + c * 10 + ao * 50)`
  472. got := tieredQuota(expr, usage, false, 1.0)
  473. // C=600-100=500, AO=100 → (1000*2 + 500*10 + 100*50) * 0.5 = 6000
  474. want := 6000.0
  475. if math.Abs(got-want) > 0.01 {
  476. t.Fatalf("quota = %f, want %f", got, want)
  477. }
  478. }
  479. func TestBuildTieredTokenParams_GPT_AudioOutputNoVar(t *testing.T) {
  480. usage := &dto.Usage{
  481. PromptTokens: 1000,
  482. CompletionTokens: 600,
  483. CompletionTokenDetails: dto.OutputTokenDetails{
  484. AudioTokens: 100,
  485. TextTokens: 500,
  486. },
  487. }
  488. expr := `tier("base", p * 2 + c * 10)`
  489. got := tieredQuota(expr, usage, false, 1.0)
  490. // No ao → C=600 (audio stays in C) → (1000*2 + 600*10) * 0.5 = 4000
  491. want := 4000.0
  492. if math.Abs(got-want) > 0.01 {
  493. t.Fatalf("quota = %f, want %f", got, want)
  494. }
  495. }
  496. func TestBuildTieredTokenParams_ParityWithRatio(t *testing.T) {
  497. // GPT-5.4 prices: input=$2.5, output=$15, cacheRead=$0.25
  498. // Ratio equivalents: modelRatio=1.25, completionRatio=6, cacheRatio=0.1
  499. usage := &dto.Usage{
  500. PromptTokens: 10000,
  501. CompletionTokens: 2000,
  502. PromptTokensDetails: dto.InputTokenDetails{
  503. CachedTokens: 3000,
  504. TextTokens: 7000,
  505. },
  506. }
  507. expr := `tier("base", p * 2.5 + c * 15 + cr * 0.25)`
  508. for _, gr := range []float64{1.0, 1.5, 2.0, 0.5} {
  509. tq := tieredQuota(expr, usage, false, gr)
  510. rq := ratioQuota(usage, false, 1.25, 6, 0.1, 0, gr)
  511. if math.Abs(tq-rq) > 0.01 {
  512. t.Fatalf("groupRatio=%v: tiered=%f ratio=%f (mismatch)", gr, tq, rq)
  513. }
  514. }
  515. }
  516. func TestBuildTieredTokenParams_ParityWithRatio_Image(t *testing.T) {
  517. // gpt-image-1-mini prices: input=$2, output=$8, image=$2.5
  518. // Ratio equivalents: modelRatio=1, completionRatio=4, imageRatio=1.25
  519. usage := &dto.Usage{
  520. PromptTokens: 5000,
  521. CompletionTokens: 4000,
  522. PromptTokensDetails: dto.InputTokenDetails{
  523. ImageTokens: 1000,
  524. TextTokens: 4000,
  525. },
  526. }
  527. expr := `tier("base", p * 2 + c * 8 + img * 2.5)`
  528. tq := tieredQuota(expr, usage, false, 1.0)
  529. rq := ratioQuota(usage, false, 1.0, 4, 0, 1.25, 1.0)
  530. if math.Abs(tq-rq) > 0.01 {
  531. t.Fatalf("tiered=%f ratio=%f (mismatch)", tq, rq)
  532. }
  533. }
  534. // ---------------------------------------------------------------------------
  535. // Stress test: 1000 concurrent goroutines, complex tiered expr vs ratio,
  536. // random token counts, verify correctness and measure performance
  537. // ---------------------------------------------------------------------------
  538. const complexTieredExpr = `p <= 200000 ? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6 + img * 3 + img_o * 30 + ai * 10 + ao * 40) : tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12 + img * 6 + img_o * 60 + ai * 20 + ao * 80)`
  539. func randomUsage(rng *rand.Rand) *dto.Usage {
  540. cacheRead := int(rng.Float64() * 50000)
  541. cacheCreate := int(rng.Float64() * 10000)
  542. imgIn := int(rng.Float64() * 5000)
  543. audioIn := int(rng.Float64() * 3000)
  544. prompt := int(rng.Float64()*300000) + cacheRead + cacheCreate + imgIn + audioIn
  545. imgOut := int(rng.Float64() * 2000)
  546. audioOut := int(rng.Float64() * 1000)
  547. completion := int(rng.Float64()*50000) + imgOut + audioOut
  548. return &dto.Usage{
  549. PromptTokens: prompt,
  550. CompletionTokens: completion,
  551. PromptTokensDetails: dto.InputTokenDetails{
  552. CachedTokens: cacheRead,
  553. CachedCreationTokens: cacheCreate,
  554. ImageTokens: imgIn,
  555. AudioTokens: audioIn,
  556. TextTokens: prompt - cacheRead - cacheCreate - imgIn - audioIn,
  557. },
  558. CompletionTokenDetails: dto.OutputTokenDetails{
  559. ImageTokens: imgOut,
  560. AudioTokens: audioOut,
  561. TextTokens: completion - imgOut - audioOut,
  562. },
  563. }
  564. }
  565. func TestStress_TieredBilling_1000Concurrent(t *testing.T) {
  566. usedVars := billingexpr.UsedVars(complexTieredExpr)
  567. var wg sync.WaitGroup
  568. errCh := make(chan string, 1000)
  569. for i := 0; i < 1000; i++ {
  570. wg.Add(1)
  571. go func(seed int64) {
  572. defer wg.Done()
  573. rng := rand.New(rand.NewSource(seed))
  574. for j := 0; j < 100; j++ {
  575. usage := randomUsage(rng)
  576. groupRatio := 0.5 + rng.Float64()*2.0
  577. params := BuildTieredTokenParams(usage, false, usedVars)
  578. cost, trace, err := billingexpr.RunExpr(complexTieredExpr, params)
  579. if err != nil {
  580. errCh <- err.Error()
  581. return
  582. }
  583. if cost < 0 {
  584. errCh <- "negative cost"
  585. return
  586. }
  587. quota := billingexpr.QuotaRound(cost / 1_000_000 * testQuotaPerUnit * groupRatio)
  588. if quota < 0 {
  589. errCh <- "negative quota"
  590. return
  591. }
  592. _ = trace.MatchedTier
  593. }
  594. }(int64(i))
  595. }
  596. wg.Wait()
  597. close(errCh)
  598. for e := range errCh {
  599. t.Fatal(e)
  600. }
  601. }
  602. func BenchmarkTieredBilling_ComplexExpr(b *testing.B) {
  603. rng := rand.New(rand.NewSource(42))
  604. usedVars := billingexpr.UsedVars(complexTieredExpr)
  605. usages := make([]*dto.Usage, 1000)
  606. for i := range usages {
  607. usages[i] = randomUsage(rng)
  608. }
  609. b.ResetTimer()
  610. for i := 0; i < b.N; i++ {
  611. usage := usages[i%len(usages)]
  612. params := BuildTieredTokenParams(usage, false, usedVars)
  613. billingexpr.RunExpr(complexTieredExpr, params)
  614. }
  615. }
  616. func BenchmarkRatioBilling_Equivalent(b *testing.B) {
  617. rng := rand.New(rand.NewSource(42))
  618. usages := make([]*dto.Usage, 1000)
  619. for i := range usages {
  620. usages[i] = randomUsage(rng)
  621. }
  622. b.ResetTimer()
  623. for i := 0; i < b.N; i++ {
  624. usage := usages[i%len(usages)]
  625. ratioQuota(usage, false, 1.5, 5.0, 0.1, 1.0, 1.5)
  626. }
  627. }
  628. func BenchmarkTieredBilling_Parallel(b *testing.B) {
  629. usedVars := billingexpr.UsedVars(complexTieredExpr)
  630. b.RunParallel(func(pb *testing.PB) {
  631. rng := rand.New(rand.NewSource(rand.Int63()))
  632. for pb.Next() {
  633. usage := randomUsage(rng)
  634. params := BuildTieredTokenParams(usage, false, usedVars)
  635. billingexpr.RunExpr(complexTieredExpr, params)
  636. }
  637. })
  638. }
  639. func BenchmarkRatioBilling_Parallel(b *testing.B) {
  640. b.RunParallel(func(pb *testing.PB) {
  641. rng := rand.New(rand.NewSource(rand.Int63()))
  642. for pb.Next() {
  643. usage := randomUsage(rng)
  644. ratioQuota(usage, false, 1.5, 5.0, 0.1, 1.0, 1.5)
  645. }
  646. })
  647. }