tiered_settle.go 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. package service
  2. import (
  3. "github.com/QuantumNous/new-api/dto"
  4. "github.com/QuantumNous/new-api/pkg/billingexpr"
  5. relaycommon "github.com/QuantumNous/new-api/relay/common"
  6. )
  7. // TieredResultWrapper wraps billingexpr.TieredResult for use at the service layer.
  8. type TieredResultWrapper = billingexpr.TieredResult
  9. // BuildTieredTokenParams constructs billingexpr.TokenParams from a dto.Usage,
  10. // normalizing P and C so they mean "tokens not separately priced by the
  11. // expression". Sub-categories (cache, image, audio) are only subtracted
  12. // when the expression references them via their own variable.
  13. //
  14. // GPT-format APIs report prompt_tokens / completion_tokens as totals that
  15. // include all sub-categories (cache, image, audio). Claude-format APIs
  16. // report them as text-only. This function normalizes to text-only when
  17. // sub-categories are separately priced.
  18. func BuildTieredTokenParams(usage *dto.Usage, isClaudeUsageSemantic bool, usedVars map[string]bool) billingexpr.TokenParams {
  19. p := float64(usage.PromptTokens)
  20. c := float64(usage.CompletionTokens)
  21. cr := float64(usage.PromptTokensDetails.CachedTokens)
  22. ccTotal := float64(usage.PromptTokensDetails.CachedCreationTokens)
  23. cc1h := float64(usage.ClaudeCacheCreation1hTokens)
  24. img := float64(usage.PromptTokensDetails.ImageTokens)
  25. ai := float64(usage.PromptTokensDetails.AudioTokens)
  26. imgO := float64(usage.CompletionTokenDetails.ImageTokens)
  27. ao := float64(usage.CompletionTokenDetails.AudioTokens)
  28. if !isClaudeUsageSemantic {
  29. if usedVars["cr"] {
  30. p -= cr
  31. }
  32. if usedVars["cc"] || usedVars["cc1h"] {
  33. p -= ccTotal
  34. }
  35. if usedVars["img"] {
  36. p -= img
  37. }
  38. if usedVars["ai"] {
  39. p -= ai
  40. }
  41. if usedVars["img_o"] {
  42. c -= imgO
  43. }
  44. if usedVars["ao"] {
  45. c -= ao
  46. }
  47. }
  48. if p < 0 {
  49. p = 0
  50. }
  51. if c < 0 {
  52. c = 0
  53. }
  54. return billingexpr.TokenParams{
  55. P: p,
  56. C: c,
  57. CR: cr,
  58. CC: ccTotal - cc1h,
  59. CC1h: cc1h,
  60. Img: img,
  61. ImgO: imgO,
  62. AI: ai,
  63. AO: ao,
  64. }
  65. }
  66. // TryTieredSettle checks if the request uses tiered_expr billing and, if so,
  67. // computes the actual quota using the frozen BillingSnapshot. Returns:
  68. // - ok=true, quota, result when tiered billing applies
  69. // - ok=false, 0, nil when it doesn't (caller should fall through to existing logic)
  70. func TryTieredSettle(relayInfo *relaycommon.RelayInfo, params billingexpr.TokenParams) (ok bool, quota int, result *billingexpr.TieredResult) {
  71. snap := relayInfo.TieredBillingSnapshot
  72. if snap == nil || snap.BillingMode != "tiered_expr" {
  73. return false, 0, nil
  74. }
  75. requestInput := billingexpr.RequestInput{}
  76. if relayInfo.BillingRequestInput != nil {
  77. requestInput = *relayInfo.BillingRequestInput
  78. }
  79. tr, err := billingexpr.ComputeTieredQuotaWithRequest(snap, params, requestInput)
  80. if err != nil {
  81. quota = relayInfo.FinalPreConsumedQuota
  82. if quota <= 0 {
  83. quota = snap.EstimatedQuotaAfterGroup
  84. }
  85. return true, quota, nil
  86. }
  87. return true, tr.ActualQuotaAfterGroup, &tr
  88. }