| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798 |
- package service
- import (
- "github.com/QuantumNous/new-api/dto"
- "github.com/QuantumNous/new-api/pkg/billingexpr"
- relaycommon "github.com/QuantumNous/new-api/relay/common"
- )
- // TieredResultWrapper wraps billingexpr.TieredResult for use at the service layer.
- type TieredResultWrapper = billingexpr.TieredResult
- // BuildTieredTokenParams constructs billingexpr.TokenParams from a dto.Usage,
- // normalizing P and C so they mean "tokens not separately priced by the
- // expression". Sub-categories (cache, image, audio) are only subtracted
- // when the expression references them via their own variable.
- //
- // GPT-format APIs report prompt_tokens / completion_tokens as totals that
- // include all sub-categories (cache, image, audio). Claude-format APIs
- // report them as text-only. This function normalizes to text-only when
- // sub-categories are separately priced.
- func BuildTieredTokenParams(usage *dto.Usage, isClaudeUsageSemantic bool, usedVars map[string]bool) billingexpr.TokenParams {
- p := float64(usage.PromptTokens)
- c := float64(usage.CompletionTokens)
- cr := float64(usage.PromptTokensDetails.CachedTokens)
- ccTotal := float64(usage.PromptTokensDetails.CachedCreationTokens)
- cc1h := float64(usage.ClaudeCacheCreation1hTokens)
- img := float64(usage.PromptTokensDetails.ImageTokens)
- ai := float64(usage.PromptTokensDetails.AudioTokens)
- imgO := float64(usage.CompletionTokenDetails.ImageTokens)
- ao := float64(usage.CompletionTokenDetails.AudioTokens)
- if !isClaudeUsageSemantic {
- if usedVars["cr"] {
- p -= cr
- }
- if usedVars["cc"] || usedVars["cc1h"] {
- p -= ccTotal
- }
- if usedVars["img"] {
- p -= img
- }
- if usedVars["ai"] {
- p -= ai
- }
- if usedVars["img_o"] {
- c -= imgO
- }
- if usedVars["ao"] {
- c -= ao
- }
- }
- if p < 0 {
- p = 0
- }
- if c < 0 {
- c = 0
- }
- return billingexpr.TokenParams{
- P: p,
- C: c,
- CR: cr,
- CC: ccTotal - cc1h,
- CC1h: cc1h,
- Img: img,
- ImgO: imgO,
- AI: ai,
- AO: ao,
- }
- }
- // TryTieredSettle checks if the request uses tiered_expr billing and, if so,
- // computes the actual quota using the frozen BillingSnapshot. Returns:
- // - ok=true, quota, result when tiered billing applies
- // - ok=false, 0, nil when it doesn't (caller should fall through to existing logic)
- func TryTieredSettle(relayInfo *relaycommon.RelayInfo, params billingexpr.TokenParams) (ok bool, quota int, result *billingexpr.TieredResult) {
- snap := relayInfo.TieredBillingSnapshot
- if snap == nil || snap.BillingMode != "tiered_expr" {
- return false, 0, nil
- }
- requestInput := billingexpr.RequestInput{}
- if relayInfo.BillingRequestInput != nil {
- requestInput = *relayInfo.BillingRequestInput
- }
- tr, err := billingexpr.ComputeTieredQuotaWithRequest(snap, params, requestInput)
- if err != nil {
- quota = relayInfo.FinalPreConsumedQuota
- if quota <= 0 {
- quota = snap.EstimatedQuotaAfterGroup
- }
- return true, quota, nil
- }
- return true, tr.ActualQuotaAfterGroup, &tr
- }
|