|
|
@@ -396,15 +396,20 @@ export namespace Session {
|
|
|
read: cachedInputTokens,
|
|
|
},
|
|
|
}
|
|
|
+
|
|
|
+ const costInfo =
|
|
|
+ input.model.cost?.context_over_200k && tokens.input + tokens.cache.read > 200_000
|
|
|
+ ? input.model.cost.context_over_200k
|
|
|
+ : input.model.cost
|
|
|
return {
|
|
|
cost: new Decimal(0)
|
|
|
- .add(new Decimal(tokens.input).mul(input.model.cost?.input ?? 0).div(1_000_000))
|
|
|
- .add(new Decimal(tokens.output).mul(input.model.cost?.output ?? 0).div(1_000_000))
|
|
|
- .add(new Decimal(tokens.cache.read).mul(input.model.cost?.cache_read ?? 0).div(1_000_000))
|
|
|
- .add(new Decimal(tokens.cache.write).mul(input.model.cost?.cache_write ?? 0).div(1_000_000))
|
|
|
+ .add(new Decimal(tokens.input).mul(costInfo?.input ?? 0).div(1_000_000))
|
|
|
+ .add(new Decimal(tokens.output).mul(costInfo?.output ?? 0).div(1_000_000))
|
|
|
+ .add(new Decimal(tokens.cache.read).mul(costInfo?.cache_read ?? 0).div(1_000_000))
|
|
|
+ .add(new Decimal(tokens.cache.write).mul(costInfo?.cache_write ?? 0).div(1_000_000))
|
|
|
// TODO: update models.dev to have better pricing model, for now:
|
|
|
// charge reasoning tokens at the same rate as output tokens
|
|
|
- .add(new Decimal(tokens.reasoning).mul(input.model.cost?.output ?? 0).div(1_000_000))
|
|
|
+ .add(new Decimal(tokens.reasoning).mul(costInfo?.output ?? 0).div(1_000_000))
|
|
|
.toNumber(),
|
|
|
tokens,
|
|
|
}
|