Просмотр исходного кода

Merge pull request #1451 from dtrugman/feat/add-openai-style-cost-calculation

Add openai style cost calculation
Matt Rubens 9 месяцев назад
Родитель
Сommit
6644202b55

+ 18 - 8
src/api/providers/__tests__/requesty.test.ts

@@ -22,8 +22,10 @@ describe("RequestyHandler", () => {
 			contextWindow: 4000,
 			supportsPromptCache: false,
 			supportsImages: true,
-			inputPrice: 0,
-			outputPrice: 0,
+			inputPrice: 1,
+			outputPrice: 10,
+			cacheReadsPrice: 0.1,
+			cacheWritesPrice: 1.5,
 		},
 		openAiStreamingEnabled: true,
 		includeMaxTokens: true, // Add this to match the implementation
@@ -83,8 +85,12 @@ describe("RequestyHandler", () => {
 						yield {
 							choices: [{ delta: { content: " world" } }],
 							usage: {
-								prompt_tokens: 10,
-								completion_tokens: 5,
+								prompt_tokens: 30,
+								completion_tokens: 10,
+								prompt_tokens_details: {
+									cached_tokens: 15,
+									caching_tokens: 5,
+								},
 							},
 						}
 					},
@@ -105,10 +111,11 @@ describe("RequestyHandler", () => {
 					{ type: "text", text: " world" },
 					{
 						type: "usage",
-						inputTokens: 10,
-						outputTokens: 5,
-						cacheWriteTokens: undefined,
-						cacheReadTokens: undefined,
+						inputTokens: 30,
+						outputTokens: 10,
+						cacheWriteTokens: 5,
+						cacheReadTokens: 15,
+						totalCost: 0.000119, // (10 * 1 / 1,000,000) + (5 * 1.5 / 1,000,000) + (15 * 0.1 / 1,000,000) + (10 * 10 / 1,000,000)
 					},
 				])
 
@@ -182,6 +189,9 @@ describe("RequestyHandler", () => {
 						type: "usage",
 						inputTokens: 10,
 						outputTokens: 5,
+						cacheWriteTokens: 0,
+						cacheReadTokens: 0,
+						totalCost: 0.00006, // (10 * 1 / 1,000,000) + (5 * 10 / 1,000,000)
 					},
 				])
 

+ 3 - 3
src/api/providers/openai.ts

@@ -116,7 +116,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 					}
 				}
 				if (chunk.usage) {
-					yield this.processUsageMetrics(chunk.usage)
+					yield this.processUsageMetrics(chunk.usage, modelInfo)
 				}
 			}
 		} else {
@@ -139,11 +139,11 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				type: "text",
 				text: response.choices[0]?.message.content || "",
 			}
-			yield this.processUsageMetrics(response.usage)
+			yield this.processUsageMetrics(response.usage, modelInfo)
 		}
 	}
 
-	protected processUsageMetrics(usage: any): ApiStreamUsageChunk {
+	protected processUsageMetrics(usage: any, modelInfo?: ModelInfo): ApiStreamUsageChunk {
 		return {
 			type: "usage",
 			inputTokens: usage?.prompt_tokens || 0,

+ 26 - 6
src/api/providers/requesty.ts

@@ -1,9 +1,20 @@
 import axios from "axios"
 
 import { ModelInfo, requestyModelInfoSaneDefaults, requestyDefaultModelId } from "../../shared/api"
-import { parseApiPrice } from "../../utils/cost"
+import { calculateApiCostOpenAI, parseApiPrice } from "../../utils/cost"
 import { ApiStreamUsageChunk } from "../transform/stream"
 import { OpenAiHandler, OpenAiHandlerOptions } from "./openai"
+import OpenAI from "openai"
+
+// Requesty usage includes an extra field for Anthropic use cases.
+// Safely cast the prompt token details section to the appropriate structure.
+interface RequestyUsage extends OpenAI.CompletionUsage {
+	prompt_tokens_details?: {
+		caching_tokens?: number
+		cached_tokens?: number
+	}
+	total_cost?: number
+}
 
 export class RequestyHandler extends OpenAiHandler {
 	constructor(options: OpenAiHandlerOptions) {
@@ -27,13 +38,22 @@ export class RequestyHandler extends OpenAiHandler {
 		}
 	}
 
-	protected override processUsageMetrics(usage: any): ApiStreamUsageChunk {
+	protected override processUsageMetrics(usage: any, modelInfo?: ModelInfo): ApiStreamUsageChunk {
+		const requestyUsage = usage as RequestyUsage
+		const inputTokens = requestyUsage?.prompt_tokens || 0
+		const outputTokens = requestyUsage?.completion_tokens || 0
+		const cacheWriteTokens = requestyUsage?.prompt_tokens_details?.caching_tokens || 0
+		const cacheReadTokens = requestyUsage?.prompt_tokens_details?.cached_tokens || 0
+		const totalCost = modelInfo
+			? calculateApiCostOpenAI(modelInfo, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
+			: 0
 		return {
 			type: "usage",
-			inputTokens: usage?.prompt_tokens || 0,
-			outputTokens: usage?.completion_tokens || 0,
-			cacheWriteTokens: usage?.cache_creation_input_tokens,
-			cacheReadTokens: usage?.cache_read_input_tokens,
+			inputTokens: inputTokens,
+			outputTokens: outputTokens,
+			cacheWriteTokens: cacheWriteTokens,
+			cacheReadTokens: cacheReadTokens,
+			totalCost: totalCost,
 		}
 	}
 }

+ 2 - 2
src/api/providers/vscode-lm.ts

@@ -2,7 +2,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
 import * as vscode from "vscode"
 
 import { SingleCompletionHandler } from "../"
-import { calculateApiCost } from "../../utils/cost"
+import { calculateApiCostAnthropic } from "../../utils/cost"
 import { ApiStream } from "../transform/stream"
 import { convertToVsCodeLmMessages } from "../transform/vscode-lm-format"
 import { SELECTOR_SEPARATOR, stringifyVsCodeLmModelSelector } from "../../shared/vsCodeSelectorUtils"
@@ -462,7 +462,7 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
 				type: "usage",
 				inputTokens: totalInputTokens,
 				outputTokens: totalOutputTokens,
-				totalCost: calculateApiCost(this.getModel().info, totalInputTokens, totalOutputTokens),
+				totalCost: calculateApiCostAnthropic(this.getModel().info, totalInputTokens, totalOutputTokens),
 			}
 		} catch (error: unknown) {
 			this.ensureCleanState()

+ 3 - 3
src/core/Cline.ts

@@ -55,7 +55,7 @@ import { ClineAskResponse } from "../shared/WebviewMessage"
 import { GlobalFileNames } from "../shared/globalFileNames"
 import { defaultModeSlug, getModeBySlug, getFullModeDetails } from "../shared/modes"
 import { EXPERIMENT_IDS, experiments as Experiments, ExperimentId } from "../shared/experiments"
-import { calculateApiCost } from "../utils/cost"
+import { calculateApiCostAnthropic } from "../utils/cost"
 import { fileExistsAtPath } from "../utils/fs"
 import { arePathsEqual, getReadablePath } from "../utils/path"
 import { parseMentions } from "./mentions"
@@ -875,7 +875,7 @@ export class Cline {
 			//  The way this agentic loop works is that cline will be given a task that he then calls tools to complete. unless there's an attempt_completion call, we keep responding back to him with his tool's responses until he either attempt_completion or does not use anymore tools. If he does not use anymore tools, we ask him to consider if he's completed the task and then call attempt_completion, otherwise proceed with completing the task.
 			// There is a MAX_REQUESTS_PER_TASK limit to prevent infinite requests, but Cline is prompted to finish the task as efficiently as he can.
 
-			//const totalCost = this.calculateApiCost(totalInputTokens, totalOutputTokens)
+			//const totalCost = this.calculateApiCostAnthropic(totalInputTokens, totalOutputTokens)
 			if (didEndLoop) {
 				// For now a task never 'completes'. This will only happen if the user hits max requests and denies resetting the count.
 				//this.say("task_completed", `Task completed. Total API usage cost: ${totalCost}`)
@@ -3173,7 +3173,7 @@ export class Cline {
 					cacheReads: cacheReadTokens,
 					cost:
 						totalCost ??
-						calculateApiCost(
+						calculateApiCostAnthropic(
 							this.api.getModel().info,
 							inputTokens,
 							outputTokens,

+ 103 - 10
src/utils/__tests__/cost.test.ts

@@ -1,8 +1,8 @@
-import { calculateApiCost } from "../cost"
+import { calculateApiCostAnthropic, calculateApiCostOpenAI } from "../cost"
 import { ModelInfo } from "../../shared/api"
 
 describe("Cost Utility", () => {
-	describe("calculateApiCost", () => {
+	describe("calculateApiCostAnthropic", () => {
 		const mockModelInfo: ModelInfo = {
 			maxTokens: 8192,
 			contextWindow: 200_000,
@@ -14,7 +14,7 @@ describe("Cost Utility", () => {
 		}
 
 		it("should calculate basic input/output costs correctly", () => {
-			const cost = calculateApiCost(mockModelInfo, 1000, 500)
+			const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500)
 
 			// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
 			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
@@ -23,7 +23,7 @@ describe("Cost Utility", () => {
 		})
 
 		it("should handle cache writes cost", () => {
-			const cost = calculateApiCost(mockModelInfo, 1000, 500, 2000)
+			const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500, 2000)
 
 			// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
 			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
@@ -33,7 +33,7 @@ describe("Cost Utility", () => {
 		})
 
 		it("should handle cache reads cost", () => {
-			const cost = calculateApiCost(mockModelInfo, 1000, 500, undefined, 3000)
+			const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500, undefined, 3000)
 
 			// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
 			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
@@ -43,7 +43,7 @@ describe("Cost Utility", () => {
 		})
 
 		it("should handle all cost components together", () => {
-			const cost = calculateApiCost(mockModelInfo, 1000, 500, 2000, 3000)
+			const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500, 2000, 3000)
 
 			// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
 			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
@@ -60,17 +60,17 @@ describe("Cost Utility", () => {
 				supportsPromptCache: true,
 			}
 
-			const cost = calculateApiCost(modelWithoutPrices, 1000, 500, 2000, 3000)
+			const cost = calculateApiCostAnthropic(modelWithoutPrices, 1000, 500, 2000, 3000)
 			expect(cost).toBe(0)
 		})
 
 		it("should handle zero tokens", () => {
-			const cost = calculateApiCost(mockModelInfo, 0, 0, 0, 0)
+			const cost = calculateApiCostAnthropic(mockModelInfo, 0, 0, 0, 0)
 			expect(cost).toBe(0)
 		})
 
 		it("should handle undefined cache values", () => {
-			const cost = calculateApiCost(mockModelInfo, 1000, 500)
+			const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500)
 
 			// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
 			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
@@ -85,7 +85,7 @@ describe("Cost Utility", () => {
 				cacheReadsPrice: undefined,
 			}
 
-			const cost = calculateApiCost(modelWithoutCachePrices, 1000, 500, 2000, 3000)
+			const cost = calculateApiCostAnthropic(modelWithoutCachePrices, 1000, 500, 2000, 3000)
 
 			// Should only include input and output costs
 			// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
@@ -94,4 +94,97 @@ describe("Cost Utility", () => {
 			expect(cost).toBe(0.0105)
 		})
 	})
+
+	describe("calculateApiCostOpenAI", () => {
+		const mockModelInfo: ModelInfo = {
+			maxTokens: 8192,
+			contextWindow: 200_000,
+			supportsPromptCache: true,
+			inputPrice: 3.0, // $3 per million tokens
+			outputPrice: 15.0, // $15 per million tokens
+			cacheWritesPrice: 3.75, // $3.75 per million tokens
+			cacheReadsPrice: 0.3, // $0.30 per million tokens
+		}
+
+		it("should calculate basic input/output costs correctly", () => {
+			const cost = calculateApiCostOpenAI(mockModelInfo, 1000, 500)
+
+			// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
+			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
+			// Total: 0.003 + 0.0075 = 0.0105
+			expect(cost).toBe(0.0105)
+		})
+
+		it("should handle cache writes cost", () => {
+			const cost = calculateApiCostOpenAI(mockModelInfo, 3000, 500, 2000)
+
+			// Input cost: (3.0 / 1_000_000) * (3000 - 2000) = 0.003
+			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
+			// Cache writes: (3.75 / 1_000_000) * 2000 = 0.0075
+			// Total: 0.003 + 0.0075 + 0.0075 = 0.018
+			expect(cost).toBeCloseTo(0.018, 6)
+		})
+
+		it("should handle cache reads cost", () => {
+			const cost = calculateApiCostOpenAI(mockModelInfo, 4000, 500, undefined, 3000)
+
+			// Input cost: (3.0 / 1_000_000) * (4000 - 3000) = 0.003
+			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
+			// Cache reads: (0.3 / 1_000_000) * 3000 = 0.0009
+			// Total: 0.003 + 0.0075 + 0.0009 = 0.0114
+			expect(cost).toBe(0.0114)
+		})
+
+		it("should handle all cost components together", () => {
+			const cost = calculateApiCostOpenAI(mockModelInfo, 6000, 500, 2000, 3000)
+
+			// Input cost: (3.0 / 1_000_000) * (6000 - 2000 - 3000) = 0.003
+			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
+			// Cache writes: (3.75 / 1_000_000) * 2000 = 0.0075
+			// Cache reads: (0.3 / 1_000_000) * 3000 = 0.0009
+			// Total: 0.003 + 0.0075 + 0.0075 + 0.0009 = 0.0189
+			expect(cost).toBe(0.0189)
+		})
+
+		it("should handle missing prices gracefully", () => {
+			const modelWithoutPrices: ModelInfo = {
+				maxTokens: 8192,
+				contextWindow: 200_000,
+				supportsPromptCache: true,
+			}
+
+			const cost = calculateApiCostOpenAI(modelWithoutPrices, 1000, 500, 2000, 3000)
+			expect(cost).toBe(0)
+		})
+
+		it("should handle zero tokens", () => {
+			const cost = calculateApiCostOpenAI(mockModelInfo, 0, 0, 0, 0)
+			expect(cost).toBe(0)
+		})
+
+		it("should handle undefined cache values", () => {
+			const cost = calculateApiCostOpenAI(mockModelInfo, 1000, 500)
+
+			// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
+			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
+			// Total: 0.003 + 0.0075 = 0.0105
+			expect(cost).toBe(0.0105)
+		})
+
+		it("should handle missing cache prices", () => {
+			const modelWithoutCachePrices: ModelInfo = {
+				...mockModelInfo,
+				cacheWritesPrice: undefined,
+				cacheReadsPrice: undefined,
+			}
+
+			const cost = calculateApiCostOpenAI(modelWithoutCachePrices, 6000, 500, 2000, 3000)
+
+			// Should only include input and output costs
+			// Input cost: (3.0 / 1_000_000) * (6000 - 2000 - 3000) = 0.003
+			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
+			// Total: 0.003 + 0.0075 = 0.0105
+			expect(cost).toBe(0.0105)
+		})
+	})
 })

+ 44 - 13
src/utils/cost.ts

@@ -1,26 +1,57 @@
 import { ModelInfo } from "../shared/api"
 
-export function calculateApiCost(
+function calculateApiCostInternal(
 	modelInfo: ModelInfo,
 	inputTokens: number,
 	outputTokens: number,
-	cacheCreationInputTokens?: number,
-	cacheReadInputTokens?: number,
+	cacheCreationInputTokens: number,
+	cacheReadInputTokens: number,
 ): number {
-	const modelCacheWritesPrice = modelInfo.cacheWritesPrice
-	let cacheWritesCost = 0
-	if (cacheCreationInputTokens && modelCacheWritesPrice) {
-		cacheWritesCost = (modelCacheWritesPrice / 1_000_000) * cacheCreationInputTokens
-	}
-	const modelCacheReadsPrice = modelInfo.cacheReadsPrice
-	let cacheReadsCost = 0
-	if (cacheReadInputTokens && modelCacheReadsPrice) {
-		cacheReadsCost = (modelCacheReadsPrice / 1_000_000) * cacheReadInputTokens
-	}
+	const cacheWritesCost = ((modelInfo.cacheWritesPrice || 0) / 1_000_000) * cacheCreationInputTokens
+	const cacheReadsCost = ((modelInfo.cacheReadsPrice || 0) / 1_000_000) * cacheReadInputTokens
 	const baseInputCost = ((modelInfo.inputPrice || 0) / 1_000_000) * inputTokens
 	const outputCost = ((modelInfo.outputPrice || 0) / 1_000_000) * outputTokens
 	const totalCost = cacheWritesCost + cacheReadsCost + baseInputCost + outputCost
 	return totalCost
 }
 
+// For Anthropic compliant usage, the input tokens count does NOT include the cached tokens
+export function calculateApiCostAnthropic(
+	modelInfo: ModelInfo,
+	inputTokens: number,
+	outputTokens: number,
+	cacheCreationInputTokens?: number,
+	cacheReadInputTokens?: number,
+): number {
+	const cacheCreationInputTokensNum = cacheCreationInputTokens || 0
+	const cacheReadInputTokensNum = cacheReadInputTokens || 0
+	return calculateApiCostInternal(
+		modelInfo,
+		inputTokens,
+		outputTokens,
+		cacheCreationInputTokensNum,
+		cacheReadInputTokensNum,
+	)
+}
+
+// For OpenAI compliant usage, the input tokens count INCLUDES the cached tokens
+export function calculateApiCostOpenAI(
+	modelInfo: ModelInfo,
+	inputTokens: number,
+	outputTokens: number,
+	cacheCreationInputTokens?: number,
+	cacheReadInputTokens?: number,
+): number {
+	const cacheCreationInputTokensNum = cacheCreationInputTokens || 0
+	const cacheReadInputTokensNum = cacheReadInputTokens || 0
+	const nonCachedInputTokens = Math.max(0, inputTokens - cacheCreationInputTokensNum - cacheReadInputTokensNum)
+	return calculateApiCostInternal(
+		modelInfo,
+		nonCachedInputTokens,
+		outputTokens,
+		cacheCreationInputTokensNum,
+		cacheReadInputTokensNum,
+	)
+}
+
 export const parseApiPrice = (price: any) => (price ? parseFloat(price) * 1_000_000 : undefined)