Browse Source

Fix cost and token tracking between provider styles (#8954)

Matt Rubens 2 months ago
parent
commit
416fa5727e

+ 9 - 7
src/api/providers/anthropic.ts

@@ -230,17 +230,19 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 		}
 
 		if (inputTokens > 0 || outputTokens > 0 || cacheWriteTokens > 0 || cacheReadTokens > 0) {
+			const { totalCost } = calculateApiCostAnthropic(
+				this.getModel().info,
+				inputTokens,
+				outputTokens,
+				cacheWriteTokens,
+				cacheReadTokens,
+			)
+
 			yield {
 				type: "usage",
 				inputTokens: 0,
 				outputTokens: 0,
-				totalCost: calculateApiCostAnthropic(
-					this.getModel().info,
-					inputTokens,
-					outputTokens,
-					cacheWriteTokens,
-					cacheReadTokens,
-				),
+				totalCost,
 			}
 		}
 	}

+ 2 - 1
src/api/providers/cerebras.ts

@@ -331,6 +331,7 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan
 		const { info } = this.getModel()
 		// Use actual token usage from the last request
 		const { inputTokens, outputTokens } = this.lastUsage
-		return calculateApiCostOpenAI(info, inputTokens, outputTokens)
+		const { totalCost } = calculateApiCostOpenAI(info, inputTokens, outputTokens)
+		return totalCost
 	}
 }

+ 2 - 2
src/api/providers/deepinfra.ts

@@ -131,9 +131,9 @@ export class DeepInfraHandler extends RouterProvider implements SingleCompletion
 		const cacheWriteTokens = usage?.prompt_tokens_details?.cache_write_tokens || 0
 		const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || 0
 
-		const totalCost = modelInfo
+		const { totalCost } = modelInfo
 			? calculateApiCostOpenAI(modelInfo, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
-			: 0
+			: { totalCost: 0 }
 
 		return {
 			type: "usage",

+ 1 - 1
src/api/providers/groq.ts

@@ -64,7 +64,7 @@ export class GroqHandler extends BaseOpenAiCompatibleProvider<GroqModelId> {
 		const cacheWriteTokens = 0
 
 		// Calculate cost using OpenAI-compatible cost calculation
-		const totalCost = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
+		const { totalCost } = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
 
 		yield {
 			type: "usage",

+ 9 - 8
src/api/providers/lite-llm.ts

@@ -165,22 +165,23 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
 					(lastUsage as any).prompt_cache_hit_tokens ||
 					0
 
+				const { totalCost } = calculateApiCostOpenAI(
+					info,
+					lastUsage.prompt_tokens || 0,
+					lastUsage.completion_tokens || 0,
+					cacheWriteTokens,
+					cacheReadTokens,
+				)
+
 				const usageData: ApiStreamUsageChunk = {
 					type: "usage",
 					inputTokens: lastUsage.prompt_tokens || 0,
 					outputTokens: lastUsage.completion_tokens || 0,
 					cacheWriteTokens: cacheWriteTokens > 0 ? cacheWriteTokens : undefined,
 					cacheReadTokens: cacheReadTokens > 0 ? cacheReadTokens : undefined,
+					totalCost,
 				}
 
-				usageData.totalCost = calculateApiCostOpenAI(
-					info,
-					usageData.inputTokens,
-					usageData.outputTokens,
-					usageData.cacheWriteTokens || 0,
-					usageData.cacheReadTokens || 0,
-				)
-
 				yield usageData
 			}
 		} catch (error) {

+ 2 - 2
src/api/providers/openai-native.ts

@@ -99,8 +99,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		const effectiveInfo = this.applyServiceTierPricing(model.info, effectiveTier)
 
 		// Pass total input tokens directly to calculateApiCostOpenAI
-		// The function handles subtracting both cache reads and writes internally (see shared/cost.ts:46)
-		const totalCost = calculateApiCostOpenAI(
+		// The function handles subtracting both cache reads and writes internally
+		const { totalCost } = calculateApiCostOpenAI(
 			effectiveInfo,
 			totalInputTokens,
 			totalOutputTokens,

+ 2 - 2
src/api/providers/requesty.ts

@@ -85,9 +85,9 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
 		const outputTokens = requestyUsage?.completion_tokens || 0
 		const cacheWriteTokens = requestyUsage?.prompt_tokens_details?.caching_tokens || 0
 		const cacheReadTokens = requestyUsage?.prompt_tokens_details?.cached_tokens || 0
-		const totalCost = modelInfo
+		const { totalCost } = modelInfo
 			? calculateApiCostOpenAI(modelInfo, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
-			: 0
+			: { totalCost: 0 }
 
 		return {
 			type: "usage",

+ 51 - 24
src/core/task/Task.ts

@@ -74,7 +74,7 @@ import { RooTerminalProcess } from "../../integrations/terminal/types"
 import { TerminalRegistry } from "../../integrations/terminal/TerminalRegistry"
 
 // utils
-import { calculateApiCostAnthropic } from "../../shared/cost"
+import { calculateApiCostAnthropic, calculateApiCostOpenAI } from "../../shared/cost"
 import { getWorkspacePath } from "../../utils/path"
 
 // prompts
@@ -1886,21 +1886,35 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 					}
 
 					const existingData = JSON.parse(this.clineMessages[lastApiReqIndex].text || "{}")
+
+					// Calculate total tokens and cost using provider-aware function
+					const modelId = getModelId(this.apiConfiguration)
+					const apiProtocol = getApiProtocol(this.apiConfiguration.apiProvider, modelId)
+
+					const costResult =
+						apiProtocol === "anthropic"
+							? calculateApiCostAnthropic(
+									this.api.getModel().info,
+									inputTokens,
+									outputTokens,
+									cacheWriteTokens,
+									cacheReadTokens,
+								)
+							: calculateApiCostOpenAI(
+									this.api.getModel().info,
+									inputTokens,
+									outputTokens,
+									cacheWriteTokens,
+									cacheReadTokens,
+								)
+
 					this.clineMessages[lastApiReqIndex].text = JSON.stringify({
 						...existingData,
-						tokensIn: inputTokens,
-						tokensOut: outputTokens,
+						tokensIn: costResult.totalInputTokens,
+						tokensOut: costResult.totalOutputTokens,
 						cacheWrites: cacheWriteTokens,
 						cacheReads: cacheReadTokens,
-						cost:
-							totalCost ??
-							calculateApiCostAnthropic(
-								this.api.getModel().info,
-								inputTokens,
-								outputTokens,
-								cacheWriteTokens,
-								cacheReadTokens,
-							),
+						cost: totalCost ?? costResult.totalCost,
 						cancelReason,
 						streamingFailedMessage,
 					} satisfies ClineApiReqInfo)
@@ -2104,21 +2118,34 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 									await this.updateClineMessage(apiReqMessage)
 								}
 
-								// Capture telemetry
+								// Capture telemetry with provider-aware cost calculation
+								const modelId = getModelId(this.apiConfiguration)
+								const apiProtocol = getApiProtocol(this.apiConfiguration.apiProvider, modelId)
+
+								// Use the appropriate cost function based on the API protocol
+								const costResult =
+									apiProtocol === "anthropic"
+										? calculateApiCostAnthropic(
+												this.api.getModel().info,
+												tokens.input,
+												tokens.output,
+												tokens.cacheWrite,
+												tokens.cacheRead,
+											)
+										: calculateApiCostOpenAI(
+												this.api.getModel().info,
+												tokens.input,
+												tokens.output,
+												tokens.cacheWrite,
+												tokens.cacheRead,
+											)
+
 								TelemetryService.instance.captureLlmCompletion(this.taskId, {
-									inputTokens: tokens.input,
-									outputTokens: tokens.output,
+									inputTokens: costResult.totalInputTokens,
+									outputTokens: costResult.totalOutputTokens,
 									cacheWriteTokens: tokens.cacheWrite,
 									cacheReadTokens: tokens.cacheRead,
-									cost:
-										tokens.total ??
-										calculateApiCostAnthropic(
-											this.api.getModel().info,
-											tokens.input,
-											tokens.output,
-											tokens.cacheWrite,
-											tokens.cacheRead,
-										),
+									cost: tokens.total ?? costResult.totalCost,
 								})
 							}
 						}

+ 32 - 6
src/shared/cost.ts

@@ -1,18 +1,31 @@
 import type { ModelInfo } from "@roo-code/types"
 
+export interface ApiCostResult {
+	totalInputTokens: number
+	totalOutputTokens: number
+	totalCost: number
+}
+
 function calculateApiCostInternal(
 	modelInfo: ModelInfo,
 	inputTokens: number,
 	outputTokens: number,
 	cacheCreationInputTokens: number,
 	cacheReadInputTokens: number,
-): number {
+	totalInputTokens: number,
+	totalOutputTokens: number,
+): ApiCostResult {
 	const cacheWritesCost = ((modelInfo.cacheWritesPrice || 0) / 1_000_000) * cacheCreationInputTokens
 	const cacheReadsCost = ((modelInfo.cacheReadsPrice || 0) / 1_000_000) * cacheReadInputTokens
 	const baseInputCost = ((modelInfo.inputPrice || 0) / 1_000_000) * inputTokens
 	const outputCost = ((modelInfo.outputPrice || 0) / 1_000_000) * outputTokens
 	const totalCost = cacheWritesCost + cacheReadsCost + baseInputCost + outputCost
-	return totalCost
+
+	return {
+		totalInputTokens,
+		totalOutputTokens,
+		totalCost,
+	}
 }
 
 // For Anthropic compliant usage, the input tokens count does NOT include the
@@ -23,13 +36,22 @@ export function calculateApiCostAnthropic(
 	outputTokens: number,
 	cacheCreationInputTokens?: number,
 	cacheReadInputTokens?: number,
-): number {
+): ApiCostResult {
+	const cacheCreation = cacheCreationInputTokens || 0
+	const cacheRead = cacheReadInputTokens || 0
+
+	// For Anthropic: inputTokens does NOT include cached tokens
+	// Total input = base input + cache creation + cache reads
+	const totalInputTokens = inputTokens + cacheCreation + cacheRead
+
 	return calculateApiCostInternal(
 		modelInfo,
 		inputTokens,
 		outputTokens,
-		cacheCreationInputTokens || 0,
-		cacheReadInputTokens || 0,
+		cacheCreation,
+		cacheRead,
+		totalInputTokens,
+		outputTokens,
 	)
 }
 
@@ -40,17 +62,21 @@ export function calculateApiCostOpenAI(
 	outputTokens: number,
 	cacheCreationInputTokens?: number,
 	cacheReadInputTokens?: number,
-): number {
+): ApiCostResult {
 	const cacheCreationInputTokensNum = cacheCreationInputTokens || 0
 	const cacheReadInputTokensNum = cacheReadInputTokens || 0
 	const nonCachedInputTokens = Math.max(0, inputTokens - cacheCreationInputTokensNum - cacheReadInputTokensNum)
 
+	// For OpenAI: inputTokens ALREADY includes all tokens (cached + non-cached)
+	// So we pass the original inputTokens as the total
 	return calculateApiCostInternal(
 		modelInfo,
 		nonCachedInputTokens,
 		outputTokens,
 		cacheCreationInputTokensNum,
 		cacheReadInputTokensNum,
+		inputTokens,
+		outputTokens,
 	)
 }
 

+ 6 - 9
src/shared/getApiMetrics.ts

@@ -80,15 +80,12 @@ export function getApiMetrics(messages: ClineMessage[]) {
 		if (message.type === "say" && message.say === "api_req_started" && message.text) {
 			try {
 				const parsedText: ParsedApiReqStartedTextType = JSON.parse(message.text)
-				const { tokensIn, tokensOut, cacheWrites, cacheReads, apiProtocol } = parsedText
-
-				// Calculate context tokens based on API protocol.
-				if (apiProtocol === "anthropic") {
-					result.contextTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
-				} else {
-					// For OpenAI (or when protocol is not specified).
-					result.contextTokens = (tokensIn || 0) + (tokensOut || 0)
-				}
+				const { tokensIn, tokensOut } = parsedText
+
+				// Since tokensIn now stores TOTAL input tokens (including cache tokens),
+				// we no longer need to add cacheWrites and cacheReads separately.
+				// This applies to both Anthropic and OpenAI protocols.
+				result.contextTokens = (tokensIn || 0) + (tokensOut || 0)
 			} catch (error) {
 				console.error("Error parsing JSON:", error)
 				continue

+ 64 - 32
src/utils/__tests__/cost.spec.ts

@@ -17,43 +17,51 @@ describe("Cost Utility", () => {
 		}
 
 		it("should calculate basic input/output costs correctly", () => {
-			const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500)
+			const result = calculateApiCostAnthropic(mockModelInfo, 1000, 500)
 
 			// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
 			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
 			// Total: 0.003 + 0.0075 = 0.0105
-			expect(cost).toBe(0.0105)
+			expect(result.totalCost).toBe(0.0105)
+			expect(result.totalInputTokens).toBe(1000)
+			expect(result.totalOutputTokens).toBe(500)
 		})
 
 		it("should handle cache writes cost", () => {
-			const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500, 2000)
+			const result = calculateApiCostAnthropic(mockModelInfo, 1000, 500, 2000)
 
 			// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
 			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
 			// Cache writes: (3.75 / 1_000_000) * 2000 = 0.0075
 			// Total: 0.003 + 0.0075 + 0.0075 = 0.018
-			expect(cost).toBeCloseTo(0.018, 6)
+			expect(result.totalCost).toBeCloseTo(0.018, 6)
+			expect(result.totalInputTokens).toBe(3000) // 1000 + 2000
+			expect(result.totalOutputTokens).toBe(500)
 		})
 
 		it("should handle cache reads cost", () => {
-			const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500, undefined, 3000)
+			const result = calculateApiCostAnthropic(mockModelInfo, 1000, 500, undefined, 3000)
 
 			// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
 			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
 			// Cache reads: (0.3 / 1_000_000) * 3000 = 0.0009
 			// Total: 0.003 + 0.0075 + 0.0009 = 0.0114
-			expect(cost).toBe(0.0114)
+			expect(result.totalCost).toBe(0.0114)
+			expect(result.totalInputTokens).toBe(4000) // 1000 + 3000
+			expect(result.totalOutputTokens).toBe(500)
 		})
 
 		it("should handle all cost components together", () => {
-			const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500, 2000, 3000)
+			const result = calculateApiCostAnthropic(mockModelInfo, 1000, 500, 2000, 3000)
 
 			// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
 			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
 			// Cache writes: (3.75 / 1_000_000) * 2000 = 0.0075
 			// Cache reads: (0.3 / 1_000_000) * 3000 = 0.0009
 			// Total: 0.003 + 0.0075 + 0.0075 + 0.0009 = 0.0189
-			expect(cost).toBe(0.0189)
+			expect(result.totalCost).toBe(0.0189)
+			expect(result.totalInputTokens).toBe(6000) // 1000 + 2000 + 3000
+			expect(result.totalOutputTokens).toBe(500)
 		})
 
 		it("should handle missing prices gracefully", () => {
@@ -63,22 +71,28 @@ describe("Cost Utility", () => {
 				supportsPromptCache: true,
 			}
 
-			const cost = calculateApiCostAnthropic(modelWithoutPrices, 1000, 500, 2000, 3000)
-			expect(cost).toBe(0)
+			const result = calculateApiCostAnthropic(modelWithoutPrices, 1000, 500, 2000, 3000)
+			expect(result.totalCost).toBe(0)
+			expect(result.totalInputTokens).toBe(6000) // 1000 + 2000 + 3000
+			expect(result.totalOutputTokens).toBe(500)
 		})
 
 		it("should handle zero tokens", () => {
-			const cost = calculateApiCostAnthropic(mockModelInfo, 0, 0, 0, 0)
-			expect(cost).toBe(0)
+			const result = calculateApiCostAnthropic(mockModelInfo, 0, 0, 0, 0)
+			expect(result.totalCost).toBe(0)
+			expect(result.totalInputTokens).toBe(0)
+			expect(result.totalOutputTokens).toBe(0)
 		})
 
 		it("should handle undefined cache values", () => {
-			const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500)
+			const result = calculateApiCostAnthropic(mockModelInfo, 1000, 500)
 
 			// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
 			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
 			// Total: 0.003 + 0.0075 = 0.0105
-			expect(cost).toBe(0.0105)
+			expect(result.totalCost).toBe(0.0105)
+			expect(result.totalInputTokens).toBe(1000)
+			expect(result.totalOutputTokens).toBe(500)
 		})
 
 		it("should handle missing cache prices", () => {
@@ -88,13 +102,15 @@ describe("Cost Utility", () => {
 				cacheReadsPrice: undefined,
 			}
 
-			const cost = calculateApiCostAnthropic(modelWithoutCachePrices, 1000, 500, 2000, 3000)
+			const result = calculateApiCostAnthropic(modelWithoutCachePrices, 1000, 500, 2000, 3000)
 
 			// Should only include input and output costs
 			// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
 			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
 			// Total: 0.003 + 0.0075 = 0.0105
-			expect(cost).toBe(0.0105)
+			expect(result.totalCost).toBe(0.0105)
+			expect(result.totalInputTokens).toBe(6000) // 1000 + 2000 + 3000
+			expect(result.totalOutputTokens).toBe(500)
 		})
 	})
 
@@ -110,43 +126,51 @@ describe("Cost Utility", () => {
 		}
 
 		it("should calculate basic input/output costs correctly", () => {
-			const cost = calculateApiCostOpenAI(mockModelInfo, 1000, 500)
+			const result = calculateApiCostOpenAI(mockModelInfo, 1000, 500)
 
 			// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
 			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
 			// Total: 0.003 + 0.0075 = 0.0105
-			expect(cost).toBe(0.0105)
+			expect(result.totalCost).toBe(0.0105)
+			expect(result.totalInputTokens).toBe(1000)
+			expect(result.totalOutputTokens).toBe(500)
 		})
 
 		it("should handle cache writes cost", () => {
-			const cost = calculateApiCostOpenAI(mockModelInfo, 3000, 500, 2000)
+			const result = calculateApiCostOpenAI(mockModelInfo, 3000, 500, 2000)
 
 			// Input cost: (3.0 / 1_000_000) * (3000 - 2000) = 0.003
 			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
 			// Cache writes: (3.75 / 1_000_000) * 2000 = 0.0075
 			// Total: 0.003 + 0.0075 + 0.0075 = 0.018
-			expect(cost).toBeCloseTo(0.018, 6)
+			expect(result.totalCost).toBeCloseTo(0.018, 6)
+			expect(result.totalInputTokens).toBe(3000) // Total already includes cache
+			expect(result.totalOutputTokens).toBe(500)
 		})
 
 		it("should handle cache reads cost", () => {
-			const cost = calculateApiCostOpenAI(mockModelInfo, 4000, 500, undefined, 3000)
+			const result = calculateApiCostOpenAI(mockModelInfo, 4000, 500, undefined, 3000)
 
 			// Input cost: (3.0 / 1_000_000) * (4000 - 3000) = 0.003
 			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
 			// Cache reads: (0.3 / 1_000_000) * 3000 = 0.0009
 			// Total: 0.003 + 0.0075 + 0.0009 = 0.0114
-			expect(cost).toBe(0.0114)
+			expect(result.totalCost).toBe(0.0114)
+			expect(result.totalInputTokens).toBe(4000) // Total already includes cache
+			expect(result.totalOutputTokens).toBe(500)
 		})
 
 		it("should handle all cost components together", () => {
-			const cost = calculateApiCostOpenAI(mockModelInfo, 6000, 500, 2000, 3000)
+			const result = calculateApiCostOpenAI(mockModelInfo, 6000, 500, 2000, 3000)
 
 			// Input cost: (3.0 / 1_000_000) * (6000 - 2000 - 3000) = 0.003
 			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
 			// Cache writes: (3.75 / 1_000_000) * 2000 = 0.0075
 			// Cache reads: (0.3 / 1_000_000) * 3000 = 0.0009
 			// Total: 0.003 + 0.0075 + 0.0075 + 0.0009 = 0.0189
-			expect(cost).toBe(0.0189)
+			expect(result.totalCost).toBe(0.0189)
+			expect(result.totalInputTokens).toBe(6000) // Total already includes cache
+			expect(result.totalOutputTokens).toBe(500)
 		})
 
 		it("should handle missing prices gracefully", () => {
@@ -156,22 +180,28 @@ describe("Cost Utility", () => {
 				supportsPromptCache: true,
 			}
 
-			const cost = calculateApiCostOpenAI(modelWithoutPrices, 1000, 500, 2000, 3000)
-			expect(cost).toBe(0)
+			const result = calculateApiCostOpenAI(modelWithoutPrices, 1000, 500, 2000, 3000)
+			expect(result.totalCost).toBe(0)
+			expect(result.totalInputTokens).toBe(1000) // Total already includes cache
+			expect(result.totalOutputTokens).toBe(500)
 		})
 
 		it("should handle zero tokens", () => {
-			const cost = calculateApiCostOpenAI(mockModelInfo, 0, 0, 0, 0)
-			expect(cost).toBe(0)
+			const result = calculateApiCostOpenAI(mockModelInfo, 0, 0, 0, 0)
+			expect(result.totalCost).toBe(0)
+			expect(result.totalInputTokens).toBe(0)
+			expect(result.totalOutputTokens).toBe(0)
 		})
 
 		it("should handle undefined cache values", () => {
-			const cost = calculateApiCostOpenAI(mockModelInfo, 1000, 500)
+			const result = calculateApiCostOpenAI(mockModelInfo, 1000, 500)
 
 			// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
 			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
 			// Total: 0.003 + 0.0075 = 0.0105
-			expect(cost).toBe(0.0105)
+			expect(result.totalCost).toBe(0.0105)
+			expect(result.totalInputTokens).toBe(1000)
+			expect(result.totalOutputTokens).toBe(500)
 		})
 
 		it("should handle missing cache prices", () => {
@@ -181,13 +211,15 @@ describe("Cost Utility", () => {
 				cacheReadsPrice: undefined,
 			}
 
-			const cost = calculateApiCostOpenAI(modelWithoutCachePrices, 6000, 500, 2000, 3000)
+			const result = calculateApiCostOpenAI(modelWithoutCachePrices, 6000, 500, 2000, 3000)
 
 			// Should only include input and output costs
 			// Input cost: (3.0 / 1_000_000) * (6000 - 2000 - 3000) = 0.003
 			// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
 			// Total: 0.003 + 0.0075 = 0.0105
-			expect(cost).toBe(0.0105)
+			expect(result.totalCost).toBe(0.0105)
+			expect(result.totalInputTokens).toBe(6000) // Total already includes cache
+			expect(result.totalOutputTokens).toBe(500)
 		})
 	})
 })