Преглед изворни кода

fix: stabilize token/cache accounting across providers and routed Roo metadata (#11448)

Hannes Rudolph пре 7 часа
родитељ
комит
3965cd9743
35 измењених фајлова са 1272 додато и 162 уклоњено
  1. 161 0
      src/api/providers/__tests__/bedrock.spec.ts
  2. 164 0
      src/api/providers/__tests__/gemini.spec.ts
  3. 7 1
      src/api/providers/__tests__/native-ollama.spec.ts
  4. 197 0
      src/api/providers/__tests__/openai-native-usage.spec.ts
  5. 201 0
      src/api/providers/__tests__/openai-usage-tracking.spec.ts
  6. 2 0
      src/api/providers/__tests__/requesty.spec.ts
  7. 67 2
      src/api/providers/__tests__/roo.spec.ts
  8. 4 0
      src/api/providers/__tests__/vercel-ai-gateway.spec.ts
  9. 3 0
      src/api/providers/anthropic-vertex.ts
  10. 3 0
      src/api/providers/anthropic.ts
  11. 21 6
      src/api/providers/azure.ts
  12. 16 3
      src/api/providers/baseten.ts
  13. 21 7
      src/api/providers/bedrock.ts
  14. 23 7
      src/api/providers/deepseek.ts
  15. 23 7
      src/api/providers/fireworks.ts
  16. 13 2
      src/api/providers/gemini.ts
  17. 16 4
      src/api/providers/lite-llm.ts
  18. 3 0
      src/api/providers/minimax.ts
  19. 16 4
      src/api/providers/mistral.ts
  20. 18 4
      src/api/providers/moonshot.ts
  21. 6 2
      src/api/providers/native-ollama.ts
  22. 21 6
      src/api/providers/openai-codex.ts
  23. 17 6
      src/api/providers/openai-compatible.ts
  24. 14 3
      src/api/providers/openai-native.ts
  25. 24 6
      src/api/providers/openai.ts
  26. 3 0
      src/api/providers/openrouter.ts
  27. 18 3
      src/api/providers/requesty.ts
  28. 69 8
      src/api/providers/roo.ts
  29. 23 7
      src/api/providers/sambanova.ts
  30. 22 4
      src/api/providers/vercel-ai-gateway.ts
  31. 13 2
      src/api/providers/vertex.ts
  32. 2 0
      src/api/providers/vscode-lm.ts
  33. 22 8
      src/api/providers/xai.ts
  34. 4 0
      src/api/transform/stream.ts
  35. 35 60
      src/core/task/Task.ts

+ 161 - 0
src/api/providers/__tests__/bedrock.spec.ts

@@ -1279,4 +1279,165 @@ describe("AwsBedrockHandler", () => {
 			expect(mockCaptureException).toHaveBeenCalled()
 			expect(mockCaptureException).toHaveBeenCalled()
 		})
 		})
 	})
 	})
+
+	describe("AI SDK v6 usage field paths", () => {
+		const systemPrompt = "You are a helpful assistant"
+		const messages: RooMessage[] = [
+			{
+				role: "user",
+				content: "Hello",
+			},
+		]
+
+		function setupStream(usage: Record<string, unknown>, providerMetadata: Record<string, unknown> = {}) {
+			async function* mockFullStream() {
+				yield { type: "text-delta", text: "reply" }
+			}
+
+			mockStreamText.mockReturnValue({
+				fullStream: mockFullStream(),
+				usage: Promise.resolve(usage),
+				providerMetadata: Promise.resolve(providerMetadata),
+			})
+		}
+
+		describe("cache tokens", () => {
+			it("should read cache tokens from v6 top-level cachedInputTokens", async () => {
+				setupStream({ inputTokens: 100, outputTokens: 50, cachedInputTokens: 30 })
+
+				const generator = handler.createMessage(systemPrompt, messages)
+				const chunks: unknown[] = []
+				for await (const chunk of generator) {
+					chunks.push(chunk)
+				}
+
+				const usageChunk = chunks.find((c: any) => c.type === "usage") as any
+				expect(usageChunk).toBeDefined()
+				expect(usageChunk.cacheReadTokens).toBe(30)
+			})
+
+			it("should read cache tokens from v6 inputTokenDetails.cacheReadTokens", async () => {
+				setupStream({
+					inputTokens: 100,
+					outputTokens: 50,
+					inputTokenDetails: { cacheReadTokens: 25 },
+				})
+
+				const generator = handler.createMessage(systemPrompt, messages)
+				const chunks: unknown[] = []
+				for await (const chunk of generator) {
+					chunks.push(chunk)
+				}
+
+				const usageChunk = chunks.find((c: any) => c.type === "usage") as any
+				expect(usageChunk).toBeDefined()
+				expect(usageChunk.cacheReadTokens).toBe(25)
+			})
+
+			it("should prefer v6 top-level cachedInputTokens over providerMetadata.bedrock", async () => {
+				setupStream(
+					{ inputTokens: 100, outputTokens: 50, cachedInputTokens: 30 },
+					{ bedrock: { usage: { cacheReadInputTokens: 20 } } },
+				)
+
+				const generator = handler.createMessage(systemPrompt, messages)
+				const chunks: unknown[] = []
+				for await (const chunk of generator) {
+					chunks.push(chunk)
+				}
+
+				const usageChunk = chunks.find((c: any) => c.type === "usage") as any
+				expect(usageChunk).toBeDefined()
+				expect(usageChunk.cacheReadTokens).toBe(30)
+			})
+
+			it("should fall back to providerMetadata.bedrock.usage.cacheReadInputTokens", async () => {
+				setupStream(
+					{ inputTokens: 100, outputTokens: 50 },
+					{ bedrock: { usage: { cacheReadInputTokens: 20 } } },
+				)
+
+				const generator = handler.createMessage(systemPrompt, messages)
+				const chunks: unknown[] = []
+				for await (const chunk of generator) {
+					chunks.push(chunk)
+				}
+
+				const usageChunk = chunks.find((c: any) => c.type === "usage") as any
+				expect(usageChunk).toBeDefined()
+				expect(usageChunk.cacheReadTokens).toBe(20)
+			})
+
+			it("should read cacheWriteTokens from v6 inputTokenDetails.cacheWriteTokens", async () => {
+				setupStream({
+					inputTokens: 100,
+					outputTokens: 50,
+					inputTokenDetails: { cacheWriteTokens: 15 },
+				})
+
+				const generator = handler.createMessage(systemPrompt, messages)
+				const chunks: unknown[] = []
+				for await (const chunk of generator) {
+					chunks.push(chunk)
+				}
+
+				const usageChunk = chunks.find((c: any) => c.type === "usage") as any
+				expect(usageChunk).toBeDefined()
+				expect(usageChunk.cacheWriteTokens).toBe(15)
+			})
+		})
+
+		describe("reasoning tokens", () => {
+			it("should read reasoning tokens from v6 top-level reasoningTokens", async () => {
+				setupStream({ inputTokens: 100, outputTokens: 50, reasoningTokens: 40 })
+
+				const generator = handler.createMessage(systemPrompt, messages)
+				const chunks: unknown[] = []
+				for await (const chunk of generator) {
+					chunks.push(chunk)
+				}
+
+				const usageChunk = chunks.find((c: any) => c.type === "usage") as any
+				expect(usageChunk).toBeDefined()
+				expect(usageChunk.reasoningTokens).toBe(40)
+			})
+
+			it("should read reasoning tokens from v6 outputTokenDetails.reasoningTokens", async () => {
+				setupStream({
+					inputTokens: 100,
+					outputTokens: 50,
+					outputTokenDetails: { reasoningTokens: 35 },
+				})
+
+				const generator = handler.createMessage(systemPrompt, messages)
+				const chunks: unknown[] = []
+				for await (const chunk of generator) {
+					chunks.push(chunk)
+				}
+
+				const usageChunk = chunks.find((c: any) => c.type === "usage") as any
+				expect(usageChunk).toBeDefined()
+				expect(usageChunk.reasoningTokens).toBe(35)
+			})
+
+			it("should prefer v6 top-level reasoningTokens over outputTokenDetails", async () => {
+				setupStream({
+					inputTokens: 100,
+					outputTokens: 50,
+					reasoningTokens: 40,
+					outputTokenDetails: { reasoningTokens: 15 },
+				})
+
+				const generator = handler.createMessage(systemPrompt, messages)
+				const chunks: unknown[] = []
+				for await (const chunk of generator) {
+					chunks.push(chunk)
+				}
+
+				const usageChunk = chunks.find((c: any) => c.type === "usage") as any
+				expect(usageChunk).toBeDefined()
+				expect(usageChunk.reasoningTokens).toBe(40)
+			})
+		})
+	})
 })
 })

+ 164 - 0
src/api/providers/__tests__/gemini.spec.ts

@@ -472,4 +472,168 @@ describe("GeminiHandler", () => {
 			expect(mockCaptureException).toHaveBeenCalled()
 			expect(mockCaptureException).toHaveBeenCalled()
 		})
 		})
 	})
 	})
+
+	describe("AI SDK v6 usage field paths", () => {
+		const mockMessages: RooMessage[] = [
+			{
+				role: "user",
+				content: "Hello",
+			},
+		]
+		const systemPrompt = "You are a helpful assistant"
+
+		function setupStream(usage: Record<string, unknown>) {
+			const mockFullStream = (async function* () {
+				yield { type: "text-delta", text: "reply" }
+			})()
+
+			mockStreamText.mockReturnValue({
+				fullStream: mockFullStream,
+				usage: Promise.resolve(usage),
+				providerMetadata: Promise.resolve({}),
+			})
+		}
+
+		describe("cache tokens", () => {
+			it("should read cache tokens from v6 top-level cachedInputTokens", async () => {
+				setupStream({ inputTokens: 100, outputTokens: 50, cachedInputTokens: 30 })
+
+				const stream = handler.createMessage(systemPrompt, mockMessages)
+				const chunks = []
+				for await (const chunk of stream) {
+					chunks.push(chunk)
+				}
+
+				const usageChunk = chunks.find((c) => c.type === "usage")
+				expect(usageChunk).toBeDefined()
+				expect(usageChunk!.cacheReadTokens).toBe(30)
+			})
+
+			it("should read cache tokens from v6 inputTokenDetails.cacheReadTokens", async () => {
+				setupStream({
+					inputTokens: 100,
+					outputTokens: 50,
+					inputTokenDetails: { cacheReadTokens: 25 },
+				})
+
+				const stream = handler.createMessage(systemPrompt, mockMessages)
+				const chunks = []
+				for await (const chunk of stream) {
+					chunks.push(chunk)
+				}
+
+				const usageChunk = chunks.find((c) => c.type === "usage")
+				expect(usageChunk).toBeDefined()
+				expect(usageChunk!.cacheReadTokens).toBe(25)
+			})
+
+			it("should prefer v6 top-level cachedInputTokens over legacy details", async () => {
+				setupStream({
+					inputTokens: 100,
+					outputTokens: 50,
+					cachedInputTokens: 30,
+					details: { cachedInputTokens: 20 },
+				})
+
+				const stream = handler.createMessage(systemPrompt, mockMessages)
+				const chunks = []
+				for await (const chunk of stream) {
+					chunks.push(chunk)
+				}
+
+				const usageChunk = chunks.find((c) => c.type === "usage")
+				expect(usageChunk).toBeDefined()
+				expect(usageChunk!.cacheReadTokens).toBe(30)
+			})
+
+			it("should fall back to legacy details.cachedInputTokens", async () => {
+				setupStream({
+					inputTokens: 100,
+					outputTokens: 50,
+					details: { cachedInputTokens: 20 },
+				})
+
+				const stream = handler.createMessage(systemPrompt, mockMessages)
+				const chunks = []
+				for await (const chunk of stream) {
+					chunks.push(chunk)
+				}
+
+				const usageChunk = chunks.find((c) => c.type === "usage")
+				expect(usageChunk).toBeDefined()
+				expect(usageChunk!.cacheReadTokens).toBe(20)
+			})
+		})
+
+		describe("reasoning tokens", () => {
+			it("should read reasoning tokens from v6 top-level reasoningTokens", async () => {
+				setupStream({ inputTokens: 100, outputTokens: 50, reasoningTokens: 40 })
+
+				const stream = handler.createMessage(systemPrompt, mockMessages)
+				const chunks = []
+				for await (const chunk of stream) {
+					chunks.push(chunk)
+				}
+
+				const usageChunk = chunks.find((c) => c.type === "usage")
+				expect(usageChunk).toBeDefined()
+				expect(usageChunk!.reasoningTokens).toBe(40)
+			})
+
+			it("should read reasoning tokens from v6 outputTokenDetails.reasoningTokens", async () => {
+				setupStream({
+					inputTokens: 100,
+					outputTokens: 50,
+					outputTokenDetails: { reasoningTokens: 35 },
+				})
+
+				const stream = handler.createMessage(systemPrompt, mockMessages)
+				const chunks = []
+				for await (const chunk of stream) {
+					chunks.push(chunk)
+				}
+
+				const usageChunk = chunks.find((c) => c.type === "usage")
+				expect(usageChunk).toBeDefined()
+				expect(usageChunk!.reasoningTokens).toBe(35)
+			})
+
+			it("should prefer v6 top-level reasoningTokens over legacy details", async () => {
+				setupStream({
+					inputTokens: 100,
+					outputTokens: 50,
+					reasoningTokens: 40,
+					details: { reasoningTokens: 15 },
+				})
+
+				const stream = handler.createMessage(systemPrompt, mockMessages)
+				const chunks = []
+				for await (const chunk of stream) {
+					chunks.push(chunk)
+				}
+
+				const usageChunk = chunks.find((c) => c.type === "usage")
+				expect(usageChunk).toBeDefined()
+				expect(usageChunk!.reasoningTokens).toBe(40)
+			})
+
+			it("should fall back to legacy details.reasoningTokens", async () => {
+				setupStream({
+					inputTokens: 100,
+					outputTokens: 50,
+					details: { reasoningTokens: 15 },
+				})
+
+				const stream = handler.createMessage(systemPrompt, mockMessages)
+				const chunks = []
+				for await (const chunk of stream) {
+					chunks.push(chunk)
+				}
+
+				const usageChunk = chunks.find((c) => c.type === "usage")
+				expect(usageChunk).toBeDefined()
+				expect(usageChunk!.reasoningTokens).toBe(15)
+			})
+		})
+	})
 })
 })

+ 7 - 1
src/api/providers/__tests__/native-ollama.spec.ts

@@ -84,7 +84,13 @@ describe("NativeOllamaHandler", () => {
 			expect(results).toHaveLength(3)
 			expect(results).toHaveLength(3)
 			expect(results[0]).toEqual({ type: "text", text: "Hello" })
 			expect(results[0]).toEqual({ type: "text", text: "Hello" })
 			expect(results[1]).toEqual({ type: "text", text: " world" })
 			expect(results[1]).toEqual({ type: "text", text: " world" })
-			expect(results[2]).toEqual({ type: "usage", inputTokens: 10, outputTokens: 2 })
+			expect(results[2]).toEqual({
+				type: "usage",
+				inputTokens: 10,
+				outputTokens: 2,
+				totalInputTokens: 10,
+				totalOutputTokens: 2,
+			})
 		})
 		})
 
 
 		it("should not include providerOptions by default (no num_ctx)", async () => {
 		it("should not include providerOptions by default (no num_ctx)", async () => {

+ 197 - 0
src/api/providers/__tests__/openai-native-usage.spec.ts

@@ -353,4 +353,201 @@ describe("OpenAiNativeHandler - usage metrics", () => {
 			expect(callArgs.providerOptions.openai.promptCacheRetention).toBeUndefined()
 			expect(callArgs.providerOptions.openai.promptCacheRetention).toBeUndefined()
 		})
 		})
 	})
 	})
+
+	describe("AI SDK v6 usage field paths", () => {
+		describe("cache tokens", () => {
+			it("should read cache tokens from v6 top-level cachedInputTokens", async () => {
+				async function* mockFullStream() {
+					yield { type: "text-delta", text: "Test" }
+				}
+
+				mockStreamText.mockReturnValue({
+					fullStream: mockFullStream(),
+					usage: Promise.resolve({
+						inputTokens: 100,
+						outputTokens: 50,
+						cachedInputTokens: 30,
+					}),
+					providerMetadata: Promise.resolve({}),
+					content: Promise.resolve([]),
+				})
+
+				const stream = handler.createMessage(systemPrompt, messages)
+				const chunks: any[] = []
+				for await (const chunk of stream) {
+					chunks.push(chunk)
+				}
+
+				const usageChunks = chunks.filter((c) => c.type === "usage")
+				expect(usageChunks).toHaveLength(1)
+				expect(usageChunks[0].cacheReadTokens).toBe(30)
+			})
+
+			it("should read cache tokens from v6 inputTokenDetails.cacheReadTokens", async () => {
+				async function* mockFullStream() {
+					yield { type: "text-delta", text: "Test" }
+				}
+
+				mockStreamText.mockReturnValue({
+					fullStream: mockFullStream(),
+					usage: Promise.resolve({
+						inputTokens: 100,
+						outputTokens: 50,
+						inputTokenDetails: { cacheReadTokens: 25 },
+					}),
+					providerMetadata: Promise.resolve({}),
+					content: Promise.resolve([]),
+				})
+
+				const stream = handler.createMessage(systemPrompt, messages)
+				const chunks: any[] = []
+				for await (const chunk of stream) {
+					chunks.push(chunk)
+				}
+
+				const usageChunks = chunks.filter((c) => c.type === "usage")
+				expect(usageChunks).toHaveLength(1)
+				expect(usageChunks[0].cacheReadTokens).toBe(25)
+			})
+
+			it("should prefer v6 top-level cachedInputTokens over legacy details", async () => {
+				async function* mockFullStream() {
+					yield { type: "text-delta", text: "Test" }
+				}
+
+				mockStreamText.mockReturnValue({
+					fullStream: mockFullStream(),
+					usage: Promise.resolve({
+						inputTokens: 100,
+						outputTokens: 50,
+						cachedInputTokens: 30,
+						details: { cachedInputTokens: 20 },
+					}),
+					providerMetadata: Promise.resolve({}),
+					content: Promise.resolve([]),
+				})
+
+				const stream = handler.createMessage(systemPrompt, messages)
+				const chunks: any[] = []
+				for await (const chunk of stream) {
+					chunks.push(chunk)
+				}
+
+				const usageChunks = chunks.filter((c) => c.type === "usage")
+				expect(usageChunks).toHaveLength(1)
+				expect(usageChunks[0].cacheReadTokens).toBe(30)
+			})
+
+			it("should read cacheWriteTokens from v6 inputTokenDetails.cacheWriteTokens", async () => {
+				async function* mockFullStream() {
+					yield { type: "text-delta", text: "Test" }
+				}
+
+				mockStreamText.mockReturnValue({
+					fullStream: mockFullStream(),
+					usage: Promise.resolve({
+						inputTokens: 100,
+						outputTokens: 50,
+						inputTokenDetails: { cacheWriteTokens: 15 },
+					}),
+					providerMetadata: Promise.resolve({}),
+					content: Promise.resolve([]),
+				})
+
+				const stream = handler.createMessage(systemPrompt, messages)
+				const chunks: any[] = []
+				for await (const chunk of stream) {
+					chunks.push(chunk)
+				}
+
+				const usageChunks = chunks.filter((c) => c.type === "usage")
+				expect(usageChunks).toHaveLength(1)
+				expect(usageChunks[0].cacheWriteTokens).toBe(15)
+			})
+		})
+
+		describe("reasoning tokens", () => {
+			it("should read reasoning tokens from v6 top-level reasoningTokens", async () => {
+				async function* mockFullStream() {
+					yield { type: "text-delta", text: "Test" }
+				}
+
+				mockStreamText.mockReturnValue({
+					fullStream: mockFullStream(),
+					usage: Promise.resolve({
+						inputTokens: 100,
+						outputTokens: 50,
+						reasoningTokens: 40,
+					}),
+					providerMetadata: Promise.resolve({}),
+					content: Promise.resolve([]),
+				})
+
+				const stream = handler.createMessage(systemPrompt, messages)
+				const chunks: any[] = []
+				for await (const chunk of stream) {
+					chunks.push(chunk)
+				}
+
+				const usageChunks = chunks.filter((c) => c.type === "usage")
+				expect(usageChunks).toHaveLength(1)
+				expect(usageChunks[0].reasoningTokens).toBe(40)
+			})
+
+			it("should read reasoning tokens from v6 outputTokenDetails.reasoningTokens", async () => {
+				async function* mockFullStream() {
+					yield { type: "text-delta", text: "Test" }
+				}
+
+				mockStreamText.mockReturnValue({
+					fullStream: mockFullStream(),
+					usage: Promise.resolve({
+						inputTokens: 100,
+						outputTokens: 50,
+						outputTokenDetails: { reasoningTokens: 35 },
+					}),
+					providerMetadata: Promise.resolve({}),
+					content: Promise.resolve([]),
+				})
+
+				const stream = handler.createMessage(systemPrompt, messages)
+				const chunks: any[] = []
+				for await (const chunk of stream) {
+					chunks.push(chunk)
+				}
+
+				const usageChunks = chunks.filter((c) => c.type === "usage")
+				expect(usageChunks).toHaveLength(1)
+				expect(usageChunks[0].reasoningTokens).toBe(35)
+			})
+
+			it("should prefer v6 top-level reasoningTokens over legacy details", async () => {
+				async function* mockFullStream() {
+					yield { type: "text-delta", text: "Test" }
+				}
+
+				mockStreamText.mockReturnValue({
+					fullStream: mockFullStream(),
+					usage: Promise.resolve({
+						inputTokens: 100,
+						outputTokens: 50,
+						reasoningTokens: 40,
+						details: { reasoningTokens: 15 },
+					}),
+					providerMetadata: Promise.resolve({}),
+					content: Promise.resolve([]),
+				})
+
+				const stream = handler.createMessage(systemPrompt, messages)
+				const chunks: any[] = []
+				for await (const chunk of stream) {
+					chunks.push(chunk)
+				}
+
+				const usageChunks = chunks.filter((c) => c.type === "usage")
+				expect(usageChunks).toHaveLength(1)
+				expect(usageChunks[0].reasoningTokens).toBe(40)
+			})
+		})
+	})
 })
 })

+ 201 - 0
src/api/providers/__tests__/openai-usage-tracking.spec.ts

@@ -98,6 +98,8 @@ describe("OpenAiHandler with usage tracking fix", () => {
 				type: "usage",
 				type: "usage",
 				inputTokens: 10,
 				inputTokens: 10,
 				outputTokens: 5,
 				outputTokens: 5,
+				totalInputTokens: 10,
+				totalOutputTokens: 5,
 			})
 			})
 
 
 			const lastChunk = chunks[chunks.length - 1]
 			const lastChunk = chunks[chunks.length - 1]
@@ -133,6 +135,8 @@ describe("OpenAiHandler with usage tracking fix", () => {
 				type: "usage",
 				type: "usage",
 				inputTokens: 10,
 				inputTokens: 10,
 				outputTokens: 5,
 				outputTokens: 5,
+				totalInputTokens: 10,
+				totalOutputTokens: 5,
 			})
 			})
 		})
 		})
 
 
@@ -229,5 +233,202 @@ describe("OpenAiHandler with usage tracking fix", () => {
 				}),
 				}),
 			)
 			)
 		})
 		})
+
+		describe("AI SDK v6 usage field paths", () => {
+			describe("cache tokens", () => {
+				it("should read cache tokens from v6 top-level cachedInputTokens when providerMetadata is empty", async () => {
+					async function* mockFullStream() {
+						yield { type: "text-delta", text: "Test response" }
+					}
+
+					mockStreamText.mockReturnValueOnce({
+						fullStream: mockFullStream(),
+						usage: Promise.resolve({
+							inputTokens: 100,
+							outputTokens: 50,
+							cachedInputTokens: 30,
+						}),
+						providerMetadata: Promise.resolve(undefined),
+					})
+
+					const stream = handler.createMessage(systemPrompt, messages)
+					const chunks: any[] = []
+					for await (const chunk of stream) {
+						chunks.push(chunk)
+					}
+
+					const usageChunks = chunks.filter((chunk) => chunk.type === "usage")
+					expect(usageChunks).toHaveLength(1)
+					expect(usageChunks[0].cacheReadTokens).toBe(30)
+				})
+
+				it("should read cache tokens from v6 inputTokenDetails.cacheReadTokens when providerMetadata is empty", async () => {
+					async function* mockFullStream() {
+						yield { type: "text-delta", text: "Test response" }
+					}
+
+					mockStreamText.mockReturnValueOnce({
+						fullStream: mockFullStream(),
+						usage: Promise.resolve({
+							inputTokens: 100,
+							outputTokens: 50,
+							inputTokenDetails: { cacheReadTokens: 25 },
+						}),
+						providerMetadata: Promise.resolve(undefined),
+					})
+
+					const stream = handler.createMessage(systemPrompt, messages)
+					const chunks: any[] = []
+					for await (const chunk of stream) {
+						chunks.push(chunk)
+					}
+
+					const usageChunks = chunks.filter((chunk) => chunk.type === "usage")
+					expect(usageChunks).toHaveLength(1)
+					expect(usageChunks[0].cacheReadTokens).toBe(25)
+				})
+
+				it("should prefer providerMetadata.openai.cachedPromptTokens over v6 top-level", async () => {
+					async function* mockFullStream() {
+						yield { type: "text-delta", text: "Test response" }
+					}
+
+					mockStreamText.mockReturnValueOnce({
+						fullStream: mockFullStream(),
+						usage: Promise.resolve({
+							inputTokens: 100,
+							outputTokens: 50,
+							cachedInputTokens: 30,
+						}),
+						providerMetadata: Promise.resolve({
+							openai: {
+								cachedPromptTokens: 80,
+							},
+						}),
+					})
+
+					const stream = handler.createMessage(systemPrompt, messages)
+					const chunks: any[] = []
+					for await (const chunk of stream) {
+						chunks.push(chunk)
+					}
+
+					const usageChunks = chunks.filter((chunk) => chunk.type === "usage")
+					expect(usageChunks).toHaveLength(1)
+					expect(usageChunks[0].cacheReadTokens).toBe(80)
+				})
+
+				it("should prefer v6 top-level cachedInputTokens over legacy details when providerMetadata is empty", async () => {
+					async function* mockFullStream() {
+						yield { type: "text-delta", text: "Test response" }
+					}
+
+					mockStreamText.mockReturnValueOnce({
+						fullStream: mockFullStream(),
+						usage: Promise.resolve({
+							inputTokens: 100,
+							outputTokens: 50,
+							cachedInputTokens: 30,
+							details: { cachedInputTokens: 20 },
+						}),
+						providerMetadata: Promise.resolve(undefined),
+					})
+
+					const stream = handler.createMessage(systemPrompt, messages)
+					const chunks: any[] = []
+					for await (const chunk of stream) {
+						chunks.push(chunk)
+					}
+
+					const usageChunks = chunks.filter((chunk) => chunk.type === "usage")
+					expect(usageChunks).toHaveLength(1)
+					expect(usageChunks[0].cacheReadTokens).toBe(30)
+				})
+			})
+
+			describe("reasoning tokens", () => {
+				it("should read reasoning tokens from v6 top-level reasoningTokens when providerMetadata is empty", async () => {
+					async function* mockFullStream() {
+						yield { type: "text-delta", text: "Test response" }
+					}
+
+					mockStreamText.mockReturnValueOnce({
+						fullStream: mockFullStream(),
+						usage: Promise.resolve({
+							inputTokens: 100,
+							outputTokens: 50,
+							reasoningTokens: 40,
+						}),
+						providerMetadata: Promise.resolve(undefined),
+					})
+
+					const stream = handler.createMessage(systemPrompt, messages)
+					const chunks: any[] = []
+					for await (const chunk of stream) {
+						chunks.push(chunk)
+					}
+
+					const usageChunks = chunks.filter((chunk) => chunk.type === "usage")
+					expect(usageChunks).toHaveLength(1)
+					expect(usageChunks[0].reasoningTokens).toBe(40)
+				})
+
+				it("should read reasoning tokens from v6 outputTokenDetails.reasoningTokens when providerMetadata is empty", async () => {
+					async function* mockFullStream() {
+						yield { type: "text-delta", text: "Test response" }
+					}
+
+					mockStreamText.mockReturnValueOnce({
+						fullStream: mockFullStream(),
+						usage: Promise.resolve({
+							inputTokens: 100,
+							outputTokens: 50,
+							outputTokenDetails: { reasoningTokens: 35 },
+						}),
+						providerMetadata: Promise.resolve(undefined),
+					})
+
+					const stream = handler.createMessage(systemPrompt, messages)
+					const chunks: any[] = []
+					for await (const chunk of stream) {
+						chunks.push(chunk)
+					}
+
+					const usageChunks = chunks.filter((chunk) => chunk.type === "usage")
+					expect(usageChunks).toHaveLength(1)
+					expect(usageChunks[0].reasoningTokens).toBe(35)
+				})
+
+				it("should prefer providerMetadata.openai.reasoningTokens over v6 top-level", async () => {
+					async function* mockFullStream() {
+						yield { type: "text-delta", text: "Test response" }
+					}
+
+					mockStreamText.mockReturnValueOnce({
+						fullStream: mockFullStream(),
+						usage: Promise.resolve({
+							inputTokens: 100,
+							outputTokens: 50,
+							reasoningTokens: 40,
+						}),
+						providerMetadata: Promise.resolve({
+							openai: {
+								reasoningTokens: 20,
+							},
+						}),
+					})
+
+					const stream = handler.createMessage(systemPrompt, messages)
+					const chunks: any[] = []
+					for await (const chunk of stream) {
+						chunks.push(chunk)
+					}
+
+					const usageChunks = chunks.filter((chunk) => chunk.type === "usage")
+					expect(usageChunks).toHaveLength(1)
+					expect(usageChunks[0].reasoningTokens).toBe(20)
+				})
+			})
+		})
 	})
 	})
 })
 })

+ 2 - 0
src/api/providers/__tests__/requesty.spec.ts

@@ -178,6 +178,8 @@ describe("RequestyHandler", () => {
 				cacheReadTokens: 2,
 				cacheReadTokens: 2,
 				reasoningTokens: undefined,
 				reasoningTokens: undefined,
 				totalCost: expect.any(Number),
 				totalCost: expect.any(Number),
+				totalInputTokens: 10,
+				totalOutputTokens: 20,
 			})
 			})
 		})
 		})
 
 

+ 67 - 2
src/api/providers/__tests__/roo.spec.ts

@@ -105,7 +105,12 @@ function createMockStreamResult(options?: {
 	toolCallParts?: Array<{ type: string; id?: string; toolName?: string; delta?: string }>
 	toolCallParts?: Array<{ type: string; id?: string; toolName?: string; delta?: string }>
 	inputTokens?: number
 	inputTokens?: number
 	outputTokens?: number
 	outputTokens?: number
-	providerMetadata?: Record<string, any>
+	providerMetadata?: Record<string, unknown>
+	usage?: {
+		cachedInputTokens?: number
+		inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number }
+		details?: { cachedInputTokens?: number }
+	}
 }) {
 }) {
 	const {
 	const {
 		textChunks = ["Test response"],
 		textChunks = ["Test response"],
@@ -114,6 +119,7 @@ function createMockStreamResult(options?: {
 		inputTokens = 10,
 		inputTokens = 10,
 		outputTokens = 5,
 		outputTokens = 5,
 		providerMetadata = undefined,
 		providerMetadata = undefined,
+		usage = undefined,
 	} = options ?? {}
 	} = options ?? {}
 
 
 	const fullStream = (async function* () {
 	const fullStream = (async function* () {
@@ -130,7 +136,7 @@ function createMockStreamResult(options?: {
 
 
 	return {
 	return {
 		fullStream,
 		fullStream,
-		usage: Promise.resolve({ inputTokens, outputTokens }),
+		usage: Promise.resolve({ inputTokens, outputTokens, ...usage }),
 		providerMetadata: Promise.resolve(providerMetadata),
 		providerMetadata: Promise.resolve(providerMetadata),
 	}
 	}
 }
 }
@@ -767,6 +773,65 @@ describe("RooHandler", () => {
 			expect(usageChunk).toBeDefined()
 			expect(usageChunk).toBeDefined()
 			expect(usageChunk.cacheWriteTokens).toBe(20)
 			expect(usageChunk.cacheWriteTokens).toBe(20)
 			expect(usageChunk.cacheReadTokens).toBe(30)
 			expect(usageChunk.cacheReadTokens).toBe(30)
+			expect(usageChunk.totalInputTokens).toBe(100)
+		})
+
+		it("should fall back to anthropic metadata when roo metadata is missing", async () => {
+			mockStreamText.mockReturnValue(
+				createMockStreamResult({
+					inputTokens: 120,
+					outputTokens: 40,
+					providerMetadata: {
+						anthropic: {
+							cacheCreationInputTokens: 25,
+							usage: {
+								cache_read_input_tokens: 35,
+							},
+						},
+					},
+				}),
+			)
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			const usageChunk = chunks.find((c) => c.type === "usage")
+			expect(usageChunk).toBeDefined()
+			expect(usageChunk.inputTokens).toBe(120)
+			expect(usageChunk.cacheWriteTokens).toBe(25)
+			expect(usageChunk.cacheReadTokens).toBe(35)
+			expect(usageChunk.totalInputTokens).toBe(120)
+		})
+
+		it("should fall back to AI SDK usage cache fields when provider metadata is missing", async () => {
+			mockStreamText.mockReturnValue(
+				createMockStreamResult({
+					inputTokens: 140,
+					outputTokens: 30,
+					usage: {
+						cachedInputTokens: 22,
+						inputTokenDetails: {
+							cacheWriteTokens: 11,
+						},
+					},
+				}),
+			)
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			const usageChunk = chunks.find((c) => c.type === "usage")
+			expect(usageChunk).toBeDefined()
+			expect(usageChunk.inputTokens).toBe(140)
+			expect(usageChunk.cacheWriteTokens).toBe(11)
+			expect(usageChunk.cacheReadTokens).toBe(22)
+			expect(usageChunk.totalInputTokens).toBe(140)
 		})
 		})
 	})
 	})
 
 

+ 4 - 0
src/api/providers/__tests__/vercel-ai-gateway.spec.ts

@@ -191,6 +191,8 @@ describe("VercelAiGatewayHandler", () => {
 				cacheWriteTokens: 2,
 				cacheWriteTokens: 2,
 				cacheReadTokens: 3,
 				cacheReadTokens: 3,
 				totalCost: 0.005,
 				totalCost: 0.005,
+				totalInputTokens: 10,
+				totalOutputTokens: 5,
 			})
 			})
 		})
 		})
 
 
@@ -281,6 +283,8 @@ describe("VercelAiGatewayHandler", () => {
 				cacheWriteTokens: 2,
 				cacheWriteTokens: 2,
 				cacheReadTokens: 3,
 				cacheReadTokens: 3,
 				totalCost: 0.005,
 				totalCost: 0.005,
+				totalInputTokens: 10,
+				totalOutputTokens: 5,
 			})
 			})
 		})
 		})
 
 

+ 3 - 0
src/api/providers/anthropic-vertex.ts

@@ -221,6 +221,9 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
 			cacheWriteTokens: cacheWriteTokens > 0 ? cacheWriteTokens : undefined,
 			cacheWriteTokens: cacheWriteTokens > 0 ? cacheWriteTokens : undefined,
 			cacheReadTokens: cacheReadTokens > 0 ? cacheReadTokens : undefined,
 			cacheReadTokens: cacheReadTokens > 0 ? cacheReadTokens : undefined,
 			totalCost,
 			totalCost,
+			// Anthropic: inputTokens is non-cached only; total = input + cache write + cache read
+			totalInputTokens: inputTokens + (cacheWriteTokens ?? 0) + (cacheReadTokens ?? 0),
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 
 

+ 3 - 0
src/api/providers/anthropic.ts

@@ -209,6 +209,9 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 			cacheWriteTokens: cacheWriteTokens > 0 ? cacheWriteTokens : undefined,
 			cacheWriteTokens: cacheWriteTokens > 0 ? cacheWriteTokens : undefined,
 			cacheReadTokens: cacheReadTokens > 0 ? cacheReadTokens : undefined,
 			cacheReadTokens: cacheReadTokens > 0 ? cacheReadTokens : undefined,
 			totalCost,
 			totalCost,
+			// Anthropic: inputTokens is non-cached only; total = input + cache write + cache read
+			totalInputTokens: inputTokens + (cacheWriteTokens ?? 0) + (cacheReadTokens ?? 0),
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 
 

+ 21 - 6
src/api/providers/azure.ts

@@ -90,6 +90,12 @@ export class AzureHandler extends BaseProvider implements SingleCompletionHandle
 		usage: {
 		usage: {
 			inputTokens?: number
 			inputTokens?: number
 			outputTokens?: number
 			outputTokens?: number
+			totalInputTokens?: number
+			totalOutputTokens?: number
+			cachedInputTokens?: number
+			reasoningTokens?: number
+			inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number }
+			outputTokenDetails?: { reasoningTokens?: number }
 			details?: {
 			details?: {
 				cachedInputTokens?: number
 				cachedInputTokens?: number
 				reasoningTokens?: number
 				reasoningTokens?: number
@@ -102,19 +108,28 @@ export class AzureHandler extends BaseProvider implements SingleCompletionHandle
 			}
 			}
 		},
 		},
 	): ApiStreamUsageChunk {
 	): ApiStreamUsageChunk {
-		// Extract cache metrics from Azure's providerMetadata if available
-		const cacheReadTokens = providerMetadata?.azure?.promptCacheHitTokens ?? usage.details?.cachedInputTokens
+		// Extract cache metrics from Azure's providerMetadata, then v6 fields, then legacy
+		const cacheReadTokens =
+			providerMetadata?.azure?.promptCacheHitTokens ??
+			usage.cachedInputTokens ??
+			usage.inputTokenDetails?.cacheReadTokens ??
+			usage.details?.cachedInputTokens
 		// Azure uses OpenAI-compatible caching which does not report cache write tokens separately;
 		// Azure uses OpenAI-compatible caching which does not report cache write tokens separately;
 		// promptCacheMissTokens represents tokens NOT found in cache (processed from scratch), not tokens written to cache.
 		// promptCacheMissTokens represents tokens NOT found in cache (processed from scratch), not tokens written to cache.
-		const cacheWriteTokens = undefined
+		const cacheWriteTokens = usage.inputTokenDetails?.cacheWriteTokens
 
 
+		const inputTokens = usage.inputTokens || 0
+		const outputTokens = usage.outputTokens || 0
 		return {
 		return {
 			type: "usage",
 			type: "usage",
-			inputTokens: usage.inputTokens || 0,
-			outputTokens: usage.outputTokens || 0,
+			inputTokens,
+			outputTokens,
 			cacheReadTokens,
 			cacheReadTokens,
 			cacheWriteTokens,
 			cacheWriteTokens,
-			reasoningTokens: usage.details?.reasoningTokens,
+			reasoningTokens:
+				usage.reasoningTokens ?? usage.outputTokenDetails?.reasoningTokens ?? usage.details?.reasoningTokens,
+			totalInputTokens: inputTokens,
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 
 

+ 16 - 3
src/api/providers/baseten.ts

@@ -71,16 +71,29 @@ export class BasetenHandler extends BaseProvider implements SingleCompletionHand
 	protected processUsageMetrics(usage: {
 	protected processUsageMetrics(usage: {
 		inputTokens?: number
 		inputTokens?: number
 		outputTokens?: number
 		outputTokens?: number
+		totalInputTokens?: number
+		totalOutputTokens?: number
+		cachedInputTokens?: number
+		reasoningTokens?: number
+		inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number }
+		outputTokenDetails?: { reasoningTokens?: number }
 		details?: {
 		details?: {
 			cachedInputTokens?: number
 			cachedInputTokens?: number
 			reasoningTokens?: number
 			reasoningTokens?: number
 		}
 		}
 	}): ApiStreamUsageChunk {
 	}): ApiStreamUsageChunk {
+		const inputTokens = usage.inputTokens || 0
+		const outputTokens = usage.outputTokens || 0
 		return {
 		return {
 			type: "usage",
 			type: "usage",
-			inputTokens: usage.inputTokens || 0,
-			outputTokens: usage.outputTokens || 0,
-			reasoningTokens: usage.details?.reasoningTokens,
+			inputTokens,
+			outputTokens,
+			cacheReadTokens:
+				usage.cachedInputTokens ?? usage.inputTokenDetails?.cacheReadTokens ?? usage.details?.cachedInputTokens,
+			reasoningTokens:
+				usage.reasoningTokens ?? usage.outputTokenDetails?.reasoningTokens ?? usage.details?.reasoningTokens,
+			totalInputTokens: inputTokens,
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 
 

+ 21 - 7
src/api/providers/bedrock.ts

@@ -351,7 +351,20 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 	 * Process usage metrics from the AI SDK response.
 	 * Process usage metrics from the AI SDK response.
 	 */
 	 */
 	private processUsageMetrics(
 	private processUsageMetrics(
-		usage: { inputTokens?: number; outputTokens?: number },
+		usage: {
+			inputTokens?: number
+			outputTokens?: number
+			totalInputTokens?: number
+			totalOutputTokens?: number
+			cachedInputTokens?: number
+			reasoningTokens?: number
+			inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number }
+			outputTokenDetails?: { reasoningTokens?: number }
+			details?: {
+				cachedInputTokens?: number
+				reasoningTokens?: number
+			}
+		},
 		info: ModelInfo,
 		info: ModelInfo,
 		providerMetadata?: Record<string, Record<string, unknown>>,
 		providerMetadata?: Record<string, Record<string, unknown>>,
 	): ApiStreamUsageChunk {
 	): ApiStreamUsageChunk {
@@ -360,8 +373,7 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 
 
 		// The AI SDK exposes reasoningTokens as a top-level field on usage, and also
 		// The AI SDK exposes reasoningTokens as a top-level field on usage, and also
 		// under outputTokenDetails.reasoningTokens — there is no .details property.
 		// under outputTokenDetails.reasoningTokens — there is no .details property.
-		const reasoningTokens =
-			(usage as any).reasoningTokens ?? (usage as any).outputTokenDetails?.reasoningTokens ?? 0
+		const reasoningTokens = usage.reasoningTokens ?? usage.outputTokenDetails?.reasoningTokens ?? 0
 
 
 		// Extract cache metrics primarily from usage (AI SDK standard locations),
 		// Extract cache metrics primarily from usage (AI SDK standard locations),
 		// falling back to providerMetadata.bedrock.usage for provider-specific fields.
 		// falling back to providerMetadata.bedrock.usage for provider-specific fields.
@@ -369,12 +381,11 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 			| { cacheReadInputTokens?: number; cacheWriteInputTokens?: number }
 			| { cacheReadInputTokens?: number; cacheWriteInputTokens?: number }
 			| undefined
 			| undefined
 		const cacheReadTokens =
 		const cacheReadTokens =
-			(usage as any).inputTokenDetails?.cacheReadTokens ??
-			(usage as any).cachedInputTokens ??
+			usage.cachedInputTokens ??
+			usage.inputTokenDetails?.cacheReadTokens ??
 			bedrockUsage?.cacheReadInputTokens ??
 			bedrockUsage?.cacheReadInputTokens ??
 			0
 			0
-		const cacheWriteTokens =
-			(usage as any).inputTokenDetails?.cacheWriteTokens ?? bedrockUsage?.cacheWriteInputTokens ?? 0
+		const cacheWriteTokens = usage.inputTokenDetails?.cacheWriteTokens ?? bedrockUsage?.cacheWriteInputTokens ?? 0
 
 
 		// For prompt routers, the AI SDK surfaces the invoked model ID in
 		// For prompt routers, the AI SDK surfaces the invoked model ID in
 		// providerMetadata.bedrock.trace.promptRouter.invokedModelId.
 		// providerMetadata.bedrock.trace.promptRouter.invokedModelId.
@@ -417,6 +428,9 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 				reasoningTokens,
 				reasoningTokens,
 				info: costInfo,
 				info: costInfo,
 			}),
 			}),
+			// AI SDK normalizes inputTokens to total (OpenAI convention) for Bedrock
+			totalInputTokens: inputTokens,
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 
 

+ 23 - 7
src/api/providers/deepseek.ts

@@ -72,6 +72,12 @@ export class DeepSeekHandler extends BaseProvider implements SingleCompletionHan
 		usage: {
 		usage: {
 			inputTokens?: number
 			inputTokens?: number
 			outputTokens?: number
 			outputTokens?: number
+			totalInputTokens?: number
+			totalOutputTokens?: number
+			cachedInputTokens?: number
+			reasoningTokens?: number
+			inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number }
+			outputTokenDetails?: { reasoningTokens?: number }
 			details?: {
 			details?: {
 				cachedInputTokens?: number
 				cachedInputTokens?: number
 				reasoningTokens?: number
 				reasoningTokens?: number
@@ -84,17 +90,27 @@ export class DeepSeekHandler extends BaseProvider implements SingleCompletionHan
 			}
 			}
 		},
 		},
 	): ApiStreamUsageChunk {
 	): ApiStreamUsageChunk {
-		// Extract cache metrics from DeepSeek's providerMetadata
-		const cacheReadTokens = providerMetadata?.deepseek?.promptCacheHitTokens ?? usage.details?.cachedInputTokens
-		const cacheWriteTokens = providerMetadata?.deepseek?.promptCacheMissTokens
-
+		// Extract cache metrics from DeepSeek's providerMetadata, then v6 fields, then legacy
+		const cacheReadTokens =
+			providerMetadata?.deepseek?.promptCacheHitTokens ??
+			usage.cachedInputTokens ??
+			usage.inputTokenDetails?.cacheReadTokens ??
+			usage.details?.cachedInputTokens
+		const cacheWriteTokens =
+			providerMetadata?.deepseek?.promptCacheMissTokens ?? usage.inputTokenDetails?.cacheWriteTokens
+
+		const inputTokens = usage.inputTokens || 0
+		const outputTokens = usage.outputTokens || 0
 		return {
 		return {
 			type: "usage",
 			type: "usage",
-			inputTokens: usage.inputTokens || 0,
-			outputTokens: usage.outputTokens || 0,
+			inputTokens,
+			outputTokens,
 			cacheReadTokens,
 			cacheReadTokens,
 			cacheWriteTokens,
 			cacheWriteTokens,
-			reasoningTokens: usage.details?.reasoningTokens,
+			reasoningTokens:
+				usage.reasoningTokens ?? usage.outputTokenDetails?.reasoningTokens ?? usage.details?.reasoningTokens,
+			totalInputTokens: inputTokens,
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 
 

+ 23 - 7
src/api/providers/fireworks.ts

@@ -73,6 +73,12 @@ export class FireworksHandler extends BaseProvider implements SingleCompletionHa
 		usage: {
 		usage: {
 			inputTokens?: number
 			inputTokens?: number
 			outputTokens?: number
 			outputTokens?: number
+			totalInputTokens?: number
+			totalOutputTokens?: number
+			cachedInputTokens?: number
+			reasoningTokens?: number
+			inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number }
+			outputTokenDetails?: { reasoningTokens?: number }
 			details?: {
 			details?: {
 				cachedInputTokens?: number
 				cachedInputTokens?: number
 				reasoningTokens?: number
 				reasoningTokens?: number
@@ -85,17 +91,27 @@ export class FireworksHandler extends BaseProvider implements SingleCompletionHa
 			}
 			}
 		},
 		},
 	): ApiStreamUsageChunk {
 	): ApiStreamUsageChunk {
-		// Extract cache metrics from Fireworks' providerMetadata if available
-		const cacheReadTokens = providerMetadata?.fireworks?.promptCacheHitTokens ?? usage.details?.cachedInputTokens
-		const cacheWriteTokens = providerMetadata?.fireworks?.promptCacheMissTokens
-
+		// Extract cache metrics from Fireworks' providerMetadata, then v6 fields, then legacy
+		const cacheReadTokens =
+			providerMetadata?.fireworks?.promptCacheHitTokens ??
+			usage.cachedInputTokens ??
+			usage.inputTokenDetails?.cacheReadTokens ??
+			usage.details?.cachedInputTokens
+		const cacheWriteTokens =
+			providerMetadata?.fireworks?.promptCacheMissTokens ?? usage.inputTokenDetails?.cacheWriteTokens
+
+		const inputTokens = usage.inputTokens || 0
+		const outputTokens = usage.outputTokens || 0
 		return {
 		return {
 			type: "usage",
 			type: "usage",
-			inputTokens: usage.inputTokens || 0,
-			outputTokens: usage.outputTokens || 0,
+			inputTokens,
+			outputTokens,
 			cacheReadTokens,
 			cacheReadTokens,
 			cacheWriteTokens,
 			cacheWriteTokens,
-			reasoningTokens: usage.details?.reasoningTokens,
+			reasoningTokens:
+				usage.reasoningTokens ?? usage.outputTokenDetails?.reasoningTokens ?? usage.details?.reasoningTokens,
+			totalInputTokens: inputTokens,
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 
 

+ 13 - 2
src/api/providers/gemini.ts

@@ -248,6 +248,12 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 		usage: {
 		usage: {
 			inputTokens?: number
 			inputTokens?: number
 			outputTokens?: number
 			outputTokens?: number
+			totalInputTokens?: number
+			totalOutputTokens?: number
+			cachedInputTokens?: number
+			reasoningTokens?: number
+			inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number }
+			outputTokenDetails?: { reasoningTokens?: number }
 			details?: {
 			details?: {
 				cachedInputTokens?: number
 				cachedInputTokens?: number
 				reasoningTokens?: number
 				reasoningTokens?: number
@@ -258,8 +264,10 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 	): ApiStreamUsageChunk {
 	): ApiStreamUsageChunk {
 		const inputTokens = usage.inputTokens || 0
 		const inputTokens = usage.inputTokens || 0
 		const outputTokens = usage.outputTokens || 0
 		const outputTokens = usage.outputTokens || 0
-		const cacheReadTokens = usage.details?.cachedInputTokens
-		const reasoningTokens = usage.details?.reasoningTokens
+		const cacheReadTokens =
+			usage.cachedInputTokens ?? usage.inputTokenDetails?.cacheReadTokens ?? usage.details?.cachedInputTokens
+		const reasoningTokens =
+			usage.reasoningTokens ?? usage.outputTokenDetails?.reasoningTokens ?? usage.details?.reasoningTokens
 
 
 		return {
 		return {
 			type: "usage",
 			type: "usage",
@@ -274,6 +282,9 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 				cacheReadTokens,
 				cacheReadTokens,
 				reasoningTokens,
 				reasoningTokens,
 			}),
 			}),
+			// Gemini: inputTokens is already total
+			totalInputTokens: inputTokens,
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 
 

+ 16 - 4
src/api/providers/lite-llm.ts

@@ -96,18 +96,30 @@ export class LiteLLMHandler extends OpenAICompatibleHandler implements SingleCom
 	protected override processUsageMetrics(usage: {
 	protected override processUsageMetrics(usage: {
 		inputTokens?: number
 		inputTokens?: number
 		outputTokens?: number
 		outputTokens?: number
+		totalInputTokens?: number
+		totalOutputTokens?: number
+		cachedInputTokens?: number
+		reasoningTokens?: number
+		inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number }
+		outputTokenDetails?: { reasoningTokens?: number }
 		details?: {
 		details?: {
 			cachedInputTokens?: number
 			cachedInputTokens?: number
 			reasoningTokens?: number
 			reasoningTokens?: number
 		}
 		}
 		raw?: Record<string, unknown>
 		raw?: Record<string, unknown>
 	}): ApiStreamUsageChunk {
 	}): ApiStreamUsageChunk {
+		const inputTokens = usage.inputTokens || 0
+		const outputTokens = usage.outputTokens || 0
 		return {
 		return {
 			type: "usage",
 			type: "usage",
-			inputTokens: usage.inputTokens || 0,
-			outputTokens: usage.outputTokens || 0,
-			cacheReadTokens: usage.details?.cachedInputTokens,
-			reasoningTokens: usage.details?.reasoningTokens,
+			inputTokens,
+			outputTokens,
+			cacheReadTokens:
+				usage.cachedInputTokens ?? usage.inputTokenDetails?.cacheReadTokens ?? usage.details?.cachedInputTokens,
+			reasoningTokens:
+				usage.reasoningTokens ?? usage.outputTokenDetails?.reasoningTokens ?? usage.details?.reasoningTokens,
+			totalInputTokens: inputTokens,
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 }
 }

+ 3 - 0
src/api/providers/minimax.ts

@@ -167,6 +167,9 @@ export class MiniMaxHandler extends BaseProvider implements SingleCompletionHand
 			cacheWriteTokens: cacheWriteTokens > 0 ? cacheWriteTokens : undefined,
 			cacheWriteTokens: cacheWriteTokens > 0 ? cacheWriteTokens : undefined,
 			cacheReadTokens: cacheReadTokens > 0 ? cacheReadTokens : undefined,
 			cacheReadTokens: cacheReadTokens > 0 ? cacheReadTokens : undefined,
 			totalCost,
 			totalCost,
+			// MiniMax uses Anthropic SDK: inputTokens is non-cached only
+			totalInputTokens: inputTokens + (cacheWriteTokens ?? 0) + (cacheReadTokens ?? 0),
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 
 

+ 16 - 4
src/api/providers/mistral.ts

@@ -78,17 +78,29 @@ export class MistralHandler extends BaseProvider implements SingleCompletionHand
 	protected processUsageMetrics(usage: {
 	protected processUsageMetrics(usage: {
 		inputTokens?: number
 		inputTokens?: number
 		outputTokens?: number
 		outputTokens?: number
+		totalInputTokens?: number
+		totalOutputTokens?: number
+		cachedInputTokens?: number
+		reasoningTokens?: number
+		inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number }
+		outputTokenDetails?: { reasoningTokens?: number }
 		details?: {
 		details?: {
 			cachedInputTokens?: number
 			cachedInputTokens?: number
 			reasoningTokens?: number
 			reasoningTokens?: number
 		}
 		}
 	}): ApiStreamUsageChunk {
 	}): ApiStreamUsageChunk {
+		const inputTokens = usage.inputTokens || 0
+		const outputTokens = usage.outputTokens || 0
 		return {
 		return {
 			type: "usage",
 			type: "usage",
-			inputTokens: usage.inputTokens || 0,
-			outputTokens: usage.outputTokens || 0,
-			cacheReadTokens: usage.details?.cachedInputTokens,
-			reasoningTokens: usage.details?.reasoningTokens,
+			inputTokens,
+			outputTokens,
+			cacheReadTokens:
+				usage.cachedInputTokens ?? usage.inputTokenDetails?.cacheReadTokens ?? usage.details?.cachedInputTokens,
+			reasoningTokens:
+				usage.reasoningTokens ?? usage.outputTokenDetails?.reasoningTokens ?? usage.details?.reasoningTokens,
+			totalInputTokens: inputTokens,
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 
 

+ 18 - 4
src/api/providers/moonshot.ts

@@ -46,6 +46,12 @@ export class MoonshotHandler extends OpenAICompatibleHandler {
 	protected override processUsageMetrics(usage: {
 	protected override processUsageMetrics(usage: {
 		inputTokens?: number
 		inputTokens?: number
 		outputTokens?: number
 		outputTokens?: number
+		totalInputTokens?: number
+		totalOutputTokens?: number
+		cachedInputTokens?: number
+		reasoningTokens?: number
+		inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number }
+		outputTokenDetails?: { reasoningTokens?: number }
 		details?: {
 		details?: {
 			cachedInputTokens?: number
 			cachedInputTokens?: number
 			reasoningTokens?: number
 			reasoningTokens?: number
@@ -55,12 +61,20 @@ export class MoonshotHandler extends OpenAICompatibleHandler {
 		// Moonshot uses cached_tokens at the top level of raw usage data
 		// Moonshot uses cached_tokens at the top level of raw usage data
 		const rawUsage = usage.raw as { cached_tokens?: number } | undefined
 		const rawUsage = usage.raw as { cached_tokens?: number } | undefined
 
 
+		const inputTokens = usage.inputTokens || 0
+		const outputTokens = usage.outputTokens || 0
 		return {
 		return {
 			type: "usage",
 			type: "usage",
-			inputTokens: usage.inputTokens || 0,
-			outputTokens: usage.outputTokens || 0,
-			cacheWriteTokens: 0,
-			cacheReadTokens: rawUsage?.cached_tokens ?? usage.details?.cachedInputTokens,
+			inputTokens,
+			outputTokens,
+			cacheWriteTokens: usage.inputTokenDetails?.cacheWriteTokens ?? 0,
+			cacheReadTokens:
+				rawUsage?.cached_tokens ??
+				usage.cachedInputTokens ??
+				usage.inputTokenDetails?.cacheReadTokens ??
+				usage.details?.cachedInputTokens,
+			totalInputTokens: inputTokens,
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 
 

+ 6 - 2
src/api/providers/native-ollama.ts

@@ -126,10 +126,14 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
 
 
 			const usage = await result.usage
 			const usage = await result.usage
 			if (usage) {
 			if (usage) {
+				const inputTokens = usage.inputTokens || 0
+				const outputTokens = usage.outputTokens || 0
 				yield {
 				yield {
 					type: "usage",
 					type: "usage",
-					inputTokens: usage.inputTokens || 0,
-					outputTokens: usage.outputTokens || 0,
+					inputTokens,
+					outputTokens,
+					totalInputTokens: inputTokens,
+					totalOutputTokens: outputTokens,
 				}
 				}
 			}
 			}
 
 

+ 21 - 6
src/api/providers/openai-codex.ts

@@ -252,14 +252,27 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion
 					if (usage) {
 					if (usage) {
 						const inputTokens = usage.inputTokens || 0
 						const inputTokens = usage.inputTokens || 0
 						const outputTokens = usage.outputTokens || 0
 						const outputTokens = usage.outputTokens || 0
-						const details = (usage as any).details as
-							| { cachedInputTokens?: number; reasoningTokens?: number }
-							| undefined
-						const cacheReadTokens = details?.cachedInputTokens ?? 0
+						const typedUsage = usage as {
+							inputTokens?: number
+							outputTokens?: number
+							cachedInputTokens?: number
+							reasoningTokens?: number
+							inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number }
+							outputTokenDetails?: { reasoningTokens?: number }
+							details?: { cachedInputTokens?: number; reasoningTokens?: number }
+						}
+						const cacheReadTokens =
+							typedUsage.cachedInputTokens ??
+							typedUsage.inputTokenDetails?.cacheReadTokens ??
+							typedUsage.details?.cachedInputTokens ??
+							0
 						// The OpenAI Responses API does not report cache write tokens separately;
 						// The OpenAI Responses API does not report cache write tokens separately;
 						// only cached (read) tokens are available via usage.details.cachedInputTokens.
 						// only cached (read) tokens are available via usage.details.cachedInputTokens.
-						const cacheWriteTokens = 0
-						const reasoningTokens = details?.reasoningTokens
+						const cacheWriteTokens = typedUsage.inputTokenDetails?.cacheWriteTokens ?? 0
+						const reasoningTokens =
+							typedUsage.reasoningTokens ??
+							typedUsage.outputTokenDetails?.reasoningTokens ??
+							typedUsage.details?.reasoningTokens
 
 
 						yield {
 						yield {
 							type: "usage",
 							type: "usage",
@@ -269,6 +282,8 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion
 							cacheReadTokens: cacheReadTokens || undefined,
 							cacheReadTokens: cacheReadTokens || undefined,
 							...(typeof reasoningTokens === "number" ? { reasoningTokens } : {}),
 							...(typeof reasoningTokens === "number" ? { reasoningTokens } : {}),
 							totalCost: 0, // Subscription-based pricing
 							totalCost: 0, // Subscription-based pricing
+							totalInputTokens: inputTokens,
+							totalOutputTokens: outputTokens,
 						}
 						}
 					}
 					}
 				} catch (usageError) {
 				} catch (usageError) {

+ 17 - 6
src/api/providers/openai-compatible.ts

@@ -97,6 +97,10 @@ export abstract class OpenAICompatibleHandler extends BaseProvider implements Si
 	protected processUsageMetrics(usage: {
 	protected processUsageMetrics(usage: {
 		inputTokens?: number
 		inputTokens?: number
 		outputTokens?: number
 		outputTokens?: number
+		totalInputTokens?: number
+		totalOutputTokens?: number
+		cachedInputTokens?: number
+		reasoningTokens?: number
 		inputTokenDetails?: {
 		inputTokenDetails?: {
 			cacheReadTokens?: number
 			cacheReadTokens?: number
 			cacheWriteTokens?: number
 			cacheWriteTokens?: number
@@ -111,15 +115,22 @@ export abstract class OpenAICompatibleHandler extends BaseProvider implements Si
 		}
 		}
 		raw?: Record<string, unknown>
 		raw?: Record<string, unknown>
 	}): ApiStreamUsageChunk {
 	}): ApiStreamUsageChunk {
+		const inputTokens = usage.inputTokens || 0
+		const outputTokens = usage.outputTokens || 0
 		return {
 		return {
 			type: "usage",
 			type: "usage",
-			inputTokens: usage.inputTokens || 0,
-			outputTokens: usage.outputTokens || 0,
-			// P1: AI SDK v6 standard (LanguageModelInputTokenDetails)
-			// P2: Legacy AI SDK standard (usage.details)
-			cacheReadTokens: usage.inputTokenDetails?.cacheReadTokens ?? usage.details?.cachedInputTokens,
+			inputTokens,
+			outputTokens,
+			// P1: AI SDK v6 top-level
+			// P2: AI SDK v6 structured (LanguageModelInputTokenDetails)
+			// P3: Legacy AI SDK standard (usage.details)
+			cacheReadTokens:
+				usage.cachedInputTokens ?? usage.inputTokenDetails?.cacheReadTokens ?? usage.details?.cachedInputTokens,
 			cacheWriteTokens: usage.inputTokenDetails?.cacheWriteTokens,
 			cacheWriteTokens: usage.inputTokenDetails?.cacheWriteTokens,
-			reasoningTokens: usage.outputTokenDetails?.reasoningTokens ?? usage.details?.reasoningTokens,
+			reasoningTokens:
+				usage.reasoningTokens ?? usage.outputTokenDetails?.reasoningTokens ?? usage.details?.reasoningTokens,
+			totalInputTokens: inputTokens,
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 
 

+ 14 - 3
src/api/providers/openai-native.ts

@@ -345,6 +345,12 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		usage: {
 		usage: {
 			inputTokens?: number
 			inputTokens?: number
 			outputTokens?: number
 			outputTokens?: number
+			totalInputTokens?: number
+			totalOutputTokens?: number
+			cachedInputTokens?: number
+			reasoningTokens?: number
+			inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number }
+			outputTokenDetails?: { reasoningTokens?: number }
 			details?: {
 			details?: {
 				cachedInputTokens?: number
 				cachedInputTokens?: number
 				reasoningTokens?: number
 				reasoningTokens?: number
@@ -356,11 +362,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		const inputTokens = usage.inputTokens || 0
 		const inputTokens = usage.inputTokens || 0
 		const outputTokens = usage.outputTokens || 0
 		const outputTokens = usage.outputTokens || 0
 
 
-		const cacheReadTokens = usage.details?.cachedInputTokens ?? 0
+		const cacheReadTokens =
+			usage.cachedInputTokens ?? usage.inputTokenDetails?.cacheReadTokens ?? usage.details?.cachedInputTokens ?? 0
 		// The OpenAI Responses API does not report cache write tokens separately;
 		// The OpenAI Responses API does not report cache write tokens separately;
 		// only cached (read) tokens are available via usage.details.cachedInputTokens.
 		// only cached (read) tokens are available via usage.details.cachedInputTokens.
-		const cacheWriteTokens = 0
-		const reasoningTokens = usage.details?.reasoningTokens
+		const cacheWriteTokens = usage.inputTokenDetails?.cacheWriteTokens ?? 0
+		const reasoningTokens =
+			usage.reasoningTokens ?? usage.outputTokenDetails?.reasoningTokens ?? usage.details?.reasoningTokens
 
 
 		const effectiveTier =
 		const effectiveTier =
 			this.lastServiceTier || (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
 			this.lastServiceTier || (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
@@ -382,6 +390,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			cacheReadTokens: cacheReadTokens || undefined,
 			cacheReadTokens: cacheReadTokens || undefined,
 			...(typeof reasoningTokens === "number" ? { reasoningTokens } : {}),
 			...(typeof reasoningTokens === "number" ? { reasoningTokens } : {}),
 			totalCost,
 			totalCost,
+			// OpenAI: inputTokens is already total
+			totalInputTokens: inputTokens,
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 
 

+ 24 - 6
src/api/providers/openai.ts

@@ -295,6 +295,12 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		usage: {
 		usage: {
 			inputTokens?: number
 			inputTokens?: number
 			outputTokens?: number
 			outputTokens?: number
+			totalInputTokens?: number
+			totalOutputTokens?: number
+			cachedInputTokens?: number
+			reasoningTokens?: number
+			inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number }
+			outputTokenDetails?: { reasoningTokens?: number }
 			details?: {
 			details?: {
 				cachedInputTokens?: number
 				cachedInputTokens?: number
 				reasoningTokens?: number
 				reasoningTokens?: number
@@ -309,16 +315,28 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		},
 		},
 	): ApiStreamUsageChunk {
 	): ApiStreamUsageChunk {
 		// Extract cache and reasoning metrics from OpenAI's providerMetadata when available,
 		// Extract cache and reasoning metrics from OpenAI's providerMetadata when available,
-		// falling back to usage.details for standard AI SDK fields.
-		const cacheReadTokens = providerMetadata?.openai?.cachedPromptTokens ?? usage.details?.cachedInputTokens
-		const reasoningTokens = providerMetadata?.openai?.reasoningTokens ?? usage.details?.reasoningTokens
-
+		// then v6 fields, then legacy usage.details.
+		const cacheReadTokens =
+			providerMetadata?.openai?.cachedPromptTokens ??
+			usage.cachedInputTokens ??
+			usage.inputTokenDetails?.cacheReadTokens ??
+			usage.details?.cachedInputTokens
+		const reasoningTokens =
+			providerMetadata?.openai?.reasoningTokens ??
+			usage.reasoningTokens ??
+			usage.outputTokenDetails?.reasoningTokens ??
+			usage.details?.reasoningTokens
+
+		const inputTokens = usage.inputTokens || 0
+		const outputTokens = usage.outputTokens || 0
 		return {
 		return {
 			type: "usage",
 			type: "usage",
-			inputTokens: usage.inputTokens || 0,
-			outputTokens: usage.outputTokens || 0,
+			inputTokens,
+			outputTokens,
 			cacheReadTokens,
 			cacheReadTokens,
 			reasoningTokens,
 			reasoningTokens,
+			totalInputTokens: inputTokens,
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 
 

+ 3 - 0
src/api/providers/openrouter.ts

@@ -127,6 +127,9 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 			...(cacheReadTokens > 0 ? { cacheReadTokens } : {}),
 			...(cacheReadTokens > 0 ? { cacheReadTokens } : {}),
 			...(typeof reasoningTokens === "number" && reasoningTokens > 0 ? { reasoningTokens } : {}),
 			...(typeof reasoningTokens === "number" && reasoningTokens > 0 ? { reasoningTokens } : {}),
 			totalCost,
 			totalCost,
+			// OpenRouter uses OpenAI convention: inputTokens is already total
+			totalInputTokens: inputTokens,
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 
 

+ 18 - 3
src/api/providers/requesty.ts

@@ -142,6 +142,12 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
 		usage: {
 		usage: {
 			inputTokens?: number
 			inputTokens?: number
 			outputTokens?: number
 			outputTokens?: number
+			totalInputTokens?: number
+			totalOutputTokens?: number
+			cachedInputTokens?: number
+			reasoningTokens?: number
+			inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number }
+			outputTokenDetails?: { reasoningTokens?: number }
 			details?: {
 			details?: {
 				cachedInputTokens?: number
 				cachedInputTokens?: number
 				reasoningTokens?: number
 				reasoningTokens?: number
@@ -152,8 +158,14 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
 	): ApiStreamUsageChunk {
 	): ApiStreamUsageChunk {
 		const inputTokens = usage.inputTokens || 0
 		const inputTokens = usage.inputTokens || 0
 		const outputTokens = usage.outputTokens || 0
 		const outputTokens = usage.outputTokens || 0
-		const cacheWriteTokens = providerMetadata?.requesty?.usage?.cachingTokens ?? 0
-		const cacheReadTokens = providerMetadata?.requesty?.usage?.cachedTokens ?? usage.details?.cachedInputTokens ?? 0
+		const cacheWriteTokens =
+			providerMetadata?.requesty?.usage?.cachingTokens ?? usage.inputTokenDetails?.cacheWriteTokens ?? 0
+		const cacheReadTokens =
+			providerMetadata?.requesty?.usage?.cachedTokens ??
+			usage.cachedInputTokens ??
+			usage.inputTokenDetails?.cacheReadTokens ??
+			usage.details?.cachedInputTokens ??
+			0
 
 
 		const { totalCost } = modelInfo
 		const { totalCost } = modelInfo
 			? calculateApiCostOpenAI(modelInfo, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
 			? calculateApiCostOpenAI(modelInfo, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
@@ -165,8 +177,11 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
 			outputTokens,
 			outputTokens,
 			cacheWriteTokens,
 			cacheWriteTokens,
 			cacheReadTokens,
 			cacheReadTokens,
-			reasoningTokens: usage.details?.reasoningTokens,
+			reasoningTokens:
+				usage.reasoningTokens ?? usage.outputTokenDetails?.reasoningTokens ?? usage.details?.reasoningTokens,
 			totalCost,
 			totalCost,
+			totalInputTokens: inputTokens,
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 
 

+ 69 - 8
src/api/providers/roo.ts

@@ -29,6 +29,40 @@ import { t } from "../../i18n"
 import type { RooMessage } from "../../core/task-persistence/rooMessage"
 import type { RooMessage } from "../../core/task-persistence/rooMessage"
 import { sanitizeMessagesForProvider } from "../transform/sanitize-messages"
 import { sanitizeMessagesForProvider } from "../transform/sanitize-messages"
 
 
+type RooProviderMetadata = {
+	cost?: number
+	cache_creation_input_tokens?: number
+	cache_read_input_tokens?: number
+	cached_tokens?: number
+}
+
+type AnthropicProviderMetadata = {
+	cacheCreationInputTokens?: number
+	cacheReadInputTokens?: number
+	usage?: {
+		cache_read_input_tokens?: number
+	}
+}
+
+type GatewayProviderMetadata = {
+	cost?: number
+	cache_creation_input_tokens?: number
+	cached_tokens?: number
+}
+
+type UsageWithCache = {
+	inputTokens?: number
+	outputTokens?: number
+	cachedInputTokens?: number
+	inputTokenDetails?: {
+		cacheReadTokens?: number
+		cacheWriteTokens?: number
+	}
+	details?: {
+		cachedInputTokens?: number
+	}
+}
+
 function getSessionToken(): string {
 function getSessionToken(): string {
 	const token = CloudService.hasInstance() ? CloudService.instance.authService?.getSessionToken() : undefined
 	const token = CloudService.hasInstance() ? CloudService.instance.authService?.getSessionToken() : undefined
 	return token ?? "unauthenticated"
 	return token ?? "unauthenticated"
@@ -95,6 +129,8 @@ export class RooHandler extends BaseProvider implements SingleCompletionHandler
 		messages: RooMessage[],
 		messages: RooMessage[],
 		metadata?: ApiHandlerCreateMessageMetadata,
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
 	): ApiStream {
+		const firstNumber = (...values: Array<number | undefined>) => values.find((value) => typeof value === "number")
+
 		const model = this.getModel()
 		const model = this.getModel()
 		const { id: modelId, info } = model
 		const { id: modelId, info } = model
 
 
@@ -149,18 +185,40 @@ export class RooHandler extends BaseProvider implements SingleCompletionHandler
 			}
 			}
 
 
 			// Check provider metadata for usage details
 			// Check provider metadata for usage details
-			const providerMetadata =
-				(await result.providerMetadata) ?? (await (result as any).experimental_providerMetadata)
-			const rooMeta = providerMetadata?.roo as Record<string, any> | undefined
+			const providerMetadata = (await result.providerMetadata) ?? undefined
+			const experimentalProviderMetadata = await (
+				result as { experimental_providerMetadata?: Promise<Record<string, unknown> | undefined> }
+			).experimental_providerMetadata
+			const metadataWithFallback = providerMetadata ?? experimentalProviderMetadata
+			const rooMeta = metadataWithFallback?.roo as RooProviderMetadata | undefined
+			const anthropicMeta = metadataWithFallback?.anthropic as AnthropicProviderMetadata | undefined
+			const gatewayMeta = metadataWithFallback?.gateway as GatewayProviderMetadata | undefined
 
 
 			// Process usage with protocol-aware normalization
 			// Process usage with protocol-aware normalization
-			const usage = await result.usage
+			const usage = (await result.usage) as UsageWithCache
 			const promptTokens = usage.inputTokens ?? 0
 			const promptTokens = usage.inputTokens ?? 0
 			const completionTokens = usage.outputTokens ?? 0
 			const completionTokens = usage.outputTokens ?? 0
 
 
-			// Extract cache tokens from provider metadata
-			const cacheCreation = (rooMeta?.cache_creation_input_tokens as number) ?? 0
-			const cacheRead = (rooMeta?.cache_read_input_tokens as number) ?? (rooMeta?.cached_tokens as number) ?? 0
+			// Extract cache tokens with priority chain (no double counting):
+			// Roo metadata -> Anthropic metadata -> Gateway metadata -> AI SDK usage -> legacy usage.details -> 0
+			const cacheCreation =
+				firstNumber(
+					rooMeta?.cache_creation_input_tokens,
+					anthropicMeta?.cacheCreationInputTokens,
+					gatewayMeta?.cache_creation_input_tokens,
+					usage.inputTokenDetails?.cacheWriteTokens,
+				) ?? 0
+			const cacheRead =
+				firstNumber(
+					rooMeta?.cache_read_input_tokens,
+					rooMeta?.cached_tokens,
+					anthropicMeta?.cacheReadInputTokens,
+					anthropicMeta?.usage?.cache_read_input_tokens,
+					gatewayMeta?.cached_tokens,
+					usage.cachedInputTokens,
+					usage.inputTokenDetails?.cacheReadTokens,
+					usage.details?.cachedInputTokens,
+				) ?? 0
 
 
 			// Protocol-aware token normalization:
 			// Protocol-aware token normalization:
 			// - OpenAI protocol expects TOTAL input tokens (cached + non-cached)
 			// - OpenAI protocol expects TOTAL input tokens (cached + non-cached)
@@ -171,7 +229,7 @@ export class RooHandler extends BaseProvider implements SingleCompletionHandler
 
 
 			// Cost: prefer server-side cost, fall back to client-side calculation
 			// Cost: prefer server-side cost, fall back to client-side calculation
 			const isFreeModel = info.isFree === true
 			const isFreeModel = info.isFree === true
-			const serverCost = rooMeta?.cost as number | undefined
+			const serverCost = firstNumber(rooMeta?.cost, gatewayMeta?.cost)
 			const { totalCost: calculatedCost } = calculateApiCostOpenAI(
 			const { totalCost: calculatedCost } = calculateApiCostOpenAI(
 				info,
 				info,
 				promptTokens,
 				promptTokens,
@@ -188,6 +246,9 @@ export class RooHandler extends BaseProvider implements SingleCompletionHandler
 				cacheWriteTokens: cacheCreation,
 				cacheWriteTokens: cacheCreation,
 				cacheReadTokens: cacheRead,
 				cacheReadTokens: cacheRead,
 				totalCost,
 				totalCost,
+				// Roo: promptTokens is always the server-reported total regardless of protocol normalization
+				totalInputTokens: promptTokens,
+				totalOutputTokens: completionTokens,
 			}
 			}
 
 
 			yield* yieldResponseMessage(result)
 			yield* yieldResponseMessage(result)

+ 23 - 7
src/api/providers/sambanova.ts

@@ -74,6 +74,12 @@ export class SambaNovaHandler extends BaseProvider implements SingleCompletionHa
 		usage: {
 		usage: {
 			inputTokens?: number
 			inputTokens?: number
 			outputTokens?: number
 			outputTokens?: number
+			totalInputTokens?: number
+			totalOutputTokens?: number
+			cachedInputTokens?: number
+			reasoningTokens?: number
+			inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number }
+			outputTokenDetails?: { reasoningTokens?: number }
 			details?: {
 			details?: {
 				cachedInputTokens?: number
 				cachedInputTokens?: number
 				reasoningTokens?: number
 				reasoningTokens?: number
@@ -86,17 +92,27 @@ export class SambaNovaHandler extends BaseProvider implements SingleCompletionHa
 			}
 			}
 		},
 		},
 	): ApiStreamUsageChunk {
 	): ApiStreamUsageChunk {
-		// Extract cache metrics from SambaNova's providerMetadata if available
-		const cacheReadTokens = providerMetadata?.sambanova?.promptCacheHitTokens ?? usage.details?.cachedInputTokens
-		const cacheWriteTokens = providerMetadata?.sambanova?.promptCacheMissTokens
-
+		// Extract cache metrics from SambaNova's providerMetadata, then v6 fields, then legacy
+		const cacheReadTokens =
+			providerMetadata?.sambanova?.promptCacheHitTokens ??
+			usage.cachedInputTokens ??
+			usage.inputTokenDetails?.cacheReadTokens ??
+			usage.details?.cachedInputTokens
+		const cacheWriteTokens =
+			providerMetadata?.sambanova?.promptCacheMissTokens ?? usage.inputTokenDetails?.cacheWriteTokens
+
+		const inputTokens = usage.inputTokens || 0
+		const outputTokens = usage.outputTokens || 0
 		return {
 		return {
 			type: "usage",
 			type: "usage",
-			inputTokens: usage.inputTokens || 0,
-			outputTokens: usage.outputTokens || 0,
+			inputTokens,
+			outputTokens,
 			cacheReadTokens,
 			cacheReadTokens,
 			cacheWriteTokens,
 			cacheWriteTokens,
-			reasoningTokens: usage.details?.reasoningTokens,
+			reasoningTokens:
+				usage.reasoningTokens ?? usage.outputTokenDetails?.reasoningTokens ?? usage.details?.reasoningTokens,
+			totalInputTokens: inputTokens,
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 
 

+ 22 - 4
src/api/providers/vercel-ai-gateway.ts

@@ -87,6 +87,12 @@ export class VercelAiGatewayHandler extends BaseProvider implements SingleComple
 		usage: {
 		usage: {
 			inputTokens?: number
 			inputTokens?: number
 			outputTokens?: number
 			outputTokens?: number
+			totalInputTokens?: number
+			totalOutputTokens?: number
+			cachedInputTokens?: number
+			reasoningTokens?: number
+			inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number }
+			outputTokenDetails?: { reasoningTokens?: number }
 			details?: {
 			details?: {
 				cachedInputTokens?: number
 				cachedInputTokens?: number
 				reasoningTokens?: number
 				reasoningTokens?: number
@@ -96,17 +102,29 @@ export class VercelAiGatewayHandler extends BaseProvider implements SingleComple
 	): ApiStreamUsageChunk {
 	): ApiStreamUsageChunk {
 		const gatewayMeta = providerMetadata?.gateway as Record<string, unknown> | undefined
 		const gatewayMeta = providerMetadata?.gateway as Record<string, unknown> | undefined
 
 
-		const cacheWriteTokens = (gatewayMeta?.cache_creation_input_tokens as number) ?? undefined
-		const cacheReadTokens = usage.details?.cachedInputTokens ?? (gatewayMeta?.cached_tokens as number) ?? undefined
+		const cacheWriteTokens =
+			(gatewayMeta?.cache_creation_input_tokens as number) ??
+			usage.inputTokenDetails?.cacheWriteTokens ??
+			undefined
+		const cacheReadTokens =
+			usage.cachedInputTokens ??
+			usage.inputTokenDetails?.cacheReadTokens ??
+			usage.details?.cachedInputTokens ??
+			(gatewayMeta?.cached_tokens as number) ??
+			undefined
 		const totalCost = (gatewayMeta?.cost as number) ?? 0
 		const totalCost = (gatewayMeta?.cost as number) ?? 0
 
 
+		const inputTokens = usage.inputTokens || 0
+		const outputTokens = usage.outputTokens || 0
 		return {
 		return {
 			type: "usage",
 			type: "usage",
-			inputTokens: usage.inputTokens || 0,
-			outputTokens: usage.outputTokens || 0,
+			inputTokens,
+			outputTokens,
 			cacheWriteTokens,
 			cacheWriteTokens,
 			cacheReadTokens,
 			cacheReadTokens,
 			totalCost,
 			totalCost,
+			totalInputTokens: inputTokens,
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 
 

+ 13 - 2
src/api/providers/vertex.ts

@@ -229,6 +229,12 @@ export class VertexHandler extends BaseProvider implements SingleCompletionHandl
 		usage: {
 		usage: {
 			inputTokens?: number
 			inputTokens?: number
 			outputTokens?: number
 			outputTokens?: number
+			totalInputTokens?: number
+			totalOutputTokens?: number
+			cachedInputTokens?: number
+			reasoningTokens?: number
+			inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number }
+			outputTokenDetails?: { reasoningTokens?: number }
 			details?: {
 			details?: {
 				cachedInputTokens?: number
 				cachedInputTokens?: number
 				reasoningTokens?: number
 				reasoningTokens?: number
@@ -239,8 +245,10 @@ export class VertexHandler extends BaseProvider implements SingleCompletionHandl
 	): ApiStreamUsageChunk {
 	): ApiStreamUsageChunk {
 		const inputTokens = usage.inputTokens || 0
 		const inputTokens = usage.inputTokens || 0
 		const outputTokens = usage.outputTokens || 0
 		const outputTokens = usage.outputTokens || 0
-		const cacheReadTokens = usage.details?.cachedInputTokens
-		const reasoningTokens = usage.details?.reasoningTokens
+		const cacheReadTokens =
+			usage.cachedInputTokens ?? usage.inputTokenDetails?.cacheReadTokens ?? usage.details?.cachedInputTokens
+		const reasoningTokens =
+			usage.reasoningTokens ?? usage.outputTokenDetails?.reasoningTokens ?? usage.details?.reasoningTokens
 
 
 		return {
 		return {
 			type: "usage",
 			type: "usage",
@@ -255,6 +263,9 @@ export class VertexHandler extends BaseProvider implements SingleCompletionHandl
 				cacheReadTokens,
 				cacheReadTokens,
 				reasoningTokens,
 				reasoningTokens,
 			}),
 			}),
+			// Vertex: inputTokens is already total
+			totalInputTokens: inputTokens,
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 
 

+ 2 - 0
src/api/providers/vscode-lm.ts

@@ -475,6 +475,8 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
 				type: "usage",
 				type: "usage",
 				inputTokens: totalInputTokens,
 				inputTokens: totalInputTokens,
 				outputTokens: totalOutputTokens,
 				outputTokens: totalOutputTokens,
+				totalInputTokens,
+				totalOutputTokens,
 			}
 			}
 		} catch (error: unknown) {
 		} catch (error: unknown) {
 			this.ensureCleanState()
 			this.ensureCleanState()

+ 22 - 8
src/api/providers/xai.ts

@@ -82,6 +82,12 @@ export class XAIHandler extends BaseProvider implements SingleCompletionHandler
 		usage: {
 		usage: {
 			inputTokens?: number
 			inputTokens?: number
 			outputTokens?: number
 			outputTokens?: number
+			totalInputTokens?: number
+			totalOutputTokens?: number
+			cachedInputTokens?: number
+			reasoningTokens?: number
+			inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number }
+			outputTokenDetails?: { reasoningTokens?: number }
 			details?: {
 			details?: {
 				cachedInputTokens?: number
 				cachedInputTokens?: number
 				reasoningTokens?: number
 				reasoningTokens?: number
@@ -93,17 +99,25 @@ export class XAIHandler extends BaseProvider implements SingleCompletionHandler
 			}
 			}
 		},
 		},
 	): ApiStreamUsageChunk {
 	): ApiStreamUsageChunk {
-		// Extract cache metrics from xAI's providerMetadata if available
-		// xAI supports prompt caching through prompt_tokens_details.cached_tokens
-		const cacheReadTokens = providerMetadata?.xai?.cachedPromptTokens ?? usage.details?.cachedInputTokens
-
+		// Extract cache metrics from xAI's providerMetadata, then v6 fields, then legacy
+		const cacheReadTokens =
+			providerMetadata?.xai?.cachedPromptTokens ??
+			usage.cachedInputTokens ??
+			usage.inputTokenDetails?.cacheReadTokens ??
+			usage.details?.cachedInputTokens
+
+		const inputTokens = usage.inputTokens || 0
+		const outputTokens = usage.outputTokens || 0
 		return {
 		return {
 			type: "usage",
 			type: "usage",
-			inputTokens: usage.inputTokens || 0,
-			outputTokens: usage.outputTokens || 0,
+			inputTokens,
+			outputTokens,
 			cacheReadTokens,
 			cacheReadTokens,
-			cacheWriteTokens: undefined, // xAI doesn't report cache write tokens separately
-			reasoningTokens: usage.details?.reasoningTokens,
+			cacheWriteTokens: usage.inputTokenDetails?.cacheWriteTokens, // xAI doesn't typically report cache write tokens
+			reasoningTokens:
+				usage.reasoningTokens ?? usage.outputTokenDetails?.reasoningTokens ?? usage.details?.reasoningTokens,
+			totalInputTokens: inputTokens,
+			totalOutputTokens: outputTokens,
 		}
 		}
 	}
 	}
 
 

+ 4 - 0
src/api/transform/stream.ts

@@ -66,6 +66,10 @@ export interface ApiStreamUsageChunk {
 	cacheReadTokens?: number
 	cacheReadTokens?: number
 	reasoningTokens?: number
 	reasoningTokens?: number
 	totalCost?: number
 	totalCost?: number
+	/** Total input tokens including cache read/write tokens. Each provider computes this directly. */
+	totalInputTokens?: number
+	/** Total output tokens. Each provider computes this directly. */
+	totalOutputTokens?: number
 }
 }
 
 
 export interface ApiStreamGroundingChunk {
 export interface ApiStreamGroundingChunk {

+ 35 - 60
src/core/task/Task.ts

@@ -89,7 +89,6 @@ import { TerminalRegistry } from "../../integrations/terminal/TerminalRegistry"
 import { OutputInterceptor } from "../../integrations/terminal/OutputInterceptor"
 import { OutputInterceptor } from "../../integrations/terminal/OutputInterceptor"
 
 
 // utils
 // utils
-import { calculateApiCostAnthropic, calculateApiCostOpenAI } from "../../shared/cost"
 import { getWorkspacePath } from "../../utils/path"
 import { getWorkspacePath } from "../../utils/path"
 import { sanitizeToolUseId } from "../../utils/tool-id"
 import { sanitizeToolUseId } from "../../utils/tool-id"
 import { getTaskDirectoryPath } from "../../utils/storage"
 import { getTaskDirectoryPath } from "../../utils/storage"
@@ -2894,6 +2893,8 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				let inputTokens = 0
 				let inputTokens = 0
 				let outputTokens = 0
 				let outputTokens = 0
 				let totalCost: number | undefined
 				let totalCost: number | undefined
+				let totalInputTokensAccum = 0
+				let totalOutputTokensAccum = 0
 
 
 				// We can't use `api_req_finished` anymore since it's a unique case
 				// We can't use `api_req_finished` anymore since it's a unique case
 				// where it could come after a streaming message (i.e. in the middle
 				// where it could come after a streaming message (i.e. in the middle
@@ -2909,38 +2910,14 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 
 
 					const existingData = JSON.parse(this.clineMessages[lastApiReqIndex].text || "{}")
 					const existingData = JSON.parse(this.clineMessages[lastApiReqIndex].text || "{}")
 
 
-					// Calculate total tokens and cost using provider-aware function
-					const modelId = getModelId(this.apiConfiguration)
-					const apiProvider = this.apiConfiguration.apiProvider
-					const apiProtocol = getApiProtocol(
-						apiProvider && !isRetiredProvider(apiProvider) ? apiProvider : undefined,
-						modelId,
-					)
-
-					const costResult =
-						apiProtocol === "anthropic"
-							? calculateApiCostAnthropic(
-									streamModelInfo,
-									inputTokens,
-									outputTokens,
-									cacheWriteTokens,
-									cacheReadTokens,
-								)
-							: calculateApiCostOpenAI(
-									streamModelInfo,
-									inputTokens,
-									outputTokens,
-									cacheWriteTokens,
-									cacheReadTokens,
-								)
-
+					// Use provider-computed totals when available, falling back to raw token counts
 					this.clineMessages[lastApiReqIndex].text = JSON.stringify({
 					this.clineMessages[lastApiReqIndex].text = JSON.stringify({
 						...existingData,
 						...existingData,
-						tokensIn: costResult.totalInputTokens,
-						tokensOut: costResult.totalOutputTokens,
+						tokensIn: totalInputTokensAccum || inputTokens,
+						tokensOut: totalOutputTokensAccum || outputTokens,
 						cacheWrites: cacheWriteTokens,
 						cacheWrites: cacheWriteTokens,
 						cacheReads: cacheReadTokens,
 						cacheReads: cacheReadTokens,
-						cost: totalCost ?? costResult.totalCost,
+						cost: totalCost ?? existingData.cost,
 						cancelReason,
 						cancelReason,
 						streamingFailedMessage,
 						streamingFailedMessage,
 					} satisfies ClineApiReqInfo)
 					} satisfies ClineApiReqInfo)
@@ -3069,7 +3046,9 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 								outputTokens += chunk.outputTokens
 								outputTokens += chunk.outputTokens
 								cacheWriteTokens += chunk.cacheWriteTokens ?? 0
 								cacheWriteTokens += chunk.cacheWriteTokens ?? 0
 								cacheReadTokens += chunk.cacheReadTokens ?? 0
 								cacheReadTokens += chunk.cacheReadTokens ?? 0
-								totalCost = chunk.totalCost
+								totalCost = chunk.totalCost ?? totalCost
+								totalInputTokensAccum += chunk.totalInputTokens ?? 0
+								totalOutputTokensAccum += chunk.totalOutputTokens ?? 0
 								break
 								break
 							case "grounding":
 							case "grounding":
 								// Handle grounding sources separately from regular content
 								// Handle grounding sources separately from regular content
@@ -3202,6 +3181,8 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 						cacheWrite: cacheWriteTokens,
 						cacheWrite: cacheWriteTokens,
 						cacheRead: cacheReadTokens,
 						cacheRead: cacheReadTokens,
 						total: totalCost,
 						total: totalCost,
+						totalIn: totalInputTokensAccum,
+						totalOut: totalOutputTokensAccum,
 					}
 					}
 
 
 					const drainStreamInBackgroundToFindAllUsage = async (apiReqIndex: number) => {
 					const drainStreamInBackgroundToFindAllUsage = async (apiReqIndex: number) => {
@@ -3215,6 +3196,8 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 						let bgCacheWriteTokens = currentTokens.cacheWrite
 						let bgCacheWriteTokens = currentTokens.cacheWrite
 						let bgCacheReadTokens = currentTokens.cacheRead
 						let bgCacheReadTokens = currentTokens.cacheRead
 						let bgTotalCost = currentTokens.total
 						let bgTotalCost = currentTokens.total
+						let bgTotalInputTokens = currentTokens.totalIn
+						let bgTotalOutputTokens = currentTokens.totalOut
 
 
 						// Helper function to capture telemetry and update messages
 						// Helper function to capture telemetry and update messages
 						const captureUsageData = async (
 						const captureUsageData = async (
@@ -3224,6 +3207,8 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 								cacheWrite: number
 								cacheWrite: number
 								cacheRead: number
 								cacheRead: number
 								total?: number
 								total?: number
+								totalIn: number
+								totalOut: number
 							},
 							},
 							messageIndex: number = apiReqIndex,
 							messageIndex: number = apiReqIndex,
 						) => {
 						) => {
@@ -3238,7 +3223,9 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 								outputTokens = tokens.output
 								outputTokens = tokens.output
 								cacheWriteTokens = tokens.cacheWrite
 								cacheWriteTokens = tokens.cacheWrite
 								cacheReadTokens = tokens.cacheRead
 								cacheReadTokens = tokens.cacheRead
-								totalCost = tokens.total
+								totalCost = tokens.total ?? totalCost
+								totalInputTokensAccum = tokens.totalIn
+								totalOutputTokensAccum = tokens.totalOut
 
 
 								// Update the API request message with the latest usage data
 								// Update the API request message with the latest usage data
 								updateApiReqMsg()
 								updateApiReqMsg()
@@ -3250,38 +3237,20 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 									await this.updateClineMessage(apiReqMessage)
 									await this.updateClineMessage(apiReqMessage)
 								}
 								}
 
 
-								// Capture telemetry with provider-aware cost calculation
-								const modelId = getModelId(this.apiConfiguration)
-								const apiProvider = this.apiConfiguration.apiProvider
-								const apiProtocol = getApiProtocol(
-									apiProvider && !isRetiredProvider(apiProvider) ? apiProvider : undefined,
-									modelId,
-								)
-
-								// Use the appropriate cost function based on the API protocol
-								const costResult =
-									apiProtocol === "anthropic"
-										? calculateApiCostAnthropic(
-												streamModelInfo,
-												tokens.input,
-												tokens.output,
-												tokens.cacheWrite,
-												tokens.cacheRead,
-											)
-										: calculateApiCostOpenAI(
-												streamModelInfo,
-												tokens.input,
-												tokens.output,
-												tokens.cacheWrite,
-												tokens.cacheRead,
-											)
+								const messageData = JSON.parse(
+									this.clineMessages[messageIndex]?.text || "{}",
+								) as ClineApiReqInfo
+								const telemetryCost =
+									tokens.total ??
+									(typeof messageData.cost === "number" ? messageData.cost : undefined)
 
 
+								// Use provider-computed totals for telemetry, falling back to raw counts
 								TelemetryService.instance.captureLlmCompletion(this.taskId, {
 								TelemetryService.instance.captureLlmCompletion(this.taskId, {
-									inputTokens: costResult.totalInputTokens,
-									outputTokens: costResult.totalOutputTokens,
+									inputTokens: tokens.totalIn || tokens.input,
+									outputTokens: tokens.totalOut || tokens.output,
 									cacheWriteTokens: tokens.cacheWrite,
 									cacheWriteTokens: tokens.cacheWrite,
 									cacheReadTokens: tokens.cacheRead,
 									cacheReadTokens: tokens.cacheRead,
-									cost: tokens.total ?? costResult.totalCost,
+									cost: telemetryCost,
 								})
 								})
 							}
 							}
 						}
 						}
@@ -3315,7 +3284,9 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 									bgOutputTokens += chunk.outputTokens
 									bgOutputTokens += chunk.outputTokens
 									bgCacheWriteTokens += chunk.cacheWriteTokens ?? 0
 									bgCacheWriteTokens += chunk.cacheWriteTokens ?? 0
 									bgCacheReadTokens += chunk.cacheReadTokens ?? 0
 									bgCacheReadTokens += chunk.cacheReadTokens ?? 0
-									bgTotalCost = chunk.totalCost
+									bgTotalCost = chunk.totalCost ?? bgTotalCost
+									bgTotalInputTokens += chunk.totalInputTokens ?? 0
+									bgTotalOutputTokens += chunk.totalOutputTokens ?? 0
 								}
 								}
 							}
 							}
 
 
@@ -3334,6 +3305,8 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 										cacheWrite: bgCacheWriteTokens,
 										cacheWrite: bgCacheWriteTokens,
 										cacheRead: bgCacheReadTokens,
 										cacheRead: bgCacheReadTokens,
 										total: bgTotalCost,
 										total: bgTotalCost,
+										totalIn: bgTotalInputTokens,
+										totalOut: bgTotalOutputTokens,
 									},
 									},
 									lastApiReqIndex,
 									lastApiReqIndex,
 								)
 								)
@@ -3358,6 +3331,8 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 										cacheWrite: bgCacheWriteTokens,
 										cacheWrite: bgCacheWriteTokens,
 										cacheRead: bgCacheReadTokens,
 										cacheRead: bgCacheReadTokens,
 										total: bgTotalCost,
 										total: bgTotalCost,
+										totalIn: bgTotalInputTokens,
+										totalOut: bgTotalOutputTokens,
 									},
 									},
 									lastApiReqIndex,
 									lastApiReqIndex,
 								)
 								)