Przeglądaj źródła

Use upstream_inference_cost for OpenRouter BYOK cost calculation and show cached token count (#5145)

Improve OpenRouter cache calculation and show cached tokens
Christiaan Arnoldus 9 miesięcy temu
rodzic
commit
1a9aa13bd2

+ 5 - 9
src/api/providers/openrouter.ts

@@ -48,13 +48,11 @@ interface CompletionUsage {
 	}
 	total_tokens?: number
 	cost?: number
-	is_byok?: boolean
+	cost_details?: {
+		upstream_inference_cost?: number
+	}
 }
 
-// with bring your own key, OpenRouter charges 5% of what it normally would: https://openrouter.ai/docs/use-cases/byok
-// so we multiply the cost reported by OpenRouter to get an estimate of what the request actually cost
-const BYOK_COST_MULTIPLIER = 20
-
 export class OpenRouterHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
@@ -168,11 +166,9 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 				type: "usage",
 				inputTokens: lastUsage.prompt_tokens || 0,
 				outputTokens: lastUsage.completion_tokens || 0,
-				// Waiting on OpenRouter to figure out what this represents in the Gemini case
-				// and how to best support it.
-				// cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens,
+				cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens,
 				reasoningTokens: lastUsage.completion_tokens_details?.reasoning_tokens,
-				totalCost: (lastUsage.is_byok ? BYOK_COST_MULTIPLIER : 1) * (lastUsage.cost || 0),
+				totalCost: (lastUsage.cost_details?.upstream_inference_cost || 0) + (lastUsage.cost || 0),
 			}
 		}
 	}

+ 0 - 1
webview-ui/src/__tests__/ContextWindowProgress.spec.tsx

@@ -51,7 +51,6 @@ describe("ContextWindowProgress", () => {
 			task: { ts: Date.now(), type: "say" as const, say: "text" as const, text: "Test task" },
 			tokensIn: 100,
 			tokensOut: 50,
-			doesModelSupportPromptCache: true,
 			totalCost: 0.001,
 			contextTokens: 1000,
 			onClose: vi.fn(),

+ 0 - 1
webview-ui/src/components/chat/ChatView.tsx

@@ -1371,7 +1371,6 @@ const ChatViewComponent: React.ForwardRefRenderFunction<ChatViewRef, ChatViewPro
 						task={task}
 						tokensIn={apiMetrics.totalTokensIn}
 						tokensOut={apiMetrics.totalTokensOut}
-						doesModelSupportPromptCache={model?.supportsPromptCache ?? false}
 						cacheWrites={apiMetrics.totalCacheWrites}
 						cacheReads={apiMetrics.totalCacheReads}
 						totalCost={apiMetrics.totalCost}

+ 18 - 21
webview-ui/src/components/chat/TaskHeader.tsx

@@ -25,7 +25,6 @@ export interface TaskHeaderProps {
 	task: ClineMessage
 	tokensIn: number
 	tokensOut: number
-	doesModelSupportPromptCache: boolean
 	cacheWrites?: number
 	cacheReads?: number
 	totalCost: number
@@ -39,7 +38,6 @@ const TaskHeader = ({
 	task,
 	tokensIn,
 	tokensOut,
-	doesModelSupportPromptCache,
 	cacheWrites,
 	cacheReads,
 	totalCost,
@@ -186,25 +184,24 @@ const TaskHeader = ({
 								{!totalCost && <TaskActions item={currentTaskItem} buttonsDisabled={buttonsDisabled} />}
 							</div>
 
-							{doesModelSupportPromptCache &&
-								((typeof cacheReads === "number" && cacheReads > 0) ||
-									(typeof cacheWrites === "number" && cacheWrites > 0)) && (
-									<div className="flex items-center gap-1 flex-wrap h-[20px]">
-										<span className="font-bold">{t("chat:task.cache")}</span>
-										{typeof cacheWrites === "number" && cacheWrites > 0 && (
-											<span className="flex items-center gap-0.5">
-												<CloudUpload size={16} />
-												{formatLargeNumber(cacheWrites)}
-											</span>
-										)}
-										{typeof cacheReads === "number" && cacheReads > 0 && (
-											<span className="flex items-center gap-0.5">
-												<CloudDownload size={16} />
-												{formatLargeNumber(cacheReads)}
-											</span>
-										)}
-									</div>
-								)}
+							{((typeof cacheReads === "number" && cacheReads > 0) ||
+								(typeof cacheWrites === "number" && cacheWrites > 0)) && (
+								<div className="flex items-center gap-1 flex-wrap h-[20px]">
+									<span className="font-bold">{t("chat:task.cache")}</span>
+									{typeof cacheWrites === "number" && cacheWrites > 0 && (
+										<span className="flex items-center gap-0.5">
+											<CloudUpload size={16} />
+											{formatLargeNumber(cacheWrites)}
+										</span>
+									)}
+									{typeof cacheReads === "number" && cacheReads > 0 && (
+										<span className="flex items-center gap-0.5">
+											<CloudDownload size={16} />
+											{formatLargeNumber(cacheReads)}
+										</span>
+									)}
+								</div>
+							)}
 
 							{!!totalCost && (
 								<div className="flex justify-between items-center h-[20px]">

+ 0 - 1
webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx

@@ -49,7 +49,6 @@ describe("TaskHeader", () => {
 		task: { type: "say", ts: Date.now(), text: "Test task", images: [] },
 		tokensIn: 100,
 		tokensOut: 50,
-		doesModelSupportPromptCache: true,
 		totalCost: 0.05,
 		contextTokens: 200,
 		buttonsDisabled: false,