فهرست منبع

fix: record cached tokens from chat completions usage (#889)

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: Sisyphus <[email protected]>
ding113 1 ماه پیش
والد
کامیت
1bb1d49a16
2فایلهای تغییر یافته به همراه44 افزوده شده و 3 حذف شده
  1. 12 3
      src/app/v1/_lib/proxy/response-handler.ts
  2. 32 0
      tests/unit/proxy/extract-usage-metrics.test.ts

+ 12 - 3
src/app/v1/_lib/proxy/response-handler.ts

@@ -2528,9 +2528,7 @@ export function extractUsageMetrics(value: unknown): UsageMetrics | null {
     hasAny = true;
   }
 
-  // OpenAI Response API 格式:input_tokens_details.cached_tokens(嵌套结构)
-  // 仅在顶层字段不存在时使用(避免重复计算)
-  if (!result.cache_read_input_tokens) {
+  if (result.cache_read_input_tokens === undefined) {
     const inputTokensDetails = usage.input_tokens_details as Record<string, unknown> | undefined;
     if (inputTokensDetails && typeof inputTokensDetails.cached_tokens === "number") {
       result.cache_read_input_tokens = inputTokensDetails.cached_tokens;
@@ -2541,6 +2539,17 @@ export function extractUsageMetrics(value: unknown): UsageMetrics | null {
     }
   }
 
+  if (result.cache_read_input_tokens === undefined) {
+    const promptTokensDetails = usage.prompt_tokens_details as Record<string, unknown> | undefined;
+    if (promptTokensDetails && typeof promptTokensDetails.cached_tokens === "number") {
+      result.cache_read_input_tokens = promptTokensDetails.cached_tokens;
+      hasAny = true;
+      logger.debug("[ResponseHandler] Parsed cached tokens from OpenAI Chat Completions format", {
+        cachedTokens: promptTokensDetails.cached_tokens,
+      });
+    }
+  }
+
   return hasAny ? result : null;
 }
 

+ 32 - 0
tests/unit/proxy/extract-usage-metrics.test.ts

@@ -540,6 +540,38 @@ describe("extractUsageMetrics", () => {
       // 顶层优先
       expect(result.usageMetrics?.cache_read_input_tokens).toBe(300);
     });
+
+    it("应从 Chat Completions 的 prompt_tokens_details.cached_tokens 提取缓存读取", () => {
+      const response = JSON.stringify({
+        usage: {
+          prompt_tokens: 1000,
+          completion_tokens: 500,
+          prompt_tokens_details: {
+            cached_tokens: 200,
+          },
+        },
+      });
+
+      const result = parseUsageFromResponseText(response, "openai");
+
+      expect(result.usageMetrics?.cache_read_input_tokens).toBe(200);
+    });
+
+    it("顶层 cache_read_input_tokens 应优先于 Chat Completions 嵌套格式", () => {
+      const response = JSON.stringify({
+        usage: {
+          prompt_tokens: 1000,
+          cache_read_input_tokens: 300,
+          prompt_tokens_details: {
+            cached_tokens: 200,
+          },
+        },
+      });
+
+      const result = parseUsageFromResponseText(response, "openai");
+
+      expect(result.usageMetrics?.cache_read_input_tokens).toBe(300);
+    });
   });
 
   describe("SSE 流式响应解析", () => {