فهرست منبع

Zen: adjust cache tokens

Frank 3 هفته پیش
والد
کامیت
a03a2b6eab

+ 7 - 2
packages/console/app/src/routes/zen/util/handler.ts

@@ -461,12 +461,17 @@ export async function handler(
       ...modelProvider,
       ...zenData.providers[modelProvider.id],
       ...(() => {
-        const format = zenData.providers[modelProvider.id].format
+        const providerProps = zenData.providers[modelProvider.id]
+        const format = providerProps.format
         const providerModel = modelProvider.model
         if (format === "anthropic") return anthropicHelper({ reqModel, providerModel })
         if (format === "google") return googleHelper({ reqModel, providerModel })
         if (format === "openai") return openaiHelper({ reqModel, providerModel })
-        return oaCompatHelper({ reqModel, providerModel })
+        return oaCompatHelper({
+          reqModel,
+          providerModel,
+          adjustCacheUsage: providerProps.adjustCacheUsage,
+        })
       })(),
     }
   }

+ 8 - 3
packages/console/app/src/routes/zen/util/provider/openai-compatible.ts

@@ -21,7 +21,7 @@ type Usage = {
   }
 }
 
-export const oaCompatHelper: ProviderHelper = () => ({
+export const oaCompatHelper: ProviderHelper = ({ adjustCacheUsage }) => ({
   format: "oa-compat",
   modifyUrl: (providerApi: string) => providerApi + "/chat/completions",
   modifyHeaders: (headers: Headers, body: Record<string, any>, apiKey: string) => {
@@ -57,10 +57,15 @@ export const oaCompatHelper: ProviderHelper = () => ({
     }
   },
   normalizeUsage: (usage: Usage) => {
-    const inputTokens = usage.prompt_tokens ?? 0
+    let inputTokens = usage.prompt_tokens ?? 0
     const outputTokens = usage.completion_tokens ?? 0
     const reasoningTokens = usage.completion_tokens_details?.reasoning_tokens ?? undefined
-    const cacheReadTokens = usage.cached_tokens ?? usage.prompt_tokens_details?.cached_tokens ?? undefined
+    let cacheReadTokens = usage.cached_tokens ?? usage.prompt_tokens_details?.cached_tokens ?? undefined
+
+    if (adjustCacheUsage && !cacheReadTokens) {
+      cacheReadTokens = Math.floor(inputTokens * 0.9)
+    }
+
     return {
       inputTokens: inputTokens - (cacheReadTokens ?? 0),
       outputTokens,

+ 1 - 1
packages/console/app/src/routes/zen/util/provider/provider.ts

@@ -33,7 +33,7 @@ export type UsageInfo = {
   cacheWrite1hTokens?: number
 }
 
-export type ProviderHelper = (input: { reqModel: string; providerModel: string }) => {
+export type ProviderHelper = (input: { reqModel: string; providerModel: string; adjustCacheUsage?: boolean }) => {
   format: ZenData.Format
   modifyUrl: (providerApi: string, isStream?: boolean) => string
   modifyHeaders: (headers: Headers, body: Record<string, any>, apiKey: string) => void

+ 1 - 0
packages/console/core/src/model.ts

@@ -48,6 +48,7 @@ export namespace ZenData {
     headerMappings: z.record(z.string(), z.string()).optional(),
     payloadModifier: z.record(z.string(), z.any()).optional(),
     payloadMappings: z.record(z.string(), z.string()).optional(),
+    adjustCacheUsage: z.boolean().optional(),
   })
 
   const ModelsSchema = z.object({