Chris Estreich 8 месяцев назад
Родитель
Сommit
a08461a655

+ 2 - 1
src/api/index.ts

@@ -28,7 +28,8 @@ export interface SingleCompletionHandler {
 }
 
 export interface ApiHandler {
-	createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream
+	createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[], cacheKey?: string): ApiStream
+
 	getModel(): { id: string; info: ModelInfo }
 
 	/**

+ 91 - 9
src/api/providers/__tests__/gemini.test.ts

@@ -3,7 +3,7 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 
 import { GeminiHandler } from "../gemini"
-import { geminiDefaultModelId } from "../../../shared/api"
+import { geminiDefaultModelId, type ModelInfo } from "../../../shared/api"
 
 const GEMINI_20_FLASH_THINKING_NAME = "gemini-2.0-flash-thinking-exp-1219"
 
@@ -72,18 +72,15 @@ describe("GeminiHandler", () => {
 
 			// Should have 3 chunks: 'Hello', ' world!', and usage info
 			expect(chunks.length).toBe(3)
-			expect(chunks[0]).toEqual({
-				type: "text",
-				text: "Hello",
-			})
-			expect(chunks[1]).toEqual({
-				type: "text",
-				text: " world!",
-			})
+			expect(chunks[0]).toEqual({ type: "text", text: "Hello" })
+			expect(chunks[1]).toEqual({ type: "text", text: " world!" })
 			expect(chunks[2]).toEqual({
 				type: "usage",
 				inputTokens: 10,
 				outputTokens: 5,
+				cacheReadTokens: undefined,
+				cacheWriteTokens: undefined,
+				thinkingTokens: undefined,
 			})
 
 			// Verify the call to generateContentStream
@@ -171,4 +168,89 @@ describe("GeminiHandler", () => {
 			expect(modelInfo.id).toBe(geminiDefaultModelId) // Default model
 		})
 	})
+
+	describe("calculateCost", () => {
+		// Mock ModelInfo based on gemini-1.5-flash-latest pricing (per 1M tokens)
+		// Removed 'id' and 'name' as they are not part of ModelInfo type directly
+		const mockInfo: ModelInfo = {
+			inputPrice: 0.125, // $/1M tokens
+			outputPrice: 0.375, // $/1M tokens
+			cacheWritesPrice: 0.125, // Assume same as input for test
+			cacheReadsPrice: 0.125 * 0.25, // Assume 0.25x input for test
+			contextWindow: 1_000_000,
+			maxTokens: 8192,
+			supportsPromptCache: true, // Enable cache calculations for tests
+		}
+
+		it("should calculate cost correctly based on input and output tokens", () => {
+			const inputTokens = 10000 // Use larger numbers for per-million pricing
+			const outputTokens = 20000
+			// Added non-null assertions (!) as mockInfo guarantees these values
+			const expectedCost =
+				(inputTokens / 1_000_000) * mockInfo.inputPrice! + (outputTokens / 1_000_000) * mockInfo.outputPrice!
+
+			const cost = handler.calculateCost({ info: mockInfo, inputTokens, outputTokens })
+			expect(cost).toBeCloseTo(expectedCost)
+		})
+
+		it("should return 0 if token counts are zero", () => {
+			// Note: The method expects numbers, not undefined. Passing undefined would be a type error.
+			// The calculateCost method itself returns undefined if prices are missing, but 0 if tokens are 0 and prices exist.
+			expect(handler.calculateCost({ info: mockInfo, inputTokens: 0, outputTokens: 0 })).toBe(0)
+		})
+
+		it("should handle only input tokens", () => {
+			const inputTokens = 5000
+			// Added non-null assertion (!)
+			const expectedCost = (inputTokens / 1_000_000) * mockInfo.inputPrice!
+			expect(handler.calculateCost({ info: mockInfo, inputTokens, outputTokens: 0 })).toBeCloseTo(expectedCost)
+		})
+
+		it("should handle only output tokens", () => {
+			const outputTokens = 15000
+			// Added non-null assertion (!)
+			const expectedCost = (outputTokens / 1_000_000) * mockInfo.outputPrice!
+			expect(handler.calculateCost({ info: mockInfo, inputTokens: 0, outputTokens })).toBeCloseTo(expectedCost)
+		})
+
+		it("should calculate cost with cache write tokens", () => {
+			const inputTokens = 10000
+			const outputTokens = 20000
+			const cacheWriteTokens = 5000
+			const CACHE_TTL = 5 // Match the constant in gemini.ts
+
+			// Added non-null assertions (!)
+			const expectedInputCost = (inputTokens / 1_000_000) * mockInfo.inputPrice!
+			const expectedOutputCost = (outputTokens / 1_000_000) * mockInfo.outputPrice!
+			const expectedCacheWriteCost =
+				mockInfo.cacheWritesPrice! * (cacheWriteTokens / 1_000_000) * (CACHE_TTL / 60)
+			const expectedCost = expectedInputCost + expectedOutputCost + expectedCacheWriteCost
+
+			const cost = handler.calculateCost({ info: mockInfo, inputTokens, outputTokens, cacheWriteTokens })
+			expect(cost).toBeCloseTo(expectedCost)
+		})
+
+		it("should calculate cost with cache read tokens", () => {
+			const inputTokens = 10000 // Total logical input
+			const outputTokens = 20000
+			const cacheReadTokens = 8000 // Part of inputTokens read from cache
+
+			const uncachedReadTokens = inputTokens - cacheReadTokens
+			// Added non-null assertions (!)
+			const expectedInputCost = (uncachedReadTokens / 1_000_000) * mockInfo.inputPrice!
+			const expectedOutputCost = (outputTokens / 1_000_000) * mockInfo.outputPrice!
+			const expectedCacheReadCost = mockInfo.cacheReadsPrice! * (cacheReadTokens / 1_000_000)
+			const expectedCost = expectedInputCost + expectedOutputCost + expectedCacheReadCost
+
+			const cost = handler.calculateCost({ info: mockInfo, inputTokens, outputTokens, cacheReadTokens })
+			expect(cost).toBeCloseTo(expectedCost)
+		})
+
+		it("should return undefined if pricing info is missing", () => {
+			// Create a copy and explicitly set a price to undefined
+			const incompleteInfo: ModelInfo = { ...mockInfo, outputPrice: undefined }
+			const cost = handler.calculateCost({ info: incompleteInfo, inputTokens: 1000, outputTokens: 1000 })
+			expect(cost).toBeUndefined()
+		})
+	})
 })

+ 9 - 10
src/api/providers/anthropic.ts

@@ -42,8 +42,14 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 			case "claude-3-opus-20240229":
 			case "claude-3-haiku-20240307": {
 				/**
-				 * The latest message will be the new user message, one before will
-				 * be the assistant message from a previous request, and the user message before that will be a previously cached user message. So we need to mark the latest user message as ephemeral to cache it for the next request, and mark the second to last user message as ephemeral to let the server know the last message to retrieve from the cache for the current request..
+				 * The latest message will be the new user message, one before
+				 * will be the assistant message from a previous request, and
+				 * the user message before that will be a previously cached user
+				 * message. So we need to mark the latest user message as
+				 * ephemeral to cache it for the next request, and mark the
+				 * second to last user message as ephemeral to let the server
+				 * know the last message to retrieve from the cache for the
+				 * current request.
 				 */
 				const userMsgIndices = messages.reduce(
 					(acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc),
@@ -77,9 +83,6 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 							}
 							return message
 						}),
-						// tools, // cache breakpoints go from tools > system > messages, and since tools dont change, we can just set the breakpoint at the end of system (this avoids having to set a breakpoint at the end of tools which by itself does not meet min requirements for haiku caching)
-						// tool_choice: { type: "auto" },
-						// tools: tools,
 						stream: true,
 					},
 					(() => {
@@ -102,9 +105,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 							case "claude-3-opus-20240229":
 							case "claude-3-haiku-20240307":
 								betas.push("prompt-caching-2024-07-31")
-								return {
-									headers: { "anthropic-beta": betas.join(",") },
-								}
+								return { headers: { "anthropic-beta": betas.join(",") } }
 							default:
 								return undefined
 						}
@@ -119,8 +120,6 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 					temperature,
 					system: [{ text: systemPrompt, type: "text" }],
 					messages,
-					// tools,
-					// tool_choice: { type: "auto" },
 					stream: true,
 				})) as any
 				break

+ 126 - 27
src/api/providers/gemini.ts

@@ -1,9 +1,9 @@
 import type { Anthropic } from "@anthropic-ai/sdk"
 import {
 	GoogleGenAI,
-	ThinkingConfig,
 	type GenerateContentResponseUsageMetadata,
 	type GenerateContentParameters,
+	type Content,
 } from "@google/genai"
 
 import { SingleCompletionHandler } from "../"
@@ -13,30 +13,64 @@ import { convertAnthropicContentToGemini, convertAnthropicMessageToGemini } from
 import type { ApiStream } from "../transform/stream"
 import { BaseProvider } from "./base-provider"
 
+const CACHE_TTL = 5
+
 export class GeminiHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: GoogleGenAI
+	private contentCaches: Map<string, { key: string; count: number }>
 
 	constructor(options: ApiHandlerOptions) {
 		super()
 		this.options = options
 		this.client = new GoogleGenAI({ apiKey: options.geminiApiKey ?? "not-provided" })
+		this.contentCaches = new Map()
 	}
 
-	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
-		const { id: model, thinkingConfig, maxOutputTokens } = this.getModel()
+	async *createMessage(
+		systemInstruction: string,
+		messages: Anthropic.Messages.MessageParam[],
+		cacheKey?: string,
+	): ApiStream {
+		const { id: model, thinkingConfig, maxOutputTokens, info } = this.getModel()
+
+		const contents = messages.map(convertAnthropicMessageToGemini)
+		let uncachedContent: Content[] | undefined = undefined
+		let cachedContent: string | undefined = undefined
+		let cacheWriteTokens: number | undefined = undefined
+
+		// https://ai.google.dev/gemini-api/docs/caching?lang=node
+		if (info.supportsPromptCache && cacheKey) {
+			const cacheEntry = this.contentCaches.get(cacheKey)
+
+			if (cacheEntry) {
+				uncachedContent = contents.slice(cacheEntry.count, contents.length)
+				cachedContent = cacheEntry.key
+			}
+
+			const newCacheEntry = await this.client.caches.create({
+				model,
+				config: { contents, systemInstruction, ttl: `${CACHE_TTL * 60}s` },
+			})
+
+			if (newCacheEntry.name) {
+				this.contentCaches.set(cacheKey, { key: newCacheEntry.name, count: contents.length })
+				cacheWriteTokens = newCacheEntry.usageMetadata?.totalTokenCount ?? 0
+			}
+		}
 
 		const params: GenerateContentParameters = {
 			model,
-			contents: messages.map(convertAnthropicMessageToGemini),
+			contents: uncachedContent ?? contents,
 			config: {
+				cachedContent,
+				systemInstruction: cachedContent ? undefined : systemInstruction,
 				httpOptions: this.options.googleGeminiBaseUrl
 					? { baseUrl: this.options.googleGeminiBaseUrl }
 					: undefined,
 				thinkingConfig,
 				maxOutputTokens,
 				temperature: this.options.modelTemperature ?? 0,
-				systemInstruction: systemPrompt,
 			},
 		}
 
@@ -55,46 +89,58 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 		}
 
 		if (lastUsageMetadata) {
+			const inputTokens = lastUsageMetadata.promptTokenCount ?? 0
+			const outputTokens = lastUsageMetadata.candidatesTokenCount ?? 0
+			const cacheReadTokens = lastUsageMetadata.cachedContentTokenCount
+			const reasoningTokens = lastUsageMetadata.thoughtsTokenCount
+
+			const totalCost = this.calculateCost({
+				info,
+				inputTokens,
+				outputTokens,
+				cacheWriteTokens,
+				cacheReadTokens,
+			})
+
 			yield {
 				type: "usage",
-				inputTokens: lastUsageMetadata.promptTokenCount ?? 0,
-				outputTokens: lastUsageMetadata.candidatesTokenCount ?? 0,
+				inputTokens,
+				outputTokens,
+				cacheWriteTokens,
+				cacheReadTokens,
+				reasoningTokens,
+				totalCost,
 			}
 		}
 	}
 
-	override getModel(): {
-		id: GeminiModelId
-		info: ModelInfo
-		thinkingConfig?: ThinkingConfig
-		maxOutputTokens?: number
-	} {
+	override getModel() {
 		let id = this.options.apiModelId ? (this.options.apiModelId as GeminiModelId) : geminiDefaultModelId
 		let info: ModelInfo = geminiModels[id]
-		let thinkingConfig: ThinkingConfig | undefined = undefined
-		let maxOutputTokens: number | undefined = undefined
 
-		const thinkingSuffix = ":thinking"
+		if (id?.endsWith(":thinking")) {
+			id = id.slice(0, -":thinking".length) as GeminiModelId
 
-		if (id?.endsWith(thinkingSuffix)) {
-			id = id.slice(0, -thinkingSuffix.length) as GeminiModelId
-			info = geminiModels[id]
+			if (geminiModels[id]) {
+				info = geminiModels[id]
 
-			thinkingConfig = this.options.modelMaxThinkingTokens
-				? { thinkingBudget: this.options.modelMaxThinkingTokens }
-				: undefined
-
-			maxOutputTokens = this.options.modelMaxTokens ?? info.maxTokens ?? undefined
+				return {
+					id,
+					info,
+					thinkingConfig: this.options.modelMaxThinkingTokens
+						? { thinkingBudget: this.options.modelMaxThinkingTokens }
+						: undefined,
+					maxOutputTokens: this.options.modelMaxTokens ?? info.maxTokens ?? undefined,
+				}
+			}
 		}
 
 		if (!info) {
 			id = geminiDefaultModelId
 			info = geminiModels[geminiDefaultModelId]
-			thinkingConfig = undefined
-			maxOutputTokens = undefined
 		}
 
-		return { id, info, thinkingConfig, maxOutputTokens }
+		return { id, info }
 	}
 
 	async completePrompt(prompt: string): Promise<string> {
@@ -142,4 +188,57 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 			return super.countTokens(content)
 		}
 	}
+
+	public calculateCost({
+		info,
+		inputTokens,
+		outputTokens,
+		cacheWriteTokens,
+		cacheReadTokens,
+	}: {
+		info: ModelInfo
+		inputTokens: number
+		outputTokens: number
+		cacheWriteTokens?: number
+		cacheReadTokens?: number
+	}) {
+		if (!info.inputPrice || !info.outputPrice || !info.cacheWritesPrice || !info.cacheReadsPrice) {
+			return undefined
+		}
+
+		let inputPrice = info.inputPrice
+		let outputPrice = info.outputPrice
+		let cacheWritesPrice = info.cacheWritesPrice
+		let cacheReadsPrice = info.cacheReadsPrice
+
+		// If there's tiered pricing then adjust the input and output token prices
+		// based on the input tokens used.
+		if (info.tiers) {
+			const tier = info.tiers.find((tier) => inputTokens <= tier.contextWindow)
+
+			if (tier) {
+				inputPrice = tier.inputPrice ?? inputPrice
+				outputPrice = tier.outputPrice ?? outputPrice
+				cacheWritesPrice = tier.cacheWritesPrice ?? cacheWritesPrice
+				cacheReadsPrice = tier.cacheReadsPrice ?? cacheReadsPrice
+			}
+		}
+
+		let inputTokensCost = inputPrice * (inputTokens / 1_000_000)
+		let outputTokensCost = outputPrice * (outputTokens / 1_000_000)
+		let cacheWriteCost = 0
+		let cacheReadCost = 0
+
+		if (cacheWriteTokens) {
+			cacheWriteCost = cacheWritesPrice * (cacheWriteTokens / 1_000_000) * (CACHE_TTL / 60)
+		}
+
+		if (cacheReadTokens) {
+			const uncachedReadTokens = inputTokens - cacheReadTokens
+			cacheReadCost = cacheReadsPrice * (cacheReadTokens / 1_000_000)
+			inputTokensCost = inputPrice * (uncachedReadTokens / 1_000_000)
+		}
+
+		return inputTokensCost + outputTokensCost + cacheWriteCost + cacheReadCost
+	}
 }

+ 1 - 1
src/api/providers/vscode-lm.ts

@@ -442,7 +442,7 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
 			yield {
 				type: "usage",
 				inputTokens: totalInputTokens,
-				outputTokens: totalOutputTokens
+				outputTokens: totalOutputTokens,
 			}
 		} catch (error: unknown) {
 			this.ensureCleanState()

+ 18 - 4
src/core/Cline.ts

@@ -136,32 +136,39 @@ export class Cline extends EventEmitter<ClineEvents> {
 	readonly rootTask: Cline | undefined = undefined
 	readonly parentTask: Cline | undefined = undefined
 	readonly taskNumber: number
+
 	isPaused: boolean = false
 	pausedModeSlug: string = defaultModeSlug
 	private pauseInterval: NodeJS.Timeout | undefined
 
 	readonly apiConfiguration: ApiConfiguration
 	api: ApiHandler
+	private promptCacheKey: string
+
+	rooIgnoreController?: RooIgnoreController
 	private fileContextTracker: FileContextTracker
 	private urlContentFetcher: UrlContentFetcher
 	browserSession: BrowserSession
 	didEditFile: boolean = false
 	customInstructions?: string
+
 	diffStrategy?: DiffStrategy
 	diffEnabled: boolean = false
 	fuzzyMatchThreshold: number
 
 	apiConversationHistory: (Anthropic.MessageParam & { ts?: number })[] = []
 	clineMessages: ClineMessage[] = []
-	rooIgnoreController?: RooIgnoreController
+
 	private askResponse?: ClineAskResponse
 	private askResponseText?: string
 	private askResponseImages?: string[]
 	private lastMessageTs?: number
+
 	// Not private since it needs to be accessible by tools.
 	consecutiveMistakeCount: number = 0
 	consecutiveMistakeLimit: number
 	consecutiveMistakeCountForApplyDiff: Map<string, number> = new Map()
+
 	// Not private since it needs to be accessible by tools.
 	providerRef: WeakRef<ClineProvider>
 	private abort: boolean = false
@@ -203,7 +210,6 @@ export class Cline extends EventEmitter<ClineEvents> {
 		task,
 		images,
 		historyItem,
-		experiments,
 		startTask = true,
 		rootTask,
 		parentTask,
@@ -222,11 +228,15 @@ export class Cline extends EventEmitter<ClineEvents> {
 
 		this.rooIgnoreController = new RooIgnoreController(this.cwd)
 		this.fileContextTracker = new FileContextTracker(provider, this.taskId)
+
 		this.rooIgnoreController.initialize().catch((error) => {
 			console.error("Failed to initialize RooIgnoreController:", error)
 		})
+
 		this.apiConfiguration = apiConfiguration
 		this.api = buildApiHandler(apiConfiguration)
+		this.promptCacheKey = crypto.randomUUID()
+
 		this.urlContentFetcher = new UrlContentFetcher(provider.context)
 		this.browserSession = new BrowserSession(provider.context)
 		this.customInstructions = customInstructions
@@ -353,6 +363,8 @@ export class Cline extends EventEmitter<ClineEvents> {
 	}
 
 	public async overwriteClineMessages(newMessages: ClineMessage[]) {
+		// Reset the the prompt cache key since we've altered the conversation history.
+		this.promptCacheKey = crypto.randomUUID()
 		this.clineMessages = newMessages
 		await this.saveClineMessages()
 	}
@@ -652,6 +664,7 @@ export class Cline extends EventEmitter<ClineEvents> {
 			modifiedClineMessages,
 			(m) => !(m.ask === "resume_task" || m.ask === "resume_completed_task"),
 		)
+
 		if (lastRelevantMessageIndex !== -1) {
 			modifiedClineMessages.splice(lastRelevantMessageIndex + 1)
 		}
@@ -661,6 +674,7 @@ export class Cline extends EventEmitter<ClineEvents> {
 			modifiedClineMessages,
 			(m) => m.type === "say" && m.say === "api_req_started",
 		)
+
 		if (lastApiReqStartedIndex !== -1) {
 			const lastApiReqStarted = modifiedClineMessages[lastApiReqStartedIndex]
 			const { cost, cancelReason }: ClineApiReqInfo = JSON.parse(lastApiReqStarted.text || "{}")
@@ -853,7 +867,7 @@ export class Cline extends EventEmitter<ClineEvents> {
 		}
 
 		const wasRecent = lastClineMessage?.ts && Date.now() - lastClineMessage.ts < 30_000
-		
+
 		newUserContent.push({
 			type: "text",
 			text:
@@ -1092,7 +1106,7 @@ export class Cline extends EventEmitter<ClineEvents> {
 			return { role, content }
 		})
 
-		const stream = this.api.createMessage(systemPrompt, cleanConversationHistory)
+		const stream = this.api.createMessage(systemPrompt, cleanConversationHistory, this.promptCacheKey)
 		const iterator = stream[Symbol.asyncIterator]()
 
 		try {

+ 45 - 0
src/exports/roo-code.d.ts

@@ -46,6 +46,15 @@ type ProviderSettings = {
 				minTokensPerCachePoint?: number | undefined
 				maxCachePoints?: number | undefined
 				cachableFields?: string[] | undefined
+				tiers?:
+					| {
+							contextWindow: number
+							inputPrice?: number | undefined
+							outputPrice?: number | undefined
+							cacheWritesPrice?: number | undefined
+							cacheReadsPrice?: number | undefined
+					  }[]
+					| undefined
 		  } | null)
 		| undefined
 	glamaApiKey?: string | undefined
@@ -69,6 +78,15 @@ type ProviderSettings = {
 				minTokensPerCachePoint?: number | undefined
 				maxCachePoints?: number | undefined
 				cachableFields?: string[] | undefined
+				tiers?:
+					| {
+							contextWindow: number
+							inputPrice?: number | undefined
+							outputPrice?: number | undefined
+							cacheWritesPrice?: number | undefined
+							cacheReadsPrice?: number | undefined
+					  }[]
+					| undefined
 		  } | null)
 		| undefined
 	openRouterBaseUrl?: string | undefined
@@ -112,6 +130,15 @@ type ProviderSettings = {
 				minTokensPerCachePoint?: number | undefined
 				maxCachePoints?: number | undefined
 				cachableFields?: string[] | undefined
+				tiers?:
+					| {
+							contextWindow: number
+							inputPrice?: number | undefined
+							outputPrice?: number | undefined
+							cacheWritesPrice?: number | undefined
+							cacheReadsPrice?: number | undefined
+					  }[]
+					| undefined
 		  } | null)
 		| undefined
 	openAiUseAzure?: boolean | undefined
@@ -158,6 +185,15 @@ type ProviderSettings = {
 				minTokensPerCachePoint?: number | undefined
 				maxCachePoints?: number | undefined
 				cachableFields?: string[] | undefined
+				tiers?:
+					| {
+							contextWindow: number
+							inputPrice?: number | undefined
+							outputPrice?: number | undefined
+							cacheWritesPrice?: number | undefined
+							cacheReadsPrice?: number | undefined
+					  }[]
+					| undefined
 		  } | null)
 		| undefined
 	requestyApiKey?: string | undefined
@@ -180,6 +216,15 @@ type ProviderSettings = {
 				minTokensPerCachePoint?: number | undefined
 				maxCachePoints?: number | undefined
 				cachableFields?: string[] | undefined
+				tiers?:
+					| {
+							contextWindow: number
+							inputPrice?: number | undefined
+							outputPrice?: number | undefined
+							cacheWritesPrice?: number | undefined
+							cacheReadsPrice?: number | undefined
+					  }[]
+					| undefined
 		  } | null)
 		| undefined
 	xaiApiKey?: string | undefined

+ 45 - 0
src/exports/types.ts

@@ -47,6 +47,15 @@ type ProviderSettings = {
 				minTokensPerCachePoint?: number | undefined
 				maxCachePoints?: number | undefined
 				cachableFields?: string[] | undefined
+				tiers?:
+					| {
+							contextWindow: number
+							inputPrice?: number | undefined
+							outputPrice?: number | undefined
+							cacheWritesPrice?: number | undefined
+							cacheReadsPrice?: number | undefined
+					  }[]
+					| undefined
 		  } | null)
 		| undefined
 	glamaApiKey?: string | undefined
@@ -70,6 +79,15 @@ type ProviderSettings = {
 				minTokensPerCachePoint?: number | undefined
 				maxCachePoints?: number | undefined
 				cachableFields?: string[] | undefined
+				tiers?:
+					| {
+							contextWindow: number
+							inputPrice?: number | undefined
+							outputPrice?: number | undefined
+							cacheWritesPrice?: number | undefined
+							cacheReadsPrice?: number | undefined
+					  }[]
+					| undefined
 		  } | null)
 		| undefined
 	openRouterBaseUrl?: string | undefined
@@ -113,6 +131,15 @@ type ProviderSettings = {
 				minTokensPerCachePoint?: number | undefined
 				maxCachePoints?: number | undefined
 				cachableFields?: string[] | undefined
+				tiers?:
+					| {
+							contextWindow: number
+							inputPrice?: number | undefined
+							outputPrice?: number | undefined
+							cacheWritesPrice?: number | undefined
+							cacheReadsPrice?: number | undefined
+					  }[]
+					| undefined
 		  } | null)
 		| undefined
 	openAiUseAzure?: boolean | undefined
@@ -159,6 +186,15 @@ type ProviderSettings = {
 				minTokensPerCachePoint?: number | undefined
 				maxCachePoints?: number | undefined
 				cachableFields?: string[] | undefined
+				tiers?:
+					| {
+							contextWindow: number
+							inputPrice?: number | undefined
+							outputPrice?: number | undefined
+							cacheWritesPrice?: number | undefined
+							cacheReadsPrice?: number | undefined
+					  }[]
+					| undefined
 		  } | null)
 		| undefined
 	requestyApiKey?: string | undefined
@@ -181,6 +217,15 @@ type ProviderSettings = {
 				minTokensPerCachePoint?: number | undefined
 				maxCachePoints?: number | undefined
 				cachableFields?: string[] | undefined
+				tiers?:
+					| {
+							contextWindow: number
+							inputPrice?: number | undefined
+							outputPrice?: number | undefined
+							cacheWritesPrice?: number | undefined
+							cacheReadsPrice?: number | undefined
+					  }[]
+					| undefined
 		  } | null)
 		| undefined
 	xaiApiKey?: string | undefined

+ 11 - 0
src/schemas/index.ts

@@ -114,6 +114,17 @@ export const modelInfoSchema = z.object({
 	minTokensPerCachePoint: z.number().optional(),
 	maxCachePoints: z.number().optional(),
 	cachableFields: z.array(z.string()).optional(),
+	tiers: z
+		.array(
+			z.object({
+				contextWindow: z.number(),
+				inputPrice: z.number().optional(),
+				outputPrice: z.number().optional(),
+				cacheWritesPrice: z.number().optional(),
+				cacheReadsPrice: z.number().optional(),
+			}),
+		)
+		.optional(),
 })
 
 export type ModelInfo = z.infer<typeof modelInfoSchema>

+ 42 - 8
src/shared/api.ts

@@ -682,17 +682,35 @@ export const geminiModels = {
 		maxTokens: 65_535,
 		contextWindow: 1_048_576,
 		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 2.5,
+		supportsPromptCache: true,
+		inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
 		outputPrice: 15,
+		cacheReadsPrice: 0.625,
+		cacheWritesPrice: 4.5,
+		tiers: [
+			{
+				contextWindow: 200_000,
+				inputPrice: 1.25,
+				outputPrice: 10,
+				cacheReadsPrice: 0.31,
+			},
+			{
+				contextWindow: Infinity,
+				inputPrice: 2.5,
+				outputPrice: 15,
+				cacheReadsPrice: 0.625,
+			},
+		],
 	},
 	"gemini-2.0-flash-001": {
 		maxTokens: 8192,
 		contextWindow: 1_048_576,
 		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 0,
-		outputPrice: 0,
+		supportsPromptCache: true,
+		inputPrice: 0.1,
+		outputPrice: 0.4,
+		cacheReadsPrice: 0.025,
+		cacheWritesPrice: 1.0,
 	},
 	"gemini-2.0-flash-lite-preview-02-05": {
 		maxTokens: 8192,
@@ -738,9 +756,25 @@ export const geminiModels = {
 		maxTokens: 8192,
 		contextWindow: 1_048_576,
 		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 0,
-		outputPrice: 0,
+		supportsPromptCache: true,
+		inputPrice: 0.15, // This is the pricing for prompts above 128k tokens.
+		outputPrice: 0.6,
+		cacheReadsPrice: 0.0375,
+		cacheWritesPrice: 1.0,
+		tiers: [
+			{
+				contextWindow: 128_000,
+				inputPrice: 0.075,
+				outputPrice: 0.3,
+				cacheReadsPrice: 0.01875,
+			},
+			{
+				contextWindow: Infinity,
+				inputPrice: 0.15,
+				outputPrice: 0.6,
+				cacheReadsPrice: 0.0375,
+			},
+		],
 	},
 	"gemini-1.5-flash-exp-0827": {
 		maxTokens: 8192,

+ 1 - 0
webview-ui/src/components/settings/ApiOptions.tsx

@@ -1693,6 +1693,7 @@ const ApiOptions = ({
 					)}
 
 					<ModelInfoView
+						apiProvider={selectedProvider}
 						selectedModelId={selectedModelId}
 						modelInfo={selectedModelInfo}
 						isDescriptionExpanded={isDescriptionExpanded}

+ 9 - 11
webview-ui/src/components/settings/ModelInfoView.tsx

@@ -1,15 +1,15 @@
-import { useMemo } from "react"
 import { VSCodeLink } from "@vscode/webview-ui-toolkit/react"
 import { useAppTranslation } from "@/i18n/TranslationContext"
 
 import { formatPrice } from "@/utils/formatPrice"
 import { cn } from "@/lib/utils"
 
-import { ModelInfo, geminiModels } from "@roo/shared/api"
+import { ModelInfo } from "@roo/shared/api"
 
 import { ModelDescriptionMarkdown } from "./ModelDescriptionMarkdown"
 
 type ModelInfoViewProps = {
+	apiProvider?: string
 	selectedModelId: string
 	modelInfo: ModelInfo
 	isDescriptionExpanded: boolean
@@ -17,13 +17,13 @@ type ModelInfoViewProps = {
 }
 
 export const ModelInfoView = ({
+	apiProvider,
 	selectedModelId,
 	modelInfo,
 	isDescriptionExpanded,
 	setIsDescriptionExpanded,
 }: ModelInfoViewProps) => {
 	const { t } = useAppTranslation()
-	const isGemini = useMemo(() => Object.keys(geminiModels).includes(selectedModelId), [selectedModelId])
 
 	const infoItems = [
 		<ModelInfoSupportsItem
@@ -36,13 +36,11 @@ export const ModelInfoView = ({
 			supportsLabel={t("settings:modelInfo.supportsComputerUse")}
 			doesNotSupportLabel={t("settings:modelInfo.noComputerUse")}
 		/>,
-		!isGemini && (
-			<ModelInfoSupportsItem
-				isSupported={modelInfo.supportsPromptCache}
-				supportsLabel={t("settings:modelInfo.supportsPromptCache")}
-				doesNotSupportLabel={t("settings:modelInfo.noPromptCache")}
-			/>
-		),
+		<ModelInfoSupportsItem
+			isSupported={modelInfo.supportsPromptCache}
+			supportsLabel={t("settings:modelInfo.supportsPromptCache")}
+			doesNotSupportLabel={t("settings:modelInfo.noPromptCache")}
+		/>,
 		typeof modelInfo.maxTokens === "number" && modelInfo.maxTokens > 0 && (
 			<>
 				<span className="font-medium">{t("settings:modelInfo.maxOutput")}:</span>{" "}
@@ -73,7 +71,7 @@ export const ModelInfoView = ({
 				{formatPrice(modelInfo.cacheWritesPrice || 0)} / 1M tokens
 			</>
 		),
-		isGemini && (
+		apiProvider === "gemini" && (
 			<span className="italic">
 				{selectedModelId === "gemini-2.5-pro-preview-03-25"
 					? t("settings:modelInfo.gemini.billingEstimate")

+ 1 - 0
webview-ui/src/components/settings/ModelPicker.tsx

@@ -186,6 +186,7 @@ export const ModelPicker = ({
 			</div>
 			{selectedModelId && selectedModelInfo && (
 				<ModelInfoView
+					apiProvider={apiConfiguration.apiProvider}
 					selectedModelId={selectedModelId}
 					modelInfo={selectedModelInfo}
 					isDescriptionExpanded={isDescriptionExpanded}