8 месяцев назад · a08461a655
--- a/src/api/index.ts
+++ b/src/api/index.ts
@@ -28,7 +28,8 @@ export interface SingleCompletionHandler {
 
				 }
			
 
				 
			
 
				 export interface ApiHandler {
			
 
				-	createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream
			
 
				+	createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[], cacheKey?: string): ApiStream
			
 
				+
			
 
				 	getModel(): { id: string; info: ModelInfo }
			
 
				 
			
 
				 	/**
			
--- a/src/api/providers/__tests__/gemini.test.ts
+++ b/src/api/providers/__tests__/gemini.test.ts
@@ -3,7 +3,7 @@
 
				 import { Anthropic } from "@anthropic-ai/sdk"
			
 
				 
			
 
				 import { GeminiHandler } from "../gemini"
			
 
				-import { geminiDefaultModelId } from "../../../shared/api"
			
 
				+import { geminiDefaultModelId, type ModelInfo } from "../../../shared/api"
			
 
				 
			
 
				 const GEMINI_20_FLASH_THINKING_NAME = "gemini-2.0-flash-thinking-exp-1219"
			
 
				 
			
@@ -72,18 +72,15 @@ describe("GeminiHandler", () => {
 
				 
			
 
				 			// Should have 3 chunks: 'Hello', ' world!', and usage info
			
 
				 			expect(chunks.length).toBe(3)
			
 
				-			expect(chunks[0]).toEqual({
			
 
				-				type: "text",
			
 
				-				text: "Hello",
			
 
				-			})
			
 
				-			expect(chunks[1]).toEqual({
			
 
				-				type: "text",
			
 
				-				text: " world!",
			
 
				-			})
			
 
				+			expect(chunks[0]).toEqual({ type: "text", text: "Hello" })
			
 
				+			expect(chunks[1]).toEqual({ type: "text", text: " world!" })
			
 
				 			expect(chunks[2]).toEqual({
			
 
				 				type: "usage",
			
 
				 				inputTokens: 10,
			
 
				 				outputTokens: 5,
			
 
				+				cacheReadTokens: undefined,
			
 
				+				cacheWriteTokens: undefined,
			
 
				+				thinkingTokens: undefined,
			
 
				 			})
			
 
				 
			
 
				 			// Verify the call to generateContentStream
			
@@ -171,4 +168,89 @@ describe("GeminiHandler", () => {
 
				 			expect(modelInfo.id).toBe(geminiDefaultModelId) // Default model
			
 
				 		})
			
 
				 	})
			
 
				+
			
 
				+	describe("calculateCost", () => {
			
 
				+		// Mock ModelInfo based on gemini-1.5-flash-latest pricing (per 1M tokens)
			
 
				+		// Removed 'id' and 'name' as they are not part of ModelInfo type directly
			
 
				+		const mockInfo: ModelInfo = {
			
 
				+			inputPrice: 0.125, // $/1M tokens
			
 
				+			outputPrice: 0.375, // $/1M tokens
			
 
				+			cacheWritesPrice: 0.125, // Assume same as input for test
			
 
				+			cacheReadsPrice: 0.125 * 0.25, // Assume 0.25x input for test
			
 
				+			contextWindow: 1_000_000,
			
 
				+			maxTokens: 8192,
			
 
				+			supportsPromptCache: true, // Enable cache calculations for tests
			
 
				+		}
			
 
				+
			
 
				+		it("should calculate cost correctly based on input and output tokens", () => {
			
 
				+			const inputTokens = 10000 // Use larger numbers for per-million pricing
			
 
				+			const outputTokens = 20000
			
 
				+			// Added non-null assertions (!) as mockInfo guarantees these values
			
 
				+			const expectedCost =
			
 
				+				(inputTokens / 1_000_000) * mockInfo.inputPrice! + (outputTokens / 1_000_000) * mockInfo.outputPrice!
			
 
				+
			
 
				+			const cost = handler.calculateCost({ info: mockInfo, inputTokens, outputTokens })
			
 
				+			expect(cost).toBeCloseTo(expectedCost)
			
 
				+		})
			
 
				+
			
 
				+		it("should return 0 if token counts are zero", () => {
			
 
				+			// Note: The method expects numbers, not undefined. Passing undefined would be a type error.
			
 
				+			// The calculateCost method itself returns undefined if prices are missing, but 0 if tokens are 0 and prices exist.
			
 
				+			expect(handler.calculateCost({ info: mockInfo, inputTokens: 0, outputTokens: 0 })).toBe(0)
			
 
				+		})
			
 
				+
			
 
				+		it("should handle only input tokens", () => {
			
 
				+			const inputTokens = 5000
			
 
				+			// Added non-null assertion (!)
			
 
				+			const expectedCost = (inputTokens / 1_000_000) * mockInfo.inputPrice!
			
 
				+			expect(handler.calculateCost({ info: mockInfo, inputTokens, outputTokens: 0 })).toBeCloseTo(expectedCost)
			
 
				+		})
			
 
				+
			
 
				+		it("should handle only output tokens", () => {
			
 
				+			const outputTokens = 15000
			
 
				+			// Added non-null assertion (!)
			
 
				+			const expectedCost = (outputTokens / 1_000_000) * mockInfo.outputPrice!
			
 
				+			expect(handler.calculateCost({ info: mockInfo, inputTokens: 0, outputTokens })).toBeCloseTo(expectedCost)
			
 
				+		})
			
 
				+
			
 
				+		it("should calculate cost with cache write tokens", () => {
			
 
				+			const inputTokens = 10000
			
 
				+			const outputTokens = 20000
			
 
				+			const cacheWriteTokens = 5000
			
 
				+			const CACHE_TTL = 5 // Match the constant in gemini.ts
			
 
				+
			
 
				+			// Added non-null assertions (!)
			
 
				+			const expectedInputCost = (inputTokens / 1_000_000) * mockInfo.inputPrice!
			
 
				+			const expectedOutputCost = (outputTokens / 1_000_000) * mockInfo.outputPrice!
			
 
				+			const expectedCacheWriteCost =
			
 
				+				mockInfo.cacheWritesPrice! * (cacheWriteTokens / 1_000_000) * (CACHE_TTL / 60)
			
 
				+			const expectedCost = expectedInputCost + expectedOutputCost + expectedCacheWriteCost
			
 
				+
			
 
				+			const cost = handler.calculateCost({ info: mockInfo, inputTokens, outputTokens, cacheWriteTokens })
			
 
				+			expect(cost).toBeCloseTo(expectedCost)
			
 
				+		})
			
 
				+
			
 
				+		it("should calculate cost with cache read tokens", () => {
			
 
				+			const inputTokens = 10000 // Total logical input
			
 
				+			const outputTokens = 20000
			
 
				+			const cacheReadTokens = 8000 // Part of inputTokens read from cache
			
 
				+
			
 
				+			const uncachedReadTokens = inputTokens - cacheReadTokens
			
 
				+			// Added non-null assertions (!)
			
 
				+			const expectedInputCost = (uncachedReadTokens / 1_000_000) * mockInfo.inputPrice!
			
 
				+			const expectedOutputCost = (outputTokens / 1_000_000) * mockInfo.outputPrice!
			
 
				+			const expectedCacheReadCost = mockInfo.cacheReadsPrice! * (cacheReadTokens / 1_000_000)
			
 
				+			const expectedCost = expectedInputCost + expectedOutputCost + expectedCacheReadCost
			
 
				+
			
 
				+			const cost = handler.calculateCost({ info: mockInfo, inputTokens, outputTokens, cacheReadTokens })
			
 
				+			expect(cost).toBeCloseTo(expectedCost)
			
 
				+		})
			
 
				+
			
 
				+		it("should return undefined if pricing info is missing", () => {
			
 
				+			// Create a copy and explicitly set a price to undefined
			
 
				+			const incompleteInfo: ModelInfo = { ...mockInfo, outputPrice: undefined }
			
 
				+			const cost = handler.calculateCost({ info: incompleteInfo, inputTokens: 1000, outputTokens: 1000 })
			
 
				+			expect(cost).toBeUndefined()
			
 
				+		})
			
 
				+	})
			
 
				 })
			
--- a/src/api/providers/anthropic.ts
+++ b/src/api/providers/anthropic.ts
@@ -42,8 +42,14 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 
				 			case "claude-3-opus-20240229":
			
 
				 			case "claude-3-haiku-20240307": {
			
 
				 				/**
			
 
				-				 * The latest message will be the new user message, one before will
			
 
				-				 * be the assistant message from a previous request, and the user message before that will be a previously cached user message. So we need to mark the latest user message as ephemeral to cache it for the next request, and mark the second to last user message as ephemeral to let the server know the last message to retrieve from the cache for the current request..
			
 
				+				 * The latest message will be the new user message, one before
			
 
				+				 * will be the assistant message from a previous request, and
			
 
				+				 * the user message before that will be a previously cached user
			
 
				+				 * message. So we need to mark the latest user message as
			
 
				+				 * ephemeral to cache it for the next request, and mark the
			
 
				+				 * second to last user message as ephemeral to let the server
			
 
				+				 * know the last message to retrieve from the cache for the
			
 
				+				 * current request.
			
 
				 				 */
			
 
				 				const userMsgIndices = messages.reduce(
			
 
				 					(acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc),
			
@@ -77,9 +83,6 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 
				 							}
			
 
				 							return message
			
 
				 						}),
			
 
				-						// tools, // cache breakpoints go from tools > system > messages, and since tools dont change, we can just set the breakpoint at the end of system (this avoids having to set a breakpoint at the end of tools which by itself does not meet min requirements for haiku caching)
			
 
				-						// tool_choice: { type: "auto" },
			
 
				-						// tools: tools,
			
 
				 						stream: true,
			
 
				 					},
			
 
				 					(() => {
			
@@ -102,9 +105,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 
				 							case "claude-3-opus-20240229":
			
 
				 							case "claude-3-haiku-20240307":
			
 
				 								betas.push("prompt-caching-2024-07-31")
			
 
				-								return {
			
 
				-									headers: { "anthropic-beta": betas.join(",") },
			
 
				-								}
			
 
				+								return { headers: { "anthropic-beta": betas.join(",") } }
			
 
				 							default:
			
 
				 								return undefined
			
 
				 						}
			
@@ -119,8 +120,6 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 
				 					temperature,
			
 
				 					system: [{ text: systemPrompt, type: "text" }],
			
 
				 					messages,
			
 
				-					// tools,
			
 
				-					// tool_choice: { type: "auto" },
			
 
				 					stream: true,
			
 
				 				})) as any
			
 
				 				break
			
--- a/src/api/providers/gemini.ts
+++ b/src/api/providers/gemini.ts
@@ -1,9 +1,9 @@
 
				 import type { Anthropic } from "@anthropic-ai/sdk"
			
 
				 import {
			
 
				 	GoogleGenAI,
			
 
				-	ThinkingConfig,
			
 
				 	type GenerateContentResponseUsageMetadata,
			
 
				 	type GenerateContentParameters,
			
 
				+	type Content,
			
 
				 } from "@google/genai"
			
 
				 
			
 
				 import { SingleCompletionHandler } from "../"
			
@@ -13,30 +13,64 @@ import { convertAnthropicContentToGemini, convertAnthropicMessageToGemini } from
 
				 import type { ApiStream } from "../transform/stream"
			
 
				 import { BaseProvider } from "./base-provider"
			
 
				 
			
 
				+const CACHE_TTL = 5
			
 
				+
			
 
				 export class GeminiHandler extends BaseProvider implements SingleCompletionHandler {
			
 
				 	protected options: ApiHandlerOptions
			
 
				 	private client: GoogleGenAI
			
 
				+	private contentCaches: Map<string, { key: string; count: number }>
			
 
				 
			
 
				 	constructor(options: ApiHandlerOptions) {
			
 
				 		super()
			
 
				 		this.options = options
			
 
				 		this.client = new GoogleGenAI({ apiKey: options.geminiApiKey ?? "not-provided" })
			
 
				+		this.contentCaches = new Map()
			
 
				 	}
			
 
				 
			
 
				-	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				-		const { id: model, thinkingConfig, maxOutputTokens } = this.getModel()
			
 
				+	async *createMessage(
			
 
				+		systemInstruction: string,
			
 
				+		messages: Anthropic.Messages.MessageParam[],
			
 
				+		cacheKey?: string,
			
 
				+	): ApiStream {
			
 
				+		const { id: model, thinkingConfig, maxOutputTokens, info } = this.getModel()
			
 
				+
			
 
				+		const contents = messages.map(convertAnthropicMessageToGemini)
			
 
				+		let uncachedContent: Content[] | undefined = undefined
			
 
				+		let cachedContent: string | undefined = undefined
			
 
				+		let cacheWriteTokens: number | undefined = undefined
			
 
				+
			
 
				+		// https://ai.google.dev/gemini-api/docs/caching?lang=node
			
 
				+		if (info.supportsPromptCache && cacheKey) {
			
 
				+			const cacheEntry = this.contentCaches.get(cacheKey)
			
 
				+
			
 
				+			if (cacheEntry) {
			
 
				+				uncachedContent = contents.slice(cacheEntry.count, contents.length)
			
 
				+				cachedContent = cacheEntry.key
			
 
				+			}
			
 
				+
			
 
				+			const newCacheEntry = await this.client.caches.create({
			
 
				+				model,
			
 
				+				config: { contents, systemInstruction, ttl: `${CACHE_TTL * 60}s` },
			
 
				+			})
			
 
				+
			
 
				+			if (newCacheEntry.name) {
			
 
				+				this.contentCaches.set(cacheKey, { key: newCacheEntry.name, count: contents.length })
			
 
				+				cacheWriteTokens = newCacheEntry.usageMetadata?.totalTokenCount ?? 0
			
 
				+			}
			
 
				+		}
			
 
				 
			
 
				 		const params: GenerateContentParameters = {
			
 
				 			model,
			
 
				-			contents: messages.map(convertAnthropicMessageToGemini),
			
 
				+			contents: uncachedContent ?? contents,
			
 
				 			config: {
			
 
				+				cachedContent,
			
 
				+				systemInstruction: cachedContent ? undefined : systemInstruction,
			
 
				 				httpOptions: this.options.googleGeminiBaseUrl
			
 
				 					? { baseUrl: this.options.googleGeminiBaseUrl }
			
 
				 					: undefined,
			
 
				 				thinkingConfig,
			
 
				 				maxOutputTokens,
			
 
				 				temperature: this.options.modelTemperature ?? 0,
			
 
				-				systemInstruction: systemPrompt,
			
 
				 			},
			
 
				 		}
			
 
				 
			
@@ -55,46 +89,58 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 
				 		}
			
 
				 
			
 
				 		if (lastUsageMetadata) {
			
 
				+			const inputTokens = lastUsageMetadata.promptTokenCount ?? 0
			
 
				+			const outputTokens = lastUsageMetadata.candidatesTokenCount ?? 0
			
 
				+			const cacheReadTokens = lastUsageMetadata.cachedContentTokenCount
			
 
				+			const reasoningTokens = lastUsageMetadata.thoughtsTokenCount
			
 
				+
			
 
				+			const totalCost = this.calculateCost({
			
 
				+				info,
			
 
				+				inputTokens,
			
 
				+				outputTokens,
			
 
				+				cacheWriteTokens,
			
 
				+				cacheReadTokens,
			
 
				+			})
			
 
				+
			
 
				 			yield {
			
 
				 				type: "usage",
			
 
				-				inputTokens: lastUsageMetadata.promptTokenCount ?? 0,
			
 
				-				outputTokens: lastUsageMetadata.candidatesTokenCount ?? 0,
			
 
				+				inputTokens,
			
 
				+				outputTokens,
			
 
				+				cacheWriteTokens,
			
 
				+				cacheReadTokens,
			
 
				+				reasoningTokens,
			
 
				+				totalCost,
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	override getModel(): {
			
 
				-		id: GeminiModelId
			
 
				-		info: ModelInfo
			
 
				-		thinkingConfig?: ThinkingConfig
			
 
				-		maxOutputTokens?: number
			
 
				-	} {
			
 
				+	override getModel() {
			
 
				 		let id = this.options.apiModelId ? (this.options.apiModelId as GeminiModelId) : geminiDefaultModelId
			
 
				 		let info: ModelInfo = geminiModels[id]
			
 
				-		let thinkingConfig: ThinkingConfig | undefined = undefined
			
 
				-		let maxOutputTokens: number | undefined = undefined
			
 
				 
			
 
				-		const thinkingSuffix = ":thinking"
			
 
				+		if (id?.endsWith(":thinking")) {
			
 
				+			id = id.slice(0, -":thinking".length) as GeminiModelId
			
 
				 
			
 
				-		if (id?.endsWith(thinkingSuffix)) {
			
 
				-			id = id.slice(0, -thinkingSuffix.length) as GeminiModelId
			
 
				-			info = geminiModels[id]
			
 
				+			if (geminiModels[id]) {
			
 
				+				info = geminiModels[id]
			
 
				 
			
 
				-			thinkingConfig = this.options.modelMaxThinkingTokens
			
 
				-				? { thinkingBudget: this.options.modelMaxThinkingTokens }
			
 
				-				: undefined
			
 
				-
			
 
				-			maxOutputTokens = this.options.modelMaxTokens ?? info.maxTokens ?? undefined
			
 
				+				return {
			
 
				+					id,
			
 
				+					info,
			
 
				+					thinkingConfig: this.options.modelMaxThinkingTokens
			
 
				+						? { thinkingBudget: this.options.modelMaxThinkingTokens }
			
 
				+						: undefined,
			
 
				+					maxOutputTokens: this.options.modelMaxTokens ?? info.maxTokens ?? undefined,
			
 
				+				}
			
 
				+			}
			
 
				 		}
			
 
				 
			
 
				 		if (!info) {
			
 
				 			id = geminiDefaultModelId
			
 
				 			info = geminiModels[geminiDefaultModelId]
			
 
				-			thinkingConfig = undefined
			
 
				-			maxOutputTokens = undefined
			
 
				 		}
			
 
				 
			
 
				-		return { id, info, thinkingConfig, maxOutputTokens }
			
 
				+		return { id, info }
			
 
				 	}
			
 
				 
			
 
				 	async completePrompt(prompt: string): Promise<string> {
			
@@ -142,4 +188,57 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 
				 			return super.countTokens(content)
			
 
				 		}
			
 
				 	}
			
 
				+
			
 
				+	public calculateCost({
			
 
				+		info,
			
 
				+		inputTokens,
			
 
				+		outputTokens,
			
 
				+		cacheWriteTokens,
			
 
				+		cacheReadTokens,
			
 
				+	}: {
			
 
				+		info: ModelInfo
			
 
				+		inputTokens: number
			
 
				+		outputTokens: number
			
 
				+		cacheWriteTokens?: number
			
 
				+		cacheReadTokens?: number
			
 
				+	}) {
			
 
				+		if (!info.inputPrice || !info.outputPrice || !info.cacheWritesPrice || !info.cacheReadsPrice) {
			
 
				+			return undefined
			
 
				+		}
			
 
				+
			
 
				+		let inputPrice = info.inputPrice
			
 
				+		let outputPrice = info.outputPrice
			
 
				+		let cacheWritesPrice = info.cacheWritesPrice
			
 
				+		let cacheReadsPrice = info.cacheReadsPrice
			
 
				+
			
 
				+		// If there's tiered pricing then adjust the input and output token prices
			
 
				+		// based on the input tokens used.
			
 
				+		if (info.tiers) {
			
 
				+			const tier = info.tiers.find((tier) => inputTokens <= tier.contextWindow)
			
 
				+
			
 
				+			if (tier) {
			
 
				+				inputPrice = tier.inputPrice ?? inputPrice
			
 
				+				outputPrice = tier.outputPrice ?? outputPrice
			
 
				+				cacheWritesPrice = tier.cacheWritesPrice ?? cacheWritesPrice
			
 
				+				cacheReadsPrice = tier.cacheReadsPrice ?? cacheReadsPrice
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		let inputTokensCost = inputPrice * (inputTokens / 1_000_000)
			
 
				+		let outputTokensCost = outputPrice * (outputTokens / 1_000_000)
			
 
				+		let cacheWriteCost = 0
			
 
				+		let cacheReadCost = 0
			
 
				+
			
 
				+		if (cacheWriteTokens) {
			
 
				+			cacheWriteCost = cacheWritesPrice * (cacheWriteTokens / 1_000_000) * (CACHE_TTL / 60)
			
 
				+		}
			
 
				+
			
 
				+		if (cacheReadTokens) {
			
 
				+			const uncachedReadTokens = inputTokens - cacheReadTokens
			
 
				+			cacheReadCost = cacheReadsPrice * (cacheReadTokens / 1_000_000)
			
 
				+			inputTokensCost = inputPrice * (uncachedReadTokens / 1_000_000)
			
 
				+		}
			
 
				+
			
 
				+		return inputTokensCost + outputTokensCost + cacheWriteCost + cacheReadCost
			
 
				+	}
			
 
				 }
			
--- a/src/api/providers/vscode-lm.ts
+++ b/src/api/providers/vscode-lm.ts
@@ -442,7 +442,7 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
 
				 			yield {
			
 
				 				type: "usage",
			
 
				 				inputTokens: totalInputTokens,
			
 
				-				outputTokens: totalOutputTokens
			
 
				+				outputTokens: totalOutputTokens,
			
 
				 			}
			
 
				 		} catch (error: unknown) {
			
 
				 			this.ensureCleanState()
			
--- a/src/core/Cline.ts
+++ b/src/core/Cline.ts
@@ -136,32 +136,39 @@ export class Cline extends EventEmitter<ClineEvents> {
 
				 	readonly rootTask: Cline | undefined = undefined
			
 
				 	readonly parentTask: Cline | undefined = undefined
			
 
				 	readonly taskNumber: number
			
 
				+
			
 
				 	isPaused: boolean = false
			
 
				 	pausedModeSlug: string = defaultModeSlug
			
 
				 	private pauseInterval: NodeJS.Timeout | undefined
			
 
				 
			
 
				 	readonly apiConfiguration: ApiConfiguration
			
 
				 	api: ApiHandler
			
 
				+	private promptCacheKey: string
			
 
				+
			
 
				+	rooIgnoreController?: RooIgnoreController
			
 
				 	private fileContextTracker: FileContextTracker
			
 
				 	private urlContentFetcher: UrlContentFetcher
			
 
				 	browserSession: BrowserSession
			
 
				 	didEditFile: boolean = false
			
 
				 	customInstructions?: string
			
 
				+
			
 
				 	diffStrategy?: DiffStrategy
			
 
				 	diffEnabled: boolean = false
			
 
				 	fuzzyMatchThreshold: number
			
 
				 
			
 
				 	apiConversationHistory: (Anthropic.MessageParam & { ts?: number })[] = []
			
 
				 	clineMessages: ClineMessage[] = []
			
 
				-	rooIgnoreController?: RooIgnoreController
			
 
				+
			
 
				 	private askResponse?: ClineAskResponse
			
 
				 	private askResponseText?: string
			
 
				 	private askResponseImages?: string[]
			
 
				 	private lastMessageTs?: number
			
 
				+
			
 
				 	// Not private since it needs to be accessible by tools.
			
 
				 	consecutiveMistakeCount: number = 0
			
 
				 	consecutiveMistakeLimit: number
			
 
				 	consecutiveMistakeCountForApplyDiff: Map<string, number> = new Map()
			
 
				+
			
 
				 	// Not private since it needs to be accessible by tools.
			
 
				 	providerRef: WeakRef<ClineProvider>
			
 
				 	private abort: boolean = false
			
@@ -203,7 +210,6 @@ export class Cline extends EventEmitter<ClineEvents> {
 
				 		task,
			
 
				 		images,
			
 
				 		historyItem,
			
 
				-		experiments,
			
 
				 		startTask = true,
			
 
				 		rootTask,
			
 
				 		parentTask,
			
@@ -222,11 +228,15 @@ export class Cline extends EventEmitter<ClineEvents> {
 
				 
			
 
				 		this.rooIgnoreController = new RooIgnoreController(this.cwd)
			
 
				 		this.fileContextTracker = new FileContextTracker(provider, this.taskId)
			
 
				+
			
 
				 		this.rooIgnoreController.initialize().catch((error) => {
			
 
				 			console.error("Failed to initialize RooIgnoreController:", error)
			
 
				 		})
			
 
				+
			
 
				 		this.apiConfiguration = apiConfiguration
			
 
				 		this.api = buildApiHandler(apiConfiguration)
			
 
				+		this.promptCacheKey = crypto.randomUUID()
			
 
				+
			
 
				 		this.urlContentFetcher = new UrlContentFetcher(provider.context)
			
 
				 		this.browserSession = new BrowserSession(provider.context)
			
 
				 		this.customInstructions = customInstructions
			
@@ -353,6 +363,8 @@ export class Cline extends EventEmitter<ClineEvents> {
 
				 	}
			
 
				 
			
 
				 	public async overwriteClineMessages(newMessages: ClineMessage[]) {
			
 
				+		// Reset the the prompt cache key since we've altered the conversation history.
			
 
				+		this.promptCacheKey = crypto.randomUUID()
			
 
				 		this.clineMessages = newMessages
			
 
				 		await this.saveClineMessages()
			
 
				 	}
			
@@ -652,6 +664,7 @@ export class Cline extends EventEmitter<ClineEvents> {
 
				 			modifiedClineMessages,
			
 
				 			(m) => !(m.ask === "resume_task" || m.ask === "resume_completed_task"),
			
 
				 		)
			
 
				+
			
 
				 		if (lastRelevantMessageIndex !== -1) {
			
 
				 			modifiedClineMessages.splice(lastRelevantMessageIndex + 1)
			
 
				 		}
			
@@ -661,6 +674,7 @@ export class Cline extends EventEmitter<ClineEvents> {
 
				 			modifiedClineMessages,
			
 
				 			(m) => m.type === "say" && m.say === "api_req_started",
			
 
				 		)
			
 
				+
			
 
				 		if (lastApiReqStartedIndex !== -1) {
			
 
				 			const lastApiReqStarted = modifiedClineMessages[lastApiReqStartedIndex]
			
 
				 			const { cost, cancelReason }: ClineApiReqInfo = JSON.parse(lastApiReqStarted.text || "{}")
			
@@ -853,7 +867,7 @@ export class Cline extends EventEmitter<ClineEvents> {
 
				 		}
			
 
				 
			
 
				 		const wasRecent = lastClineMessage?.ts && Date.now() - lastClineMessage.ts < 30_000
			
 
				-		
			
 
				+
			
 
				 		newUserContent.push({
			
 
				 			type: "text",
			
 
				 			text:
			
@@ -1092,7 +1106,7 @@ export class Cline extends EventEmitter<ClineEvents> {
 
				 			return { role, content }
			
 
				 		})
			
 
				 
			
 
				-		const stream = this.api.createMessage(systemPrompt, cleanConversationHistory)
			
 
				+		const stream = this.api.createMessage(systemPrompt, cleanConversationHistory, this.promptCacheKey)
			
 
				 		const iterator = stream[Symbol.asyncIterator]()
			
 
				 
			
 
				 		try {
			
--- a/src/exports/roo-code.d.ts
+++ b/src/exports/roo-code.d.ts
@@ -46,6 +46,15 @@ type ProviderSettings = {
 
				 				minTokensPerCachePoint?: number | undefined
			
 
				 				maxCachePoints?: number | undefined
			
 
				 				cachableFields?: string[] | undefined
			
 
				+				tiers?:
			
 
				+					| {
			
 
				+							contextWindow: number
			
 
				+							inputPrice?: number | undefined
			
 
				+							outputPrice?: number | undefined
			
 
				+							cacheWritesPrice?: number | undefined
			
 
				+							cacheReadsPrice?: number | undefined
			
 
				+					  }[]
			
 
				+					| undefined
			
 
				 		  } | null)
			
 
				 		| undefined
			
 
				 	glamaApiKey?: string | undefined
			
@@ -69,6 +78,15 @@ type ProviderSettings = {
 
				 				minTokensPerCachePoint?: number | undefined
			
 
				 				maxCachePoints?: number | undefined
			
 
				 				cachableFields?: string[] | undefined
			
 
				+				tiers?:
			
 
				+					| {
			
 
				+							contextWindow: number
			
 
				+							inputPrice?: number | undefined
			
 
				+							outputPrice?: number | undefined
			
 
				+							cacheWritesPrice?: number | undefined
			
 
				+							cacheReadsPrice?: number | undefined
			
 
				+					  }[]
			
 
				+					| undefined
			
 
				 		  } | null)
			
 
				 		| undefined
			
 
				 	openRouterBaseUrl?: string | undefined
			
@@ -112,6 +130,15 @@ type ProviderSettings = {
 
				 				minTokensPerCachePoint?: number | undefined
			
 
				 				maxCachePoints?: number | undefined
			
 
				 				cachableFields?: string[] | undefined
			
 
				+				tiers?:
			
 
				+					| {
			
 
				+							contextWindow: number
			
 
				+							inputPrice?: number | undefined
			
 
				+							outputPrice?: number | undefined
			
 
				+							cacheWritesPrice?: number | undefined
			
 
				+							cacheReadsPrice?: number | undefined
			
 
				+					  }[]
			
 
				+					| undefined
			
 
				 		  } | null)
			
 
				 		| undefined
			
 
				 	openAiUseAzure?: boolean | undefined
			
@@ -158,6 +185,15 @@ type ProviderSettings = {
 
				 				minTokensPerCachePoint?: number | undefined
			
 
				 				maxCachePoints?: number | undefined
			
 
				 				cachableFields?: string[] | undefined
			
 
				+				tiers?:
			
 
				+					| {
			
 
				+							contextWindow: number
			
 
				+							inputPrice?: number | undefined
			
 
				+							outputPrice?: number | undefined
			
 
				+							cacheWritesPrice?: number | undefined
			
 
				+							cacheReadsPrice?: number | undefined
			
 
				+					  }[]
			
 
				+					| undefined
			
 
				 		  } | null)
			
 
				 		| undefined
			
 
				 	requestyApiKey?: string | undefined
			
@@ -180,6 +216,15 @@ type ProviderSettings = {
 
				 				minTokensPerCachePoint?: number | undefined
			
 
				 				maxCachePoints?: number | undefined
			
 
				 				cachableFields?: string[] | undefined
			
 
				+				tiers?:
			
 
				+					| {
			
 
				+							contextWindow: number
			
 
				+							inputPrice?: number | undefined
			
 
				+							outputPrice?: number | undefined
			
 
				+							cacheWritesPrice?: number | undefined
			
 
				+							cacheReadsPrice?: number | undefined
			
 
				+					  }[]
			
 
				+					| undefined
			
 
				 		  } | null)
			
 
				 		| undefined
			
 
				 	xaiApiKey?: string | undefined
			
--- a/src/exports/types.ts
+++ b/src/exports/types.ts
@@ -47,6 +47,15 @@ type ProviderSettings = {
 
				 				minTokensPerCachePoint?: number | undefined
			
 
				 				maxCachePoints?: number | undefined
			
 
				 				cachableFields?: string[] | undefined
			
 
				+				tiers?:
			
 
				+					| {
			
 
				+							contextWindow: number
			
 
				+							inputPrice?: number | undefined
			
 
				+							outputPrice?: number | undefined
			
 
				+							cacheWritesPrice?: number | undefined
			
 
				+							cacheReadsPrice?: number | undefined
			
 
				+					  }[]
			
 
				+					| undefined
			
 
				 		  } | null)
			
 
				 		| undefined
			
 
				 	glamaApiKey?: string | undefined
			
@@ -70,6 +79,15 @@ type ProviderSettings = {
 
				 				minTokensPerCachePoint?: number | undefined
			
 
				 				maxCachePoints?: number | undefined
			
 
				 				cachableFields?: string[] | undefined
			
 
				+				tiers?:
			
 
				+					| {
			
 
				+							contextWindow: number
			
 
				+							inputPrice?: number | undefined
			
 
				+							outputPrice?: number | undefined
			
 
				+							cacheWritesPrice?: number | undefined
			
 
				+							cacheReadsPrice?: number | undefined
			
 
				+					  }[]
			
 
				+					| undefined
			
 
				 		  } | null)
			
 
				 		| undefined
			
 
				 	openRouterBaseUrl?: string | undefined
			
@@ -113,6 +131,15 @@ type ProviderSettings = {
 
				 				minTokensPerCachePoint?: number | undefined
			
 
				 				maxCachePoints?: number | undefined
			
 
				 				cachableFields?: string[] | undefined
			
 
				+				tiers?:
			
 
				+					| {
			
 
				+							contextWindow: number
			
 
				+							inputPrice?: number | undefined
			
 
				+							outputPrice?: number | undefined
			
 
				+							cacheWritesPrice?: number | undefined
			
 
				+							cacheReadsPrice?: number | undefined
			
 
				+					  }[]
			
 
				+					| undefined
			
 
				 		  } | null)
			
 
				 		| undefined
			
 
				 	openAiUseAzure?: boolean | undefined
			
@@ -159,6 +186,15 @@ type ProviderSettings = {
 
				 				minTokensPerCachePoint?: number | undefined
			
 
				 				maxCachePoints?: number | undefined
			
 
				 				cachableFields?: string[] | undefined
			
 
				+				tiers?:
			
 
				+					| {
			
 
				+							contextWindow: number
			
 
				+							inputPrice?: number | undefined
			
 
				+							outputPrice?: number | undefined
			
 
				+							cacheWritesPrice?: number | undefined
			
 
				+							cacheReadsPrice?: number | undefined
			
 
				+					  }[]
			
 
				+					| undefined
			
 
				 		  } | null)
			
 
				 		| undefined
			
 
				 	requestyApiKey?: string | undefined
			
@@ -181,6 +217,15 @@ type ProviderSettings = {
 
				 				minTokensPerCachePoint?: number | undefined
			
 
				 				maxCachePoints?: number | undefined
			
 
				 				cachableFields?: string[] | undefined
			
 
				+				tiers?:
			
 
				+					| {
			
 
				+							contextWindow: number
			
 
				+							inputPrice?: number | undefined
			
 
				+							outputPrice?: number | undefined
			
 
				+							cacheWritesPrice?: number | undefined
			
 
				+							cacheReadsPrice?: number | undefined
			
 
				+					  }[]
			
 
				+					| undefined
			
 
				 		  } | null)
			
 
				 		| undefined
			
 
				 	xaiApiKey?: string | undefined
			
--- a/src/schemas/index.ts
+++ b/src/schemas/index.ts
@@ -114,6 +114,17 @@ export const modelInfoSchema = z.object({
 
				 	minTokensPerCachePoint: z.number().optional(),
			
 
				 	maxCachePoints: z.number().optional(),
			
 
				 	cachableFields: z.array(z.string()).optional(),
			
 
				+	tiers: z
			
 
				+		.array(
			
 
				+			z.object({
			
 
				+				contextWindow: z.number(),
			
 
				+				inputPrice: z.number().optional(),
			
 
				+				outputPrice: z.number().optional(),
			
 
				+				cacheWritesPrice: z.number().optional(),
			
 
				+				cacheReadsPrice: z.number().optional(),
			
 
				+			}),
			
 
				+		)
			
 
				+		.optional(),
			
 
				 })
			
 
				 
			
 
				 export type ModelInfo = z.infer<typeof modelInfoSchema>
			
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@@ -682,17 +682,35 @@ export const geminiModels = {
 
				 		maxTokens: 65_535,
			
 
				 		contextWindow: 1_048_576,
			
 
				 		supportsImages: true,
			
 
				-		supportsPromptCache: false,
			
 
				-		inputPrice: 2.5,
			
 
				+		supportsPromptCache: true,
			
 
				+		inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
			
 
				 		outputPrice: 15,
			
 
				+		cacheReadsPrice: 0.625,
			
 
				+		cacheWritesPrice: 4.5,
			
 
				+		tiers: [
			
 
				+			{
			
 
				+				contextWindow: 200_000,
			
 
				+				inputPrice: 1.25,
			
 
				+				outputPrice: 10,
			
 
				+				cacheReadsPrice: 0.31,
			
 
				+			},
			
 
				+			{
			
 
				+				contextWindow: Infinity,
			
 
				+				inputPrice: 2.5,
			
 
				+				outputPrice: 15,
			
 
				+				cacheReadsPrice: 0.625,
			
 
				+			},
			
 
				+		],
			
 
				 	},
			
 
				 	"gemini-2.0-flash-001": {
			
 
				 		maxTokens: 8192,
			
 
				 		contextWindow: 1_048_576,
			
 
				 		supportsImages: true,
			
 
				-		supportsPromptCache: false,
			
 
				-		inputPrice: 0,
			
 
				-		outputPrice: 0,
			
 
				+		supportsPromptCache: true,
			
 
				+		inputPrice: 0.1,
			
 
				+		outputPrice: 0.4,
			
 
				+		cacheReadsPrice: 0.025,
			
 
				+		cacheWritesPrice: 1.0,
			
 
				 	},
			
 
				 	"gemini-2.0-flash-lite-preview-02-05": {
			
 
				 		maxTokens: 8192,
			
@@ -738,9 +756,25 @@ export const geminiModels = {
 
				 		maxTokens: 8192,
			
 
				 		contextWindow: 1_048_576,
			
 
				 		supportsImages: true,
			
 
				-		supportsPromptCache: false,
			
 
				-		inputPrice: 0,
			
 
				-		outputPrice: 0,
			
 
				+		supportsPromptCache: true,
			
 
				+		inputPrice: 0.15, // This is the pricing for prompts above 128k tokens.
			
 
				+		outputPrice: 0.6,
			
 
				+		cacheReadsPrice: 0.0375,
			
 
				+		cacheWritesPrice: 1.0,
			
 
				+		tiers: [
			
 
				+			{
			
 
				+				contextWindow: 128_000,
			
 
				+				inputPrice: 0.075,
			
 
				+				outputPrice: 0.3,
			
 
				+				cacheReadsPrice: 0.01875,
			
 
				+			},
			
 
				+			{
			
 
				+				contextWindow: Infinity,
			
 
				+				inputPrice: 0.15,
			
 
				+				outputPrice: 0.6,
			
 
				+				cacheReadsPrice: 0.0375,
			
 
				+			},
			
 
				+		],
			
 
				 	},
			
 
				 	"gemini-1.5-flash-exp-0827": {
			
 
				 		maxTokens: 8192,
			
--- a/webview-ui/src/components/settings/ApiOptions.tsx
+++ b/webview-ui/src/components/settings/ApiOptions.tsx
@@ -1693,6 +1693,7 @@ const ApiOptions = ({
 
				 					)}
			
 
				 
			
 
				 					<ModelInfoView
			
 
				+						apiProvider={selectedProvider}
			
 
				 						selectedModelId={selectedModelId}
			
 
				 						modelInfo={selectedModelInfo}
			
 
				 						isDescriptionExpanded={isDescriptionExpanded}
			
--- a/webview-ui/src/components/settings/ModelInfoView.tsx
+++ b/webview-ui/src/components/settings/ModelInfoView.tsx
@@ -1,15 +1,15 @@
 
				-import { useMemo } from "react"
			
 
				 import { VSCodeLink } from "@vscode/webview-ui-toolkit/react"
			
 
				 import { useAppTranslation } from "@/i18n/TranslationContext"
			
 
				 
			
 
				 import { formatPrice } from "@/utils/formatPrice"
			
 
				 import { cn } from "@/lib/utils"
			
 
				 
			
 
				-import { ModelInfo, geminiModels } from "@roo/shared/api"
			
 
				+import { ModelInfo } from "@roo/shared/api"
			
 
				 
			
 
				 import { ModelDescriptionMarkdown } from "./ModelDescriptionMarkdown"
			
 
				 
			
 
				 type ModelInfoViewProps = {
			
 
				+	apiProvider?: string
			
 
				 	selectedModelId: string
			
 
				 	modelInfo: ModelInfo
			
 
				 	isDescriptionExpanded: boolean
			
@@ -17,13 +17,13 @@ type ModelInfoViewProps = {
 
				 }
			
 
				 
			
 
				 export const ModelInfoView = ({
			
 
				+	apiProvider,
			
 
				 	selectedModelId,
			
 
				 	modelInfo,
			
 
				 	isDescriptionExpanded,
			
 
				 	setIsDescriptionExpanded,
			
 
				 }: ModelInfoViewProps) => {
			
 
				 	const { t } = useAppTranslation()
			
 
				-	const isGemini = useMemo(() => Object.keys(geminiModels).includes(selectedModelId), [selectedModelId])
			
 
				 
			
 
				 	const infoItems = [
			
 
				 		<ModelInfoSupportsItem
			
@@ -36,13 +36,11 @@ export const ModelInfoView = ({
 
				 			supportsLabel={t("settings:modelInfo.supportsComputerUse")}
			
 
				 			doesNotSupportLabel={t("settings:modelInfo.noComputerUse")}
			
 
				 		/>,
			
 
				-		!isGemini && (
			
 
				-			<ModelInfoSupportsItem
			
 
				-				isSupported={modelInfo.supportsPromptCache}
			
 
				-				supportsLabel={t("settings:modelInfo.supportsPromptCache")}
			
 
				-				doesNotSupportLabel={t("settings:modelInfo.noPromptCache")}
			
 
				-			/>
			
 
				-		),
			
 
				+		<ModelInfoSupportsItem
			
 
				+			isSupported={modelInfo.supportsPromptCache}
			
 
				+			supportsLabel={t("settings:modelInfo.supportsPromptCache")}
			
 
				+			doesNotSupportLabel={t("settings:modelInfo.noPromptCache")}
			
 
				+		/>,
			
 
				 		typeof modelInfo.maxTokens === "number" && modelInfo.maxTokens > 0 && (
			
 
				 			<>
			
 
				 				<span className="font-medium">{t("settings:modelInfo.maxOutput")}:</span>{" "}
			
@@ -73,7 +71,7 @@ export const ModelInfoView = ({
 
				 				{formatPrice(modelInfo.cacheWritesPrice || 0)} / 1M tokens
			
 
				 			</>
			
 
				 		),
			
 
				-		isGemini && (
			
 
				+		apiProvider === "gemini" && (
			
 
				 			<span className="italic">
			
 
				 				{selectedModelId === "gemini-2.5-pro-preview-03-25"
			
 
				 					? t("settings:modelInfo.gemini.billingEstimate")
			
--- a/webview-ui/src/components/settings/ModelPicker.tsx
+++ b/webview-ui/src/components/settings/ModelPicker.tsx
@@ -186,6 +186,7 @@ export const ModelPicker = ({
 
				 			</div>
			
 
				 			{selectedModelId && selectedModelInfo && (
			
 
				 				<ModelInfoView
			
 
				+					apiProvider={apiConfiguration.apiProvider}
			
 
				 					selectedModelId={selectedModelId}
			
 
				 					modelInfo={selectedModelInfo}
			
 
				 					isDescriptionExpanded={isDescriptionExpanded}