Browse Source

Add an option to enable prompt caching (#2924)

Chris Estreich 8 months ago
parent
commit
ad4782b766

+ 8 - 2
src/exports/roo-code.d.ts

@@ -36,6 +36,7 @@ type ProviderSettings = {
 				supportsImages?: boolean | undefined
 				supportsComputerUse?: boolean | undefined
 				supportsPromptCache: boolean
+				isPromptCacheOptional?: boolean | undefined
 				inputPrice?: number | undefined
 				outputPrice?: number | undefined
 				cacheWritesPrice?: number | undefined
@@ -68,6 +69,7 @@ type ProviderSettings = {
 				supportsImages?: boolean | undefined
 				supportsComputerUse?: boolean | undefined
 				supportsPromptCache: boolean
+				isPromptCacheOptional?: boolean | undefined
 				inputPrice?: number | undefined
 				outputPrice?: number | undefined
 				cacheWritesPrice?: number | undefined
@@ -120,6 +122,7 @@ type ProviderSettings = {
 				supportsImages?: boolean | undefined
 				supportsComputerUse?: boolean | undefined
 				supportsPromptCache: boolean
+				isPromptCacheOptional?: boolean | undefined
 				inputPrice?: number | undefined
 				outputPrice?: number | undefined
 				cacheWritesPrice?: number | undefined
@@ -175,6 +178,7 @@ type ProviderSettings = {
 				supportsImages?: boolean | undefined
 				supportsComputerUse?: boolean | undefined
 				supportsPromptCache: boolean
+				isPromptCacheOptional?: boolean | undefined
 				inputPrice?: number | undefined
 				outputPrice?: number | undefined
 				cacheWritesPrice?: number | undefined
@@ -206,6 +210,7 @@ type ProviderSettings = {
 				supportsImages?: boolean | undefined
 				supportsComputerUse?: boolean | undefined
 				supportsPromptCache: boolean
+				isPromptCacheOptional?: boolean | undefined
 				inputPrice?: number | undefined
 				outputPrice?: number | undefined
 				cacheWritesPrice?: number | undefined
@@ -231,11 +236,12 @@ type ProviderSettings = {
 	modelMaxTokens?: number | undefined
 	modelMaxThinkingTokens?: number | undefined
 	includeMaxTokens?: boolean | undefined
-	modelTemperature?: (number | null) | undefined
 	reasoningEffort?: ("low" | "medium" | "high") | undefined
-	rateLimitSeconds?: number | undefined
+	promptCachingEnabled?: boolean | undefined
 	diffEnabled?: boolean | undefined
 	fuzzyMatchThreshold?: number | undefined
+	modelTemperature?: (number | null) | undefined
+	rateLimitSeconds?: number | undefined
 	fakeAi?: unknown | undefined
 }
 

+ 8 - 2
src/exports/types.ts

@@ -37,6 +37,7 @@ type ProviderSettings = {
 				supportsImages?: boolean | undefined
 				supportsComputerUse?: boolean | undefined
 				supportsPromptCache: boolean
+				isPromptCacheOptional?: boolean | undefined
 				inputPrice?: number | undefined
 				outputPrice?: number | undefined
 				cacheWritesPrice?: number | undefined
@@ -69,6 +70,7 @@ type ProviderSettings = {
 				supportsImages?: boolean | undefined
 				supportsComputerUse?: boolean | undefined
 				supportsPromptCache: boolean
+				isPromptCacheOptional?: boolean | undefined
 				inputPrice?: number | undefined
 				outputPrice?: number | undefined
 				cacheWritesPrice?: number | undefined
@@ -121,6 +123,7 @@ type ProviderSettings = {
 				supportsImages?: boolean | undefined
 				supportsComputerUse?: boolean | undefined
 				supportsPromptCache: boolean
+				isPromptCacheOptional?: boolean | undefined
 				inputPrice?: number | undefined
 				outputPrice?: number | undefined
 				cacheWritesPrice?: number | undefined
@@ -176,6 +179,7 @@ type ProviderSettings = {
 				supportsImages?: boolean | undefined
 				supportsComputerUse?: boolean | undefined
 				supportsPromptCache: boolean
+				isPromptCacheOptional?: boolean | undefined
 				inputPrice?: number | undefined
 				outputPrice?: number | undefined
 				cacheWritesPrice?: number | undefined
@@ -207,6 +211,7 @@ type ProviderSettings = {
 				supportsImages?: boolean | undefined
 				supportsComputerUse?: boolean | undefined
 				supportsPromptCache: boolean
+				isPromptCacheOptional?: boolean | undefined
 				inputPrice?: number | undefined
 				outputPrice?: number | undefined
 				cacheWritesPrice?: number | undefined
@@ -232,11 +237,12 @@ type ProviderSettings = {
 	modelMaxTokens?: number | undefined
 	modelMaxThinkingTokens?: number | undefined
 	includeMaxTokens?: boolean | undefined
-	modelTemperature?: (number | null) | undefined
 	reasoningEffort?: ("low" | "medium" | "high") | undefined
-	rateLimitSeconds?: number | undefined
+	promptCachingEnabled?: boolean | undefined
 	diffEnabled?: boolean | undefined
 	fuzzyMatchThreshold?: number | undefined
+	modelTemperature?: (number | null) | undefined
+	rateLimitSeconds?: number | undefined
 	fakeAi?: unknown | undefined
 }
 

+ 7 - 4
src/schemas/index.ts

@@ -104,6 +104,7 @@ export const modelInfoSchema = z.object({
 	supportsImages: z.boolean().optional(),
 	supportsComputerUse: z.boolean().optional(),
 	supportsPromptCache: z.boolean(),
+	isPromptCacheOptional: z.boolean().optional(),
 	inputPrice: z.number().optional(),
 	outputPrice: z.number().optional(),
 	cacheWritesPrice: z.number().optional(),
@@ -398,11 +399,12 @@ export const providerSettingsSchema = z.object({
 	modelMaxThinkingTokens: z.number().optional(),
 	// Generic
 	includeMaxTokens: z.boolean().optional(),
-	modelTemperature: z.number().nullish(),
 	reasoningEffort: reasoningEffortsSchema.optional(),
-	rateLimitSeconds: z.number().optional(),
+	promptCachingEnabled: z.boolean().optional(),
 	diffEnabled: z.boolean().optional(),
 	fuzzyMatchThreshold: z.number().optional(),
+	modelTemperature: z.number().nullish(),
+	rateLimitSeconds: z.number().optional(),
 	// Fake AI
 	fakeAi: z.unknown().optional(),
 })
@@ -489,11 +491,12 @@ const providerSettingsRecord: ProviderSettingsRecord = {
 	modelMaxThinkingTokens: undefined,
 	// Generic
 	includeMaxTokens: undefined,
-	modelTemperature: undefined,
 	reasoningEffort: undefined,
-	rateLimitSeconds: undefined,
+	promptCachingEnabled: undefined,
 	diffEnabled: undefined,
 	fuzzyMatchThreshold: undefined,
+	modelTemperature: undefined,
+	rateLimitSeconds: undefined,
 	// Fake AI
 	fakeAi: undefined,
 	// X.AI (Grok)

+ 3 - 0
src/shared/api.ts

@@ -683,6 +683,7 @@ export const geminiModels = {
 		contextWindow: 1_048_576,
 		supportsImages: true,
 		supportsPromptCache: false,
+		isPromptCacheOptional: true,
 		inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
 		outputPrice: 15,
 		cacheReadsPrice: 0.625,
@@ -707,6 +708,7 @@ export const geminiModels = {
 		contextWindow: 1_048_576,
 		supportsImages: true,
 		supportsPromptCache: false,
+		isPromptCacheOptional: true,
 		inputPrice: 0.1,
 		outputPrice: 0.4,
 		cacheReadsPrice: 0.025,
@@ -757,6 +759,7 @@ export const geminiModels = {
 		contextWindow: 1_048_576,
 		supportsImages: true,
 		supportsPromptCache: false,
+		isPromptCacheOptional: true,
 		inputPrice: 0.15, // This is the pricing for prompts above 128k tokens.
 		outputPrice: 0.6,
 		cacheReadsPrice: 0.0375,

+ 12 - 4
webview-ui/src/components/settings/ApiOptions.tsx

@@ -41,15 +41,16 @@ import { VSCodeButtonLink } from "../common/VSCodeButtonLink"
 import { MODELS_BY_PROVIDER, PROVIDERS, VERTEX_REGIONS, REASONING_MODELS } from "./constants"
 import { ModelInfoView } from "./ModelInfoView"
 import { ModelPicker } from "./ModelPicker"
-import { TemperatureControl } from "./TemperatureControl"
-import { RateLimitSecondsControl } from "./RateLimitSecondsControl"
-import { DiffSettingsControl } from "./DiffSettingsControl"
 import { ApiErrorMessage } from "./ApiErrorMessage"
 import { ThinkingBudget } from "./ThinkingBudget"
 import { R1FormatSetting } from "./R1FormatSetting"
 import { OpenRouterBalanceDisplay } from "./OpenRouterBalanceDisplay"
 import { RequestyBalanceDisplay } from "./RequestyBalanceDisplay"
 import { ReasoningEffort } from "./ReasoningEffort"
+import { PromptCachingControl } from "./PromptCachingControl"
+import { DiffSettingsControl } from "./DiffSettingsControl"
+import { TemperatureControl } from "./TemperatureControl"
+import { RateLimitSecondsControl } from "./RateLimitSecondsControl"
 
 interface ApiOptionsProps {
 	uriScheme: string | undefined
@@ -1742,6 +1743,13 @@ const ApiOptions = ({
 				/>
 			)}
 
+			{selectedModelInfo.supportsPromptCache && selectedModelInfo.isPromptCacheOptional && (
+				<PromptCachingControl
+					apiConfiguration={apiConfiguration}
+					setApiConfigurationField={setApiConfigurationField}
+				/>
+			)}
+
 			{!fromWelcomeView && (
 				<>
 					<DiffSettingsControl
@@ -1750,7 +1758,7 @@ const ApiOptions = ({
 						onChange={(field, value) => setApiConfigurationField(field, value)}
 					/>
 					<TemperatureControl
-						value={apiConfiguration?.modelTemperature}
+						value={apiConfiguration.modelTemperature}
 						onChange={handleInputChange("modelTemperature", noTransform)}
 						maxValue={2}
 					/>

+ 29 - 0
webview-ui/src/components/settings/PromptCachingControl.tsx

@@ -0,0 +1,29 @@
+import { VSCodeCheckbox } from "@vscode/webview-ui-toolkit/react"
+
+import { ApiConfiguration } from "@roo/shared/api"
+
+import { useAppTranslation } from "@src/i18n/TranslationContext"
+
+interface PromptCachingControlProps {
+	apiConfiguration: ApiConfiguration
+	setApiConfigurationField: <K extends keyof ApiConfiguration>(field: K, value: ApiConfiguration[K]) => void
+}
+
+export const PromptCachingControl = ({ apiConfiguration, setApiConfigurationField }: PromptCachingControlProps) => {
+	const { t } = useAppTranslation()
+
+	return (
+		<>
+			<div>
+				<VSCodeCheckbox
+					checked={apiConfiguration.promptCachingEnabled}
+					onChange={(e: any) => setApiConfigurationField("promptCachingEnabled", e.target.checked)}>
+					<label className="block font-medium mb-1">{t("settings:promptCaching.label")}</label>
+				</VSCodeCheckbox>
+				<div className="text-sm text-vscode-descriptionForeground mt-1">
+					{t("settings:promptCaching.description")}
+				</div>
+			</div>
+		</>
+	)
+}

+ 4 - 0
webview-ui/src/i18n/locales/ca/settings.json

@@ -386,6 +386,10 @@
 			"description": "Quan està activat, Roo utilitzarà l'eina diff de blocs múltiples. Això intentarà actualitzar múltiples blocs de codi a l'arxiu en una sola petició."
 		}
 	},
+	"promptCaching": {
+		"label": "Habilitar emmagatzematge en caché de prompts",
+		"description": "Quan està habilitat, Roo utilitzarà aquest model amb la memòria cau de prompts activada per reduir costos."
+	},
 	"temperature": {
 		"useCustom": "Utilitzar temperatura personalitzada",
 		"description": "Controla l'aleatorietat en les respostes del model.",

+ 4 - 0
webview-ui/src/i18n/locales/de/settings.json

@@ -386,6 +386,10 @@
 			"description": "Wenn aktiviert, verwendet Roo das Multi-Block-Diff-Werkzeug. Dies versucht, mehrere Codeblöcke in der Datei in einer Anfrage zu aktualisieren."
 		}
 	},
+	"promptCaching": {
+		"label": "Prompt-Caching aktivieren",
+		"description": "Wenn aktiviert, wird Roo dieses Modell mit aktiviertem Prompt-Caching verwenden, um Kosten zu reduzieren."
+	},
 	"temperature": {
 		"useCustom": "Benutzerdefinierte Temperatur verwenden",
 		"description": "Steuert die Zufälligkeit in den Antworten des Modells.",

+ 4 - 0
webview-ui/src/i18n/locales/en/settings.json

@@ -386,6 +386,10 @@
 			"description": "When enabled, Roo will use multi block diff tool. This will try to update multiple code blocks in the file in one request."
 		}
 	},
+	"promptCaching": {
+		"label": "Enable prompt caching",
+		"description": "When enabled, Roo will use this model with prompt caching turned on in order to reduce costs."
+	},
 	"temperature": {
 		"useCustom": "Use custom temperature",
 		"description": "Controls randomness in the model's responses.",

+ 4 - 0
webview-ui/src/i18n/locales/es/settings.json

@@ -386,6 +386,10 @@
 			"description": "Cuando está habilitado, Roo usará la herramienta de diff de bloques múltiples. Esto intentará actualizar múltiples bloques de código en el archivo en una sola solicitud."
 		}
 	},
+	"promptCaching": {
+		"label": "Habilitar caché de prompts",
+		"description": "Cuando está habilitado, Roo usará este modelo con el caché de prompts activado para reducir costos."
+	},
 	"temperature": {
 		"useCustom": "Usar temperatura personalizada",
 		"description": "Controla la aleatoriedad en las respuestas del modelo.",

+ 4 - 0
webview-ui/src/i18n/locales/fr/settings.json

@@ -386,6 +386,10 @@
 			"description": "Lorsqu'il est activé, Roo utilisera l'outil diff multi-blocs. Cela tentera de mettre à jour plusieurs blocs de code dans le fichier en une seule requête."
 		}
 	},
+	"promptCaching": {
+		"label": "Activer la mise en cache des prompts",
+		"description": "Lorsque cette option est activée, Roo utilisera ce modèle avec la mise en cache des prompts activée afin de réduire les coûts."
+	},
 	"temperature": {
 		"useCustom": "Utiliser une température personnalisée",
 		"description": "Contrôle l'aléatoire dans les réponses du modèle.",

+ 4 - 0
webview-ui/src/i18n/locales/hi/settings.json

@@ -386,6 +386,10 @@
 			"description": "जब सक्षम किया जाता है, तो Roo मल्टी ब्लॉक diff उपकरण का उपयोग करेगा। यह एक अनुरोध में फ़ाइल में कई कोड ब्लॉक अपडेट करने का प्रयास करेगा।"
 		}
 	},
+	"promptCaching": {
+		"label": "प्रॉम्प्ट कैशिंग सक्षम करें",
+		"description": "जब सक्षम किया जाता है, तो Roo लागत को कम करने के लिए प्रॉम्प्ट कैशिंग चालू के साथ इस मॉडल का उपयोग करेगा।"
+	},
 	"temperature": {
 		"useCustom": "कस्टम तापमान का उपयोग करें",
 		"description": "मॉडल की प्रतिक्रियाओं में यादृच्छिकता को नियंत्रित करता है।",

+ 4 - 0
webview-ui/src/i18n/locales/it/settings.json

@@ -386,6 +386,10 @@
 			"description": "Quando abilitato, Roo utilizzerà lo strumento diff multi-blocco. Questo tenterà di aggiornare più blocchi di codice nel file in una singola richiesta."
 		}
 	},
+	"promptCaching": {
+		"label": "Abilita cache dei prompt",
+		"description": "Quando abilitato, Roo utilizzerà questo modello con la cache dei prompt attivata per ridurre i costi."
+	},
 	"temperature": {
 		"useCustom": "Usa temperatura personalizzata",
 		"description": "Controlla la casualità nelle risposte del modello.",

+ 4 - 0
webview-ui/src/i18n/locales/ja/settings.json

@@ -386,6 +386,10 @@
 			"description": "有効にすると、Rooはマルチブロックdiffツールを使用します。これにより、1つのリクエストでファイル内の複数のコードブロックを更新しようとします。"
 		}
 	},
+	"promptCaching": {
+		"label": "プロンプトキャッシングを有効化",
+		"description": "有効にすると、Rooはコスト削減のためにプロンプトキャッシングを有効にしてこのモデルを使用します。"
+	},
 	"temperature": {
 		"useCustom": "カスタム温度を使用",
 		"description": "モデルの応答のランダム性を制御します。",

+ 4 - 0
webview-ui/src/i18n/locales/ko/settings.json

@@ -386,6 +386,10 @@
 			"description": "활성화하면 Roo가 다중 블록 diff 도구를 사용합니다. 이것은 하나의 요청에서 파일의 여러 코드 블록을 업데이트하려고 시도합니다."
 		}
 	},
+	"promptCaching": {
+		"label": "프롬프트 캐싱 활성화",
+		"description": "활성화하면 Roo는 비용 절감을 위해 프롬프트 캐싱을 켠 상태로 이 모델을 사용합니다."
+	},
 	"temperature": {
 		"useCustom": "사용자 정의 온도 사용",
 		"description": "모델 응답의 무작위성을 제어합니다.",

+ 4 - 0
webview-ui/src/i18n/locales/pl/settings.json

@@ -386,6 +386,10 @@
 			"description": "Po włączeniu, Roo użyje narzędzia diff wieloblokowego. Spróbuje to zaktualizować wiele bloków kodu w pliku w jednym żądaniu."
 		}
 	},
+	"promptCaching": {
+		"label": "Włącz buforowanie podpowiedzi",
+		"description": "Po włączeniu, Roo będzie używać tego modelu z włączonym buforowaniem promptów w celu zmniejszenia kosztów."
+	},
 	"temperature": {
 		"useCustom": "Użyj niestandardowej temperatury",
 		"description": "Kontroluje losowość w odpowiedziach modelu.",

+ 4 - 0
webview-ui/src/i18n/locales/pt-BR/settings.json

@@ -386,6 +386,10 @@
 			"description": "Quando ativado, o Roo usará a ferramenta diff de múltiplos blocos. Isso tentará atualizar vários blocos de código no arquivo em uma única solicitação."
 		}
 	},
+	"promptCaching": {
+		"label": "Ativar cache de prompts",
+		"description": "Quando ativado, o Roo usará este modelo com o cache de prompts ativado para reduzir custos."
+	},
 	"temperature": {
 		"useCustom": "Usar temperatura personalizada",
 		"description": "Controla a aleatoriedade nas respostas do modelo.",

+ 4 - 0
webview-ui/src/i18n/locales/tr/settings.json

@@ -386,6 +386,10 @@
 			"description": "Etkinleştirildiğinde, Roo çoklu blok diff aracını kullanacaktır. Bu, tek bir istekte dosyadaki birden fazla kod bloğunu güncellemeye çalışacaktır."
 		}
 	},
+	"promptCaching": {
+		"label": "Prompt önbelleğini etkinleştir",
+		"description": "Etkinleştirildiğinde, Roo maliyetleri azaltmak için prompt önbelleği açık olan bu modeli kullanacaktır."
+	},
 	"temperature": {
 		"useCustom": "Özel sıcaklık kullan",
 		"description": "Model yanıtlarındaki rastgeleliği kontrol eder.",

+ 4 - 0
webview-ui/src/i18n/locales/vi/settings.json

@@ -386,6 +386,10 @@
 			"description": "Khi được bật, Roo sẽ sử dụng công cụ diff đa khối. Điều này sẽ cố gắng cập nhật nhiều khối mã trong tệp trong một yêu cầu."
 		}
 	},
+	"promptCaching": {
+		"label": "Bật bộ nhớ đệm prompt",
+		"description": "Khi được bật, Roo sẽ sử dụng mô hình này với bộ nhớ đệm prompt được bật để giảm chi phí."
+	},
 	"temperature": {
 		"useCustom": "Sử dụng nhiệt độ tùy chỉnh",
 		"description": "Kiểm soát tính ngẫu nhiên trong phản hồi của mô hình.",

+ 4 - 0
webview-ui/src/i18n/locales/zh-CN/settings.json

@@ -386,6 +386,10 @@
 			"description": "启用后,Roo 将尝试在一个请求中进行批量搜索和替换。"
 		}
 	},
+	"promptCaching": {
+		"label": "启用提示词缓存",
+		"description": "启用后 Roo 将使用提示词缓存功能以降低成本。"
+	},
 	"temperature": {
 		"useCustom": "使用自定义温度",
 		"description": "控制模型响应的随机性",

+ 4 - 0
webview-ui/src/i18n/locales/zh-TW/settings.json

@@ -386,6 +386,10 @@
 			"description": "啟用後,Roo 將使用多區塊差異比對工具,嘗試在單一請求中更新檔案內的多個程式碼區塊。"
 		}
 	},
+	"promptCaching": {
+		"label": "啟用提示快取",
+		"description": "啟用後,Roo 將使用提示快取功能以降低成本。"
+	},
 	"temperature": {
 		"useCustom": "使用自訂溫度",
 		"description": "控制模型回應的隨機性",