Przeglądaj źródła

Update announcement/settings with details about prompt caching

Saoud Rizwan 1 rok temu
rodzic
commit
6989779dd6

+ 4 - 0
CHANGELOG.md

@@ -4,6 +4,10 @@ All notable changes to the "claude-dev" extension will be documented in this fil
 
 <!-- Check [Keep a Changelog](http://keepachangelog.com/) for recommendations on how to structure this file. -->
 
+## [1.2.0]
+
+- Add support for Prompt Caching to significantly reduce costs and response times (currently only available through Anthropic API for Claude 3.5 Sonnet and Claude 3.0 Haiku)
+
 ## [1.1.1]
 
 - Adds option to choose other Claude models (+ GPT-4o, DeepSeek, and Mistral if you use OpenRouter)

+ 1 - 1
package.json

@@ -2,7 +2,7 @@
   "name": "claude-dev",
   "displayName": "Claude Dev",
   "description": "Autonomous software engineer right in your IDE, capable of creating/editing files, executing commands, and more with your permission every step of the way.",
-  "version": "1.1.15",
+  "version": "1.2.0",
   "icon": "icon.png",
   "engines": {
     "vscode": "^1.84.0"

+ 2 - 2
src/ClaudeDev.ts

@@ -417,12 +417,12 @@ export class ClaudeDev {
 		cacheCreationInputTokens?: number,
 		cacheReadInputTokens?: number
 	): number {
-		const modelCacheWritesPrice = this.api.getModel().info.cacheWrites
+		const modelCacheWritesPrice = this.api.getModel().info.cacheWritesPrice
 		let cacheWritesCost = 0
 		if (cacheCreationInputTokens && modelCacheWritesPrice) {
 			cacheWritesCost = (modelCacheWritesPrice / 1_000_000) * cacheCreationInputTokens
 		}
-		const modelCacheReadsPrice = this.api.getModel().info.cacheReads
+		const modelCacheReadsPrice = this.api.getModel().info.cacheReadsPrice
 		let cacheReadsCost = 0
 		if (cacheReadInputTokens && modelCacheReadsPrice) {
 			cacheReadsCost = (modelCacheReadsPrice / 1_000_000) * cacheReadInputTokens

+ 1 - 1
src/providers/ClaudeDevProvider.ts

@@ -25,7 +25,7 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
 	private disposables: vscode.Disposable[] = []
 	private view?: vscode.WebviewView | vscode.WebviewPanel
 	private claudeDev?: ClaudeDev
-	private latestAnnouncementId = "aug-11-2024" // update to some unique identifier when we add a new announcement
+	private latestAnnouncementId = "aug-15-2024" // update to some unique identifier when we add a new announcement
 
 	constructor(
 		private readonly context: vscode.ExtensionContext,

+ 8 - 8
src/shared/api.ts

@@ -21,8 +21,8 @@ export interface ModelInfo {
 	supportsPromptCache: boolean
 	inputPrice: number
 	outputPrice: number
-	cacheWrites?: number
-	cacheReads?: number
+	cacheWritesPrice?: number
+	cacheReadsPrice?: number
 }
 
 export type ApiModelId = AnthropicModelId | OpenRouterModelId | BedrockModelId
@@ -38,8 +38,8 @@ export const anthropicModels = {
 		supportsPromptCache: true,
 		inputPrice: 3.0, // $3 per million input tokens
 		outputPrice: 15.0, // $15 per million output tokens
-		cacheWrites: 3.75, // $3.75 per million tokens
-		cacheReads: 0.3, // $0.30 per million tokens
+		cacheWritesPrice: 3.75, // $3.75 per million tokens
+		cacheReadsPrice: 0.3, // $0.30 per million tokens
 	},
 	"claude-3-opus-20240229": {
 		maxTokens: 4096,
@@ -47,8 +47,8 @@ export const anthropicModels = {
 		supportsPromptCache: false,
 		inputPrice: 15.0,
 		outputPrice: 75.0,
-		cacheWrites: 18.75,
-		cacheReads: 1.5,
+		cacheWritesPrice: 18.75,
+		cacheReadsPrice: 1.5,
 	},
 	"claude-3-sonnet-20240229": {
 		maxTokens: 4096,
@@ -63,8 +63,8 @@ export const anthropicModels = {
 		supportsPromptCache: true,
 		inputPrice: 0.25,
 		outputPrice: 1.25,
-		cacheWrites: 0.3,
-		cacheReads: 0.03,
+		cacheWritesPrice: 0.3,
+		cacheReadsPrice: 0.03,
 	},
 } as const satisfies Record<string, ModelInfo> // as const assertion makes the object deeply readonly
 

+ 9 - 2
webview-ui/src/components/Announcement.tsx

@@ -27,18 +27,25 @@ const Announcement = ({ version, hideAnnouncement }: AnnouncementProps) => {
 				🎉{"  "}New in v{version}
 			</h3>
 			<ul style={{ margin: "0 0 8px", paddingLeft: "20px" }}>
+				<li>
+					Adds support for{" "}
+					<VSCodeLink href="https://www.anthropic.com/news/prompt-caching" style={{ display: "inline" }}>
+						Prompt Caching
+					</VSCodeLink>{" "}
+					to reduce costs by up to 90% and latency by up to 85% (currently only available through Anthropic
+					API for Claude 3.5 Sonnet and Claude 3.0 Haiku)
+				</li>
 				<li>
 					Paste images in chat and turn mockups into fully functional applications or fix bugs with
 					screenshots
 				</li>
 				<li>
-					Added option to choose other Claude models (+ GPT-4o, DeepSeek, and Mistral if you use OpenRouter)
+					Adds option to choose other Claude models (+ GPT-4o, DeepSeek, and Mistral if you use OpenRouter)
 				</li>
 				<li>
 					You can now add custom instructions to the end of the system prompt (e.g. "Always use Python",
 					"Speak in Spanish")
 				</li>
-				<li>Improved support for running interactive terminal commands and servers</li>
 			</ul>
 			<p style={{ margin: "0" }}>
 				Follow me for more updates!{" "}

+ 23 - 4
webview-ui/src/components/ApiOptions.tsx

@@ -215,13 +215,19 @@ const ModelInfoView = ({ modelInfo }: { modelInfo: ModelInfo }) => {
 		}).format(price)
 	}
 
+	const showPromptCachingPrices =
+		modelInfo.supportsPromptCache && modelInfo.cacheWritesPrice && modelInfo.cacheReadsPrice
+
 	return (
 		<p style={{ fontSize: "12px", marginTop: "2px", color: "var(--vscode-descriptionForeground)" }}>
 			<ModelInfoSupportsItem
 				isSupported={modelInfo.supportsPromptCache}
-				supportsLabel="Supports prompt cache"
-				doesNotSupportLabel="Does not support prompt cache"
-			/>
+				supportsLabel="Supports prompt caching"
+				doesNotSupportLabel="Does not support prompt caching"
+			/>{" "}
+			<VSCodeLink href="https://www.anthropic.com/news/prompt-caching" style={{ display: "inline" }}>
+				(what is this?)
+			</VSCodeLink>
 			<br />
 			<ModelInfoSupportsItem
 				isSupported={modelInfo.supportsImages}
@@ -231,7 +237,20 @@ const ModelInfoView = ({ modelInfo }: { modelInfo: ModelInfo }) => {
 			<br />
 			<span style={{ fontWeight: 500 }}>Max output:</span> {modelInfo.maxTokens.toLocaleString()} tokens
 			<br />
-			<span style={{ fontWeight: 500 }}>Input price:</span> {formatPrice(modelInfo.inputPrice)} per million tokens
+			<span style={{ fontWeight: 500 }}>
+				{showPromptCachingPrices ? "Base input price:" : "Input price:"}
+			</span>{" "}
+			{formatPrice(modelInfo.inputPrice)} per million tokens
+			{showPromptCachingPrices && (
+				<>
+					<br />
+					<span style={{ fontWeight: 500 }}>Prompt caching write price:</span>{" "}
+					{formatPrice(modelInfo.cacheWritesPrice || 0)} per million tokens
+					<br />
+					<span style={{ fontWeight: 500 }}>Prompt caching read price:</span>{" "}
+					{formatPrice(modelInfo.cacheReadsPrice || 0)} per million tokens
+				</>
+			)}
 			<br />
 			<span style={{ fontWeight: 500 }}>Output price:</span> {formatPrice(modelInfo.outputPrice)} per million
 			tokens