1 year ago · 4a42feca72
--- a/src/core/Cline.ts
+++ b/src/core/Cline.ts
@@ -53,7 +53,7 @@ import { AssistantMessageContent, parseAssistantMessage, ToolParamName, ToolUseN
 
				 import { formatResponse } from "./prompts/responses"
			
 
				 import { SYSTEM_PROMPT } from "./prompts/system"
			
 
				 import { modes, defaultModeSlug, getModeBySlug } from "../shared/modes"
			
 
				-import { truncateHalfConversation } from "./sliding-window"
			
 
				+import { truncateConversationIfNeeded } from "./sliding-window"
			
 
				 import { ClineProvider, GlobalFileNames } from "./webview/ClineProvider"
			
 
				 import { detectCodeOmission } from "../integrations/editor/detect-omission"
			
 
				 import { BrowserSession } from "../services/browser/BrowserSession"
			
@@ -876,18 +876,25 @@ export class Cline {
 
				 
			
 
				 		// If the previous API request's total token usage is close to the context window, truncate the conversation history to free up space for the new request
			
 
				 		if (previousApiReqIndex >= 0) {
			
 
				-			const previousRequest = this.clineMessages[previousApiReqIndex]
			
 
				-			if (previousRequest && previousRequest.text) {
			
 
				-				const { tokensIn, tokensOut, cacheWrites, cacheReads }: ClineApiReqInfo = JSON.parse(
			
 
				-					previousRequest.text,
			
 
				-				)
			
 
				-				const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
			
 
				-				const contextWindow = this.api.getModel().info.contextWindow || 128_000
			
 
				-				const maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8)
			
 
				-				if (totalTokens >= maxAllowedSize) {
			
 
				-					const truncatedMessages = truncateHalfConversation(this.apiConversationHistory)
			
 
				-					await this.overwriteApiConversationHistory(truncatedMessages)
			
 
				-				}
			
 
				+			const previousRequest = this.clineMessages[previousApiReqIndex]?.text
			
 
				+			if (!previousRequest) return
			
 
				+
			
 
				+			const {
			
 
				+				tokensIn = 0,
			
 
				+				tokensOut = 0,
			
 
				+				cacheWrites = 0,
			
 
				+				cacheReads = 0,
			
 
				+			}: ClineApiReqInfo = JSON.parse(previousRequest)
			
 
				+			const totalTokens = tokensIn + tokensOut + cacheWrites + cacheReads
			
 
				+
			
 
				+			const trimmedMessages = truncateConversationIfNeeded(
			
 
				+				this.apiConversationHistory,
			
 
				+				totalTokens,
			
 
				+				this.api.getModel().info,
			
 
				+			)
			
 
				+
			
 
				+			if (trimmedMessages !== this.apiConversationHistory) {
			
 
				+				await this.overwriteApiConversationHistory(trimmedMessages)
			
 
				 			}
			
 
				 		}
			
 
				 
			
--- a/src/core/sliding-window/index.ts
+++ b/src/core/sliding-window/index.ts
@@ -1,26 +1,97 @@
 
				 import { Anthropic } from "@anthropic-ai/sdk"
			
 
				+import { ModelInfo } from "../../shared/api"
			
 
				 
			
 
				-/*
			
 
				-We can't implement a dynamically updating sliding window as it would break prompt cache
			
 
				-every time. To maintain the benefits of caching, we need to keep conversation history
			
 
				-static. This operation should be performed as infrequently as possible. If a user reaches
			
 
				-a 200k context, we can assume that the first half is likely irrelevant to their current task.
			
 
				-Therefore, this function should only be called when absolutely necessary to fit within
			
 
				-context limits, not as a continuous process.
			
 
				-*/
			
 
				-export function truncateHalfConversation(
			
 
				+/**
			
 
				+ * Truncates a conversation by removing a fraction of the messages.
			
 
				+ *
			
 
				+ * The first message is always retained, and a specified fraction (rounded to an even number)
			
 
				+ * of messages from the beginning (excluding the first) is removed.
			
 
				+ *
			
 
				+ * @param {Anthropic.Messages.MessageParam[]} messages - The conversation messages.
			
 
				+ * @param {number} fracToRemove - The fraction (between 0 and 1) of messages (excluding the first) to remove.
			
 
				+ * @returns {Anthropic.Messages.MessageParam[]} The truncated conversation messages.
			
 
				+ */
			
 
				+export function truncateConversation(
			
 
				 	messages: Anthropic.Messages.MessageParam[],
			
 
				+	fracToRemove: number,
			
 
				 ): Anthropic.Messages.MessageParam[] {
			
 
				-	// API expects messages to be in user-assistant order, and tool use messages must be followed by tool results. We need to maintain this structure while truncating.
			
 
				-
			
 
				-	// Always keep the first Task message (this includes the project's file structure in environment_details)
			
 
				 	const truncatedMessages = [messages[0]]
			
 
				-
			
 
				-	// Remove half of user-assistant pairs
			
 
				-	const messagesToRemove = Math.floor(messages.length / 4) * 2 // has to be even number
			
 
				-
			
 
				-	const remainingMessages = messages.slice(messagesToRemove + 1) // has to start with assistant message since tool result cannot follow assistant message with no tool use
			
 
				+	const rawMessagesToRemove = Math.floor((messages.length - 1) * fracToRemove)
			
 
				+	const messagesToRemove = rawMessagesToRemove - (rawMessagesToRemove % 2)
			
 
				+	const remainingMessages = messages.slice(messagesToRemove + 1)
			
 
				 	truncatedMessages.push(...remainingMessages)
			
 
				 
			
 
				 	return truncatedMessages
			
 
				 }
			
 
				+
			
 
				+/**
			
 
				+ * Conditionally truncates the conversation messages if the total token count exceeds the model's limit.
			
 
				+ *
			
 
				+ * Depending on whether the model supports prompt caching, different maximum token thresholds
			
 
				+ * and truncation fractions are used. If the current total tokens exceed the threshold,
			
 
				+ * the conversation is truncated using the appropriate fraction.
			
 
				+ *
			
 
				+ * @param {Anthropic.Messages.MessageParam[]} messages - The conversation messages.
			
 
				+ * @param {number} totalTokens - The total number of tokens in the conversation.
			
 
				+ * @param {ModelInfo} modelInfo - Model metadata including context window size and prompt cache support.
			
 
				+ * @returns {Anthropic.Messages.MessageParam[]} The original or truncated conversation messages.
			
 
				+ */
			
 
				+export function truncateConversationIfNeeded(
			
 
				+	messages: Anthropic.Messages.MessageParam[],
			
 
				+	totalTokens: number,
			
 
				+	modelInfo: ModelInfo,
			
 
				+): Anthropic.Messages.MessageParam[] {
			
 
				+	if (modelInfo.supportsPromptCache) {
			
 
				+		return totalTokens < getMaxTokensForPromptCachingModels(modelInfo)
			
 
				+			? messages
			
 
				+			: truncateConversation(messages, getTruncFractionForPromptCachingModels(modelInfo))
			
 
				+	} else {
			
 
				+		return totalTokens < getMaxTokensForNonPromptCachingModels(modelInfo)
			
 
				+			? messages
			
 
				+			: truncateConversation(messages, getTruncFractionForNonPromptCachingModels(modelInfo))
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Calculates the maximum allowed tokens for models that support prompt caching.
			
 
				+ *
			
 
				+ * The maximum is computed as the greater of (contextWindow - 40000) and 80% of the contextWindow.
			
 
				+ *
			
 
				+ * @param {ModelInfo} modelInfo - The model information containing the context window size.
			
 
				+ * @returns {number} The maximum number of tokens allowed for prompt caching models.
			
 
				+ */
			
 
				+function getMaxTokensForPromptCachingModels(modelInfo: ModelInfo): number {
			
 
				+	return Math.max(modelInfo.contextWindow - 40_000, modelInfo.contextWindow * 0.8)
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Provides the fraction of messages to remove for models that support prompt caching.
			
 
				+ *
			
 
				+ * @param {ModelInfo} modelInfo - The model information (unused in current implementation).
			
 
				+ * @returns {number} The truncation fraction for prompt caching models (fixed at 0.5).
			
 
				+ */
			
 
				+function getTruncFractionForPromptCachingModels(modelInfo: ModelInfo): number {
			
 
				+	return 0.5
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Calculates the maximum allowed tokens for models that do not support prompt caching.
			
 
				+ *
			
 
				+ * The maximum is computed as the greater of (contextWindow - 40000) and 80% of the contextWindow.
			
 
				+ *
			
 
				+ * @param {ModelInfo} modelInfo - The model information containing the context window size.
			
 
				+ * @returns {number} The maximum number of tokens allowed for non-prompt caching models.
			
 
				+ */
			
 
				+function getMaxTokensForNonPromptCachingModels(modelInfo: ModelInfo): number {
			
 
				+	return Math.max(modelInfo.contextWindow - 40_000, modelInfo.contextWindow * 0.8)
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Provides the fraction of messages to remove for models that do not support prompt caching.
			
 
				+ *
			
 
				+ * @param {ModelInfo} modelInfo - The model information.
			
 
				+ * @returns {number} The truncation fraction for non-prompt caching models (fixed at 0.1).
			
 
				+ */
			
 
				+function getTruncFractionForNonPromptCachingModels(modelInfo: ModelInfo): number {
			
 
				+	return Math.min(40_000 / modelInfo.contextWindow, 0.2)
			
 
				+}
			
--- a/webview-ui/tsconfig.json
+++ b/webview-ui/tsconfig.json
@@ -1,11 +1,7 @@
 
				 {
			
 
				 	"compilerOptions": {
			
 
				 		"target": "es5",
			
 
				-		"lib": [
			
 
				-			"dom",
			
 
				-			"dom.iterable",
			
 
				-			"esnext"
			
 
				-		],
			
 
				+		"lib": ["dom", "dom.iterable", "esnext"],
			
 
				 		"allowJs": true,
			
 
				 		"skipLibCheck": true,
			
 
				 		"esModuleInterop": true,
			
@@ -21,13 +17,8 @@
 
				 		"jsx": "react-jsx",
			
 
				 		"baseUrl": ".",
			
 
				 		"paths": {
			
 
				-			"@/*": [
			
 
				-				"./src/*"
			
 
				-			]
			
 
				+			"@/*": ["./src/*"]
			
 
				 		}
			
 
				 	},
			
 
				-	"include": [
			
 
				-		"src",
			
 
				-		"../src/shared",
			
 
				-	]
			
 
				+	"include": ["src", "../src/shared"]
			
 
				 }