10 сар өмнө · a0684454a2
--- a/.changeset/swift-lamps-decide.md
+++ b/.changeset/swift-lamps-decide.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"roo-cline": patch
			
 
				+---
			
 
				+
			
 
				+Add a dynamic token buffer
			
--- a/src/core/sliding-window/__tests__/sliding-window.test.ts
+++ b/src/core/sliding-window/__tests__/sliding-window.test.ts
@@ -3,7 +3,12 @@
 
				 import { Anthropic } from "@anthropic-ai/sdk"
			
 
				 
			
 
				 import { ModelInfo } from "../../../shared/api"
			
 
				-import { TOKEN_BUFFER, estimateTokenCount, truncateConversation, truncateConversationIfNeeded } from "../index"
			
 
				+import {
			
 
				+	TOKEN_BUFFER_PERCENTAGE,
			
 
				+	estimateTokenCount,
			
 
				+	truncateConversation,
			
 
				+	truncateConversationIfNeeded,
			
 
				+} from "../index"
			
 
				 
			
 
				 /**
			
 
				  * Tests for the truncateConversation function
			
@@ -121,10 +126,11 @@ describe("getMaxTokens", () => {
 
				 		// Create messages with very small content in the last one to avoid token overflow
			
 
				 		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
 
				 
			
 
				+		// Account for the dynamic buffer which is 10% of context window (10,000 tokens)
			
 
				 		// Below max tokens and buffer - no truncation
			
 
				 		const result1 = truncateConversationIfNeeded({
			
 
				 			messages: messagesWithSmallContent,
			
 
				-			totalTokens: 44999, // Well below threshold + buffer
			
 
				+			totalTokens: 39999, // Well below threshold + dynamic buffer
			
 
				 			contextWindow: modelInfo.contextWindow,
			
 
				 			maxTokens: modelInfo.maxTokens,
			
 
				 		})
			
@@ -148,10 +154,11 @@ describe("getMaxTokens", () => {
 
				 		// Create messages with very small content in the last one to avoid token overflow
			
 
				 		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
 
				 
			
 
				+		// Account for the dynamic buffer which is 10% of context window (10,000 tokens)
			
 
				 		// Below max tokens and buffer - no truncation
			
 
				 		const result1 = truncateConversationIfNeeded({
			
 
				 			messages: messagesWithSmallContent,
			
 
				-			totalTokens: 74999, // Well below threshold + buffer
			
 
				+			totalTokens: 69999, // Well below threshold + dynamic buffer
			
 
				 			contextWindow: modelInfo.contextWindow,
			
 
				 			maxTokens: modelInfo.maxTokens,
			
 
				 		})
			
@@ -202,10 +209,11 @@ describe("getMaxTokens", () => {
 
				 		// Create messages with very small content in the last one to avoid token overflow
			
 
				 		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
 
				 
			
 
				+		// Account for the dynamic buffer which is 10% of context window (20,000 tokens for this test)
			
 
				 		// Below max tokens and buffer - no truncation
			
 
				 		const result1 = truncateConversationIfNeeded({
			
 
				 			messages: messagesWithSmallContent,
			
 
				-			totalTokens: 164999, // Well below threshold + buffer
			
 
				+			totalTokens: 149999, // Well below threshold + dynamic buffer
			
 
				 			contextWindow: modelInfo.contextWindow,
			
 
				 			maxTokens: modelInfo.maxTokens,
			
 
				 		})
			
@@ -244,7 +252,8 @@ describe("truncateConversationIfNeeded", () => {
 
				 	it("should not truncate if tokens are below max tokens threshold", () => {
			
 
				 		const modelInfo = createModelInfo(100000, true, 30000)
			
 
				 		const maxTokens = 100000 - 30000 // 70000
			
 
				-		const totalTokens = 64999 // Well below threshold + buffer
			
 
				+		const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE // 10000
			
 
				+		const totalTokens = 70000 - dynamicBuffer - 1 // Just below threshold - buffer
			
 
				 
			
 
				 		// Create messages with very small content in the last one to avoid token overflow
			
 
				 		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
@@ -338,7 +347,8 @@ describe("truncateConversationIfNeeded", () => {
 
				 		]
			
 
				 
			
 
				 		// Set base tokens so total is well below threshold + buffer even with small content added
			
 
				-		const baseTokensForSmall = availableTokens - smallContentTokens - TOKEN_BUFFER - 10
			
 
				+		const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE
			
 
				+		const baseTokensForSmall = availableTokens - smallContentTokens - dynamicBuffer - 10
			
 
				 		const resultWithSmall = truncateConversationIfNeeded({
			
 
				 			messages: messagesWithSmallContent,
			
 
				 			totalTokens: baseTokensForSmall,
			
@@ -389,10 +399,11 @@ describe("truncateConversationIfNeeded", () => {
 
				 		expect(resultWithVeryLarge).not.toEqual(messagesWithVeryLargeContent) // Should truncate
			
 
				 	})
			
 
				 
			
 
				-	it("should truncate if tokens are within TOKEN_BUFFER of the threshold", () => {
			
 
				+	it("should truncate if tokens are within TOKEN_BUFFER_PERCENTAGE of the threshold", () => {
			
 
				 		const modelInfo = createModelInfo(100000, true, 30000)
			
 
				 		const maxTokens = 100000 - 30000 // 70000
			
 
				-		const totalTokens = 66000 // Within 5000 of threshold (70000)
			
 
				+		const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE // 10% of 100000 = 10000
			
 
				+		const totalTokens = 70000 - dynamicBuffer + 1 // Just within the dynamic buffer of threshold (70000)
			
 
				 
			
 
				 		// Create messages with very small content in the last one to avoid token overflow
			
 
				 		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
--- a/src/core/sliding-window/index.ts
+++ b/src/core/sliding-window/index.ts
@@ -4,7 +4,10 @@ import { Tiktoken } from "js-tiktoken/lite"
 
				 import o200kBase from "js-tiktoken/ranks/o200k_base"
			
 
				 
			
 
				 export const TOKEN_FUDGE_FACTOR = 1.5
			
 
				-export const TOKEN_BUFFER = 5000
			
 
				+/**
			
 
				+ * Default percentage of the context window to use as a buffer when deciding when to truncate
			
 
				+ */
			
 
				+export const TOKEN_BUFFER_PERCENTAGE = 0.1
			
 
				 
			
 
				 /**
			
 
				  * Counts tokens for user content using tiktoken for text
			
@@ -108,9 +111,9 @@ export function truncateConversationIfNeeded({
 
				 	const effectiveTokens = totalTokens + lastMessageTokens
			
 
				 
			
 
				 	// Calculate available tokens for conversation history
			
 
				-	const allowedTokens = contextWindow - reservedTokens
			
 
				+	// Truncate if we're within TOKEN_BUFFER_PERCENTAGE of the context window
			
 
				+	const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens
			
 
				 
			
 
				 	// Determine if truncation is needed and apply if necessary
			
 
				-	// Truncate if we're within TOKEN_BUFFER of the limit
			
 
				-	return effectiveTokens > allowedTokens - TOKEN_BUFFER ? truncateConversation(messages, 0.5) : messages
			
 
				+	return effectiveTokens > allowedTokens ? truncateConversation(messages, 0.5) : messages
			
 
				 }