2
0
Эх сурвалжийг харах

Add a dynamic token buffer

Matt Rubens 10 сар өмнө
parent
commit
a0684454a2

+ 5 - 0
.changeset/swift-lamps-decide.md

@@ -0,0 +1,5 @@
+---
+"roo-cline": patch
+---
+
+Add a dynamic token buffer

+ 19 - 8
src/core/sliding-window/__tests__/sliding-window.test.ts

@@ -3,7 +3,12 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 
 import { ModelInfo } from "../../../shared/api"
-import { TOKEN_BUFFER, estimateTokenCount, truncateConversation, truncateConversationIfNeeded } from "../index"
+import {
+	TOKEN_BUFFER_PERCENTAGE,
+	estimateTokenCount,
+	truncateConversation,
+	truncateConversationIfNeeded,
+} from "../index"
 
 /**
  * Tests for the truncateConversation function
@@ -121,10 +126,11 @@ describe("getMaxTokens", () => {
 		// Create messages with very small content in the last one to avoid token overflow
 		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
 
+		// Account for the dynamic buffer which is 10% of context window (10,000 tokens)
 		// Below max tokens and buffer - no truncation
 		const result1 = truncateConversationIfNeeded({
 			messages: messagesWithSmallContent,
-			totalTokens: 44999, // Well below threshold + buffer
+			totalTokens: 39999, // Well below threshold + dynamic buffer
 			contextWindow: modelInfo.contextWindow,
 			maxTokens: modelInfo.maxTokens,
 		})
@@ -148,10 +154,11 @@ describe("getMaxTokens", () => {
 		// Create messages with very small content in the last one to avoid token overflow
 		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
 
+		// Account for the dynamic buffer which is 10% of context window (10,000 tokens)
 		// Below max tokens and buffer - no truncation
 		const result1 = truncateConversationIfNeeded({
 			messages: messagesWithSmallContent,
-			totalTokens: 74999, // Well below threshold + buffer
+			totalTokens: 69999, // Well below threshold + dynamic buffer
 			contextWindow: modelInfo.contextWindow,
 			maxTokens: modelInfo.maxTokens,
 		})
@@ -202,10 +209,11 @@ describe("getMaxTokens", () => {
 		// Create messages with very small content in the last one to avoid token overflow
 		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
 
+		// Account for the dynamic buffer which is 10% of context window (20,000 tokens for this test)
 		// Below max tokens and buffer - no truncation
 		const result1 = truncateConversationIfNeeded({
 			messages: messagesWithSmallContent,
-			totalTokens: 164999, // Well below threshold + buffer
+			totalTokens: 149999, // Well below threshold + dynamic buffer
 			contextWindow: modelInfo.contextWindow,
 			maxTokens: modelInfo.maxTokens,
 		})
@@ -244,7 +252,8 @@ describe("truncateConversationIfNeeded", () => {
 	it("should not truncate if tokens are below max tokens threshold", () => {
 		const modelInfo = createModelInfo(100000, true, 30000)
 		const maxTokens = 100000 - 30000 // 70000
-		const totalTokens = 64999 // Well below threshold + buffer
+		const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE // 10000
+		const totalTokens = 70000 - dynamicBuffer - 1 // Just below threshold - buffer
 
 		// Create messages with very small content in the last one to avoid token overflow
 		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
@@ -338,7 +347,8 @@ describe("truncateConversationIfNeeded", () => {
 		]
 
 		// Set base tokens so total is well below threshold + buffer even with small content added
-		const baseTokensForSmall = availableTokens - smallContentTokens - TOKEN_BUFFER - 10
+		const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE
+		const baseTokensForSmall = availableTokens - smallContentTokens - dynamicBuffer - 10
 		const resultWithSmall = truncateConversationIfNeeded({
 			messages: messagesWithSmallContent,
 			totalTokens: baseTokensForSmall,
@@ -389,10 +399,11 @@ describe("truncateConversationIfNeeded", () => {
 		expect(resultWithVeryLarge).not.toEqual(messagesWithVeryLargeContent) // Should truncate
 	})
 
-	it("should truncate if tokens are within TOKEN_BUFFER of the threshold", () => {
+	it("should truncate if tokens are within TOKEN_BUFFER_PERCENTAGE of the threshold", () => {
 		const modelInfo = createModelInfo(100000, true, 30000)
 		const maxTokens = 100000 - 30000 // 70000
-		const totalTokens = 66000 // Within 5000 of threshold (70000)
+		const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE // 10% of 100000 = 10000
+		const totalTokens = 70000 - dynamicBuffer + 1 // Just within the dynamic buffer of threshold (70000)
 
 		// Create messages with very small content in the last one to avoid token overflow
 		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]

+ 7 - 4
src/core/sliding-window/index.ts

@@ -4,7 +4,10 @@ import { Tiktoken } from "js-tiktoken/lite"
 import o200kBase from "js-tiktoken/ranks/o200k_base"
 
 export const TOKEN_FUDGE_FACTOR = 1.5
-export const TOKEN_BUFFER = 5000
+/**
+ * Default percentage of the context window to use as a buffer when deciding when to truncate
+ */
+export const TOKEN_BUFFER_PERCENTAGE = 0.1
 
 /**
  * Counts tokens for user content using tiktoken for text
@@ -108,9 +111,9 @@ export function truncateConversationIfNeeded({
 	const effectiveTokens = totalTokens + lastMessageTokens
 
 	// Calculate available tokens for conversation history
-	const allowedTokens = contextWindow - reservedTokens
+	// Truncate if we're within TOKEN_BUFFER_PERCENTAGE of the context window
+	const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens
 
 	// Determine if truncation is needed and apply if necessary
-	// Truncate if we're within TOKEN_BUFFER of the limit
-	return effectiveTokens > allowedTokens - TOKEN_BUFFER ? truncateConversation(messages, 0.5) : messages
+	return effectiveTokens > allowedTokens ? truncateConversation(messages, 0.5) : messages
 }