1 year ago · b7be8ba897
--- a/.changeset/chilly-bugs-pay.md
+++ b/.changeset/chilly-bugs-pay.md
@@ -1,5 +0,0 @@
 
				----
			
 
				-"roo-cline": patch
			
 
				----
			
 
				-
			
 
				-Delete task confirmation enhancements
			
--- a/.changeset/tasty-grapes-suffer.md
+++ b/.changeset/tasty-grapes-suffer.md
@@ -1,5 +0,0 @@
 
				----
			
 
				-"roo-cline": patch
			
 
				----
			
 
				-
			
 
				-Fix maxTokens defaults for Claude 3.7 Sonnet models
			
--- a/.changeset/young-hornets-taste.md
+++ b/.changeset/young-hornets-taste.md
@@ -1,5 +0,0 @@
 
				----
			
 
				-"roo-cline": patch
			
 
				----
			
 
				-
			
 
				-Prettier thinking blocks
			
--- a/.github/workflows/marketplace-publish.yml
+++ b/.github/workflows/marketplace-publish.yml
@@ -29,10 +29,7 @@ jobs:
 
				       - name: Install Dependencies
			
 
				         run: |
			
 
				           npm install -g vsce ovsx
			
 
				-          npm install
			
 
				-          cd webview-ui
			
 
				-          npm install
			
 
				-          cd ..
			
 
				+          npm run install:ci
			
 
				       - name: Package and Publish Extension
			
 
				         env:
			
 
				           VSCE_PAT: ${{ secrets.VSCE_PAT }}
			
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,15 @@
 
				 # Roo Code Changelog
			
 
				 
			
 
				+## [3.7.9]
			
 
				+
			
 
				+- Delete task confirmation enhancements
			
 
				+- Smarter context window management
			
 
				+- Prettier thinking blocks
			
 
				+- Fix maxTokens defaults for Claude 3.7 Sonnet models
			
 
				+- Terminal output parsing improvements (thanks @KJ7LNW!)
			
 
				+- UI fix to dropdown hover colors (thanks @SamirSaji!)
			
 
				+- Add support for Claude Sonnet 3.7 thinking via Vertex AI (thanks @lupuletic!)
			
 
				+
			
 
				 ## [3.7.8]
			
 
				 
			
 
				 - Add Vertex AI prompt caching support for Claude models (thanks @aitoroses and @lupuletic!)
			
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 
				 {
			
 
				 	"name": "roo-cline",
			
 
				-	"version": "3.7.8",
			
 
				+	"version": "3.7.9",
			
 
				 	"lockfileVersion": 3,
			
 
				 	"requires": true,
			
 
				 	"packages": {
			
 
				 		"": {
			
 
				 			"name": "roo-cline",
			
 
				-			"version": "3.7.8",
			
 
				+			"version": "3.7.9",
			
 
				 			"dependencies": {
			
 
				 				"@anthropic-ai/bedrock-sdk": "^0.10.2",
			
 
				 				"@anthropic-ai/sdk": "^0.37.0",
			
--- a/package.json
+++ b/package.json
@@ -3,7 +3,7 @@
 
				 	"displayName": "Roo Code (prev. Roo Cline)",
			
 
				 	"description": "A whole dev team of AI agents in your editor.",
			
 
				 	"publisher": "RooVeterinaryInc",
			
 
				-	"version": "3.7.8",
			
 
				+	"version": "3.7.9",
			
 
				 	"icon": "assets/icons/rocket.png",
			
 
				 	"galleryBanner": {
			
 
				 		"color": "#617A91",
			
@@ -334,6 +334,7 @@
 
				 		"get-folder-size": "^5.0.0",
			
 
				 		"globby": "^14.0.2",
			
 
				 		"isbinaryfile": "^5.0.2",
			
 
				+		"js-tiktoken": "^1.0.19",
			
 
				 		"mammoth": "^1.8.0",
			
 
				 		"monaco-vscode-textmate-theme-converter": "^0.1.7",
			
 
				 		"openai": "^4.78.1",
			
@@ -348,7 +349,6 @@
 
				 		"sound-play": "^1.1.0",
			
 
				 		"string-similarity": "^4.0.4",
			
 
				 		"strip-ansi": "^7.1.0",
			
 
				-		"js-tiktoken": "^1.0.19",
			
 
				 		"tmp": "^0.2.3",
			
 
				 		"tree-sitter-wasms": "^0.1.11",
			
 
				 		"turndown": "^7.2.0",
			
--- a/src/core/sliding-window/__tests__/sliding-window.test.ts
+++ b/src/core/sliding-window/__tests__/sliding-window.test.ts
@@ -3,7 +3,12 @@
 
				 import { Anthropic } from "@anthropic-ai/sdk"
			
 
				 
			
 
				 import { ModelInfo } from "../../../shared/api"
			
 
				-import { estimateTokenCount, truncateConversation, truncateConversationIfNeeded } from "../index"
			
 
				+import {
			
 
				+	TOKEN_BUFFER_PERCENTAGE,
			
 
				+	estimateTokenCount,
			
 
				+	truncateConversation,
			
 
				+	truncateConversationIfNeeded,
			
 
				+} from "../index"
			
 
				 
			
 
				 /**
			
 
				  * Tests for the truncateConversation function
			
@@ -121,10 +126,11 @@ describe("getMaxTokens", () => {
 
				 		// Create messages with very small content in the last one to avoid token overflow
			
 
				 		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
 
				 
			
 
				-		// Below max tokens - no truncation
			
 
				+		// Account for the dynamic buffer which is 10% of context window (10,000 tokens)
			
 
				+		// Below max tokens and buffer - no truncation
			
 
				 		const result1 = truncateConversationIfNeeded({
			
 
				 			messages: messagesWithSmallContent,
			
 
				-			totalTokens: 49999,
			
 
				+			totalTokens: 39999, // Well below threshold + dynamic buffer
			
 
				 			contextWindow: modelInfo.contextWindow,
			
 
				 			maxTokens: modelInfo.maxTokens,
			
 
				 		})
			
@@ -133,7 +139,7 @@ describe("getMaxTokens", () => {
 
				 		// Above max tokens - truncate
			
 
				 		const result2 = truncateConversationIfNeeded({
			
 
				 			messages: messagesWithSmallContent,
			
 
				-			totalTokens: 50001,
			
 
				+			totalTokens: 50001, // Above threshold
			
 
				 			contextWindow: modelInfo.contextWindow,
			
 
				 			maxTokens: modelInfo.maxTokens,
			
 
				 		})
			
@@ -148,10 +154,11 @@ describe("getMaxTokens", () => {
 
				 		// Create messages with very small content in the last one to avoid token overflow
			
 
				 		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
 
				 
			
 
				-		// Below max tokens - no truncation
			
 
				+		// Account for the dynamic buffer which is 10% of context window (10,000 tokens)
			
 
				+		// Below max tokens and buffer - no truncation
			
 
				 		const result1 = truncateConversationIfNeeded({
			
 
				 			messages: messagesWithSmallContent,
			
 
				-			totalTokens: 79999,
			
 
				+			totalTokens: 69999, // Well below threshold + dynamic buffer
			
 
				 			contextWindow: modelInfo.contextWindow,
			
 
				 			maxTokens: modelInfo.maxTokens,
			
 
				 		})
			
@@ -160,7 +167,7 @@ describe("getMaxTokens", () => {
 
				 		// Above max tokens - truncate
			
 
				 		const result2 = truncateConversationIfNeeded({
			
 
				 			messages: messagesWithSmallContent,
			
 
				-			totalTokens: 80001,
			
 
				+			totalTokens: 80001, // Above threshold
			
 
				 			contextWindow: modelInfo.contextWindow,
			
 
				 			maxTokens: modelInfo.maxTokens,
			
 
				 		})
			
@@ -175,10 +182,10 @@ describe("getMaxTokens", () => {
 
				 		// Create messages with very small content in the last one to avoid token overflow
			
 
				 		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
 
				 
			
 
				-		// Below max tokens - no truncation
			
 
				+		// Below max tokens and buffer - no truncation
			
 
				 		const result1 = truncateConversationIfNeeded({
			
 
				 			messages: messagesWithSmallContent,
			
 
				-			totalTokens: 39999,
			
 
				+			totalTokens: 34999, // Well below threshold + buffer
			
 
				 			contextWindow: modelInfo.contextWindow,
			
 
				 			maxTokens: modelInfo.maxTokens,
			
 
				 		})
			
@@ -187,7 +194,7 @@ describe("getMaxTokens", () => {
 
				 		// Above max tokens - truncate
			
 
				 		const result2 = truncateConversationIfNeeded({
			
 
				 			messages: messagesWithSmallContent,
			
 
				-			totalTokens: 40001,
			
 
				+			totalTokens: 40001, // Above threshold
			
 
				 			contextWindow: modelInfo.contextWindow,
			
 
				 			maxTokens: modelInfo.maxTokens,
			
 
				 		})
			
@@ -202,10 +209,11 @@ describe("getMaxTokens", () => {
 
				 		// Create messages with very small content in the last one to avoid token overflow
			
 
				 		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
 
				 
			
 
				-		// Below max tokens - no truncation
			
 
				+		// Account for the dynamic buffer which is 10% of context window (20,000 tokens for this test)
			
 
				+		// Below max tokens and buffer - no truncation
			
 
				 		const result1 = truncateConversationIfNeeded({
			
 
				 			messages: messagesWithSmallContent,
			
 
				-			totalTokens: 169999,
			
 
				+			totalTokens: 149999, // Well below threshold + dynamic buffer
			
 
				 			contextWindow: modelInfo.contextWindow,
			
 
				 			maxTokens: modelInfo.maxTokens,
			
 
				 		})
			
@@ -214,7 +222,7 @@ describe("getMaxTokens", () => {
 
				 		// Above max tokens - truncate
			
 
				 		const result2 = truncateConversationIfNeeded({
			
 
				 			messages: messagesWithSmallContent,
			
 
				-			totalTokens: 170001,
			
 
				+			totalTokens: 170001, // Above threshold
			
 
				 			contextWindow: modelInfo.contextWindow,
			
 
				 			maxTokens: modelInfo.maxTokens,
			
 
				 		})
			
@@ -244,7 +252,8 @@ describe("truncateConversationIfNeeded", () => {
 
				 	it("should not truncate if tokens are below max tokens threshold", () => {
			
 
				 		const modelInfo = createModelInfo(100000, true, 30000)
			
 
				 		const maxTokens = 100000 - 30000 // 70000
			
 
				-		const totalTokens = 69999 // Below threshold
			
 
				+		const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE // 10000
			
 
				+		const totalTokens = 70000 - dynamicBuffer - 1 // Just below threshold - buffer
			
 
				 
			
 
				 		// Create messages with very small content in the last one to avoid token overflow
			
 
				 		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
@@ -337,8 +346,9 @@ describe("truncateConversationIfNeeded", () => {
 
				 			{ role: messages[messages.length - 1].role, content: smallContent },
			
 
				 		]
			
 
				 
			
 
				-		// Set base tokens so total is below threshold even with small content added
			
 
				-		const baseTokensForSmall = availableTokens - smallContentTokens - 10
			
 
				+		// Set base tokens so total is well below threshold + buffer even with small content added
			
 
				+		const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE
			
 
				+		const baseTokensForSmall = availableTokens - smallContentTokens - dynamicBuffer - 10
			
 
				 		const resultWithSmall = truncateConversationIfNeeded({
			
 
				 			messages: messagesWithSmallContent,
			
 
				 			totalTokens: baseTokensForSmall,
			
@@ -388,7 +398,30 @@ describe("truncateConversationIfNeeded", () => {
 
				 		})
			
 
				 		expect(resultWithVeryLarge).not.toEqual(messagesWithVeryLargeContent) // Should truncate
			
 
				 	})
			
 
				+
			
 
				+	it("should truncate if tokens are within TOKEN_BUFFER_PERCENTAGE of the threshold", () => {
			
 
				+		const modelInfo = createModelInfo(100000, true, 30000)
			
 
				+		const maxTokens = 100000 - 30000 // 70000
			
 
				+		const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE // 10% of 100000 = 10000
			
 
				+		const totalTokens = 70000 - dynamicBuffer + 1 // Just within the dynamic buffer of threshold (70000)
			
 
				+
			
 
				+		// Create messages with very small content in the last one to avoid token overflow
			
 
				+		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
 
				+
			
 
				+		// When truncating, always uses 0.5 fraction
			
 
				+		// With 4 messages after the first, 0.5 fraction means remove 2 messages
			
 
				+		const expectedResult = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]]
			
 
				+
			
 
				+		const result = truncateConversationIfNeeded({
			
 
				+			messages: messagesWithSmallContent,
			
 
				+			totalTokens,
			
 
				+			contextWindow: modelInfo.contextWindow,
			
 
				+			maxTokens: modelInfo.maxTokens,
			
 
				+		})
			
 
				+		expect(result).toEqual(expectedResult)
			
 
				+	})
			
 
				 })
			
 
				+
			
 
				 /**
			
 
				  * Tests for the estimateTokenCount function
			
 
				  */
			
--- a/src/core/sliding-window/index.ts
+++ b/src/core/sliding-window/index.ts
@@ -3,7 +3,11 @@ import { Anthropic } from "@anthropic-ai/sdk"
 
				 import { Tiktoken } from "js-tiktoken/lite"
			
 
				 import o200kBase from "js-tiktoken/ranks/o200k_base"
			
 
				 
			
 
				-const TOKEN_FUDGE_FACTOR = 1.5
			
 
				+export const TOKEN_FUDGE_FACTOR = 1.5
			
 
				+/**
			
 
				+ * Default percentage of the context window to use as a buffer when deciding when to truncate
			
 
				+ */
			
 
				+export const TOKEN_BUFFER_PERCENTAGE = 0.1
			
 
				 
			
 
				 /**
			
 
				  * Counts tokens for user content using tiktoken for text
			
@@ -107,8 +111,9 @@ export function truncateConversationIfNeeded({
 
				 	const effectiveTokens = totalTokens + lastMessageTokens
			
 
				 
			
 
				 	// Calculate available tokens for conversation history
			
 
				-	const allowedTokens = contextWindow - reservedTokens
			
 
				+	// Truncate if we're within TOKEN_BUFFER_PERCENTAGE of the context window
			
 
				+	const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens
			
 
				 
			
 
				 	// Determine if truncation is needed and apply if necessary
			
 
				-	return effectiveTokens < allowedTokens ? messages : truncateConversation(messages, 0.5)
			
 
				+	return effectiveTokens > allowedTokens ? truncateConversation(messages, 0.5) : messages
			
 
				 }