1 year ago · 3492f79f37
--- a/src/core/sliding-window/index.ts
+++ b/src/core/sliding-window/index.ts
@@ -1,6 +1,16 @@
 
				 import { Anthropic } from "@anthropic-ai/sdk"
			
 
				 import { ModelInfo } from "../../shared/api"
			
 
				 
			
 
				+/**
			
 
				+ * Truncates a conversation by removing a fraction of the messages.
			
 
				+ *
			
 
				+ * The first message is always retained, and a specified fraction (rounded to an even number)
			
 
				+ * of messages from the beginning (excluding the first) is removed.
			
 
				+ *
			
 
				+ * @param {Anthropic.Messages.MessageParam[]} messages - The conversation messages.
			
 
				+ * @param {number} fracToRemove - The fraction (between 0 and 1) of messages (excluding the first) to remove.
			
 
				+ * @returns {Anthropic.Messages.MessageParam[]} The truncated conversation messages.
			
 
				+ */
			
 
				 export function truncateConversation(
			
 
				 	messages: Anthropic.Messages.MessageParam[],
			
 
				 	fracToRemove: number,
			
@@ -14,6 +24,18 @@ export function truncateConversation(
 
				 	return truncatedMessages
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * Conditionally truncates the conversation messages if the total token count exceeds the model's limit.
			
 
				+ *
			
 
				+ * Depending on whether the model supports prompt caching, different maximum token thresholds
			
 
				+ * and truncation fractions are used. If the current total tokens exceed the threshold,
			
 
				+ * the conversation is truncated using the appropriate fraction.
			
 
				+ *
			
 
				+ * @param {Anthropic.Messages.MessageParam[]} messages - The conversation messages.
			
 
				+ * @param {number} totalTokens - The total number of tokens in the conversation.
			
 
				+ * @param {ModelInfo} modelInfo - Model metadata including context window size and prompt cache support.
			
 
				+ * @returns {Anthropic.Messages.MessageParam[]} The original or truncated conversation messages.
			
 
				+ */
			
 
				 export function truncateConversationIfNeeded(
			
 
				 	messages: Anthropic.Messages.MessageParam[],
			
 
				 	totalTokens: number,
			
@@ -24,25 +46,52 @@ export function truncateConversationIfNeeded(
 
				 			? messages
			
 
				 			: truncateConversation(messages, getTruncFractionForPromptCachingModels(modelInfo))
			
 
				 	} else {
			
 
				-		const thresh = getMaxTokensForNonPromptCachingModels(modelInfo)
			
 
				-		return totalTokens < thresh
			
 
				+		return totalTokens < getMaxTokensForNonPromptCachingModels(modelInfo)
			
 
				 			? messages
			
 
				 			: truncateConversation(messages, getTruncFractionForNonPromptCachingModels(modelInfo))
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * Calculates the maximum allowed tokens for models that support prompt caching.
			
 
				+ *
			
 
				+ * The maximum is computed as the greater of (contextWindow - 40000) and 80% of the contextWindow.
			
 
				+ *
			
 
				+ * @param {ModelInfo} modelInfo - The model information containing the context window size.
			
 
				+ * @returns {number} The maximum number of tokens allowed for prompt caching models.
			
 
				+ */
			
 
				 function getMaxTokensForPromptCachingModels(modelInfo: ModelInfo): number {
			
 
				 	return Math.max(modelInfo.contextWindow - 40_000, modelInfo.contextWindow * 0.8)
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * Provides the fraction of messages to remove for models that support prompt caching.
			
 
				+ *
			
 
				+ * @param {ModelInfo} modelInfo - The model information (unused in current implementation).
			
 
				+ * @returns {number} The truncation fraction for prompt caching models (fixed at 0.5).
			
 
				+ */
			
 
				 function getTruncFractionForPromptCachingModels(modelInfo: ModelInfo): number {
			
 
				-	return Math.min(80_000, modelInfo.contextWindow * 0.4)
			
 
				+	return 0.5
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * Calculates the maximum allowed tokens for models that do not support prompt caching.
			
 
				+ *
			
 
				+ * The maximum is computed as the greater of (contextWindow - 40000) and 80% of the contextWindow.
			
 
				+ *
			
 
				+ * @param {ModelInfo} modelInfo - The model information containing the context window size.
			
 
				+ * @returns {number} The maximum number of tokens allowed for non-prompt caching models.
			
 
				+ */
			
 
				 function getMaxTokensForNonPromptCachingModels(modelInfo: ModelInfo): number {
			
 
				 	return Math.max(modelInfo.contextWindow - 40_000, modelInfo.contextWindow * 0.8)
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * Provides the fraction of messages to remove for models that do not support prompt caching.
			
 
				+ *
			
 
				+ * @param {ModelInfo} modelInfo - The model information (unused in current implementation).
			
 
				+ * @returns {number} The truncation fraction for non-prompt caching models (fixed at 0.1).
			
 
				+ */
			
 
				 function getTruncFractionForNonPromptCachingModels(modelInfo: ModelInfo): number {
			
 
				-	return Math.min(80_000, modelInfo.contextWindow * 0.4)
			
 
				+	return 0.1
			
 
				 }