|
|
@@ -3,7 +3,12 @@
|
|
|
import { Anthropic } from "@anthropic-ai/sdk"
|
|
|
|
|
|
import { ModelInfo } from "../../../shared/api"
|
|
|
-import { estimateTokenCount, truncateConversation, truncateConversationIfNeeded } from "../index"
|
|
|
+import {
|
|
|
+ TOKEN_BUFFER_PERCENTAGE,
|
|
|
+ estimateTokenCount,
|
|
|
+ truncateConversation,
|
|
|
+ truncateConversationIfNeeded,
|
|
|
+} from "../index"
|
|
|
|
|
|
/**
|
|
|
* Tests for the truncateConversation function
|
|
|
@@ -121,10 +126,11 @@ describe("getMaxTokens", () => {
|
|
|
// Create messages with very small content in the last one to avoid token overflow
|
|
|
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
|
|
|
|
|
|
- // Below max tokens - no truncation
|
|
|
+ // Account for the dynamic buffer which is 10% of context window (10,000 tokens)
|
|
|
+ // Below max tokens and buffer - no truncation
|
|
|
const result1 = truncateConversationIfNeeded({
|
|
|
messages: messagesWithSmallContent,
|
|
|
- totalTokens: 49999,
|
|
|
+ totalTokens: 39999, // Well below threshold + dynamic buffer
|
|
|
contextWindow: modelInfo.contextWindow,
|
|
|
maxTokens: modelInfo.maxTokens,
|
|
|
})
|
|
|
@@ -133,7 +139,7 @@ describe("getMaxTokens", () => {
|
|
|
// Above max tokens - truncate
|
|
|
const result2 = truncateConversationIfNeeded({
|
|
|
messages: messagesWithSmallContent,
|
|
|
- totalTokens: 50001,
|
|
|
+ totalTokens: 50001, // Above threshold
|
|
|
contextWindow: modelInfo.contextWindow,
|
|
|
maxTokens: modelInfo.maxTokens,
|
|
|
})
|
|
|
@@ -148,10 +154,11 @@ describe("getMaxTokens", () => {
|
|
|
// Create messages with very small content in the last one to avoid token overflow
|
|
|
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
|
|
|
|
|
|
- // Below max tokens - no truncation
|
|
|
+ // Account for the dynamic buffer which is 10% of context window (10,000 tokens)
|
|
|
+ // Below max tokens and buffer - no truncation
|
|
|
const result1 = truncateConversationIfNeeded({
|
|
|
messages: messagesWithSmallContent,
|
|
|
- totalTokens: 79999,
|
|
|
+ totalTokens: 69999, // Well below threshold + dynamic buffer
|
|
|
contextWindow: modelInfo.contextWindow,
|
|
|
maxTokens: modelInfo.maxTokens,
|
|
|
})
|
|
|
@@ -160,7 +167,7 @@ describe("getMaxTokens", () => {
|
|
|
// Above max tokens - truncate
|
|
|
const result2 = truncateConversationIfNeeded({
|
|
|
messages: messagesWithSmallContent,
|
|
|
- totalTokens: 80001,
|
|
|
+ totalTokens: 80001, // Above threshold
|
|
|
contextWindow: modelInfo.contextWindow,
|
|
|
maxTokens: modelInfo.maxTokens,
|
|
|
})
|
|
|
@@ -175,10 +182,10 @@ describe("getMaxTokens", () => {
|
|
|
// Create messages with very small content in the last one to avoid token overflow
|
|
|
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
|
|
|
|
|
|
- // Below max tokens - no truncation
|
|
|
+ // Below max tokens and buffer - no truncation
|
|
|
const result1 = truncateConversationIfNeeded({
|
|
|
messages: messagesWithSmallContent,
|
|
|
- totalTokens: 39999,
|
|
|
+ totalTokens: 34999, // Well below threshold + buffer
|
|
|
contextWindow: modelInfo.contextWindow,
|
|
|
maxTokens: modelInfo.maxTokens,
|
|
|
})
|
|
|
@@ -187,7 +194,7 @@ describe("getMaxTokens", () => {
|
|
|
// Above max tokens - truncate
|
|
|
const result2 = truncateConversationIfNeeded({
|
|
|
messages: messagesWithSmallContent,
|
|
|
- totalTokens: 40001,
|
|
|
+ totalTokens: 40001, // Above threshold
|
|
|
contextWindow: modelInfo.contextWindow,
|
|
|
maxTokens: modelInfo.maxTokens,
|
|
|
})
|
|
|
@@ -202,10 +209,11 @@ describe("getMaxTokens", () => {
|
|
|
// Create messages with very small content in the last one to avoid token overflow
|
|
|
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
|
|
|
|
|
|
- // Below max tokens - no truncation
|
|
|
+ // Account for the dynamic buffer which is 10% of context window (20,000 tokens for this test)
|
|
|
+ // Below max tokens and buffer - no truncation
|
|
|
const result1 = truncateConversationIfNeeded({
|
|
|
messages: messagesWithSmallContent,
|
|
|
- totalTokens: 169999,
|
|
|
+ totalTokens: 149999, // Well below threshold + dynamic buffer
|
|
|
contextWindow: modelInfo.contextWindow,
|
|
|
maxTokens: modelInfo.maxTokens,
|
|
|
})
|
|
|
@@ -214,7 +222,7 @@ describe("getMaxTokens", () => {
|
|
|
// Above max tokens - truncate
|
|
|
const result2 = truncateConversationIfNeeded({
|
|
|
messages: messagesWithSmallContent,
|
|
|
- totalTokens: 170001,
|
|
|
+ totalTokens: 170001, // Above threshold
|
|
|
contextWindow: modelInfo.contextWindow,
|
|
|
maxTokens: modelInfo.maxTokens,
|
|
|
})
|
|
|
@@ -244,7 +252,8 @@ describe("truncateConversationIfNeeded", () => {
|
|
|
it("should not truncate if tokens are below max tokens threshold", () => {
|
|
|
const modelInfo = createModelInfo(100000, true, 30000)
|
|
|
const maxTokens = 100000 - 30000 // 70000
|
|
|
- const totalTokens = 69999 // Below threshold
|
|
|
+ const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE // 10000
|
|
|
+ const totalTokens = 70000 - dynamicBuffer - 1 // Just below threshold - buffer
|
|
|
|
|
|
// Create messages with very small content in the last one to avoid token overflow
|
|
|
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
|
|
|
@@ -337,8 +346,9 @@ describe("truncateConversationIfNeeded", () => {
|
|
|
{ role: messages[messages.length - 1].role, content: smallContent },
|
|
|
]
|
|
|
|
|
|
- // Set base tokens so total is below threshold even with small content added
|
|
|
- const baseTokensForSmall = availableTokens - smallContentTokens - 10
|
|
|
+ // Set base tokens so total is well below threshold + buffer even with small content added
|
|
|
+ const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE
|
|
|
+ const baseTokensForSmall = availableTokens - smallContentTokens - dynamicBuffer - 10
|
|
|
const resultWithSmall = truncateConversationIfNeeded({
|
|
|
messages: messagesWithSmallContent,
|
|
|
totalTokens: baseTokensForSmall,
|
|
|
@@ -388,7 +398,30 @@ describe("truncateConversationIfNeeded", () => {
|
|
|
})
|
|
|
expect(resultWithVeryLarge).not.toEqual(messagesWithVeryLargeContent) // Should truncate
|
|
|
})
|
|
|
+
|
|
|
+ it("should truncate if tokens are within TOKEN_BUFFER_PERCENTAGE of the threshold", () => {
|
|
|
+ const modelInfo = createModelInfo(100000, true, 30000)
|
|
|
+ const maxTokens = 100000 - 30000 // 70000
|
|
|
+ const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE // 10% of 100000 = 10000
|
|
|
+ const totalTokens = 70000 - dynamicBuffer + 1 // Just within the dynamic buffer of threshold (70000)
|
|
|
+
|
|
|
+ // Create messages with very small content in the last one to avoid token overflow
|
|
|
+ const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
|
|
|
+
|
|
|
+ // When truncating, always uses 0.5 fraction
|
|
|
+ // With 4 messages after the first, 0.5 fraction means remove 2 messages
|
|
|
+ const expectedResult = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]]
|
|
|
+
|
|
|
+ const result = truncateConversationIfNeeded({
|
|
|
+ messages: messagesWithSmallContent,
|
|
|
+ totalTokens,
|
|
|
+ contextWindow: modelInfo.contextWindow,
|
|
|
+ maxTokens: modelInfo.maxTokens,
|
|
|
+ })
|
|
|
+ expect(result).toEqual(expectedResult)
|
|
|
+ })
|
|
|
})
|
|
|
+
|
|
|
/**
|
|
|
* Tests for the estimateTokenCount function
|
|
|
*/
|