|
|
@@ -3,12 +3,35 @@
|
|
|
import { Anthropic } from "@anthropic-ai/sdk"
|
|
|
|
|
|
import { ModelInfo } from "../../../shared/api"
|
|
|
-import {
|
|
|
- TOKEN_BUFFER_PERCENTAGE,
|
|
|
- estimateTokenCount,
|
|
|
- truncateConversation,
|
|
|
- truncateConversationIfNeeded,
|
|
|
-} from "../index"
|
|
|
+import { ApiHandler } from "../../../api"
|
|
|
+import { BaseProvider } from "../../../api/providers/base-provider"
|
|
|
+import { TOKEN_BUFFER_PERCENTAGE } from "../index"
|
|
|
+import { estimateTokenCount, truncateConversation, truncateConversationIfNeeded } from "../index"
|
|
|
+
|
|
|
+// Create a mock ApiHandler for testing
|
|
|
+class MockApiHandler extends BaseProvider {
|
|
|
+ createMessage(): any {
|
|
|
+ throw new Error("Method not implemented.")
|
|
|
+ }
|
|
|
+
|
|
|
+ getModel(): { id: string; info: ModelInfo } {
|
|
|
+ return {
|
|
|
+ id: "test-model",
|
|
|
+ info: {
|
|
|
+ contextWindow: 100000,
|
|
|
+ maxTokens: 50000,
|
|
|
+ supportsPromptCache: true,
|
|
|
+ supportsImages: false,
|
|
|
+ inputPrice: 0,
|
|
|
+ outputPrice: 0,
|
|
|
+ description: "Test model",
|
|
|
+ },
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// Create a singleton instance for tests
|
|
|
+const mockApiHandler = new MockApiHandler()
|
|
|
|
|
|
/**
|
|
|
* Tests for the truncateConversation function
|
|
|
@@ -100,134 +123,91 @@ describe("truncateConversation", () => {
|
|
|
})
|
|
|
|
|
|
/**
|
|
|
- * Tests for the getMaxTokens function (private but tested through truncateConversationIfNeeded)
|
|
|
+ * Tests for the estimateTokenCount function
|
|
|
*/
|
|
|
-describe("getMaxTokens", () => {
|
|
|
- // We'll test this indirectly through truncateConversationIfNeeded
|
|
|
- const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({
|
|
|
- contextWindow,
|
|
|
- supportsPromptCache: true, // Not relevant for getMaxTokens
|
|
|
- maxTokens,
|
|
|
+describe("estimateTokenCount", () => {
|
|
|
+ it("should return 0 for empty or undefined content", async () => {
|
|
|
+ expect(await estimateTokenCount([], mockApiHandler)).toBe(0)
|
|
|
+ // @ts-ignore - Testing with undefined
|
|
|
+ expect(await estimateTokenCount(undefined, mockApiHandler)).toBe(0)
|
|
|
})
|
|
|
|
|
|
- // Reuse across tests for consistency
|
|
|
- const messages: Anthropic.Messages.MessageParam[] = [
|
|
|
- { role: "user", content: "First message" },
|
|
|
- { role: "assistant", content: "Second message" },
|
|
|
- { role: "user", content: "Third message" },
|
|
|
- { role: "assistant", content: "Fourth message" },
|
|
|
- { role: "user", content: "Fifth message" },
|
|
|
- ]
|
|
|
-
|
|
|
- it("should use maxTokens as buffer when specified", () => {
|
|
|
- const modelInfo = createModelInfo(100000, 50000)
|
|
|
- // Max tokens = 100000 - 50000 = 50000
|
|
|
-
|
|
|
- // Create messages with very small content in the last one to avoid token overflow
|
|
|
- const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
|
|
|
+ it("should estimate tokens for text blocks", async () => {
|
|
|
+ const content: Array<Anthropic.Messages.ContentBlockParam> = [
|
|
|
+ { type: "text", text: "This is a text block with 36 characters" },
|
|
|
+ ]
|
|
|
|
|
|
- // Account for the dynamic buffer which is 10% of context window (10,000 tokens)
|
|
|
- // Below max tokens and buffer - no truncation
|
|
|
- const result1 = truncateConversationIfNeeded({
|
|
|
- messages: messagesWithSmallContent,
|
|
|
- totalTokens: 39999, // Well below threshold + dynamic buffer
|
|
|
- contextWindow: modelInfo.contextWindow,
|
|
|
- maxTokens: modelInfo.maxTokens,
|
|
|
- })
|
|
|
- expect(result1).toEqual(messagesWithSmallContent)
|
|
|
+ // With tiktoken, the exact token count may differ from character-based estimation
|
|
|
+ // Instead of expecting an exact number, we verify it's a reasonable positive number
|
|
|
+ const result = await estimateTokenCount(content, mockApiHandler)
|
|
|
+ expect(result).toBeGreaterThan(0)
|
|
|
|
|
|
- // Above max tokens - truncate
|
|
|
- const result2 = truncateConversationIfNeeded({
|
|
|
- messages: messagesWithSmallContent,
|
|
|
- totalTokens: 50001, // Above threshold
|
|
|
- contextWindow: modelInfo.contextWindow,
|
|
|
- maxTokens: modelInfo.maxTokens,
|
|
|
- })
|
|
|
- expect(result2).not.toEqual(messagesWithSmallContent)
|
|
|
- expect(result2.length).toBe(3) // Truncated with 0.5 fraction
|
|
|
+ // We can also verify that longer text results in more tokens
|
|
|
+ const longerContent: Array<Anthropic.Messages.ContentBlockParam> = [
|
|
|
+ {
|
|
|
+ type: "text",
|
|
|
+ text: "This is a longer text block with significantly more characters to encode into tokens",
|
|
|
+ },
|
|
|
+ ]
|
|
|
+ const longerResult = await estimateTokenCount(longerContent, mockApiHandler)
|
|
|
+ expect(longerResult).toBeGreaterThan(result)
|
|
|
})
|
|
|
|
|
|
- it("should use 20% of context window as buffer when maxTokens is undefined", () => {
|
|
|
- const modelInfo = createModelInfo(100000, undefined)
|
|
|
- // Max tokens = 100000 - (100000 * 0.2) = 80000
|
|
|
+ it("should estimate tokens for image blocks based on data size", async () => {
|
|
|
+ // Small image
|
|
|
+ const smallImage: Array<Anthropic.Messages.ContentBlockParam> = [
|
|
|
+ { type: "image", source: { type: "base64", media_type: "image/jpeg", data: "small_dummy_data" } },
|
|
|
+ ]
|
|
|
+ // Larger image with more data
|
|
|
+ const largerImage: Array<Anthropic.Messages.ContentBlockParam> = [
|
|
|
+ { type: "image", source: { type: "base64", media_type: "image/png", data: "X".repeat(1000) } },
|
|
|
+ ]
|
|
|
|
|
|
- // Create messages with very small content in the last one to avoid token overflow
|
|
|
- const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
|
|
|
+ // Verify the token count scales with the size of the image data
|
|
|
+ const smallImageTokens = await estimateTokenCount(smallImage, mockApiHandler)
|
|
|
+ const largerImageTokens = await estimateTokenCount(largerImage, mockApiHandler)
|
|
|
|
|
|
- // Account for the dynamic buffer which is 10% of context window (10,000 tokens)
|
|
|
- // Below max tokens and buffer - no truncation
|
|
|
- const result1 = truncateConversationIfNeeded({
|
|
|
- messages: messagesWithSmallContent,
|
|
|
- totalTokens: 69999, // Well below threshold + dynamic buffer
|
|
|
- contextWindow: modelInfo.contextWindow,
|
|
|
- maxTokens: modelInfo.maxTokens,
|
|
|
- })
|
|
|
- expect(result1).toEqual(messagesWithSmallContent)
|
|
|
+ // Small image should have some tokens
|
|
|
+ expect(smallImageTokens).toBeGreaterThan(0)
|
|
|
|
|
|
- // Above max tokens - truncate
|
|
|
- const result2 = truncateConversationIfNeeded({
|
|
|
- messages: messagesWithSmallContent,
|
|
|
- totalTokens: 80001, // Above threshold
|
|
|
- contextWindow: modelInfo.contextWindow,
|
|
|
- maxTokens: modelInfo.maxTokens,
|
|
|
- })
|
|
|
- expect(result2).not.toEqual(messagesWithSmallContent)
|
|
|
- expect(result2.length).toBe(3) // Truncated with 0.5 fraction
|
|
|
+ // Larger image should have proportionally more tokens
|
|
|
+ expect(largerImageTokens).toBeGreaterThan(smallImageTokens)
|
|
|
+
|
|
|
+ // Verify the larger image calculation matches our formula including the 50% fudge factor
|
|
|
+ expect(largerImageTokens).toBe(48)
|
|
|
})
|
|
|
|
|
|
- it("should handle small context windows appropriately", () => {
|
|
|
- const modelInfo = createModelInfo(50000, 10000)
|
|
|
- // Max tokens = 50000 - 10000 = 40000
|
|
|
+ it("should estimate tokens for mixed content blocks", async () => {
|
|
|
+ const content: Array<Anthropic.Messages.ContentBlockParam> = [
|
|
|
+ { type: "text", text: "A text block with 30 characters" },
|
|
|
+ { type: "image", source: { type: "base64", media_type: "image/jpeg", data: "dummy_data" } },
|
|
|
+ { type: "text", text: "Another text with 24 chars" },
|
|
|
+ ]
|
|
|
|
|
|
- // Create messages with very small content in the last one to avoid token overflow
|
|
|
- const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
|
|
|
+ // We know image tokens calculation should be consistent
|
|
|
+ const imageTokens = Math.ceil(Math.sqrt("dummy_data".length)) * 1.5
|
|
|
|
|
|
- // Below max tokens and buffer - no truncation
|
|
|
- const result1 = truncateConversationIfNeeded({
|
|
|
- messages: messagesWithSmallContent,
|
|
|
- totalTokens: 34999, // Well below threshold + buffer
|
|
|
- contextWindow: modelInfo.contextWindow,
|
|
|
- maxTokens: modelInfo.maxTokens,
|
|
|
- })
|
|
|
- expect(result1).toEqual(messagesWithSmallContent)
|
|
|
+ // With tiktoken, we can't predict exact text token counts,
|
|
|
+ // but we can verify the total is greater than just the image tokens
|
|
|
+ const result = await estimateTokenCount(content, mockApiHandler)
|
|
|
+ expect(result).toBeGreaterThan(imageTokens)
|
|
|
|
|
|
- // Above max tokens - truncate
|
|
|
- const result2 = truncateConversationIfNeeded({
|
|
|
- messages: messagesWithSmallContent,
|
|
|
- totalTokens: 40001, // Above threshold
|
|
|
- contextWindow: modelInfo.contextWindow,
|
|
|
- maxTokens: modelInfo.maxTokens,
|
|
|
- })
|
|
|
- expect(result2).not.toEqual(messagesWithSmallContent)
|
|
|
- expect(result2.length).toBe(3) // Truncated with 0.5 fraction
|
|
|
+ // Also test against a version with only the image to verify text adds tokens
|
|
|
+ const imageOnlyContent: Array<Anthropic.Messages.ContentBlockParam> = [
|
|
|
+ { type: "image", source: { type: "base64", media_type: "image/jpeg", data: "dummy_data" } },
|
|
|
+ ]
|
|
|
+ const imageOnlyResult = await estimateTokenCount(imageOnlyContent, mockApiHandler)
|
|
|
+ expect(result).toBeGreaterThan(imageOnlyResult)
|
|
|
})
|
|
|
|
|
|
- it("should handle large context windows appropriately", () => {
|
|
|
- const modelInfo = createModelInfo(200000, 30000)
|
|
|
- // Max tokens = 200000 - 30000 = 170000
|
|
|
-
|
|
|
- // Create messages with very small content in the last one to avoid token overflow
|
|
|
- const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
|
|
|
-
|
|
|
- // Account for the dynamic buffer which is 10% of context window (20,000 tokens for this test)
|
|
|
- // Below max tokens and buffer - no truncation
|
|
|
- const result1 = truncateConversationIfNeeded({
|
|
|
- messages: messagesWithSmallContent,
|
|
|
- totalTokens: 149999, // Well below threshold + dynamic buffer
|
|
|
- contextWindow: modelInfo.contextWindow,
|
|
|
- maxTokens: modelInfo.maxTokens,
|
|
|
- })
|
|
|
- expect(result1).toEqual(messagesWithSmallContent)
|
|
|
+ it("should handle empty text blocks", async () => {
|
|
|
+ const content: Array<Anthropic.Messages.ContentBlockParam> = [{ type: "text", text: "" }]
|
|
|
+ expect(await estimateTokenCount(content, mockApiHandler)).toBe(0)
|
|
|
+ })
|
|
|
|
|
|
- // Above max tokens - truncate
|
|
|
- const result2 = truncateConversationIfNeeded({
|
|
|
- messages: messagesWithSmallContent,
|
|
|
- totalTokens: 170001, // Above threshold
|
|
|
- contextWindow: modelInfo.contextWindow,
|
|
|
- maxTokens: modelInfo.maxTokens,
|
|
|
- })
|
|
|
- expect(result2).not.toEqual(messagesWithSmallContent)
|
|
|
- expect(result2.length).toBe(3) // Truncated with 0.5 fraction
|
|
|
+ it("should handle plain string messages", async () => {
|
|
|
+ const content = "This is a plain text message"
|
|
|
+ expect(await estimateTokenCount([{ type: "text", text: content }], mockApiHandler)).toBeGreaterThan(0)
|
|
|
})
|
|
|
})
|
|
|
|
|
|
@@ -235,9 +215,9 @@ describe("getMaxTokens", () => {
|
|
|
* Tests for the truncateConversationIfNeeded function
|
|
|
*/
|
|
|
describe("truncateConversationIfNeeded", () => {
|
|
|
- const createModelInfo = (contextWindow: number, supportsPromptCache: boolean, maxTokens?: number): ModelInfo => ({
|
|
|
+ const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({
|
|
|
contextWindow,
|
|
|
- supportsPromptCache,
|
|
|
+ supportsPromptCache: true,
|
|
|
maxTokens,
|
|
|
})
|
|
|
|
|
|
@@ -249,8 +229,8 @@ describe("truncateConversationIfNeeded", () => {
|
|
|
{ role: "user", content: "Fifth message" },
|
|
|
]
|
|
|
|
|
|
- it("should not truncate if tokens are below max tokens threshold", () => {
|
|
|
- const modelInfo = createModelInfo(100000, true, 30000)
|
|
|
+ it("should not truncate if tokens are below max tokens threshold", async () => {
|
|
|
+ const modelInfo = createModelInfo(100000, 30000)
|
|
|
const maxTokens = 100000 - 30000 // 70000
|
|
|
const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE // 10000
|
|
|
const totalTokens = 70000 - dynamicBuffer - 1 // Just below threshold - buffer
|
|
|
@@ -258,17 +238,18 @@ describe("truncateConversationIfNeeded", () => {
|
|
|
// Create messages with very small content in the last one to avoid token overflow
|
|
|
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
|
|
|
|
|
|
- const result = truncateConversationIfNeeded({
|
|
|
+ const result = await truncateConversationIfNeeded({
|
|
|
messages: messagesWithSmallContent,
|
|
|
totalTokens,
|
|
|
contextWindow: modelInfo.contextWindow,
|
|
|
maxTokens: modelInfo.maxTokens,
|
|
|
+ apiHandler: mockApiHandler,
|
|
|
})
|
|
|
expect(result).toEqual(messagesWithSmallContent) // No truncation occurs
|
|
|
})
|
|
|
|
|
|
- it("should truncate if tokens are above max tokens threshold", () => {
|
|
|
- const modelInfo = createModelInfo(100000, true, 30000)
|
|
|
+ it("should truncate if tokens are above max tokens threshold", async () => {
|
|
|
+ const modelInfo = createModelInfo(100000, 30000)
|
|
|
const maxTokens = 100000 - 30000 // 70000
|
|
|
const totalTokens = 70001 // Above threshold
|
|
|
|
|
|
@@ -279,68 +260,73 @@ describe("truncateConversationIfNeeded", () => {
|
|
|
// With 4 messages after the first, 0.5 fraction means remove 2 messages
|
|
|
const expectedResult = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]]
|
|
|
|
|
|
- const result = truncateConversationIfNeeded({
|
|
|
+ const result = await truncateConversationIfNeeded({
|
|
|
messages: messagesWithSmallContent,
|
|
|
totalTokens,
|
|
|
contextWindow: modelInfo.contextWindow,
|
|
|
maxTokens: modelInfo.maxTokens,
|
|
|
+ apiHandler: mockApiHandler,
|
|
|
})
|
|
|
expect(result).toEqual(expectedResult)
|
|
|
})
|
|
|
|
|
|
- it("should work with non-prompt caching models the same as prompt caching models", () => {
|
|
|
+ it("should work with non-prompt caching models the same as prompt caching models", async () => {
|
|
|
// The implementation no longer differentiates between prompt caching and non-prompt caching models
|
|
|
- const modelInfo1 = createModelInfo(100000, true, 30000)
|
|
|
- const modelInfo2 = createModelInfo(100000, false, 30000)
|
|
|
+ const modelInfo1 = createModelInfo(100000, 30000)
|
|
|
+ const modelInfo2 = createModelInfo(100000, 30000)
|
|
|
|
|
|
// Create messages with very small content in the last one to avoid token overflow
|
|
|
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
|
|
|
|
|
|
// Test below threshold
|
|
|
const belowThreshold = 69999
|
|
|
- expect(
|
|
|
- truncateConversationIfNeeded({
|
|
|
- messages: messagesWithSmallContent,
|
|
|
- totalTokens: belowThreshold,
|
|
|
- contextWindow: modelInfo1.contextWindow,
|
|
|
- maxTokens: modelInfo1.maxTokens,
|
|
|
- }),
|
|
|
- ).toEqual(
|
|
|
- truncateConversationIfNeeded({
|
|
|
- messages: messagesWithSmallContent,
|
|
|
- totalTokens: belowThreshold,
|
|
|
- contextWindow: modelInfo2.contextWindow,
|
|
|
- maxTokens: modelInfo2.maxTokens,
|
|
|
- }),
|
|
|
- )
|
|
|
+ const result1 = await truncateConversationIfNeeded({
|
|
|
+ messages: messagesWithSmallContent,
|
|
|
+ totalTokens: belowThreshold,
|
|
|
+ contextWindow: modelInfo1.contextWindow,
|
|
|
+ maxTokens: modelInfo1.maxTokens,
|
|
|
+ apiHandler: mockApiHandler,
|
|
|
+ })
|
|
|
+
|
|
|
+ const result2 = await truncateConversationIfNeeded({
|
|
|
+ messages: messagesWithSmallContent,
|
|
|
+ totalTokens: belowThreshold,
|
|
|
+ contextWindow: modelInfo2.contextWindow,
|
|
|
+ maxTokens: modelInfo2.maxTokens,
|
|
|
+ apiHandler: mockApiHandler,
|
|
|
+ })
|
|
|
+
|
|
|
+ expect(result1).toEqual(result2)
|
|
|
|
|
|
// Test above threshold
|
|
|
const aboveThreshold = 70001
|
|
|
- expect(
|
|
|
- truncateConversationIfNeeded({
|
|
|
- messages: messagesWithSmallContent,
|
|
|
- totalTokens: aboveThreshold,
|
|
|
- contextWindow: modelInfo1.contextWindow,
|
|
|
- maxTokens: modelInfo1.maxTokens,
|
|
|
- }),
|
|
|
- ).toEqual(
|
|
|
- truncateConversationIfNeeded({
|
|
|
- messages: messagesWithSmallContent,
|
|
|
- totalTokens: aboveThreshold,
|
|
|
- contextWindow: modelInfo2.contextWindow,
|
|
|
- maxTokens: modelInfo2.maxTokens,
|
|
|
- }),
|
|
|
- )
|
|
|
+ const result3 = await truncateConversationIfNeeded({
|
|
|
+ messages: messagesWithSmallContent,
|
|
|
+ totalTokens: aboveThreshold,
|
|
|
+ contextWindow: modelInfo1.contextWindow,
|
|
|
+ maxTokens: modelInfo1.maxTokens,
|
|
|
+ apiHandler: mockApiHandler,
|
|
|
+ })
|
|
|
+
|
|
|
+ const result4 = await truncateConversationIfNeeded({
|
|
|
+ messages: messagesWithSmallContent,
|
|
|
+ totalTokens: aboveThreshold,
|
|
|
+ contextWindow: modelInfo2.contextWindow,
|
|
|
+ maxTokens: modelInfo2.maxTokens,
|
|
|
+ apiHandler: mockApiHandler,
|
|
|
+ })
|
|
|
+
|
|
|
+ expect(result3).toEqual(result4)
|
|
|
})
|
|
|
|
|
|
- it("should consider incoming content when deciding to truncate", () => {
|
|
|
- const modelInfo = createModelInfo(100000, true, 30000)
|
|
|
+ it("should consider incoming content when deciding to truncate", async () => {
|
|
|
+ const modelInfo = createModelInfo(100000, 30000)
|
|
|
const maxTokens = 30000
|
|
|
const availableTokens = modelInfo.contextWindow - maxTokens
|
|
|
|
|
|
// Test case 1: Small content that won't push us over the threshold
|
|
|
const smallContent = [{ type: "text" as const, text: "Small content" }]
|
|
|
- const smallContentTokens = estimateTokenCount(smallContent)
|
|
|
+ const smallContentTokens = await estimateTokenCount(smallContent, mockApiHandler)
|
|
|
const messagesWithSmallContent: Anthropic.Messages.MessageParam[] = [
|
|
|
...messages.slice(0, -1),
|
|
|
{ role: messages[messages.length - 1].role, content: smallContent },
|
|
|
@@ -349,11 +335,12 @@ describe("truncateConversationIfNeeded", () => {
|
|
|
// Set base tokens so total is well below threshold + buffer even with small content added
|
|
|
const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE
|
|
|
const baseTokensForSmall = availableTokens - smallContentTokens - dynamicBuffer - 10
|
|
|
- const resultWithSmall = truncateConversationIfNeeded({
|
|
|
+ const resultWithSmall = await truncateConversationIfNeeded({
|
|
|
messages: messagesWithSmallContent,
|
|
|
totalTokens: baseTokensForSmall,
|
|
|
contextWindow: modelInfo.contextWindow,
|
|
|
maxTokens,
|
|
|
+ apiHandler: mockApiHandler,
|
|
|
})
|
|
|
expect(resultWithSmall).toEqual(messagesWithSmallContent) // No truncation
|
|
|
|
|
|
@@ -364,7 +351,7 @@ describe("truncateConversationIfNeeded", () => {
|
|
|
text: "A very large incoming message that would consume a significant number of tokens and push us over the threshold",
|
|
|
},
|
|
|
]
|
|
|
- const largeContentTokens = estimateTokenCount(largeContent)
|
|
|
+ const largeContentTokens = await estimateTokenCount(largeContent, mockApiHandler)
|
|
|
const messagesWithLargeContent: Anthropic.Messages.MessageParam[] = [
|
|
|
...messages.slice(0, -1),
|
|
|
{ role: messages[messages.length - 1].role, content: largeContent },
|
|
|
@@ -372,17 +359,18 @@ describe("truncateConversationIfNeeded", () => {
|
|
|
|
|
|
// Set base tokens so we're just below threshold without content, but over with content
|
|
|
const baseTokensForLarge = availableTokens - Math.floor(largeContentTokens / 2)
|
|
|
- const resultWithLarge = truncateConversationIfNeeded({
|
|
|
+ const resultWithLarge = await truncateConversationIfNeeded({
|
|
|
messages: messagesWithLargeContent,
|
|
|
totalTokens: baseTokensForLarge,
|
|
|
contextWindow: modelInfo.contextWindow,
|
|
|
maxTokens,
|
|
|
+ apiHandler: mockApiHandler,
|
|
|
})
|
|
|
expect(resultWithLarge).not.toEqual(messagesWithLargeContent) // Should truncate
|
|
|
|
|
|
// Test case 3: Very large content that will definitely exceed threshold
|
|
|
const veryLargeContent = [{ type: "text" as const, text: "X".repeat(1000) }]
|
|
|
- const veryLargeContentTokens = estimateTokenCount(veryLargeContent)
|
|
|
+ const veryLargeContentTokens = await estimateTokenCount(veryLargeContent, mockApiHandler)
|
|
|
const messagesWithVeryLargeContent: Anthropic.Messages.MessageParam[] = [
|
|
|
...messages.slice(0, -1),
|
|
|
{ role: messages[messages.length - 1].role, content: veryLargeContent },
|
|
|
@@ -390,17 +378,18 @@ describe("truncateConversationIfNeeded", () => {
|
|
|
|
|
|
// Set base tokens so we're just below threshold without content
|
|
|
const baseTokensForVeryLarge = availableTokens - Math.floor(veryLargeContentTokens / 2)
|
|
|
- const resultWithVeryLarge = truncateConversationIfNeeded({
|
|
|
+ const resultWithVeryLarge = await truncateConversationIfNeeded({
|
|
|
messages: messagesWithVeryLargeContent,
|
|
|
totalTokens: baseTokensForVeryLarge,
|
|
|
contextWindow: modelInfo.contextWindow,
|
|
|
maxTokens,
|
|
|
+ apiHandler: mockApiHandler,
|
|
|
})
|
|
|
expect(resultWithVeryLarge).not.toEqual(messagesWithVeryLargeContent) // Should truncate
|
|
|
})
|
|
|
|
|
|
- it("should truncate if tokens are within TOKEN_BUFFER_PERCENTAGE of the threshold", () => {
|
|
|
- const modelInfo = createModelInfo(100000, true, 30000)
|
|
|
+ it("should truncate if tokens are within TOKEN_BUFFER_PERCENTAGE of the threshold", async () => {
|
|
|
+ const modelInfo = createModelInfo(100000, 30000)
|
|
|
const maxTokens = 100000 - 30000 // 70000
|
|
|
const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE // 10% of 100000 = 10000
|
|
|
const totalTokens = 70000 - dynamicBuffer + 1 // Just within the dynamic buffer of threshold (70000)
|
|
|
@@ -412,101 +401,153 @@ describe("truncateConversationIfNeeded", () => {
|
|
|
// With 4 messages after the first, 0.5 fraction means remove 2 messages
|
|
|
const expectedResult = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]]
|
|
|
|
|
|
- const result = truncateConversationIfNeeded({
|
|
|
+ const result = await truncateConversationIfNeeded({
|
|
|
messages: messagesWithSmallContent,
|
|
|
totalTokens,
|
|
|
contextWindow: modelInfo.contextWindow,
|
|
|
maxTokens: modelInfo.maxTokens,
|
|
|
+ apiHandler: mockApiHandler,
|
|
|
})
|
|
|
expect(result).toEqual(expectedResult)
|
|
|
})
|
|
|
})
|
|
|
|
|
|
/**
|
|
|
- * Tests for the estimateTokenCount function
|
|
|
+ * Tests for the getMaxTokens function (private but tested through truncateConversationIfNeeded)
|
|
|
*/
|
|
|
-describe("estimateTokenCount", () => {
|
|
|
- it("should return 0 for empty or undefined content", () => {
|
|
|
- expect(estimateTokenCount([])).toBe(0)
|
|
|
- // @ts-ignore - Testing with undefined
|
|
|
- expect(estimateTokenCount(undefined)).toBe(0)
|
|
|
+describe("getMaxTokens", () => {
|
|
|
+ // We'll test this indirectly through truncateConversationIfNeeded
|
|
|
+ const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({
|
|
|
+ contextWindow,
|
|
|
+ supportsPromptCache: true, // Not relevant for getMaxTokens
|
|
|
+ maxTokens,
|
|
|
})
|
|
|
|
|
|
- it("should estimate tokens for text blocks", () => {
|
|
|
- const content: Array<Anthropic.Messages.ContentBlockParam> = [
|
|
|
- { type: "text", text: "This is a text block with 36 characters" },
|
|
|
- ]
|
|
|
+ // Reuse across tests for consistency
|
|
|
+ const messages: Anthropic.Messages.MessageParam[] = [
|
|
|
+ { role: "user", content: "First message" },
|
|
|
+ { role: "assistant", content: "Second message" },
|
|
|
+ { role: "user", content: "Third message" },
|
|
|
+ { role: "assistant", content: "Fourth message" },
|
|
|
+ { role: "user", content: "Fifth message" },
|
|
|
+ ]
|
|
|
|
|
|
- // With tiktoken, the exact token count may differ from character-based estimation
|
|
|
- // Instead of expecting an exact number, we verify it's a reasonable positive number
|
|
|
- const result = estimateTokenCount(content)
|
|
|
- expect(result).toBeGreaterThan(0)
|
|
|
+ it("should use maxTokens as buffer when specified", async () => {
|
|
|
+ const modelInfo = createModelInfo(100000, 50000)
|
|
|
+ // Max tokens = 100000 - 50000 = 50000
|
|
|
|
|
|
- // We can also verify that longer text results in more tokens
|
|
|
- const longerContent: Array<Anthropic.Messages.ContentBlockParam> = [
|
|
|
- {
|
|
|
- type: "text",
|
|
|
- text: "This is a longer text block with significantly more characters to encode into tokens",
|
|
|
- },
|
|
|
- ]
|
|
|
- const longerResult = estimateTokenCount(longerContent)
|
|
|
- expect(longerResult).toBeGreaterThan(result)
|
|
|
- })
|
|
|
+ // Create messages with very small content in the last one to avoid token overflow
|
|
|
+ const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
|
|
|
|
|
|
- it("should estimate tokens for image blocks based on data size", () => {
|
|
|
- // Small image
|
|
|
- const smallImage: Array<Anthropic.Messages.ContentBlockParam> = [
|
|
|
- { type: "image", source: { type: "base64", media_type: "image/jpeg", data: "small_dummy_data" } },
|
|
|
- ]
|
|
|
- // Larger image with more data
|
|
|
- const largerImage: Array<Anthropic.Messages.ContentBlockParam> = [
|
|
|
- { type: "image", source: { type: "base64", media_type: "image/png", data: "X".repeat(1000) } },
|
|
|
- ]
|
|
|
+ // Account for the dynamic buffer which is 10% of context window (10,000 tokens)
|
|
|
+ // Below max tokens and buffer - no truncation
|
|
|
+ const result1 = await truncateConversationIfNeeded({
|
|
|
+ messages: messagesWithSmallContent,
|
|
|
+ totalTokens: 39999, // Well below threshold + dynamic buffer
|
|
|
+ contextWindow: modelInfo.contextWindow,
|
|
|
+ maxTokens: modelInfo.maxTokens,
|
|
|
+ apiHandler: mockApiHandler,
|
|
|
+ })
|
|
|
+ expect(result1).toEqual(messagesWithSmallContent)
|
|
|
|
|
|
- // Verify the token count scales with the size of the image data
|
|
|
- const smallImageTokens = estimateTokenCount(smallImage)
|
|
|
- const largerImageTokens = estimateTokenCount(largerImage)
|
|
|
+ // Above max tokens - truncate
|
|
|
+ const result2 = await truncateConversationIfNeeded({
|
|
|
+ messages: messagesWithSmallContent,
|
|
|
+ totalTokens: 50001, // Above threshold
|
|
|
+ contextWindow: modelInfo.contextWindow,
|
|
|
+ maxTokens: modelInfo.maxTokens,
|
|
|
+ apiHandler: mockApiHandler,
|
|
|
+ })
|
|
|
+ expect(result2).not.toEqual(messagesWithSmallContent)
|
|
|
+ expect(result2.length).toBe(3) // Truncated with 0.5 fraction
|
|
|
+ })
|
|
|
|
|
|
- // Small image should have some tokens
|
|
|
- expect(smallImageTokens).toBeGreaterThan(0)
|
|
|
+ it("should use 20% of context window as buffer when maxTokens is undefined", async () => {
|
|
|
+ const modelInfo = createModelInfo(100000, undefined)
|
|
|
+ // Max tokens = 100000 - (100000 * 0.2) = 80000
|
|
|
|
|
|
- // Larger image should have proportionally more tokens
|
|
|
- expect(largerImageTokens).toBeGreaterThan(smallImageTokens)
|
|
|
+ // Create messages with very small content in the last one to avoid token overflow
|
|
|
+ const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
|
|
|
|
|
|
- // Verify the larger image calculation matches our formula including the 50% fudge factor
|
|
|
- expect(largerImageTokens).toBe(48)
|
|
|
+ // Account for the dynamic buffer which is 10% of context window (10,000 tokens)
|
|
|
+ // Below max tokens and buffer - no truncation
|
|
|
+ const result1 = await truncateConversationIfNeeded({
|
|
|
+ messages: messagesWithSmallContent,
|
|
|
+ totalTokens: 69999, // Well below threshold + dynamic buffer
|
|
|
+ contextWindow: modelInfo.contextWindow,
|
|
|
+ maxTokens: modelInfo.maxTokens,
|
|
|
+ apiHandler: mockApiHandler,
|
|
|
+ })
|
|
|
+ expect(result1).toEqual(messagesWithSmallContent)
|
|
|
+
|
|
|
+ // Above max tokens - truncate
|
|
|
+ const result2 = await truncateConversationIfNeeded({
|
|
|
+ messages: messagesWithSmallContent,
|
|
|
+ totalTokens: 80001, // Above threshold
|
|
|
+ contextWindow: modelInfo.contextWindow,
|
|
|
+ maxTokens: modelInfo.maxTokens,
|
|
|
+ apiHandler: mockApiHandler,
|
|
|
+ })
|
|
|
+ expect(result2).not.toEqual(messagesWithSmallContent)
|
|
|
+ expect(result2.length).toBe(3) // Truncated with 0.5 fraction
|
|
|
})
|
|
|
|
|
|
- it("should estimate tokens for mixed content blocks", () => {
|
|
|
- const content: Array<Anthropic.Messages.ContentBlockParam> = [
|
|
|
- { type: "text", text: "A text block with 30 characters" },
|
|
|
- { type: "image", source: { type: "base64", media_type: "image/jpeg", data: "dummy_data" } },
|
|
|
- { type: "text", text: "Another text with 24 chars" },
|
|
|
- ]
|
|
|
+ it("should handle small context windows appropriately", async () => {
|
|
|
+ const modelInfo = createModelInfo(50000, 10000)
|
|
|
+ // Max tokens = 50000 - 10000 = 40000
|
|
|
|
|
|
- // We know image tokens calculation should be consistent
|
|
|
- const imageTokens = Math.ceil(Math.sqrt("dummy_data".length)) * 1.5
|
|
|
+ // Create messages with very small content in the last one to avoid token overflow
|
|
|
+ const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
|
|
|
|
|
|
- // With tiktoken, we can't predict exact text token counts,
|
|
|
- // but we can verify the total is greater than just the image tokens
|
|
|
- const result = estimateTokenCount(content)
|
|
|
- expect(result).toBeGreaterThan(imageTokens)
|
|
|
+ // Below max tokens and buffer - no truncation
|
|
|
+ const result1 = await truncateConversationIfNeeded({
|
|
|
+ messages: messagesWithSmallContent,
|
|
|
+ totalTokens: 34999, // Well below threshold + buffer
|
|
|
+ contextWindow: modelInfo.contextWindow,
|
|
|
+ maxTokens: modelInfo.maxTokens,
|
|
|
+ apiHandler: mockApiHandler,
|
|
|
+ })
|
|
|
+ expect(result1).toEqual(messagesWithSmallContent)
|
|
|
|
|
|
- // Also test against a version with only the image to verify text adds tokens
|
|
|
- const imageOnlyContent: Array<Anthropic.Messages.ContentBlockParam> = [
|
|
|
- { type: "image", source: { type: "base64", media_type: "image/jpeg", data: "dummy_data" } },
|
|
|
- ]
|
|
|
- const imageOnlyResult = estimateTokenCount(imageOnlyContent)
|
|
|
- expect(result).toBeGreaterThan(imageOnlyResult)
|
|
|
+ // Above max tokens - truncate
|
|
|
+ const result2 = await truncateConversationIfNeeded({
|
|
|
+ messages: messagesWithSmallContent,
|
|
|
+ totalTokens: 40001, // Above threshold
|
|
|
+ contextWindow: modelInfo.contextWindow,
|
|
|
+ maxTokens: modelInfo.maxTokens,
|
|
|
+ apiHandler: mockApiHandler,
|
|
|
+ })
|
|
|
+ expect(result2).not.toEqual(messagesWithSmallContent)
|
|
|
+ expect(result2.length).toBe(3) // Truncated with 0.5 fraction
|
|
|
})
|
|
|
|
|
|
- it("should handle empty text blocks", () => {
|
|
|
- const content: Array<Anthropic.Messages.ContentBlockParam> = [{ type: "text", text: "" }]
|
|
|
- expect(estimateTokenCount(content)).toBe(0)
|
|
|
- })
|
|
|
+ it("should handle large context windows appropriately", async () => {
|
|
|
+ const modelInfo = createModelInfo(200000, 30000)
|
|
|
+ // Max tokens = 200000 - 30000 = 170000
|
|
|
|
|
|
- it("should handle plain string messages", () => {
|
|
|
- const content = "This is a plain text message"
|
|
|
- expect(estimateTokenCount([{ type: "text", text: content }])).toBeGreaterThan(0)
|
|
|
+ // Create messages with very small content in the last one to avoid token overflow
|
|
|
+ const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
|
|
|
+
|
|
|
+ // Account for the dynamic buffer which is 10% of context window (20,000 tokens for this test)
|
|
|
+ // Below max tokens and buffer - no truncation
|
|
|
+ const result1 = await truncateConversationIfNeeded({
|
|
|
+ messages: messagesWithSmallContent,
|
|
|
+ totalTokens: 149999, // Well below threshold + dynamic buffer
|
|
|
+ contextWindow: modelInfo.contextWindow,
|
|
|
+ maxTokens: modelInfo.maxTokens,
|
|
|
+ apiHandler: mockApiHandler,
|
|
|
+ })
|
|
|
+ expect(result1).toEqual(messagesWithSmallContent)
|
|
|
+
|
|
|
+ // Above max tokens - truncate
|
|
|
+ const result2 = await truncateConversationIfNeeded({
|
|
|
+ messages: messagesWithSmallContent,
|
|
|
+ totalTokens: 170001, // Above threshold
|
|
|
+ contextWindow: modelInfo.contextWindow,
|
|
|
+ maxTokens: modelInfo.maxTokens,
|
|
|
+ apiHandler: mockApiHandler,
|
|
|
+ })
|
|
|
+ expect(result2).not.toEqual(messagesWithSmallContent)
|
|
|
+ expect(result2.length).toBe(3) // Truncated with 0.5 fraction
|
|
|
})
|
|
|
})
|