Ver código fonte

fix: transform tool blocks to text before condensing (EXT-624) (#10975)

Daniel 1 semana atrás
pai
commit
b4b8cef859
2 arquivos alterados com 409 adições e 2 exclusões
  1. 307 0
      src/core/condense/__tests__/index.spec.ts
  2. 102 2
      src/core/condense/index.ts

+ 307 - 0
src/core/condense/__tests__/index.spec.ts

@@ -15,6 +15,10 @@ import {
 	cleanupAfterTruncation,
 	extractCommandBlocks,
 	injectSyntheticToolResults,
+	toolUseToText,
+	toolResultToText,
+	convertToolBlocksToText,
+	transformMessagesForCondensing,
 } from "../index"
 
 vi.mock("../../../api/transform/image-cleaning", () => ({
@@ -1282,3 +1286,306 @@ describe("summarizeConversation with custom settings", () => {
 		)
 	})
 })
+
+describe("toolUseToText", () => {
+	it("should convert tool_use block with object input to text", () => {
+		const block: Anthropic.Messages.ToolUseBlockParam = {
+			type: "tool_use",
+			id: "tool-123",
+			name: "read_file",
+			input: { path: "test.ts", encoding: "utf-8" },
+		}
+
+		const result = toolUseToText(block)
+
+		expect(result).toBe("[Tool Use: read_file]\npath: test.ts\nencoding: utf-8")
+	})
+
+	it("should convert tool_use block with nested object input to text", () => {
+		const block: Anthropic.Messages.ToolUseBlockParam = {
+			type: "tool_use",
+			id: "tool-456",
+			name: "write_file",
+			input: {
+				path: "output.json",
+				content: { key: "value", nested: { a: 1 } },
+			},
+		}
+
+		const result = toolUseToText(block)
+
+		expect(result).toContain("[Tool Use: write_file]")
+		expect(result).toContain("path: output.json")
+		expect(result).toContain("content:")
+		expect(result).toContain('"key"')
+		expect(result).toContain('"value"')
+	})
+
+	it("should convert tool_use block with string input to text", () => {
+		const block: Anthropic.Messages.ToolUseBlockParam = {
+			type: "tool_use",
+			id: "tool-789",
+			name: "execute_command",
+			input: "ls -la" as unknown as Record<string, unknown>,
+		}
+
+		const result = toolUseToText(block)
+
+		expect(result).toBe("[Tool Use: execute_command]\nls -la")
+	})
+
+	it("should handle empty object input", () => {
+		const block: Anthropic.Messages.ToolUseBlockParam = {
+			type: "tool_use",
+			id: "tool-empty",
+			name: "some_tool",
+			input: {},
+		}
+
+		const result = toolUseToText(block)
+
+		expect(result).toBe("[Tool Use: some_tool]\n")
+	})
+})
+
+describe("toolResultToText", () => {
+	it("should convert tool_result with string content to text", () => {
+		const block: Anthropic.Messages.ToolResultBlockParam = {
+			type: "tool_result",
+			tool_use_id: "tool-123",
+			content: "File contents here",
+		}
+
+		const result = toolResultToText(block)
+
+		expect(result).toBe("[Tool Result]\nFile contents here")
+	})
+
+	it("should convert tool_result with error flag to text", () => {
+		const block: Anthropic.Messages.ToolResultBlockParam = {
+			type: "tool_result",
+			tool_use_id: "tool-456",
+			content: "File not found",
+			is_error: true,
+		}
+
+		const result = toolResultToText(block)
+
+		expect(result).toBe("[Tool Result (Error)]\nFile not found")
+	})
+
+	it("should convert tool_result with array content to text", () => {
+		const block: Anthropic.Messages.ToolResultBlockParam = {
+			type: "tool_result",
+			tool_use_id: "tool-789",
+			content: [
+				{ type: "text", text: "First line" },
+				{ type: "text", text: "Second line" },
+			],
+		}
+
+		const result = toolResultToText(block)
+
+		expect(result).toBe("[Tool Result]\nFirst line\nSecond line")
+	})
+
+	it("should handle tool_result with image in array content", () => {
+		const block: Anthropic.Messages.ToolResultBlockParam = {
+			type: "tool_result",
+			tool_use_id: "tool-img",
+			content: [
+				{ type: "text", text: "Screenshot:" },
+				{ type: "image", source: { type: "base64", media_type: "image/png", data: "abc123" } },
+			],
+		}
+
+		const result = toolResultToText(block)
+
+		expect(result).toBe("[Tool Result]\nScreenshot:\n[Image]")
+	})
+
+	it("should handle tool_result with no content", () => {
+		const block: Anthropic.Messages.ToolResultBlockParam = {
+			type: "tool_result",
+			tool_use_id: "tool-empty",
+		}
+
+		const result = toolResultToText(block)
+
+		expect(result).toBe("[Tool Result]")
+	})
+})
+
+describe("convertToolBlocksToText", () => {
+	it("should return string content unchanged", () => {
+		const content = "Simple text content"
+
+		const result = convertToolBlocksToText(content)
+
+		expect(result).toBe("Simple text content")
+	})
+
+	it("should convert tool_use blocks to text blocks", () => {
+		const content: Anthropic.Messages.ContentBlockParam[] = [
+			{
+				type: "tool_use",
+				id: "tool-123",
+				name: "read_file",
+				input: { path: "test.ts" },
+			},
+		]
+
+		const result = convertToolBlocksToText(content)
+
+		expect(Array.isArray(result)).toBe(true)
+		expect((result as Anthropic.Messages.ContentBlockParam[])[0].type).toBe("text")
+		expect((result as Anthropic.Messages.TextBlockParam[])[0].text).toContain("[Tool Use: read_file]")
+	})
+
+	it("should convert tool_result blocks to text blocks", () => {
+		const content: Anthropic.Messages.ContentBlockParam[] = [
+			{
+				type: "tool_result",
+				tool_use_id: "tool-123",
+				content: "File contents",
+			},
+		]
+
+		const result = convertToolBlocksToText(content)
+
+		expect(Array.isArray(result)).toBe(true)
+		expect((result as Anthropic.Messages.ContentBlockParam[])[0].type).toBe("text")
+		expect((result as Anthropic.Messages.TextBlockParam[])[0].text).toContain("[Tool Result]")
+	})
+
+	it("should preserve non-tool blocks unchanged", () => {
+		const content: Anthropic.Messages.ContentBlockParam[] = [
+			{ type: "text", text: "Hello" },
+			{
+				type: "tool_use",
+				id: "tool-123",
+				name: "read_file",
+				input: { path: "test.ts" },
+			},
+			{ type: "text", text: "World" },
+		]
+
+		const result = convertToolBlocksToText(content)
+
+		expect(Array.isArray(result)).toBe(true)
+		const resultArray = result as Anthropic.Messages.ContentBlockParam[]
+		expect(resultArray).toHaveLength(3)
+		expect(resultArray[0]).toEqual({ type: "text", text: "Hello" })
+		expect(resultArray[1].type).toBe("text")
+		expect((resultArray[1] as Anthropic.Messages.TextBlockParam).text).toContain("[Tool Use: read_file]")
+		expect(resultArray[2]).toEqual({ type: "text", text: "World" })
+	})
+
+	it("should handle mixed content with multiple tool blocks", () => {
+		const content: Anthropic.Messages.ContentBlockParam[] = [
+			{
+				type: "tool_use",
+				id: "tool-1",
+				name: "read_file",
+				input: { path: "a.ts" },
+			},
+			{
+				type: "tool_result",
+				tool_use_id: "tool-1",
+				content: "contents of a.ts",
+			},
+		]
+
+		const result = convertToolBlocksToText(content)
+
+		expect(Array.isArray(result)).toBe(true)
+		const resultArray = result as Anthropic.Messages.ContentBlockParam[]
+		expect(resultArray).toHaveLength(2)
+		expect((resultArray[0] as Anthropic.Messages.TextBlockParam).text).toContain("[Tool Use: read_file]")
+		expect((resultArray[1] as Anthropic.Messages.TextBlockParam).text).toContain("[Tool Result]")
+		expect((resultArray[1] as Anthropic.Messages.TextBlockParam).text).toContain("contents of a.ts")
+	})
+})
+
+describe("transformMessagesForCondensing", () => {
+	it("should transform all messages with tool blocks to text", () => {
+		const messages = [
+			{ role: "user" as const, content: "Hello" },
+			{
+				role: "assistant" as const,
+				content: [
+					{
+						type: "tool_use" as const,
+						id: "tool-1",
+						name: "read_file",
+						input: { path: "test.ts" },
+					},
+				],
+			},
+			{
+				role: "user" as const,
+				content: [
+					{
+						type: "tool_result" as const,
+						tool_use_id: "tool-1",
+						content: "file contents",
+					},
+				],
+			},
+		]
+
+		const result = transformMessagesForCondensing(messages)
+
+		expect(result).toHaveLength(3)
+		expect(result[0].content).toBe("Hello")
+		expect(Array.isArray(result[1].content)).toBe(true)
+		expect((result[1].content as any[])[0].type).toBe("text")
+		expect((result[1].content as any[])[0].text).toContain("[Tool Use: read_file]")
+		expect(Array.isArray(result[2].content)).toBe(true)
+		expect((result[2].content as any[])[0].type).toBe("text")
+		expect((result[2].content as any[])[0].text).toContain("[Tool Result]")
+	})
+
+	it("should preserve message role and other properties", () => {
+		const messages = [
+			{
+				role: "assistant" as const,
+				content: [
+					{
+						type: "tool_use" as const,
+						id: "tool-1",
+						name: "execute",
+						input: { cmd: "ls" },
+					},
+				],
+			},
+		]
+
+		const result = transformMessagesForCondensing(messages)
+
+		expect(result[0].role).toBe("assistant")
+	})
+
+	it("should handle empty messages array", () => {
+		const result = transformMessagesForCondensing([])
+
+		expect(result).toEqual([])
+	})
+
+	it("should not mutate original messages", () => {
+		const originalContent = [
+			{
+				type: "tool_use" as const,
+				id: "tool-1",
+				name: "read_file",
+				input: { path: "test.ts" },
+			},
+		]
+		const messages = [{ role: "assistant" as const, content: originalContent }]
+
+		transformMessagesForCondensing(messages)
+
+		// Original should still have tool_use type
+		expect(messages[0].content[0].type).toBe("tool_use")
+	})
+})

+ 102 - 2
src/core/condense/index.ts

@@ -14,6 +14,100 @@ import { generateFoldedFileContext } from "./foldedFileContext"
 
 export type { FoldedFileContextResult, FoldedFileContextOptions } from "./foldedFileContext"
 
+/**
+ * Converts a tool_use block to a text representation.
+ * This allows the conversation to be summarized without requiring the tools parameter.
+ */
+export function toolUseToText(block: Anthropic.Messages.ToolUseBlockParam): string {
+	let input: string
+	if (typeof block.input === "object" && block.input !== null) {
+		input = Object.entries(block.input)
+			.map(([key, value]) => {
+				const formattedValue =
+					typeof value === "object" && value !== null ? JSON.stringify(value, null, 2) : String(value)
+				return `${key}: ${formattedValue}`
+			})
+			.join("\n")
+	} else {
+		input = String(block.input)
+	}
+	return `[Tool Use: ${block.name}]\n${input}`
+}
+
+/**
+ * Converts a tool_result block to a text representation.
+ * This allows the conversation to be summarized without requiring the tools parameter.
+ */
+export function toolResultToText(block: Anthropic.Messages.ToolResultBlockParam): string {
+	const errorSuffix = block.is_error ? " (Error)" : ""
+	if (typeof block.content === "string") {
+		return `[Tool Result${errorSuffix}]\n${block.content}`
+	} else if (Array.isArray(block.content)) {
+		const contentText = block.content
+			.map((contentBlock) => {
+				if (contentBlock.type === "text") {
+					return contentBlock.text
+				}
+				if (contentBlock.type === "image") {
+					return "[Image]"
+				}
+				// Handle any other content block types
+				return `[${(contentBlock as { type: string }).type}]`
+			})
+			.join("\n")
+		return `[Tool Result${errorSuffix}]\n${contentText}`
+	}
+	return `[Tool Result${errorSuffix}]`
+}
+
+/**
+ * Converts all tool_use and tool_result blocks in a message's content to text representations.
+ * This is necessary for providers like Bedrock that require the tools parameter when tool blocks are present.
+ * By converting to text, we can send the conversation for summarization without the tools parameter.
+ *
+ * @param content - The message content (string or array of content blocks)
+ * @returns The transformed content with tool blocks converted to text blocks
+ */
+export function convertToolBlocksToText(
+	content: string | Anthropic.Messages.ContentBlockParam[],
+): string | Anthropic.Messages.ContentBlockParam[] {
+	if (typeof content === "string") {
+		return content
+	}
+
+	return content.map((block) => {
+		if (block.type === "tool_use") {
+			return {
+				type: "text" as const,
+				text: toolUseToText(block),
+			}
+		}
+		if (block.type === "tool_result") {
+			return {
+				type: "text" as const,
+				text: toolResultToText(block),
+			}
+		}
+		return block
+	})
+}
+
+/**
+ * Transforms all messages by converting tool_use and tool_result blocks to text representations.
+ * This ensures the conversation can be sent for summarization without requiring the tools parameter.
+ *
+ * @param messages - The messages to transform
+ * @returns The transformed messages with tool blocks converted to text
+ */
+export function transformMessagesForCondensing<
+	T extends { role: string; content: string | Anthropic.Messages.ContentBlockParam[] },
+>(messages: T[]): T[] {
+	return messages.map((msg) => ({
+		...msg,
+		content: convertToolBlocksToText(msg.content),
+	}))
+}
+
 export const MIN_CONDENSE_THRESHOLD = 5 // Minimum percentage of context window to trigger condensing
 export const MAX_CONDENSE_THRESHOLD = 100 // Maximum percentage of context window to trigger condensing
 
@@ -213,10 +307,16 @@ export async function summarizeConversation(options: SummarizeConversationOption
 	// (e.g., when user triggers condense after receiving attempt_completion but before responding)
 	const messagesWithToolResults = injectSyntheticToolResults(messagesToSummarize)
 
-	const requestMessages = maybeRemoveImageBlocks([...messagesWithToolResults, finalRequestMessage], apiHandler).map(
-		({ role, content }) => ({ role, content }),
+	// Transform tool_use and tool_result blocks to text representations.
+	// This is necessary because some providers (like Bedrock via LiteLLM) require the `tools` parameter
+	// when tool blocks are present. By converting them to text, we can send the conversation for
+	// summarization without needing to pass the tools parameter.
+	const messagesWithTextToolBlocks = transformMessagesForCondensing(
+		maybeRemoveImageBlocks([...messagesWithToolResults, finalRequestMessage], apiHandler),
 	)
 
+	const requestMessages = messagesWithTextToolBlocks.map(({ role, content }) => ({ role, content }))
+
 	// Note: this doesn't need to be a stream, consider using something like apiHandler.completePrompt
 	const promptToUse = SUMMARY_PROMPT