Răsfoiți Sursa

Add tests + benchmark for parseAssistantMessage V1 + 2 (#3538)

Chris Estreich 7 luni în urmă
părinte
comite
72358f2742

+ 340 - 0
src/core/assistant-message/__tests__/parseAssistantMessage.test.ts

@@ -0,0 +1,340 @@
+// npx jest src/core/assistant-message/__tests__/parseAssistantMessage.test.ts
+
+import { TextContent, ToolUse } from "../../../shared/tools"
+
+import { AssistantMessageContent, parseAssistantMessage as parseAssistantMessageV1 } from "../parseAssistantMessage"
+import { parseAssistantMessageV2 } from "../parseAssistantMessageV2"
+
+const isEmptyTextContent = (block: AssistantMessageContent) =>
+	block.type === "text" && (block as TextContent).content === ""
+
+;[parseAssistantMessageV1, parseAssistantMessageV2].forEach((parser, index) => {
+	describe(`parseAssistantMessageV${index + 1}`, () => {
+		describe("text content parsing", () => {
+			it("should parse a simple text message", () => {
+				const message = "This is a simple text message"
+				const result = parser(message)
+
+				expect(result).toHaveLength(1)
+				expect(result[0]).toEqual({
+					type: "text",
+					content: message,
+					partial: true, // Text is always partial when it's the last content
+				})
+			})
+
+			it("should parse a multi-line text message", () => {
+				const message = "This is a multi-line\ntext message\nwith several lines"
+				const result = parser(message)
+
+				expect(result).toHaveLength(1)
+				expect(result[0]).toEqual({
+					type: "text",
+					content: message,
+					partial: true, // Text is always partial when it's the last content
+				})
+			})
+
+			it("should mark text as partial when it's the last content in the message", () => {
+				const message = "This is a partial text"
+				const result = parser(message)
+
+				expect(result).toHaveLength(1)
+				expect(result[0]).toEqual({
+					type: "text",
+					content: message,
+					partial: true,
+				})
+			})
+		})
+
+		describe("tool use parsing", () => {
+			it("should parse a simple tool use", () => {
+				const message = "<read_file><path>src/file.ts</path></read_file>"
+				const result = parser(message).filter((block) => !isEmptyTextContent(block))
+
+				expect(result).toHaveLength(1)
+				const toolUse = result[0] as ToolUse
+				expect(toolUse.type).toBe("tool_use")
+				expect(toolUse.name).toBe("read_file")
+				expect(toolUse.params.path).toBe("src/file.ts")
+				expect(toolUse.partial).toBe(false)
+			})
+
+			it("should parse a tool use with multiple parameters", () => {
+				const message =
+					"<read_file><path>src/file.ts</path><start_line>10</start_line><end_line>20</end_line></read_file>"
+				const result = parser(message).filter((block) => !isEmptyTextContent(block))
+
+				expect(result).toHaveLength(1)
+				const toolUse = result[0] as ToolUse
+				expect(toolUse.type).toBe("tool_use")
+				expect(toolUse.name).toBe("read_file")
+				expect(toolUse.params.path).toBe("src/file.ts")
+				expect(toolUse.params.start_line).toBe("10")
+				expect(toolUse.params.end_line).toBe("20")
+				expect(toolUse.partial).toBe(false)
+			})
+
+			it("should mark tool use as partial when it's not closed", () => {
+				const message = "<read_file><path>src/file.ts</path>"
+				const result = parser(message).filter((block) => !isEmptyTextContent(block))
+
+				expect(result).toHaveLength(1)
+				const toolUse = result[0] as ToolUse
+				expect(toolUse.type).toBe("tool_use")
+				expect(toolUse.name).toBe("read_file")
+				expect(toolUse.params.path).toBe("src/file.ts")
+				expect(toolUse.partial).toBe(true)
+			})
+
+			it("should handle a partial parameter in a tool use", () => {
+				const message = "<read_file><path>src/file.ts"
+				const result = parser(message).filter((block) => !isEmptyTextContent(block))
+
+				expect(result).toHaveLength(1)
+				const toolUse = result[0] as ToolUse
+				expect(toolUse.type).toBe("tool_use")
+				expect(toolUse.name).toBe("read_file")
+				expect(toolUse.params.path).toBe("src/file.ts")
+				expect(toolUse.partial).toBe(true)
+			})
+		})
+
+		describe("mixed content parsing", () => {
+			it("should parse text followed by a tool use", () => {
+				const message = "Here's the file content: <read_file><path>src/file.ts</path></read_file>"
+				const result = parser(message)
+
+				expect(result).toHaveLength(2)
+
+				const textContent = result[0] as TextContent
+				expect(textContent.type).toBe("text")
+				expect(textContent.content).toBe("Here's the file content:")
+				expect(textContent.partial).toBe(false)
+
+				const toolUse = result[1] as ToolUse
+				expect(toolUse.type).toBe("tool_use")
+				expect(toolUse.name).toBe("read_file")
+				expect(toolUse.params.path).toBe("src/file.ts")
+				expect(toolUse.partial).toBe(false)
+			})
+
+			it("should parse a tool use followed by text", () => {
+				const message = "<read_file><path>src/file.ts</path></read_file>Here's what I found in the file."
+				const result = parser(message).filter((block) => !isEmptyTextContent(block))
+
+				expect(result).toHaveLength(2)
+
+				const toolUse = result[0] as ToolUse
+				expect(toolUse.type).toBe("tool_use")
+				expect(toolUse.name).toBe("read_file")
+				expect(toolUse.params.path).toBe("src/file.ts")
+				expect(toolUse.partial).toBe(false)
+
+				const textContent = result[1] as TextContent
+				expect(textContent.type).toBe("text")
+				expect(textContent.content).toBe("Here's what I found in the file.")
+				expect(textContent.partial).toBe(true)
+			})
+
+			it("should parse multiple tool uses separated by text", () => {
+				const message =
+					"First file: <read_file><path>src/file1.ts</path></read_file>Second file: <read_file><path>src/file2.ts</path></read_file>"
+				const result = parser(message)
+
+				expect(result).toHaveLength(4)
+
+				expect(result[0].type).toBe("text")
+				expect((result[0] as TextContent).content).toBe("First file:")
+
+				expect(result[1].type).toBe("tool_use")
+				expect((result[1] as ToolUse).name).toBe("read_file")
+				expect((result[1] as ToolUse).params.path).toBe("src/file1.ts")
+
+				expect(result[2].type).toBe("text")
+				expect((result[2] as TextContent).content).toBe("Second file:")
+
+				expect(result[3].type).toBe("tool_use")
+				expect((result[3] as ToolUse).name).toBe("read_file")
+				expect((result[3] as ToolUse).params.path).toBe("src/file2.ts")
+			})
+		})
+
+		describe("special cases", () => {
+			it("should handle the write_to_file tool with content that contains closing tags", () => {
+				const message = `<write_to_file><path>src/file.ts</path><content>
+	function example() {
+	// This has XML-like content: </content>
+	return true;
+	}
+	</content><line_count>5</line_count></write_to_file>`
+
+				const result = parser(message).filter((block) => !isEmptyTextContent(block))
+
+				expect(result).toHaveLength(1)
+				const toolUse = result[0] as ToolUse
+				expect(toolUse.type).toBe("tool_use")
+				expect(toolUse.name).toBe("write_to_file")
+				expect(toolUse.params.path).toBe("src/file.ts")
+				expect(toolUse.params.line_count).toBe("5")
+				expect(toolUse.params.content).toContain("function example()")
+				expect(toolUse.params.content).toContain("// This has XML-like content: </content>")
+				expect(toolUse.params.content).toContain("return true;")
+				expect(toolUse.partial).toBe(false)
+			})
+
+			it("should handle empty messages", () => {
+				const message = ""
+				const result = parser(message)
+
+				expect(result).toHaveLength(0)
+			})
+
+			it("should handle malformed tool use tags", () => {
+				const message = "This has a <not_a_tool>malformed tag</not_a_tool>"
+				const result = parser(message)
+
+				expect(result).toHaveLength(1)
+				expect(result[0].type).toBe("text")
+				expect((result[0] as TextContent).content).toBe(message)
+			})
+
+			it("should handle tool use with no parameters", () => {
+				const message = "<browser_action></browser_action>"
+				const result = parser(message).filter((block) => !isEmptyTextContent(block))
+
+				expect(result).toHaveLength(1)
+				const toolUse = result[0] as ToolUse
+				expect(toolUse.type).toBe("tool_use")
+				expect(toolUse.name).toBe("browser_action")
+				expect(Object.keys(toolUse.params).length).toBe(0)
+				expect(toolUse.partial).toBe(false)
+			})
+
+			it("should handle nested tool tags that aren't actually nested", () => {
+				const message =
+					"<execute_command><command>echo '<read_file><path>test.txt</path></read_file>'</command></execute_command>"
+
+				const result = parser(message).filter((block) => !isEmptyTextContent(block))
+
+				expect(result).toHaveLength(1)
+				const toolUse = result[0] as ToolUse
+				expect(toolUse.type).toBe("tool_use")
+				expect(toolUse.name).toBe("execute_command")
+				expect(toolUse.params.command).toBe("echo '<read_file><path>test.txt</path></read_file>'")
+				expect(toolUse.partial).toBe(false)
+			})
+
+			it("should handle a tool use with a parameter containing XML-like content", () => {
+				const message = "<search_files><regex><div>.*</div></regex><path>src</path></search_files>"
+				const result = parser(message).filter((block) => !isEmptyTextContent(block))
+
+				expect(result).toHaveLength(1)
+				const toolUse = result[0] as ToolUse
+				expect(toolUse.type).toBe("tool_use")
+				expect(toolUse.name).toBe("search_files")
+				expect(toolUse.params.regex).toBe("<div>.*</div>")
+				expect(toolUse.params.path).toBe("src")
+				expect(toolUse.partial).toBe(false)
+			})
+
+			it("should handle consecutive tool uses without text in between", () => {
+				const message =
+					"<read_file><path>file1.ts</path></read_file><read_file><path>file2.ts</path></read_file>"
+				const result = parser(message).filter((block) => !isEmptyTextContent(block))
+
+				expect(result).toHaveLength(2)
+
+				const toolUse1 = result[0] as ToolUse
+				expect(toolUse1.type).toBe("tool_use")
+				expect(toolUse1.name).toBe("read_file")
+				expect(toolUse1.params.path).toBe("file1.ts")
+				expect(toolUse1.partial).toBe(false)
+
+				const toolUse2 = result[1] as ToolUse
+				expect(toolUse2.type).toBe("tool_use")
+				expect(toolUse2.name).toBe("read_file")
+				expect(toolUse2.params.path).toBe("file2.ts")
+				expect(toolUse2.partial).toBe(false)
+			})
+
+			it("should handle whitespace in parameters", () => {
+				const message = "<read_file><path>  src/file.ts  </path></read_file>"
+				const result = parser(message).filter((block) => !isEmptyTextContent(block))
+
+				expect(result).toHaveLength(1)
+				const toolUse = result[0] as ToolUse
+				expect(toolUse.type).toBe("tool_use")
+				expect(toolUse.name).toBe("read_file")
+				expect(toolUse.params.path).toBe("src/file.ts")
+				expect(toolUse.partial).toBe(false)
+			})
+
+			it("should handle multi-line parameters", () => {
+				const message = `<write_to_file><path>file.ts</path><content>
+	line 1
+	line 2
+	line 3
+	</content><line_count>3</line_count></write_to_file>`
+				const result = parser(message).filter((block) => !isEmptyTextContent(block))
+
+				expect(result).toHaveLength(1)
+				const toolUse = result[0] as ToolUse
+				expect(toolUse.type).toBe("tool_use")
+				expect(toolUse.name).toBe("write_to_file")
+				expect(toolUse.params.path).toBe("file.ts")
+				expect(toolUse.params.content).toContain("line 1")
+				expect(toolUse.params.content).toContain("line 2")
+				expect(toolUse.params.content).toContain("line 3")
+				expect(toolUse.params.line_count).toBe("3")
+				expect(toolUse.partial).toBe(false)
+			})
+
+			it("should handle a complex message with multiple content types", () => {
+				const message = `I'll help you with that task.
+
+	<read_file><path>src/index.ts</path></read_file>
+
+	Now let's modify the file:
+
+	<write_to_file><path>src/index.ts</path><content>
+	// Updated content
+	console.log("Hello world");
+	</content><line_count>2</line_count></write_to_file>
+
+	Let's run the code:
+
+	<execute_command><command>node src/index.ts</command></execute_command>`
+
+				const result = parser(message)
+
+				expect(result).toHaveLength(6)
+
+				// First text block
+				expect(result[0].type).toBe("text")
+				expect((result[0] as TextContent).content).toBe("I'll help you with that task.")
+
+				// First tool use (read_file)
+				expect(result[1].type).toBe("tool_use")
+				expect((result[1] as ToolUse).name).toBe("read_file")
+
+				// Second text block
+				expect(result[2].type).toBe("text")
+				expect((result[2] as TextContent).content).toContain("Now let's modify the file:")
+
+				// Second tool use (write_to_file)
+				expect(result[3].type).toBe("tool_use")
+				expect((result[3] as ToolUse).name).toBe("write_to_file")
+
+				// Third text block
+				expect(result[4].type).toBe("text")
+				expect((result[4] as TextContent).content).toContain("Let's run the code:")
+
+				// Third tool use (execute_command)
+				expect(result[5].type).toBe("tool_use")
+				expect((result[5] as ToolUse).name).toBe("execute_command")
+			})
+		})
+	})
+})

+ 109 - 0
src/core/assistant-message/__tests__/parseAssistantMessageBenchmark.ts

@@ -0,0 +1,109 @@
+// node --expose-gc --import tsx src/core/assistant-message/__tests__/parseAssistantMessageBenchmark.ts
+
+import { performance } from "perf_hooks"
+import { parseAssistantMessage as parseAssistantMessageV1 } from "../parseAssistantMessage"
+import { parseAssistantMessageV2 } from "../parseAssistantMessageV2"
+
+const formatNumber = (num: number): string => {
+	return num.toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",")
+}
+
+const measureExecutionTime = (fn: Function, input: string, iterations: number = 1000): number => {
+	for (let i = 0; i < 10; i++) {
+		fn(input)
+	}
+
+	const start = performance.now()
+
+	for (let i = 0; i < iterations; i++) {
+		fn(input)
+	}
+
+	const end = performance.now()
+	return (end - start) / iterations // Average time per iteration in ms.
+}
+
+const measureMemoryUsage = (
+	fn: Function,
+	input: string,
+	iterations: number = 100,
+): { heapUsed: number; heapTotal: number } => {
+	if (global.gc) {
+		// Force garbage collection if available.
+		global.gc()
+	} else {
+		console.warn("No garbage collection hook! Run with --expose-gc for more accurate memory measurements.")
+	}
+
+	const initialMemory = process.memoryUsage()
+
+	for (let i = 0; i < iterations; i++) {
+		fn(input)
+	}
+
+	const finalMemory = process.memoryUsage()
+
+	return {
+		heapUsed: (finalMemory.heapUsed - initialMemory.heapUsed) / iterations,
+		heapTotal: (finalMemory.heapTotal - initialMemory.heapTotal) / iterations,
+	}
+}
+
+const testCases = [
+	{
+		name: "Simple text message",
+		input: "This is a simple text message without any tool uses.",
+	},
+	{
+		name: "Message with a simple tool use",
+		input: "Let's read a file: <read_file><path>src/file.ts</path></read_file>",
+	},
+	{
+		name: "Message with a complex tool use (write_to_file)",
+		input: "<write_to_file><path>src/file.ts</path><content>\nfunction example() {\n  // This has XML-like content: </content>\n  return true;\n}\n</content><line_count>5</line_count></write_to_file>",
+	},
+	{
+		name: "Message with multiple tool uses",
+		input: "First file: <read_file><path>src/file1.ts</path></read_file>\nSecond file: <read_file><path>src/file2.ts</path></read_file>\nLet's write a new file: <write_to_file><path>src/file3.ts</path><content>\nexport function newFunction() {\n  return 'Hello world';\n}\n</content><line_count>3</line_count></write_to_file>",
+	},
+	{
+		name: "Large message with repeated tool uses",
+		input: Array(50)
+			.fill(
+				'<read_file><path>src/file.ts</path></read_file>\n<write_to_file><path>output.ts</path><content>console.log("hello");</content><line_count>1</line_count></write_to_file>',
+			)
+			.join("\n"),
+	},
+]
+
+const runBenchmark = () => {
+	const maxNameLength = testCases.reduce((max, testCase) => Math.max(max, testCase.name.length), 0)
+	const namePadding = maxNameLength + 2
+
+	console.log(
+		`| ${"Test Case".padEnd(namePadding)} | V1 Time (ms) | V2 Time (ms) | V1/V2 Ratio | V1 Heap (bytes) | V2 Heap (bytes) |`,
+	)
+	console.log(
+		`| ${"-".repeat(namePadding)} | ------------ | ------------ | ----------- | ---------------- | ---------------- |`,
+	)
+
+	for (const testCase of testCases) {
+		const v1Time = measureExecutionTime(parseAssistantMessageV1, testCase.input)
+		const v2Time = measureExecutionTime(parseAssistantMessageV2, testCase.input)
+		const timeRatio = v1Time / v2Time
+
+		const v1Memory = measureMemoryUsage(parseAssistantMessageV1, testCase.input)
+		const v2Memory = measureMemoryUsage(parseAssistantMessageV2, testCase.input)
+
+		console.log(
+			`| ${testCase.name.padEnd(namePadding)} | ` +
+				`${v1Time.toFixed(4).padStart(12)} | ` +
+				`${v2Time.toFixed(4).padStart(12)} | ` +
+				`${timeRatio.toFixed(2).padStart(11)} | ` +
+				`${formatNumber(Math.round(v1Memory.heapUsed)).padStart(16)} | ` +
+				`${formatNumber(Math.round(v2Memory.heapUsed)).padStart(16)} |`,
+		)
+	}
+}
+
+runBenchmark()

+ 36 - 19
src/core/assistant-message/parseAssistantMessage.ts

@@ -3,7 +3,7 @@ import { toolNames, ToolName } from "../../schemas"
 
 export type AssistantMessageContent = TextContent | ToolUse
 
-export function parseAssistantMessage(assistantMessage: string) {
+export function parseAssistantMessage(assistantMessage: string): AssistantMessageContent[] {
 	let contentBlocks: AssistantMessageContent[] = []
 	let currentTextContent: TextContent | undefined = undefined
 	let currentTextContentStartIndex = 0
@@ -17,28 +17,28 @@ export function parseAssistantMessage(assistantMessage: string) {
 		const char = assistantMessage[i]
 		accumulator += char
 
-		// there should not be a param without a tool use
+		// There should not be a param without a tool use.
 		if (currentToolUse && currentParamName) {
 			const currentParamValue = accumulator.slice(currentParamValueStartIndex)
 			const paramClosingTag = `</${currentParamName}>`
 			if (currentParamValue.endsWith(paramClosingTag)) {
-				// end of param value
+				// End of param value.
 				currentToolUse.params[currentParamName] = currentParamValue.slice(0, -paramClosingTag.length).trim()
 				currentParamName = undefined
 				continue
 			} else {
-				// partial param value is accumulating
+				// Partial param value is accumulating.
 				continue
 			}
 		}
 
-		// no currentParamName
+		// No currentParamName.
 
 		if (currentToolUse) {
 			const currentToolValue = accumulator.slice(currentToolUseStartIndex)
 			const toolUseClosingTag = `</${currentToolUse.name}>`
 			if (currentToolValue.endsWith(toolUseClosingTag)) {
-				// end of a tool use
+				// End of a tool use.
 				currentToolUse.partial = false
 				contentBlocks.push(currentToolUse)
 				currentToolUse = undefined
@@ -47,23 +47,29 @@ export function parseAssistantMessage(assistantMessage: string) {
 				const possibleParamOpeningTags = toolParamNames.map((name) => `<${name}>`)
 				for (const paramOpeningTag of possibleParamOpeningTags) {
 					if (accumulator.endsWith(paramOpeningTag)) {
-						// start of a new parameter
+						// Start of a new parameter.
 						currentParamName = paramOpeningTag.slice(1, -1) as ToolParamName
 						currentParamValueStartIndex = accumulator.length
 						break
 					}
 				}
 
-				// there's no current param, and not starting a new param
+				// There's no current param, and not starting a new param.
 
-				// special case for write_to_file where file contents could contain the closing tag, in which case the param would have closed and we end up with the rest of the file contents here. To work around this, we get the string between the starting content tag and the LAST content tag.
+				// Special case for write_to_file where file contents could
+				// contain the closing tag, in which case the param would have
+				// closed and we end up with the rest of the file contents here.
+				// To work around this, we get the string between the starting
+				// ontent tag and the LAST content tag.
 				const contentParamName: ToolParamName = "content"
+
 				if (currentToolUse.name === "write_to_file" && accumulator.endsWith(`</${contentParamName}>`)) {
 					const toolContent = accumulator.slice(currentToolUseStartIndex)
 					const contentStartTag = `<${contentParamName}>`
 					const contentEndTag = `</${contentParamName}>`
 					const contentStartIndex = toolContent.indexOf(contentStartTag) + contentStartTag.length
 					const contentEndIndex = toolContent.lastIndexOf(contentEndTag)
+
 					if (contentStartIndex !== -1 && contentEndIndex !== -1 && contentEndIndex > contentStartIndex) {
 						currentToolUse.params[contentParamName] = toolContent
 							.slice(contentStartIndex, contentEndIndex)
@@ -71,32 +77,38 @@ export function parseAssistantMessage(assistantMessage: string) {
 					}
 				}
 
-				// partial tool value is accumulating
+				// Partial tool value is accumulating.
 				continue
 			}
 		}
 
-		// no currentToolUse
+		// No currentToolUse.
 
 		let didStartToolUse = false
 		const possibleToolUseOpeningTags = toolNames.map((name) => `<${name}>`)
+
 		for (const toolUseOpeningTag of possibleToolUseOpeningTags) {
 			if (accumulator.endsWith(toolUseOpeningTag)) {
-				// start of a new tool use
+				// Start of a new tool use.
 				currentToolUse = {
 					type: "tool_use",
 					name: toolUseOpeningTag.slice(1, -1) as ToolName,
 					params: {},
 					partial: true,
 				}
+
 				currentToolUseStartIndex = accumulator.length
-				// this also indicates the end of the current text content
+
+				// This also indicates the end of the current text content.
 				if (currentTextContent) {
 					currentTextContent.partial = false
-					// remove the partially accumulated tool use tag from the end of text (<tool)
+
+					// Remove the partially accumulated tool use tag from the
+					// end of text (<tool).
 					currentTextContent.content = currentTextContent.content
 						.slice(0, -toolUseOpeningTag.slice(0, -1).length)
 						.trim()
+
 					contentBlocks.push(currentTextContent)
 					currentTextContent = undefined
 				}
@@ -107,10 +119,12 @@ export function parseAssistantMessage(assistantMessage: string) {
 		}
 
 		if (!didStartToolUse) {
-			// no tool use, so it must be text either at the beginning or between tools
+			// No tool use, so it must be text either at the beginning or
+			// between tools.
 			if (currentTextContent === undefined) {
 				currentTextContentStartIndex = i
 			}
+
 			currentTextContent = {
 				type: "text",
 				content: accumulator.slice(currentTextContentStartIndex).trim(),
@@ -120,17 +134,20 @@ export function parseAssistantMessage(assistantMessage: string) {
 	}
 
 	if (currentToolUse) {
-		// stream did not complete tool call, add it as partial
+		// Stream did not complete tool call, add it as partial.
 		if (currentParamName) {
-			// tool call has a parameter that was not completed
+			// Tool call has a parameter that was not completed.
 			currentToolUse.params[currentParamName] = accumulator.slice(currentParamValueStartIndex).trim()
 		}
+
 		contentBlocks.push(currentToolUse)
 	}
 
-	// Note: it doesnt matter if check for currentToolUse or currentTextContent, only one of them will be defined since only one can be partial at a time
+	// NOTE: It doesn't matter if check for currentToolUse or
+	// currentTextContent, only one of them will be defined since only one can
+	// be partial at a time.
 	if (currentTextContent) {
-		// stream did not complete text content, add it as partial
+		// Stream did not complete text content, add it as partial.
 		contentBlocks.push(currentTextContent)
 	}
 

+ 278 - 0
src/core/assistant-message/parseAssistantMessageV2.ts

@@ -0,0 +1,278 @@
+import { TextContent, ToolUse, ToolParamName, toolParamNames } from "../../shared/tools"
+import { toolNames, ToolName } from "../../schemas"
+
+export type AssistantMessageContent = TextContent | ToolUse
+
+/**
+ * Parses an assistant message string potentially containing mixed text and tool
+ * usage blocks marked with XML-like tags into an array of structured content
+ * objects.
+ *
+ * This version aims for efficiency by avoiding the character-by-character
+ * accumulator of V1. It iterates through the string using an index `i`. At each
+ * position, it checks if the substring *ending* at `i` matches any known
+ * opening or closing tags for tools or parameters using `startsWith` with an
+ * offset.
+ * It uses pre-computed Maps (`toolUseOpenTags`, `toolParamOpenTags`) for quick
+ * tag lookups.
+ * State is managed using indices (`currentTextContentStart`,
+ * `currentToolUseStart`, `currentParamValueStart`) pointing to the start of the
+ * current block within the original `assistantMessage` string.
+ *
+ * Slicing is used to extract content only when a block (text, parameter, or
+ * tool use) is completed.
+ *
+ * Special handling for `write_to_file` and `new_rule` content parameters is
+ * included, using `indexOf` and `lastIndexOf` on the relevant slice to handle
+ * potentially nested closing tags.
+ *
+ * If the input string ends mid-block, the last open block is added and marked
+ * as partial.
+ *
+ * @param assistantMessage The raw string output from the assistant.
+ * @returns An array of `AssistantMessageContent` objects, which can be
+ *          `TextContent` or `ToolUse`. Blocks that were not fully closed by the
+ *          end of the input string will have their `partial` flag set to
+ *          `true`.
+ */
+
+export function parseAssistantMessageV2(assistantMessage: string): AssistantMessageContent[] {
+	const contentBlocks: AssistantMessageContent[] = []
+
+	let currentTextContentStart = 0 // Index where the current text block started.
+	let currentTextContent: TextContent | undefined = undefined
+	let currentToolUseStart = 0 // Index *after* the opening tag of the current tool use.
+	let currentToolUse: ToolUse | undefined = undefined
+	let currentParamValueStart = 0 // Index *after* the opening tag of the current param.
+	let currentParamName: ToolParamName | undefined = undefined
+
+	// Precompute tags for faster lookups.
+	const toolUseOpenTags = new Map<string, ToolName>()
+	const toolParamOpenTags = new Map<string, ToolParamName>()
+
+	for (const name of toolNames) {
+		toolUseOpenTags.set(`<${name}>`, name)
+	}
+
+	for (const name of toolParamNames) {
+		toolParamOpenTags.set(`<${name}>`, name)
+	}
+
+	const len = assistantMessage.length
+
+	for (let i = 0; i < len; i++) {
+		const currentCharIndex = i
+
+		// Parsing a tool parameter
+		if (currentToolUse && currentParamName) {
+			const closeTag = `</${currentParamName}>`
+			// Check if the string *ending* at index `i` matches the closing tag
+			if (
+				currentCharIndex >= closeTag.length - 1 &&
+				assistantMessage.startsWith(
+					closeTag,
+					currentCharIndex - closeTag.length + 1, // Start checking from potential start of tag.
+				)
+			) {
+				// Found the closing tag for the parameter.
+				const value = assistantMessage
+					.slice(
+						currentParamValueStart, // Start after the opening tag.
+						currentCharIndex - closeTag.length + 1, // End before the closing tag.
+					)
+					.trim()
+				currentToolUse.params[currentParamName] = value
+				currentParamName = undefined // Go back to parsing tool content.
+				// We don't continue loop here, need to check for tool close or other params at index i.
+			} else {
+				continue // Still inside param value, move to next char.
+			}
+		}
+
+		// Parsing a tool use (but not a specific parameter).
+		if (currentToolUse && !currentParamName) {
+			// Ensure we are not inside a parameter already.
+			// Check if starting a new parameter.
+			let startedNewParam = false
+
+			for (const [tag, paramName] of toolParamOpenTags.entries()) {
+				if (
+					currentCharIndex >= tag.length - 1 &&
+					assistantMessage.startsWith(tag, currentCharIndex - tag.length + 1)
+				) {
+					currentParamName = paramName
+					currentParamValueStart = currentCharIndex + 1 // Value starts after the tag.
+					startedNewParam = true
+					break
+				}
+			}
+
+			if (startedNewParam) {
+				continue // Handled start of param, move to next char.
+			}
+
+			// Check if closing the current tool use.
+			const toolCloseTag = `</${currentToolUse.name}>`
+
+			if (
+				currentCharIndex >= toolCloseTag.length - 1 &&
+				assistantMessage.startsWith(toolCloseTag, currentCharIndex - toolCloseTag.length + 1)
+			) {
+				// End of the tool use found.
+				// Special handling for content params *before* finalizing the
+				// tool.
+				const toolContentSlice = assistantMessage.slice(
+					currentToolUseStart, // From after the tool opening tag.
+					currentCharIndex - toolCloseTag.length + 1, // To before the tool closing tag.
+				)
+
+				// Check if content parameter needs special handling
+				// (write_to_file/new_rule).
+				// This check is important if the closing </content> tag was
+				// missed by the parameter parsing logic (e.g., if content is
+				// empty or parsing logic prioritizes tool close).
+				const contentParamName: ToolParamName = "content"
+				if (
+					currentToolUse.name === "write_to_file" /* || currentToolUse.name === "new_rule" */ &&
+					// !(contentParamName in currentToolUse.params) && // Only if not already parsed.
+					toolContentSlice.includes(`<${contentParamName}>`) // Check if tag exists.
+				) {
+					const contentStartTag = `<${contentParamName}>`
+					const contentEndTag = `</${contentParamName}>`
+					const contentStart = toolContentSlice.indexOf(contentStartTag)
+
+					// Use `lastIndexOf` for robustness against nested tags.
+					const contentEnd = toolContentSlice.lastIndexOf(contentEndTag)
+
+					if (contentStart !== -1 && contentEnd !== -1 && contentEnd > contentStart) {
+						const contentValue = toolContentSlice
+							.slice(contentStart + contentStartTag.length, contentEnd)
+							.trim()
+
+						currentToolUse.params[contentParamName] = contentValue
+					}
+				}
+
+				currentToolUse.partial = false // Mark as complete.
+				contentBlocks.push(currentToolUse)
+				currentToolUse = undefined // Reset state.
+				currentTextContentStart = currentCharIndex + 1 // Potential text starts after this tag.
+				continue // Move to next char.
+			}
+
+			// If not starting a param and not closing the tool, continue
+			// accumulating tool content implicitly.
+			continue
+		}
+
+		// Parsing text / looking for tool start.
+		if (!currentToolUse) {
+			// Check if starting a new tool use.
+			let startedNewTool = false
+
+			for (const [tag, toolName] of toolUseOpenTags.entries()) {
+				if (
+					currentCharIndex >= tag.length - 1 &&
+					assistantMessage.startsWith(tag, currentCharIndex - tag.length + 1)
+				) {
+					// End current text block if one was active.
+					if (currentTextContent) {
+						currentTextContent.content = assistantMessage
+							.slice(
+								currentTextContentStart, // From where text started.
+								currentCharIndex - tag.length + 1, // To before the tool tag starts.
+							)
+							.trim()
+
+						currentTextContent.partial = false // Ended because tool started.
+
+						if (currentTextContent.content.length > 0) {
+							contentBlocks.push(currentTextContent)
+						}
+
+						currentTextContent = undefined
+					} else {
+						// Check for any text between the last block and this tag.
+						const potentialText = assistantMessage
+							.slice(
+								currentTextContentStart, // From where text *might* have started.
+								currentCharIndex - tag.length + 1, // To before the tool tag starts.
+							)
+							.trim()
+
+						if (potentialText.length > 0) {
+							contentBlocks.push({
+								type: "text",
+								content: potentialText,
+								partial: false,
+							})
+						}
+					}
+
+					// Start the new tool use.
+					currentToolUse = {
+						type: "tool_use",
+						name: toolName,
+						params: {},
+						partial: true, // Assume partial until closing tag is found.
+					}
+
+					currentToolUseStart = currentCharIndex + 1 // Tool content starts after the opening tag.
+					startedNewTool = true
+
+					break
+				}
+			}
+
+			if (startedNewTool) {
+				continue // Handled start of tool, move to next char.
+			}
+
+			// If not starting a tool, it must be text content.
+			if (!currentTextContent) {
+				// Start a new text block if we aren't already in one.
+				currentTextContentStart = currentCharIndex // Text starts at the current character.
+
+				// Check if the current char is the start of potential text *immediately* after a tag.
+				// This needs the previous state - simpler to let slicing handle it later.
+				// Resetting start index accurately is key.
+				// It should be the index *after* the last processed tag.
+				// The logic managing currentTextContentStart after closing tags handles this.
+				currentTextContent = {
+					type: "text",
+					content: "", // Will be determined by slicing at the end or when a tool starts
+					partial: true,
+				}
+			}
+			// Continue accumulating text implicitly; content is extracted later.
+		}
+	}
+
+	// Finalize any open parameter within an open tool use.
+	if (currentToolUse && currentParamName) {
+		currentToolUse.params[currentParamName] = assistantMessage
+			.slice(currentParamValueStart) // From param start to end of string.
+			.trim()
+		// Tool use remains partial.
+	}
+
+	// Finalize any open tool use (which might contain the finalized partial param).
+	if (currentToolUse) {
+		// Tool use is partial because the loop finished before its closing tag.
+		contentBlocks.push(currentToolUse)
+	}
+	// Finalize any trailing text content.
+	// Only possible if a tool use wasn't open at the very end.
+	else if (currentTextContent) {
+		currentTextContent.content = assistantMessage
+			.slice(currentTextContentStart) // From text start to end of string.
+			.trim()
+
+		// Text is partial because the loop finished.
+		if (currentTextContent.content.length > 0) {
+			contentBlocks.push(currentTextContent)
+		}
+	}
+
+	return contentBlocks
+}

+ 1 - 1
src/integrations/editor/DiffViewProvider.ts

@@ -354,7 +354,7 @@ export class DiffViewProvider {
 	// close editor if open?
 	async reset() {
 		// Ensure any diff views opened by this provider are closed to release memory
-		await this.closeAllDiffViews();
+		await this.closeAllDiffViews()
 		this.editType = undefined
 		this.isEditing = false
 		this.originalContent = undefined