فهرست منبع

fix: prevent duplicate diff errors when parallel tool calling is enabled (#8763)

* fix: prevent duplicate diff errors when parallel tool calling is enabled

When a `replace_in_file` diff fails during streaming with parallel tool
calling enabled (GPT-5/Codex models), the same error message was being
added to userMessageContent on each streaming chunk, causing hundreds of
duplicates and context window overflow.

Root cause: The duplicate prevention added in 09276ebf4 used the
`didAlreadyUseTool` flag, but this flag is intentionally not set when
parallel tool calling is enabled (per 00e9d6f52). This was because
`didAlreadyUseTool` is designed to block subsequent tools, which is the
opposite of what parallel tool calling needs.

The fix adds per-call_id tracking via `diffErrorPushedForCallIds` Set:
- Track which specific tool calls have already had their error pushed
- Works for both parallel and non-parallel tool calling
- Different parallel tool calls can each report their own errors
- Same call_id only pushes error once, regardless of streaming chunks

This is compatible with the parallel tool calling design because GPT-5
models (which auto-enable parallel tool calling) use native tool calling
and always have a call_id. The mechanism is separate from didAlreadyUseTool
which controls flow (blocking tools) vs this which prevents duplicates.

Related commits:
- 09276ebf4: fix: prevent duplicate error messages during streamed edit
  tool failures (only worked when parallel tool calling disabled)
- 00e9d6f52: feat: add experimental parallel tool calling support
  (deliberately excluded didAlreadyUseTool from parallel mode)

* test: add unit tests for diffErrorPushedForCallIds duplicate prevention

Tests cover:
- Basic Set behavior (initialization, tracking, clearing)
- Duplicate prevention logic for parallel tool calling
- Edge cases (empty/undefined call_id, rapid streaming chunks)
- Reset behavior between API requests

* Add changeset

* refactor: rename diffErrorPushedForCallIds to errorPushedForCallIds

Generalizes the tracking mechanism per reviewer feedback from @abeatrix.

The more generic name allows the same pattern to be reused for other
tool handlers that may need duplicate error prevention in the future,
not just diff-related errors.

No functional changes - just renaming.
Robin Newhouse 4 هفته پیش
والد
کامیت
00efcc1c2b

+ 5 - 0
.changeset/fine-apes-fetch.md

@@ -0,0 +1,5 @@
+---
+"claude-dev": patch
+---
+
+fix: prevent duplicate diff errors when parallel tool calling is enabled

+ 2 - 0
src/core/task/TaskState.ts

@@ -16,6 +16,8 @@ export class TaskState {
 	userMessageContentReady = false
 	// Map of tool names to their tool_use_id for creating proper ToolResultBlockParam
 	toolUseIdMap: Map<string, string> = new Map()
+	// Track tool calls that have already had errors pushed (prevents duplicates during streaming)
+	errorPushedForCallIds: Set<string> = new Set()
 
 	// Presentation locks
 	presentAssistantMessageLocked = false

+ 194 - 0
src/core/task/__tests__/TaskState.test.ts

@@ -0,0 +1,194 @@
+import { beforeEach, describe, it } from "mocha"
+import "should"
+import { TaskState } from "../TaskState"
+
+describe("TaskState", () => {
+	describe("errorPushedForCallIds", () => {
+		let taskState: TaskState
+
+		beforeEach(() => {
+			taskState = new TaskState()
+		})
+
+		it("should initialize as empty Set", () => {
+			taskState.errorPushedForCallIds.should.be.instanceOf(Set)
+			taskState.errorPushedForCallIds.size.should.equal(0)
+		})
+
+		it("should track call_ids that have had errors pushed", () => {
+			const callId = "call_abc123"
+
+			// Initially not present
+			taskState.errorPushedForCallIds.has(callId).should.be.false()
+
+			// Add call_id
+			taskState.errorPushedForCallIds.add(callId)
+
+			// Now present
+			taskState.errorPushedForCallIds.has(callId).should.be.true()
+		})
+
+		it("should track multiple call_ids independently", () => {
+			const callId1 = "call_abc123"
+			const callId2 = "call_def456"
+			const callId3 = "call_ghi789"
+
+			// Add first two
+			taskState.errorPushedForCallIds.add(callId1)
+			taskState.errorPushedForCallIds.add(callId2)
+
+			// Check tracking
+			taskState.errorPushedForCallIds.has(callId1).should.be.true()
+			taskState.errorPushedForCallIds.has(callId2).should.be.true()
+			taskState.errorPushedForCallIds.has(callId3).should.be.false()
+		})
+
+		it("should clear all tracked call_ids", () => {
+			// Add multiple call_ids
+			taskState.errorPushedForCallIds.add("call_1")
+			taskState.errorPushedForCallIds.add("call_2")
+			taskState.errorPushedForCallIds.add("call_3")
+			taskState.errorPushedForCallIds.size.should.equal(3)
+
+			// Clear
+			taskState.errorPushedForCallIds.clear()
+
+			// Should be empty
+			taskState.errorPushedForCallIds.size.should.equal(0)
+			taskState.errorPushedForCallIds.has("call_1").should.be.false()
+		})
+
+		it("should not add duplicate call_ids (Set behavior)", () => {
+			const callId = "call_abc123"
+
+			taskState.errorPushedForCallIds.add(callId)
+			taskState.errorPushedForCallIds.add(callId)
+			taskState.errorPushedForCallIds.add(callId)
+
+			// Still only one entry
+			taskState.errorPushedForCallIds.size.should.equal(1)
+		})
+	})
+
+	describe("duplicate diff error prevention logic", () => {
+		let taskState: TaskState
+
+		beforeEach(() => {
+			taskState = new TaskState()
+		})
+
+		/**
+		 * Simulates the duplicate check logic from WriteToFileToolHandler.
+		 * This tests the core logic that prevents duplicate error messages
+		 * when parallel tool calling is enabled.
+		 */
+		function shouldSkipDuplicateError(callId: string | undefined): boolean {
+			const id = callId || ""
+			if (id && taskState.errorPushedForCallIds.has(id)) {
+				return true
+			}
+			return false
+		}
+
+		function markErrorPushed(callId: string | undefined): void {
+			const id = callId || ""
+			if (id) {
+				taskState.errorPushedForCallIds.add(id)
+			}
+		}
+
+		it("should not skip first error for a call_id", () => {
+			const callId = "call_abc123"
+
+			shouldSkipDuplicateError(callId).should.be.false()
+		})
+
+		it("should skip subsequent errors for same call_id", () => {
+			const callId = "call_abc123"
+
+			// First call - not skipped
+			shouldSkipDuplicateError(callId).should.be.false()
+			markErrorPushed(callId)
+
+			// Second call - should skip
+			shouldSkipDuplicateError(callId).should.be.true()
+
+			// Third call - still skipped
+			shouldSkipDuplicateError(callId).should.be.true()
+		})
+
+		it("should allow different call_ids to each have their error", () => {
+			const callId1 = "call_tool1"
+			const callId2 = "call_tool2"
+
+			// First tool error
+			shouldSkipDuplicateError(callId1).should.be.false()
+			markErrorPushed(callId1)
+
+			// Second tool error (different call_id) - should NOT skip
+			shouldSkipDuplicateError(callId2).should.be.false()
+			markErrorPushed(callId2)
+
+			// But both should now skip on retry
+			shouldSkipDuplicateError(callId1).should.be.true()
+			shouldSkipDuplicateError(callId2).should.be.true()
+		})
+
+		it("should not skip when call_id is empty (XML tools fallback)", () => {
+			// Empty call_id (typical for XML-based tools)
+			shouldSkipDuplicateError("").should.be.false()
+			markErrorPushed("")
+
+			// Still doesn't skip because empty string is falsy
+			shouldSkipDuplicateError("").should.be.false()
+		})
+
+		it("should not skip when call_id is undefined", () => {
+			shouldSkipDuplicateError(undefined).should.be.false()
+			markErrorPushed(undefined)
+
+			// Still doesn't skip
+			shouldSkipDuplicateError(undefined).should.be.false()
+		})
+
+		it("should reset tracking between API requests (clear)", () => {
+			const callId = "call_abc123"
+
+			// Mark error pushed
+			markErrorPushed(callId)
+			shouldSkipDuplicateError(callId).should.be.true()
+
+			// Simulate reset between API requests
+			taskState.errorPushedForCallIds.clear()
+
+			// Same call_id should not skip after reset
+			shouldSkipDuplicateError(callId).should.be.false()
+		})
+
+		it("should handle rapid streaming chunks (same call_id repeated)", () => {
+			const callId = "call_streaming"
+
+			// Simulate 238 streaming chunks (as seen in the bug)
+			const results: boolean[] = []
+
+			for (let i = 0; i < 238; i++) {
+				const shouldSkip = shouldSkipDuplicateError(callId)
+				results.push(shouldSkip)
+
+				if (!shouldSkip) {
+					markErrorPushed(callId)
+				}
+			}
+
+			// First should not skip, rest should skip
+			results[0].should.be.false()
+			results
+				.slice(1)
+				.every((r) => r)
+				.should.be.true()
+
+			// Should have only added one entry
+			taskState.errorPushedForCallIds.size.should.equal(1)
+		})
+	})
+})

+ 1 - 0
src/core/task/index.ts

@@ -2558,6 +2558,7 @@ export class Task {
 			await this.diffViewProvider.reset()
 			this.streamHandler.reset()
 			this.taskState.toolUseIdMap.clear()
+			this.taskState.errorPushedForCallIds.clear()
 
 			const { toolUseHandler, reasonsHandler } = this.streamHandler.getHandlers()
 			const stream = this.attemptApiRequest(previousApiReqIndex) // yields only if the first chunk is successful, otherwise will allow the user to retry the request (most likely due to rate limit error, which gets thrown on the first chunk)

+ 8 - 3
src/core/task/tools/handlers/WriteToFileToolHandler.ts

@@ -414,9 +414,9 @@ export class WriteToFileToolHandler implements IFullyManagedTool {
 					!block.partial, // Pass the partial flag correctly
 				)
 			} catch (error) {
-				// As we set the didAlreadyUseTool flag when the tool has failed once, we don't want to add the error message to the
-				// userMessages array again on each new streaming chunk received.
-				if (!config.enableParallelToolCalling && config.taskState.didAlreadyUseTool) {
+				// Check if we've already pushed an error for this specific tool call (prevents duplicates during streaming)
+				const callId = block.call_id || ""
+				if (callId && config.taskState.errorPushedForCallIds.has(callId)) {
 					return
 				}
 
@@ -451,6 +451,11 @@ export class WriteToFileToolHandler implements IFullyManagedTool {
 					config.coordinator,
 					config.taskState.toolUseIdMap,
 				)
+
+				// Mark this call as having had its error pushed (prevents duplicates during streaming)
+				if (callId) {
+					config.taskState.errorPushedForCallIds.add(callId)
+				}
 				if (!config.enableParallelToolCalling) {
 					config.taskState.didAlreadyUseTool = true
 				}