Просмотр исходного кода

fix: round-trip Gemini thought signatures for tool calls (#10590)

Hannes Rudolph 2 недель назад
Родитель
Сommit
168cfcaba5

+ 10 - 11
src/api/providers/gemini.ts

@@ -90,10 +90,11 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 			? (this.options.modelMaxTokens ?? maxTokens ?? undefined)
 			: (maxTokens ?? undefined)
 
-		// Only forward encrypted reasoning continuations (thoughtSignature) when we are
-		// using reasoning (thinkingConfig is present). Both effort-based (thinkingLevel)
-		// and budget-based (thinkingBudget) models require this for active loops.
-		const includeThoughtSignatures = Boolean(thinkingConfig)
+		// Gemini 3 validates thought signatures for tool/function calling steps.
+		// We must round-trip the signature when tools are in use, even if the user chose
+		// a minimal thinking level (or thinkingConfig is otherwise absent).
+		const usingNativeTools = Boolean(metadata?.tools && metadata.tools.length > 0)
+		const includeThoughtSignatures = Boolean(thinkingConfig) || usingNativeTools
 
 		// The message list can include provider-specific meta entries such as
 		// `{ type: "reasoning", ... }` that are intended only for providers like
@@ -236,9 +237,10 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 						}>) {
 							// Capture thought signatures so they can be persisted into API history.
 							const thoughtSignature = part.thoughtSignature
-							// Persist encrypted reasoning when using reasoning. Both effort-based
-							// and budget-based models require this for active loops.
-							if (thinkingConfig && thoughtSignature) {
+							// Persist thought signatures so they can be round-tripped in the next step.
+							// Gemini 3 requires this during tool calling; other Gemini thinking models
+							// benefit from it for continuity.
+							if (includeThoughtSignatures && thoughtSignature) {
 								this.lastThoughtSignature = thoughtSignature
 							}
 
@@ -455,10 +457,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 	}
 
 	public getThoughtSignature(): string | undefined {
-		// Disabled to prevent "Corrupted thought signature" errors on task resumption.
-		// Gemini thought signatures are session-specific and cannot be reliably reused
-		// across API calls or after task resumption from history.
-		return undefined
+		return this.lastThoughtSignature
 	}
 
 	public getResponseId(): string | undefined {

+ 21 - 0
src/api/transform/__tests__/gemini-format.spec.ts

@@ -140,6 +140,27 @@ describe("convertAnthropicMessageToGemini", () => {
 		])
 	})
 
+	it("should only attach thoughtSignature to the first functionCall in the message", () => {
+		const anthropicMessage: Anthropic.Messages.MessageParam = {
+			role: "assistant",
+			content: [
+				{ type: "thoughtSignature", thoughtSignature: "sig-123" } as any,
+				{ type: "tool_use", id: "call-1", name: "toolA", input: { a: 1 } },
+				{ type: "tool_use", id: "call-2", name: "toolB", input: { b: 2 } },
+			],
+		}
+
+		const result = convertAnthropicMessageToGemini(anthropicMessage)
+		expect(result).toHaveLength(1)
+
+		const parts = result[0]!.parts as any[]
+		const functionCallParts = parts.filter((p) => p.functionCall)
+		expect(functionCallParts).toHaveLength(2)
+
+		expect(functionCallParts[0].thoughtSignature).toBe("sig-123")
+		expect(functionCallParts[1].thoughtSignature).toBeUndefined()
+	})
+
 	it("should convert a message with tool result as string", () => {
 		const toolIdToName = new Map<string, string>()
 		toolIdToName.set("calculator-123", "calculator")

+ 26 - 2
src/api/transform/gemini-format.ts

@@ -70,13 +70,19 @@ export function convertAnthropicContentToGemini(
 
 				return { inlineData: { data: block.source.data, mimeType: block.source.media_type } }
 			case "tool_use":
+				// Gemini 3 validation rules:
+				// - In a parallel function calling response, only the FIRST functionCall part has a signature.
+				// - In sequential steps, each step's first functionCall must include its signature.
+				// When converting from our history, we don't always have enough information to perfectly
+				// recreate the original per-part distribution, but we can and should avoid attaching the
+				// signature to every parallel call in a single assistant message.
 				return {
 					functionCall: {
 						name: block.name,
 						args: block.input as Record<string, unknown>,
 					},
 					// Inject the thoughtSignature into the functionCall part if required.
-					// This is necessary for Gemini 2.5/3+ thinking models to validate the tool call.
+					// This is necessary for Gemini 3+ thinking models to validate the tool call.
 					...(functionCallSignature ? { thoughtSignature: functionCallSignature } : {}),
 				} as Part
 			case "tool_result": {
@@ -136,7 +142,10 @@ export function convertAnthropicContentToGemini(
 		}
 	})
 
-	// Post-processing: Ensure thought signature is attached if required
+	// Post-processing:
+	// 1) Ensure thought signature is attached if required
+	// 2) For multiple function calls in a single message, keep the signature only on the first
+	//    functionCall part to match Gemini 3 parallel-calling behavior.
 	if (includeThoughtSignatures && activeThoughtSignature) {
 		const hasSignature = parts.some((p) => "thoughtSignature" in p)
 
@@ -153,6 +162,21 @@ export function convertAnthropicContentToGemini(
 		}
 	}
 
+	if (includeThoughtSignatures) {
+		let seenFirstFunctionCall = false
+		for (const part of parts) {
+			if (part && typeof part === "object" && "functionCall" in part && (part as any).functionCall) {
+				const partWithSig = part as PartWithThoughtSignature
+				if (!seenFirstFunctionCall) {
+					seenFirstFunctionCall = true
+				} else {
+					// Remove signature from subsequent function calls in this message.
+					delete partWithSig.thoughtSignature
+				}
+			}
+		}
+	}
+
 	return parts
 }
 

+ 12 - 5
src/core/task/Task.ts

@@ -955,6 +955,13 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			const reasoningSummary = handler.getSummary?.()
 			const reasoningDetails = handler.getReasoningDetails?.()
 
+			// Only Anthropic's API expects/validates the special `thinking` content block signature.
+			// Other providers (notably Gemini 3) use different signature semantics (e.g. `thoughtSignature`)
+			// and require round-tripping the signature in their own format.
+			const modelId = getModelId(this.apiConfiguration)
+			const apiProtocol = getApiProtocol(this.apiConfiguration.apiProvider, modelId)
+			const isAnthropicProtocol = apiProtocol === "anthropic"
+
 			// Start from the original assistant message
 			const messageWithTs: any = {
 				...message,
@@ -969,7 +976,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 
 			// Store reasoning: Anthropic thinking (with signature), plain text (most providers), or encrypted (OpenAI Native)
 			// Skip if reasoning_details already contains the reasoning (to avoid duplication)
-			if (reasoning && thoughtSignature && !reasoningDetails) {
+			if (isAnthropicProtocol && reasoning && thoughtSignature && !reasoningDetails) {
 				// Anthropic provider with extended thinking: Store as proper `thinking` block
 				// This format passes through anthropic-filter.ts and is properly round-tripped
 				// for interleaved thinking with tool use (required by Anthropic API)
@@ -1028,10 +1035,10 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				}
 			}
 
-			// If we have a thought signature WITHOUT reasoning text (edge case),
-			// append it as a dedicated content block for non-Anthropic providers (e.g., Gemini).
-			// Note: For Anthropic, the signature is already included in the thinking block above.
-			if (thoughtSignature && !reasoning) {
+			// For non-Anthropic providers (e.g., Gemini 3), persist the thought signature as its own
+			// content block so converters can attach it back to the correct provider-specific fields.
+			// Note: For Anthropic extended thinking, the signature is already included in the thinking block above.
+			if (thoughtSignature && !isAnthropicProtocol) {
 				const thoughtSignatureBlock = {
 					type: "thoughtSignature",
 					thoughtSignature,