Răsfoiți Sursa

fix: Gemini thought signature validation errors (#10694)

Co-authored-by: Roo Code <[email protected]>
Daniel 6 zile în urmă
părinte
comite
ddac338fdd

+ 25 - 10
src/api/providers/openrouter.ts

@@ -17,7 +17,11 @@ import { NativeToolCallParser } from "../../core/assistant-message/NativeToolCal
 
 import type { ApiHandlerOptions } from "../../shared/api"
 
-import { convertToOpenAiMessages } from "../transform/openai-format"
+import {
+	convertToOpenAiMessages,
+	sanitizeGeminiMessages,
+	consolidateReasoningDetails,
+} from "../transform/openai-format"
 import { normalizeMistralToolCallId } from "../transform/mistral-format"
 import { resolveToolProtocol } from "../../utils/resolveToolProtocol"
 import { TOOL_PROTOCOL } from "@roo-code/types"
@@ -251,14 +255,23 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 		const isNativeProtocol = toolProtocol === TOOL_PROTOCOL.NATIVE
 		const isGemini = modelId.startsWith("google/gemini")
 
-		// For Gemini with native protocol: inject fake reasoning.encrypted block for tool calls
-		// This is required when switching from other models to Gemini to satisfy API validation.
-		// Per OpenRouter documentation (conversation with Toven, Nov 2025):
-		// - Create ONE reasoning_details entry per assistant message with tool calls
-		// - Set `id` to the FIRST tool call's ID from the tool_calls array
-		// - Set `data` to "skip_thought_signature_validator" to bypass signature validation
-		// - Set `index` to 0
+		// For Gemini models with native protocol:
+		// 1. Sanitize messages to handle thought signature validation issues.
+		//    This must happen BEFORE fake encrypted block injection to avoid injecting for
+		//    tool calls that will be dropped due to missing/mismatched reasoning_details.
+		// 2. Inject fake reasoning.encrypted block for tool calls without existing encrypted reasoning.
+		//    This is required when switching from other models to Gemini to satisfy API validation.
+		//    Per OpenRouter documentation (conversation with Toven, Nov 2025):
+		//    - Create ONE reasoning_details entry per assistant message with tool calls
+		//    - Set `id` to the FIRST tool call's ID from the tool_calls array
+		//    - Set `data` to "skip_thought_signature_validator" to bypass signature validation
+		//    - Set `index` to 0
+		// See: https://github.com/cline/cline/issues/8214
 		if (isNativeProtocol && isGemini) {
+			// Step 1: Sanitize messages - filter out tool calls with missing/mismatched reasoning_details
+			openAiMessages = sanitizeGeminiMessages(openAiMessages, modelId)
+
+			// Step 2: Inject fake reasoning.encrypted block for tool calls that survived sanitization
 			openAiMessages = openAiMessages.map((msg) => {
 				if (msg.role === "assistant") {
 					const toolCalls = (msg as any).tool_calls as any[] | undefined
@@ -506,9 +519,11 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 			}
 		}
 
-		// After streaming completes, store ONLY the reasoning_details we received from the API.
+		// After streaming completes, consolidate and store reasoning_details from the API.
+		// This filters out corrupted encrypted blocks (missing `data`) and consolidates by index.
 		if (reasoningDetailsAccumulator.size > 0) {
-			this.currentReasoningDetails = Array.from(reasoningDetailsAccumulator.values())
+			const rawDetails = Array.from(reasoningDetailsAccumulator.values())
+			this.currentReasoningDetails = consolidateReasoningDetails(rawDetails)
 		}
 
 		if (lastUsage) {

+ 341 - 1
src/api/transform/__tests__/openai-format.spec.ts

@@ -3,7 +3,12 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
 
-import { convertToOpenAiMessages } from "../openai-format"
+import {
+	convertToOpenAiMessages,
+	consolidateReasoningDetails,
+	sanitizeGeminiMessages,
+	ReasoningDetail,
+} from "../openai-format"
 import { normalizeMistralToolCallId } from "../mistral-format"
 
 describe("convertToOpenAiMessages", () => {
@@ -963,3 +968,338 @@ describe("convertToOpenAiMessages", () => {
 		})
 	})
 })
+
+describe("consolidateReasoningDetails", () => {
+	it("should return empty array for empty input", () => {
+		expect(consolidateReasoningDetails([])).toEqual([])
+	})
+
+	it("should return empty array for undefined input", () => {
+		expect(consolidateReasoningDetails(undefined as any)).toEqual([])
+	})
+
+	it("should filter out corrupted encrypted blocks (missing data field)", () => {
+		const details: ReasoningDetail[] = [
+			{
+				type: "reasoning.encrypted",
+				// Missing data field - this should be filtered out
+				id: "rs_corrupted",
+				format: "google-gemini-v1",
+				index: 0,
+			},
+			{
+				type: "reasoning.text",
+				text: "Valid reasoning",
+				id: "rs_valid",
+				format: "google-gemini-v1",
+				index: 0,
+			},
+		]
+
+		const result = consolidateReasoningDetails(details)
+
+		// Should only have the text block, not the corrupted encrypted block
+		expect(result).toHaveLength(1)
+		expect(result[0].type).toBe("reasoning.text")
+		expect(result[0].text).toBe("Valid reasoning")
+	})
+
+	it("should concatenate text from multiple entries with same index", () => {
+		const details: ReasoningDetail[] = [
+			{
+				type: "reasoning.text",
+				text: "First part. ",
+				format: "google-gemini-v1",
+				index: 0,
+			},
+			{
+				type: "reasoning.text",
+				text: "Second part.",
+				format: "google-gemini-v1",
+				index: 0,
+			},
+		]
+
+		const result = consolidateReasoningDetails(details)
+
+		expect(result).toHaveLength(1)
+		expect(result[0].text).toBe("First part. Second part.")
+	})
+
+	it("should keep only the last encrypted block per index", () => {
+		const details: ReasoningDetail[] = [
+			{
+				type: "reasoning.encrypted",
+				data: "first_encrypted_data",
+				id: "rs_1",
+				format: "google-gemini-v1",
+				index: 0,
+			},
+			{
+				type: "reasoning.encrypted",
+				data: "second_encrypted_data",
+				id: "rs_2",
+				format: "google-gemini-v1",
+				index: 0,
+			},
+		]
+
+		const result = consolidateReasoningDetails(details)
+
+		// Should only have one encrypted block - the last one
+		expect(result).toHaveLength(1)
+		expect(result[0].type).toBe("reasoning.encrypted")
+		expect(result[0].data).toBe("second_encrypted_data")
+		expect(result[0].id).toBe("rs_2")
+	})
+
+	it("should keep last signature and id from multiple entries", () => {
+		const details: ReasoningDetail[] = [
+			{
+				type: "reasoning.text",
+				text: "Part 1",
+				signature: "sig_1",
+				id: "id_1",
+				format: "google-gemini-v1",
+				index: 0,
+			},
+			{
+				type: "reasoning.text",
+				text: "Part 2",
+				signature: "sig_2",
+				id: "id_2",
+				format: "google-gemini-v1",
+				index: 0,
+			},
+		]
+
+		const result = consolidateReasoningDetails(details)
+
+		expect(result).toHaveLength(1)
+		expect(result[0].signature).toBe("sig_2")
+		expect(result[0].id).toBe("id_2")
+	})
+
+	it("should group by index correctly", () => {
+		const details: ReasoningDetail[] = [
+			{
+				type: "reasoning.text",
+				text: "Index 0 text",
+				format: "google-gemini-v1",
+				index: 0,
+			},
+			{
+				type: "reasoning.text",
+				text: "Index 1 text",
+				format: "google-gemini-v1",
+				index: 1,
+			},
+		]
+
+		const result = consolidateReasoningDetails(details)
+
+		expect(result).toHaveLength(2)
+		expect(result.find((r) => r.index === 0)?.text).toBe("Index 0 text")
+		expect(result.find((r) => r.index === 1)?.text).toBe("Index 1 text")
+	})
+
+	it("should handle summary blocks", () => {
+		const details: ReasoningDetail[] = [
+			{
+				type: "reasoning.summary",
+				summary: "Summary part 1",
+				format: "google-gemini-v1",
+				index: 0,
+			},
+			{
+				type: "reasoning.summary",
+				summary: "Summary part 2",
+				format: "google-gemini-v1",
+				index: 0,
+			},
+		]
+
+		const result = consolidateReasoningDetails(details)
+
+		// Summary should be concatenated when there's no text
+		expect(result).toHaveLength(1)
+		expect(result[0].summary).toBe("Summary part 1Summary part 2")
+	})
+})
+
+describe("sanitizeGeminiMessages", () => {
+	it("should return messages unchanged for non-Gemini models", () => {
+		const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [
+			{ role: "system", content: "You are helpful" },
+			{ role: "user", content: "Hello" },
+		]
+
+		const result = sanitizeGeminiMessages(messages, "anthropic/claude-3-5-sonnet")
+
+		expect(result).toEqual(messages)
+	})
+
+	it("should drop tool calls without reasoning_details for Gemini models", () => {
+		const messages = [
+			{ role: "system", content: "You are helpful" },
+			{
+				role: "assistant",
+				content: "Let me read the file",
+				tool_calls: [
+					{
+						id: "call_123",
+						type: "function",
+						function: { name: "read_file", arguments: '{"path":"test.ts"}' },
+					},
+				],
+				// No reasoning_details
+			},
+			{ role: "tool", tool_call_id: "call_123", content: "file contents" },
+		] as OpenAI.Chat.ChatCompletionMessageParam[]
+
+		const result = sanitizeGeminiMessages(messages, "google/gemini-3-flash-preview")
+
+		// Should have 2 messages: system and assistant (with content but no tool_calls)
+		// Tool message should be dropped
+		expect(result).toHaveLength(2)
+		expect(result[0].role).toBe("system")
+		expect(result[1].role).toBe("assistant")
+		expect((result[1] as any).tool_calls).toBeUndefined()
+	})
+
+	it("should filter reasoning_details to only include entries matching tool call IDs", () => {
+		const messages = [
+			{
+				role: "assistant",
+				content: "",
+				tool_calls: [
+					{
+						id: "call_abc",
+						type: "function",
+						function: { name: "read_file", arguments: "{}" },
+					},
+				],
+				reasoning_details: [
+					{
+						type: "reasoning.encrypted",
+						data: "valid_data",
+						id: "call_abc", // Matches tool call
+						format: "google-gemini-v1",
+						index: 0,
+					},
+					{
+						type: "reasoning.encrypted",
+						data: "mismatched_data",
+						id: "call_xyz", // Does NOT match any tool call
+						format: "google-gemini-v1",
+						index: 1,
+					},
+				],
+			},
+		] as any
+
+		const result = sanitizeGeminiMessages(messages, "google/gemini-3-flash-preview")
+
+		expect(result).toHaveLength(1)
+		const assistantMsg = result[0] as any
+		expect(assistantMsg.tool_calls).toHaveLength(1)
+		expect(assistantMsg.reasoning_details).toHaveLength(1)
+		expect(assistantMsg.reasoning_details[0].id).toBe("call_abc")
+	})
+
+	it("should drop tool calls without matching reasoning_details", () => {
+		const messages = [
+			{
+				role: "assistant",
+				content: "Some text",
+				tool_calls: [
+					{
+						id: "call_abc",
+						type: "function",
+						function: { name: "tool_a", arguments: "{}" },
+					},
+					{
+						id: "call_def",
+						type: "function",
+						function: { name: "tool_b", arguments: "{}" },
+					},
+				],
+				reasoning_details: [
+					{
+						type: "reasoning.encrypted",
+						data: "data_for_abc",
+						id: "call_abc", // Only matches first tool call
+						format: "google-gemini-v1",
+						index: 0,
+					},
+				],
+			},
+			{ role: "tool", tool_call_id: "call_abc", content: "result a" },
+			{ role: "tool", tool_call_id: "call_def", content: "result b" },
+		] as any
+
+		const result = sanitizeGeminiMessages(messages, "google/gemini-3-flash-preview")
+
+		// Should have: assistant with 1 tool_call, 1 tool message
+		expect(result).toHaveLength(2)
+
+		const assistantMsg = result[0] as any
+		expect(assistantMsg.tool_calls).toHaveLength(1)
+		expect(assistantMsg.tool_calls[0].id).toBe("call_abc")
+
+		// Only the tool result for call_abc should remain
+		expect(result[1].role).toBe("tool")
+		expect((result[1] as any).tool_call_id).toBe("call_abc")
+	})
+
+	it("should include reasoning_details without id (legacy format)", () => {
+		const messages = [
+			{
+				role: "assistant",
+				content: "",
+				tool_calls: [
+					{
+						id: "call_abc",
+						type: "function",
+						function: { name: "read_file", arguments: "{}" },
+					},
+				],
+				reasoning_details: [
+					{
+						type: "reasoning.text",
+						text: "Some reasoning without id",
+						format: "google-gemini-v1",
+						index: 0,
+						// No id field
+					},
+					{
+						type: "reasoning.encrypted",
+						data: "encrypted_data",
+						id: "call_abc",
+						format: "google-gemini-v1",
+						index: 0,
+					},
+				],
+			},
+		] as any
+
+		const result = sanitizeGeminiMessages(messages, "google/gemini-3-flash-preview")
+
+		expect(result).toHaveLength(1)
+		const assistantMsg = result[0] as any
+		// Both details should be included (one by matching id, one by having no id)
+		expect(assistantMsg.reasoning_details.length).toBeGreaterThanOrEqual(1)
+	})
+
+	it("should preserve messages without tool_calls", () => {
+		const messages = [
+			{ role: "system", content: "You are helpful" },
+			{ role: "user", content: "Hello" },
+			{ role: "assistant", content: "Hi there!" },
+		] as OpenAI.Chat.ChatCompletionMessageParam[]
+
+		const result = sanitizeGeminiMessages(messages, "google/gemini-3-flash-preview")
+
+		expect(result).toEqual(messages)
+	})
+})

+ 252 - 0
src/api/transform/openai-format.ts

@@ -1,6 +1,258 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
 
+/**
+ * Type for OpenRouter's reasoning detail elements.
+ * @see https://openrouter.ai/docs/use-cases/reasoning-tokens#streaming-response
+ */
+export type ReasoningDetail = {
+	/**
+	 * Type of reasoning detail.
+	 * @see https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-detail-types
+	 */
+	type: string // "reasoning.summary" | "reasoning.encrypted" | "reasoning.text"
+	text?: string
+	summary?: string
+	data?: string // Encrypted reasoning data
+	signature?: string | null
+	id?: string | null // Unique identifier for the reasoning detail
+	/**
+	 * Format of the reasoning detail:
+	 * - "unknown" - Format is not specified
+	 * - "openai-responses-v1" - OpenAI responses format version 1
+	 * - "anthropic-claude-v1" - Anthropic Claude format version 1 (default)
+	 * - "google-gemini-v1" - Google Gemini format version 1
+	 * - "xai-responses-v1" - xAI responses format version 1
+	 */
+	format?: string
+	index?: number // Sequential index of the reasoning detail
+}
+
+/**
+ * Consolidates reasoning_details by grouping by index and type.
+ * - Filters out corrupted encrypted blocks (missing `data` field)
+ * - For text blocks: concatenates text, keeps last signature/id/format
+ * - For encrypted blocks: keeps only the last one per index
+ *
+ * @param reasoningDetails - Array of reasoning detail objects
+ * @returns Consolidated array of reasoning details
+ * @see https://github.com/cline/cline/issues/8214
+ */
+export function consolidateReasoningDetails(reasoningDetails: ReasoningDetail[]): ReasoningDetail[] {
+	if (!reasoningDetails || reasoningDetails.length === 0) {
+		return []
+	}
+
+	// Group by index
+	const groupedByIndex = new Map<number, ReasoningDetail[]>()
+
+	for (const detail of reasoningDetails) {
+		// Drop corrupted encrypted reasoning blocks that would otherwise trigger:
+		// "Invalid input: expected string, received undefined" for reasoning_details.*.data
+		// See: https://github.com/cline/cline/issues/8214
+		if (detail.type === "reasoning.encrypted" && !detail.data) {
+			continue
+		}
+
+		const index = detail.index ?? 0
+		if (!groupedByIndex.has(index)) {
+			groupedByIndex.set(index, [])
+		}
+		groupedByIndex.get(index)!.push(detail)
+	}
+
+	// Consolidate each group
+	const consolidated: ReasoningDetail[] = []
+
+	for (const [index, details] of groupedByIndex.entries()) {
+		// Concatenate all text parts
+		let concatenatedText = ""
+		let concatenatedSummary = ""
+		let signature: string | undefined
+		let id: string | undefined
+		let format = "unknown"
+		let type = "reasoning.text"
+
+		for (const detail of details) {
+			if (detail.text) {
+				concatenatedText += detail.text
+			}
+			if (detail.summary) {
+				concatenatedSummary += detail.summary
+			}
+			// Keep the signature from the last item that has one
+			if (detail.signature) {
+				signature = detail.signature
+			}
+			// Keep the id from the last item that has one
+			if (detail.id) {
+				id = detail.id
+			}
+			// Keep format and type from any item (they should all be the same)
+			if (detail.format) {
+				format = detail.format
+			}
+			if (detail.type) {
+				type = detail.type
+			}
+		}
+
+		// Create consolidated entry for text
+		if (concatenatedText) {
+			const consolidatedEntry: ReasoningDetail = {
+				type: type,
+				text: concatenatedText,
+				signature: signature ?? undefined,
+				id: id ?? undefined,
+				format: format,
+				index: index,
+			}
+			consolidated.push(consolidatedEntry)
+		}
+
+		// Create consolidated entry for summary (used by some providers)
+		if (concatenatedSummary && !concatenatedText) {
+			const consolidatedEntry: ReasoningDetail = {
+				type: type,
+				summary: concatenatedSummary,
+				signature: signature ?? undefined,
+				id: id ?? undefined,
+				format: format,
+				index: index,
+			}
+			consolidated.push(consolidatedEntry)
+		}
+
+		// For encrypted chunks (data), only keep the last one
+		let lastDataEntry: ReasoningDetail | undefined
+		for (const detail of details) {
+			if (detail.data) {
+				lastDataEntry = {
+					type: detail.type,
+					data: detail.data,
+					signature: detail.signature ?? undefined,
+					id: detail.id ?? undefined,
+					format: detail.format,
+					index: index,
+				}
+			}
+		}
+		if (lastDataEntry) {
+			consolidated.push(lastDataEntry)
+		}
+	}
+
+	return consolidated
+}
+
+/**
+ * Sanitizes OpenAI messages for Gemini models by filtering reasoning_details
+ * to only include entries that match the tool call IDs.
+ *
+ * Gemini models require thought signatures for tool calls. When switching providers
+ * mid-conversation, historical tool calls may not include Gemini reasoning details,
+ * which can poison the next request. This function:
+ * 1. Filters reasoning_details to only include entries matching tool call IDs
+ * 2. Drops tool_calls that lack any matching reasoning_details
+ * 3. Removes corresponding tool result messages for dropped tool calls
+ *
+ * @param messages - Array of OpenAI chat completion messages
+ * @param modelId - The model ID to check if sanitization is needed
+ * @returns Sanitized array of messages (unchanged if not a Gemini model)
+ * @see https://github.com/cline/cline/issues/8214
+ */
+export function sanitizeGeminiMessages(
+	messages: OpenAI.Chat.ChatCompletionMessageParam[],
+	modelId: string,
+): OpenAI.Chat.ChatCompletionMessageParam[] {
+	// Only sanitize for Gemini models
+	if (!modelId.includes("gemini")) {
+		return messages
+	}
+
+	const droppedToolCallIds = new Set<string>()
+	const sanitized: OpenAI.Chat.ChatCompletionMessageParam[] = []
+
+	for (const msg of messages) {
+		if (msg.role === "assistant") {
+			const anyMsg = msg as any
+			const toolCalls = anyMsg.tool_calls as OpenAI.Chat.ChatCompletionMessageToolCall[] | undefined
+			const reasoningDetails = anyMsg.reasoning_details as ReasoningDetail[] | undefined
+
+			if (Array.isArray(toolCalls) && toolCalls.length > 0) {
+				const hasReasoningDetails = Array.isArray(reasoningDetails) && reasoningDetails.length > 0
+
+				if (!hasReasoningDetails) {
+					// No reasoning_details at all - drop all tool calls
+					for (const tc of toolCalls) {
+						if (tc?.id) {
+							droppedToolCallIds.add(tc.id)
+						}
+					}
+					// Keep any textual content, but drop the tool_calls themselves
+					if (anyMsg.content) {
+						sanitized.push({ role: "assistant", content: anyMsg.content } as any)
+					}
+					continue
+				}
+
+				// Filter reasoning_details to only include entries matching tool call IDs
+				// This prevents mismatched reasoning details from poisoning the request
+				const validToolCalls: OpenAI.Chat.ChatCompletionMessageToolCall[] = []
+				const validReasoningDetails: ReasoningDetail[] = []
+
+				for (const tc of toolCalls) {
+					// Check if there's a reasoning_detail with matching id
+					const matchingDetails = reasoningDetails.filter((d) => d.id === tc.id)
+
+					if (matchingDetails.length > 0) {
+						validToolCalls.push(tc)
+						validReasoningDetails.push(...matchingDetails)
+					} else {
+						// No matching reasoning_detail - drop this tool call
+						if (tc?.id) {
+							droppedToolCallIds.add(tc.id)
+						}
+					}
+				}
+
+				// Also include reasoning_details that don't have an id (legacy format)
+				const detailsWithoutId = reasoningDetails.filter((d) => !d.id)
+				validReasoningDetails.push(...detailsWithoutId)
+
+				// Build the sanitized message
+				const sanitizedMsg: any = {
+					role: "assistant",
+					content: anyMsg.content ?? "",
+				}
+
+				if (validReasoningDetails.length > 0) {
+					sanitizedMsg.reasoning_details = consolidateReasoningDetails(validReasoningDetails)
+				}
+
+				if (validToolCalls.length > 0) {
+					sanitizedMsg.tool_calls = validToolCalls
+				}
+
+				sanitized.push(sanitizedMsg)
+				continue
+			}
+		}
+
+		if (msg.role === "tool") {
+			const anyMsg = msg as any
+			if (anyMsg.tool_call_id && droppedToolCallIds.has(anyMsg.tool_call_id)) {
+				// Skip tool result for dropped tool call
+				continue
+			}
+		}
+
+		sanitized.push(msg)
+	}
+
+	return sanitized
+}
+
 /**
  * Options for converting Anthropic messages to OpenAI format.
  */