Explorar el Código

fix(litellm): inject dummy thought signatures on ALL tool calls for Gemini (#10743)

Daniel hace 2 meses
padre
commit
d7b7e17a21
Se han modificado 2 ficheros con 400 adiciones y 7 borrados
  1. 318 1
      src/api/providers/__tests__/lite-llm.spec.ts
  2. 82 6
      src/api/providers/lite-llm.ts

+ 318 - 1
src/api/providers/__tests__/lite-llm.spec.ts

@@ -3,7 +3,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
 
 import { LiteLLMHandler } from "../lite-llm"
 import { ApiHandlerOptions } from "../../../shared/api"
-import { litellmDefaultModelId, litellmDefaultModelInfo } from "@roo-code/types"
+import { litellmDefaultModelId, litellmDefaultModelInfo, TOOL_PROTOCOL } from "@roo-code/types"
 
 // Mock vscode first to avoid import errors
 vi.mock("vscode", () => ({}))
@@ -40,6 +40,12 @@ vi.mock("../fetchers/modelCache", () => ({
 			"claude-3-opus": { ...litellmDefaultModelInfo, maxTokens: 8192 },
 			"llama-3": { ...litellmDefaultModelInfo, maxTokens: 8192 },
 			"gpt-4-turbo": { ...litellmDefaultModelInfo, maxTokens: 8192 },
+			// Gemini models for thought signature injection tests
+			"gemini-3-pro": { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true },
+			"gemini-3-flash": { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true },
+			"gemini-2.5-pro": { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true },
+			"google/gemini-3-pro": { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true },
+			"vertex_ai/gemini-3-pro": { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true },
 		})
 	}),
 	getModelsFromCache: vi.fn().mockReturnValue(undefined),
@@ -388,4 +394,315 @@ describe("LiteLLMHandler", () => {
 			expect(createCall.max_completion_tokens).toBeUndefined()
 		})
 	})
+
+	describe("Gemini thought signature injection", () => {
+		describe("isGeminiModel detection", () => {
+			it("should detect Gemini 3 models", () => {
+				const handler = new LiteLLMHandler(mockOptions)
+				const isGeminiModel = (handler as any).isGeminiModel.bind(handler)
+
+				expect(isGeminiModel("gemini-3-pro")).toBe(true)
+				expect(isGeminiModel("gemini-3-flash")).toBe(true)
+				expect(isGeminiModel("gemini-3-pro-preview")).toBe(true)
+			})
+
+			it("should detect Gemini 2.5 models", () => {
+				const handler = new LiteLLMHandler(mockOptions)
+				const isGeminiModel = (handler as any).isGeminiModel.bind(handler)
+
+				expect(isGeminiModel("gemini-2.5-pro")).toBe(true)
+				expect(isGeminiModel("gemini-2.5-flash")).toBe(true)
+			})
+
+			it("should detect provider-prefixed Gemini models", () => {
+				const handler = new LiteLLMHandler(mockOptions)
+				const isGeminiModel = (handler as any).isGeminiModel.bind(handler)
+
+				expect(isGeminiModel("google/gemini-3-pro")).toBe(true)
+				expect(isGeminiModel("vertex_ai/gemini-3-pro")).toBe(true)
+				expect(isGeminiModel("vertex/gemini-2.5-pro")).toBe(true)
+			})
+
+			it("should not detect non-Gemini models", () => {
+				const handler = new LiteLLMHandler(mockOptions)
+				const isGeminiModel = (handler as any).isGeminiModel.bind(handler)
+
+				expect(isGeminiModel("gpt-4")).toBe(false)
+				expect(isGeminiModel("claude-3-opus")).toBe(false)
+				expect(isGeminiModel("gemini-1.5-pro")).toBe(false)
+				expect(isGeminiModel("gemini-2.0-flash")).toBe(false)
+			})
+		})
+
+		describe("injectThoughtSignatureForGemini", () => {
+			// Base64 encoded "skip_thought_signature_validator"
+			const dummySignature = Buffer.from("skip_thought_signature_validator").toString("base64")
+
+			it("should inject provider_specific_fields.thought_signature for assistant messages with tool_calls", () => {
+				const handler = new LiteLLMHandler(mockOptions)
+				const injectThoughtSignature = (handler as any).injectThoughtSignatureForGemini.bind(handler)
+
+				const messages = [
+					{ role: "user", content: "Hello" },
+					{
+						role: "assistant",
+						content: "",
+						tool_calls: [
+							{ id: "call_123", type: "function", function: { name: "test_tool", arguments: "{}" } },
+						],
+					},
+					{ role: "tool", tool_call_id: "call_123", content: "result" },
+				]
+
+				const result = injectThoughtSignature(messages)
+
+				// The first tool call should have provider_specific_fields.thought_signature injected
+				expect(result[1].tool_calls[0].provider_specific_fields).toBeDefined()
+				expect(result[1].tool_calls[0].provider_specific_fields.thought_signature).toBe(dummySignature)
+			})
+
+			it("should not inject if assistant message has no tool_calls", () => {
+				const handler = new LiteLLMHandler(mockOptions)
+				const injectThoughtSignature = (handler as any).injectThoughtSignatureForGemini.bind(handler)
+
+				const messages = [
+					{ role: "user", content: "Hello" },
+					{ role: "assistant", content: "Hi there!" },
+				]
+
+				const result = injectThoughtSignature(messages)
+
+				// No changes should be made
+				expect(result[1].tool_calls).toBeUndefined()
+			})
+
+			it("should always overwrite existing thought_signature", () => {
+				const handler = new LiteLLMHandler(mockOptions)
+				const injectThoughtSignature = (handler as any).injectThoughtSignatureForGemini.bind(handler)
+
+				const existingSignature = "existing_signature_base64"
+
+				const messages = [
+					{ role: "user", content: "Hello" },
+					{
+						role: "assistant",
+						content: "",
+						tool_calls: [
+							{
+								id: "call_123",
+								type: "function",
+								function: { name: "test_tool", arguments: "{}" },
+								provider_specific_fields: { thought_signature: existingSignature },
+							},
+						],
+					},
+				]
+
+				const result = injectThoughtSignature(messages)
+
+				// Should overwrite with dummy signature (always inject to ensure compatibility)
+				expect(result[1].tool_calls[0].provider_specific_fields.thought_signature).toBe(dummySignature)
+			})
+
+			it("should inject signature into ALL tool calls for parallel calls", () => {
+				const handler = new LiteLLMHandler(mockOptions)
+				const injectThoughtSignature = (handler as any).injectThoughtSignatureForGemini.bind(handler)
+
+				const messages = [
+					{ role: "user", content: "Hello" },
+					{
+						role: "assistant",
+						content: "",
+						tool_calls: [
+							{ id: "call_first", type: "function", function: { name: "tool1", arguments: "{}" } },
+							{ id: "call_second", type: "function", function: { name: "tool2", arguments: "{}" } },
+							{ id: "call_third", type: "function", function: { name: "tool3", arguments: "{}" } },
+						],
+					},
+				]
+
+				const result = injectThoughtSignature(messages)
+
+				// ALL tool calls should have the signature
+				expect(result[1].tool_calls[0].provider_specific_fields.thought_signature).toBe(dummySignature)
+				expect(result[1].tool_calls[1].provider_specific_fields.thought_signature).toBe(dummySignature)
+				expect(result[1].tool_calls[2].provider_specific_fields.thought_signature).toBe(dummySignature)
+			})
+
+			it("should preserve existing provider_specific_fields when adding thought_signature", () => {
+				const handler = new LiteLLMHandler(mockOptions)
+				const injectThoughtSignature = (handler as any).injectThoughtSignatureForGemini.bind(handler)
+
+				const messages = [
+					{ role: "user", content: "Hello" },
+					{
+						role: "assistant",
+						content: "",
+						tool_calls: [
+							{
+								id: "call_123",
+								type: "function",
+								function: { name: "test_tool", arguments: "{}" },
+								provider_specific_fields: { other_field: "value" },
+							},
+						],
+					},
+				]
+
+				const result = injectThoughtSignature(messages)
+
+				// Should have both existing field and new thought_signature
+				expect(result[1].tool_calls[0].provider_specific_fields.other_field).toBe("value")
+				expect(result[1].tool_calls[0].provider_specific_fields.thought_signature).toBe(dummySignature)
+			})
+		})
+
+		describe("createMessage integration with Gemini models", () => {
+			// Base64 encoded "skip_thought_signature_validator"
+			const dummySignature = Buffer.from("skip_thought_signature_validator").toString("base64")
+
+			it("should inject thought signatures for Gemini 3 models with native tools", async () => {
+				const optionsWithGemini: ApiHandlerOptions = {
+					...mockOptions,
+					litellmModelId: "gemini-3-pro",
+				}
+				handler = new LiteLLMHandler(optionsWithGemini)
+
+				// Mock fetchModel to return a Gemini model with native tool support
+				vi.spyOn(handler as any, "fetchModel").mockResolvedValue({
+					id: "gemini-3-pro",
+					info: { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true },
+				})
+
+				const systemPrompt = "You are a helpful assistant"
+				// Simulate conversation history with a tool call from a previous model (Claude)
+				const messages: Anthropic.Messages.MessageParam[] = [
+					{ role: "user", content: "Hello" },
+					{
+						role: "assistant",
+						content: [
+							{ type: "text", text: "I'll help you with that." },
+							{ type: "tool_use", id: "toolu_123", name: "read_file", input: { path: "test.txt" } },
+						],
+					},
+					{
+						role: "user",
+						content: [{ type: "tool_result", tool_use_id: "toolu_123", content: "file contents" }],
+					},
+					{ role: "user", content: "Thanks!" },
+				]
+
+				// Mock the stream response
+				const mockStream = {
+					async *[Symbol.asyncIterator]() {
+						yield {
+							choices: [{ delta: { content: "You're welcome!" } }],
+							usage: {
+								prompt_tokens: 100,
+								completion_tokens: 20,
+							},
+						}
+					},
+				}
+
+				mockCreate.mockReturnValue({
+					withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
+				})
+
+				// Provide tools and native protocol to trigger the injection
+				const metadata = {
+					tools: [
+						{
+							type: "function",
+							function: { name: "read_file", description: "Read a file", parameters: {} },
+						},
+					],
+					toolProtocol: TOOL_PROTOCOL.NATIVE,
+				}
+
+				const generator = handler.createMessage(systemPrompt, messages, metadata as any)
+				for await (const _chunk of generator) {
+					// Consume the generator
+				}
+
+				// Verify that the assistant message with tool_calls has thought_signature injected
+				const createCall = mockCreate.mock.calls[0][0]
+				const assistantMessage = createCall.messages.find(
+					(msg: any) => msg.role === "assistant" && msg.tool_calls && msg.tool_calls.length > 0,
+				)
+
+				expect(assistantMessage).toBeDefined()
+				// First tool call should have the thought signature
+				expect(assistantMessage.tool_calls[0].provider_specific_fields).toBeDefined()
+				expect(assistantMessage.tool_calls[0].provider_specific_fields.thought_signature).toBe(dummySignature)
+			})
+
+			it("should not inject thought signatures for non-Gemini models", async () => {
+				const optionsWithGPT4: ApiHandlerOptions = {
+					...mockOptions,
+					litellmModelId: "gpt-4",
+				}
+				handler = new LiteLLMHandler(optionsWithGPT4)
+
+				vi.spyOn(handler as any, "fetchModel").mockResolvedValue({
+					id: "gpt-4",
+					info: { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true },
+				})
+
+				const systemPrompt = "You are a helpful assistant"
+				const messages: Anthropic.Messages.MessageParam[] = [
+					{ role: "user", content: "Hello" },
+					{
+						role: "assistant",
+						content: [
+							{ type: "text", text: "I'll help you with that." },
+							{ type: "tool_use", id: "toolu_123", name: "read_file", input: { path: "test.txt" } },
+						],
+					},
+					{
+						role: "user",
+						content: [{ type: "tool_result", tool_use_id: "toolu_123", content: "file contents" }],
+					},
+				]
+
+				const mockStream = {
+					async *[Symbol.asyncIterator]() {
+						yield {
+							choices: [{ delta: { content: "Response" } }],
+							usage: { prompt_tokens: 100, completion_tokens: 20 },
+						}
+					},
+				}
+
+				mockCreate.mockReturnValue({
+					withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
+				})
+
+				const metadata = {
+					tools: [
+						{
+							type: "function",
+							function: { name: "read_file", description: "Read a file", parameters: {} },
+						},
+					],
+					toolProtocol: TOOL_PROTOCOL.NATIVE,
+				}
+
+				const generator = handler.createMessage(systemPrompt, messages, metadata as any)
+				for await (const _chunk of generator) {
+					// Consume
+				}
+
+				// Verify that thought_signature was NOT injected for non-Gemini model
+				const createCall = mockCreate.mock.calls[0][0]
+				const assistantMessage = createCall.messages.find(
+					(msg: any) => msg.role === "assistant" && msg.tool_calls && msg.tool_calls.length > 0,
+				)
+
+				expect(assistantMessage).toBeDefined()
+				// Tool calls should not have provider_specific_fields added
+				expect(assistantMessage.tool_calls[0].provider_specific_fields).toBeUndefined()
+			})
+		})
+	})
 })

+ 82 - 6
src/api/providers/lite-llm.ts

@@ -9,6 +9,7 @@ import { ApiHandlerOptions } from "../../shared/api"
 
 import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
 import { convertToOpenAiMessages } from "../transform/openai-format"
+import { resolveToolProtocol } from "../../utils/resolveToolProtocol"
 
 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
 import { RouterProvider } from "./router-provider"
@@ -38,6 +39,70 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
 		return /\bgpt-?5(?!\d)/i.test(modelId)
 	}
 
+	/**
+	 * Detect if the model is a Gemini model that requires thought signature handling.
+	 * Gemini 3 models validate thought signatures for tool/function calling steps.
+	 */
+	private isGeminiModel(modelId: string): boolean {
+		// Match various Gemini model patterns:
+		// - gemini-3-pro, gemini-3-flash, gemini-3-*
+		// - gemini/gemini-3-*, google/gemini-3-*
+		// - vertex_ai/gemini-3-*, vertex/gemini-3-*
+		// Also match Gemini 2.5+ models which use similar validation
+		const lowerModelId = modelId.toLowerCase()
+		return (
+			lowerModelId.includes("gemini-3") ||
+			lowerModelId.includes("gemini-2.5") ||
+			// Also match provider-prefixed versions
+			/\b(gemini|google|vertex_ai|vertex)\/gemini-(3|2\.5)/i.test(modelId)
+		)
+	}
+
+	/**
+	 * Inject thought signatures for Gemini models via provider_specific_fields.
+	 * This is required when switching from other models to Gemini to satisfy API validation
+	 * for function calls that weren't generated by Gemini (and thus lack thought signatures).
+	 *
+	 * Per LiteLLM documentation:
+	 * - Thought signatures are stored in provider_specific_fields.thought_signature of tool calls
+	 * - The dummy signature base64("skip_thought_signature_validator") bypasses validation
+	 *
+	 * We inject the dummy signature on EVERY tool call unconditionally to ensure Gemini
+	 * doesn't complain about missing/corrupted signatures when conversation history
+	 * contains tool calls from other models (like Claude).
+	 */
+	private injectThoughtSignatureForGemini(
+		openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[],
+	): OpenAI.Chat.ChatCompletionMessageParam[] {
+		// Base64 encoded "skip_thought_signature_validator" as per LiteLLM docs
+		const dummySignature = Buffer.from("skip_thought_signature_validator").toString("base64")
+
+		return openAiMessages.map((msg) => {
+			if (msg.role === "assistant") {
+				const toolCalls = (msg as any).tool_calls as any[] | undefined
+
+				// Only process if there are tool calls
+				if (toolCalls && toolCalls.length > 0) {
+					// Inject dummy signature into ALL tool calls' provider_specific_fields
+					// This ensures Gemini doesn't reject tool calls from other models
+					const updatedToolCalls = toolCalls.map((tc) => ({
+						...tc,
+						provider_specific_fields: {
+							...(tc.provider_specific_fields || {}),
+							thought_signature: dummySignature,
+						},
+					}))
+
+					return {
+						...msg,
+						tool_calls: updatedToolCalls,
+					}
+				}
+			}
+			return msg
+		})
+	}
+
 	override async *createMessage(
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
@@ -116,17 +181,28 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
 		// Check if this is a GPT-5 model that requires max_completion_tokens instead of max_tokens
 		const isGPT5Model = this.isGpt5(modelId)
 
+		// Resolve tool protocol - use metadata's locked protocol if provided, otherwise resolve from options
+		const toolProtocol = resolveToolProtocol(this.options, info, metadata?.toolProtocol)
+		const isNativeProtocol = toolProtocol === TOOL_PROTOCOL.NATIVE
+
 		// Check if model supports native tools and tools are provided with native protocol
 		const supportsNativeTools = info.supportsNativeTools ?? false
-		const useNativeTools =
-			supportsNativeTools &&
-			metadata?.tools &&
-			metadata.tools.length > 0 &&
-			metadata?.toolProtocol === TOOL_PROTOCOL.NATIVE
+		const useNativeTools = supportsNativeTools && metadata?.tools && metadata.tools.length > 0 && isNativeProtocol
+
+		// For Gemini models with native protocol: inject fake reasoning.encrypted block for tool calls
+		// This is required when switching from other models to Gemini to satisfy API validation.
+		// Gemini 3 models validate thought signatures for function calls, and when conversation
+		// history contains tool calls from other models (like Claude), they lack the required
+		// signatures. The "skip_thought_signature_validator" value bypasses this validation.
+		const isGemini = this.isGeminiModel(modelId)
+		let processedMessages = enhancedMessages
+		if (isNativeProtocol && isGemini) {
+			processedMessages = this.injectThoughtSignatureForGemini(enhancedMessages)
+		}
 
 		const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
 			model: modelId,
-			messages: [systemMessage, ...enhancedMessages],
+			messages: [systemMessage, ...processedMessages],
 			stream: true,
 			stream_options: {
 				include_usage: true,