Browse Source

fix(deepseek): preserve reasoning_content during tool call sequences (#10141)

Co-authored-by: Roo Code <[email protected]>
Hannes Rudolph 3 weeks ago
parent
commit
970deead5b

+ 7 - 1
src/api/providers/deepseek.ts

@@ -54,7 +54,13 @@ export class DeepSeekHandler extends OpenAiHandler {
 
 		// Convert messages to R1 format (merges consecutive same-role messages)
 		// This is required for DeepSeek which does not support successive messages with the same role
-		const convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
+		// For thinking models (deepseek-reasoner), enable mergeToolResultText to preserve reasoning_content
+		// during tool call sequences. Without this, environment_details text after tool_results would
+		// create user messages that cause DeepSeek to drop all previous reasoning_content.
+		// See: https://api-docs.deepseek.com/guides/thinking_mode
+		const convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages], {
+			mergeToolResultText: isThinkingModel,
+		})
 
 		const requestOptions: DeepSeekChatCompletionParams = {
 			model: modelId,

+ 221 - 0
src/api/transform/__tests__/r1-format.spec.ts

@@ -394,5 +394,226 @@ describe("convertToR1Format", () => {
 				content: "Follow up response",
 			})
 		})
+
+		describe("mergeToolResultText option for DeepSeek interleaved thinking", () => {
+			it("should merge text content into last tool message when mergeToolResultText is true", () => {
+				const input: Anthropic.Messages.MessageParam[] = [
+					{
+						role: "user",
+						content: [
+							{
+								type: "tool_result",
+								tool_use_id: "call_123",
+								content: "Tool result content",
+							},
+							{
+								type: "text",
+								text: "<environment_details>\nSome context\n</environment_details>",
+							},
+						],
+					},
+				]
+
+				const result = convertToR1Format(input, { mergeToolResultText: true })
+
+				// Should produce only one tool message with merged content
+				expect(result).toHaveLength(1)
+				expect(result[0]).toEqual({
+					role: "tool",
+					tool_call_id: "call_123",
+					content: "Tool result content\n\n<environment_details>\nSome context\n</environment_details>",
+				})
+			})
+
+			it("should NOT merge text when mergeToolResultText is false (default behavior)", () => {
+				const input: Anthropic.Messages.MessageParam[] = [
+					{
+						role: "user",
+						content: [
+							{
+								type: "tool_result",
+								tool_use_id: "call_123",
+								content: "Tool result content",
+							},
+							{
+								type: "text",
+								text: "Please continue",
+							},
+						],
+					},
+				]
+
+				// Without option (default behavior)
+				const result = convertToR1Format(input)
+
+				// Should produce two messages: tool message + user message
+				expect(result).toHaveLength(2)
+				expect(result[0]).toEqual({
+					role: "tool",
+					tool_call_id: "call_123",
+					content: "Tool result content",
+				})
+				expect(result[1]).toEqual({
+					role: "user",
+					content: "Please continue",
+				})
+			})
+
+			it("should merge text into last tool message when multiple tool results exist", () => {
+				const input: Anthropic.Messages.MessageParam[] = [
+					{
+						role: "user",
+						content: [
+							{
+								type: "tool_result",
+								tool_use_id: "call_1",
+								content: "First result",
+							},
+							{
+								type: "tool_result",
+								tool_use_id: "call_2",
+								content: "Second result",
+							},
+							{
+								type: "text",
+								text: "<environment_details>Context</environment_details>",
+							},
+						],
+					},
+				]
+
+				const result = convertToR1Format(input, { mergeToolResultText: true })
+
+				// Should produce two tool messages, with text merged into the last one
+				expect(result).toHaveLength(2)
+				expect(result[0]).toEqual({
+					role: "tool",
+					tool_call_id: "call_1",
+					content: "First result",
+				})
+				expect(result[1]).toEqual({
+					role: "tool",
+					tool_call_id: "call_2",
+					content: "Second result\n\n<environment_details>Context</environment_details>",
+				})
+			})
+
+			it("should NOT merge when there are images (images need user message)", () => {
+				const input: Anthropic.Messages.MessageParam[] = [
+					{
+						role: "user",
+						content: [
+							{
+								type: "tool_result",
+								tool_use_id: "call_123",
+								content: "Tool result",
+							},
+							{
+								type: "text",
+								text: "Check this image",
+							},
+							{
+								type: "image",
+								source: {
+									type: "base64",
+									media_type: "image/jpeg",
+									data: "imagedata",
+								},
+							},
+						],
+					},
+				]
+
+				const result = convertToR1Format(input, { mergeToolResultText: true })
+
+				// Should produce tool message + user message with image
+				expect(result).toHaveLength(2)
+				expect(result[0]).toEqual({
+					role: "tool",
+					tool_call_id: "call_123",
+					content: "Tool result",
+				})
+				expect(result[1]).toMatchObject({
+					role: "user",
+					content: expect.arrayContaining([
+						{ type: "text", text: "Check this image" },
+						{ type: "image_url", image_url: expect.any(Object) },
+					]),
+				})
+			})
+
+			it("should NOT merge when there are no tool results (text-only should remain user message)", () => {
+				const input: Anthropic.Messages.MessageParam[] = [
+					{
+						role: "user",
+						content: [
+							{
+								type: "text",
+								text: "Just a regular message",
+							},
+						],
+					},
+				]
+
+				const result = convertToR1Format(input, { mergeToolResultText: true })
+
+				// Should produce user message as normal
+				expect(result).toHaveLength(1)
+				expect(result[0]).toEqual({
+					role: "user",
+					content: "Just a regular message",
+				})
+			})
+
+			it("should preserve reasoning_content on assistant messages in same conversation", () => {
+				const input = [
+					{ role: "user" as const, content: "Start" },
+					{
+						role: "assistant" as const,
+						content: [
+							{
+								type: "tool_use" as const,
+								id: "call_123",
+								name: "test_tool",
+								input: {},
+							},
+						],
+						reasoning_content: "Let me think about this...",
+					},
+					{
+						role: "user" as const,
+						content: [
+							{
+								type: "tool_result" as const,
+								tool_use_id: "call_123",
+								content: "Result",
+							},
+							{
+								type: "text" as const,
+								text: "<environment_details>Context</environment_details>",
+							},
+						],
+					},
+				]
+
+				const result = convertToR1Format(input as Anthropic.Messages.MessageParam[], {
+					mergeToolResultText: true,
+				})
+
+				// Should have: user, assistant (with reasoning + tool_calls), tool
+				expect(result).toHaveLength(3)
+				expect(result[0]).toEqual({ role: "user", content: "Start" })
+				expect((result[1] as any).reasoning_content).toBe("Let me think about this...")
+				expect((result[1] as any).tool_calls).toBeDefined()
+				// Tool message should have merged content
+				expect(result[2]).toEqual({
+					role: "tool",
+					tool_call_id: "call_123",
+					content: "Result\n\n<environment_details>Context</environment_details>",
+				})
+				// Most importantly: NO user message after tool message
+				expect(result.filter((m) => m.role === "user")).toHaveLength(1)
+			})
+		})
 	})
 })

+ 52 - 26
src/api/transform/r1-format.ts

@@ -26,11 +26,20 @@ export type DeepSeekAssistantMessage = AssistantMessage & {
  * - Preserves reasoning_content on assistant messages for tool call continuations
  * - Tool result messages are converted to OpenAI tool messages
  * - reasoning_content from previous assistant messages is preserved until a new user turn
+ * - Text content after tool_results (like environment_details) is merged into the last tool message
+ *   to avoid creating user messages that would cause reasoning_content to be dropped
  *
  * @param messages Array of Anthropic messages
+ * @param options Optional configuration for message conversion
+ * @param options.mergeToolResultText If true, merge text content after tool_results into the last
+ *                                     tool message instead of creating a separate user message.
+ *                                     This is critical for DeepSeek's interleaved thinking mode.
  * @returns Array of OpenAI messages where consecutive messages with the same role are combined
  */
-export function convertToR1Format(messages: AnthropicMessage[]): Message[] {
+export function convertToR1Format(
+	messages: AnthropicMessage[],
+	options?: { mergeToolResultText?: boolean },
+): Message[] {
 	const result: Message[] = []
 
 	for (const message of messages) {
@@ -87,37 +96,54 @@ export function convertToR1Format(messages: AnthropicMessage[]): Message[] {
 					result.push(toolMessage)
 				}
 
-				// Then add user message with text/image content if any
+				// Handle text/image content after tool results
 				if (textParts.length > 0 || imageParts.length > 0) {
-					let content: UserMessage["content"]
-					if (imageParts.length > 0) {
-						const parts: (ContentPartText | ContentPartImage)[] = []
-						if (textParts.length > 0) {
-							parts.push({ type: "text", text: textParts.join("\n") })
+					// For DeepSeek interleaved thinking: when mergeToolResultText is enabled and we have
+					// tool results followed by text, merge the text into the last tool message to avoid
+					// creating a user message that would cause reasoning_content to be dropped.
+					// This is critical because DeepSeek drops all reasoning_content when it sees a user message.
+					const shouldMergeIntoToolMessage =
+						options?.mergeToolResultText && toolResults.length > 0 && imageParts.length === 0
+
+					if (shouldMergeIntoToolMessage) {
+						// Merge text content into the last tool message
+						const lastToolMessage = result[result.length - 1] as ToolMessage
+						if (lastToolMessage?.role === "tool") {
+							const additionalText = textParts.join("\n")
+							lastToolMessage.content = `${lastToolMessage.content}\n\n${additionalText}`
 						}
-						parts.push(...imageParts)
-						content = parts
 					} else {
-						content = textParts.join("\n")
-					}
+						// Standard behavior: add user message with text/image content
+						let content: UserMessage["content"]
+						if (imageParts.length > 0) {
+							const parts: (ContentPartText | ContentPartImage)[] = []
+							if (textParts.length > 0) {
+								parts.push({ type: "text", text: textParts.join("\n") })
+							}
+							parts.push(...imageParts)
+							content = parts
+						} else {
+							content = textParts.join("\n")
+						}
 
-					// Check if we can merge with the last message
-					const lastMessage = result[result.length - 1]
-					if (lastMessage?.role === "user") {
-						// Merge with existing user message
-						if (typeof lastMessage.content === "string" && typeof content === "string") {
-							lastMessage.content += `\n${content}`
+						// Check if we can merge with the last message
+						const lastMessage = result[result.length - 1]
+						if (lastMessage?.role === "user") {
+							// Merge with existing user message
+							if (typeof lastMessage.content === "string" && typeof content === "string") {
+								lastMessage.content += `\n${content}`
+							} else {
+								const lastContent = Array.isArray(lastMessage.content)
+									? lastMessage.content
+									: [{ type: "text" as const, text: lastMessage.content || "" }]
+								const newContent = Array.isArray(content)
+									? content
+									: [{ type: "text" as const, text: content }]
+								lastMessage.content = [...lastContent, ...newContent] as UserMessage["content"]
+							}
 						} else {
-							const lastContent = Array.isArray(lastMessage.content)
-								? lastMessage.content
-								: [{ type: "text" as const, text: lastMessage.content || "" }]
-							const newContent = Array.isArray(content)
-								? content
-								: [{ type: "text" as const, text: content }]
-							lastMessage.content = [...lastContent, ...newContent] as UserMessage["content"]
+							result.push({ role: "user", content })
 						}
-					} else {
-						result.push({ role: "user", content })
 					}
 				}
 			} else {