Kaynağa Gözat

Merge pull request #606 from Szpadel/extend-r1-support

Extend deepseek-r1 support
Matt Rubens 1 yıl önce
ebeveyn
işleme
f07109b686

+ 22 - 16
src/api/providers/openai.ts

@@ -1,5 +1,6 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI, { AzureOpenAI } from "openai"
+
 import {
 	ApiHandlerOptions,
 	azureOpenAiDefaultApiVersion,
@@ -8,6 +9,7 @@ import {
 } from "../../shared/api"
 import { ApiHandler, SingleCompletionHandler } from "../index"
 import { convertToOpenAiMessages } from "../transform/openai-format"
+import { convertToR1Format } from "../transform/r1-format"
 import { ApiStream } from "../transform/stream"
 
 export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
@@ -16,7 +18,8 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
 
 	constructor(options: ApiHandlerOptions) {
 		this.options = options
-		// Azure API shape slightly differs from the core API shape: https://github.com/openai/openai-node?tab=readme-ov-file#microsoft-azure-openai
+		// Azure API shape slightly differs from the core API shape:
+		// https://github.com/openai/openai-node?tab=readme-ov-file#microsoft-azure-openai
 		const urlHost = new URL(this.options.openAiBaseUrl ?? "").host
 		if (urlHost === "azure.com" || urlHost.endsWith(".azure.com") || options.openAiUseAzure) {
 			this.client = new AzureOpenAI({
@@ -38,7 +41,7 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
 
 		const deepseekReasoner = modelId.includes("deepseek-reasoner")
 
-		if (!deepseekReasoner && (this.options.openAiStreamingEnabled ?? true)) {
+		if (this.options.openAiStreamingEnabled ?? true) {
 			const systemMessage: OpenAI.Chat.ChatCompletionSystemMessageParam = {
 				role: "system",
 				content: systemPrompt,
@@ -46,7 +49,9 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
 			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
 				model: modelId,
 				temperature: 0,
-				messages: [systemMessage, ...convertToOpenAiMessages(messages)],
+				messages: deepseekReasoner
+					? convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
+					: [systemMessage, ...convertToOpenAiMessages(messages)],
 				stream: true as const,
 				stream_options: { include_usage: true },
 			}
@@ -64,6 +69,12 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
 						text: delta.content,
 					}
 				}
+				if ("reasoning_content" in delta && delta.reasoning_content) {
+					yield {
+						type: "reasoning",
+						text: (delta.reasoning_content as string | undefined) || "",
+					}
+				}
 				if (chunk.usage) {
 					yield {
 						type: "usage",
@@ -73,24 +84,19 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
 				}
 			}
 		} else {
-			let systemMessage: OpenAI.Chat.ChatCompletionUserMessageParam | OpenAI.Chat.ChatCompletionSystemMessageParam
-
 			// o1 for instance doesnt support streaming, non-1 temp, or system prompt
-			// deepseek reasoner supports system prompt
-			systemMessage = deepseekReasoner
-				? {
-						role: "system",
-						content: systemPrompt,
-					}
-				: {
-						role: "user",
-						content: systemPrompt,
-					}
+			const systemMessage: OpenAI.Chat.ChatCompletionUserMessageParam = {
+				role: "user",
+				content: systemPrompt,
+			}
 
 			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
 				model: modelId,
-				messages: [systemMessage, ...convertToOpenAiMessages(messages)],
+				messages: deepseekReasoner
+					? convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
+					: [systemMessage, ...convertToOpenAiMessages(messages)],
 			}
+
 			const response = await this.client.chat.completions.create(requestOptions)
 
 			yield {

+ 5 - 1
src/api/providers/openrouter.ts

@@ -19,6 +19,7 @@ interface OpenRouterApiStreamUsageChunk extends ApiStreamUsageChunk {
 }
 
 import { SingleCompletionHandler } from ".."
+import { convertToR1Format } from "../transform/r1-format"
 
 export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
 	private options: ApiHandlerOptions
@@ -41,7 +42,7 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
 		messages: Anthropic.Messages.MessageParam[],
 	): AsyncGenerator<ApiStreamChunk> {
 		// Convert Anthropic messages to OpenAI format
-		const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
+		let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
 			{ role: "system", content: systemPrompt },
 			...convertToOpenAiMessages(messages),
 		]
@@ -117,6 +118,9 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
 			case "deepseek/deepseek-r1":
 				// Recommended temperature for DeepSeek reasoning models
 				temperature = 0.6
+				// DeepSeek highly recommends using user instead of system role
+				openAiMessages[0].role = "user"
+				openAiMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
 		}
 
 		// https://openrouter.ai/docs/transforms

+ 180 - 0
src/api/transform/__tests__/r1-format.test.ts

@@ -0,0 +1,180 @@
+import { convertToR1Format } from "../r1-format"
+import { Anthropic } from "@anthropic-ai/sdk"
+import OpenAI from "openai"
+
+describe("convertToR1Format", () => {
+	it("should convert basic text messages", () => {
+		const input: Anthropic.Messages.MessageParam[] = [
+			{ role: "user", content: "Hello" },
+			{ role: "assistant", content: "Hi there" },
+		]
+
+		const expected: OpenAI.Chat.ChatCompletionMessageParam[] = [
+			{ role: "user", content: "Hello" },
+			{ role: "assistant", content: "Hi there" },
+		]
+
+		expect(convertToR1Format(input)).toEqual(expected)
+	})
+
+	it("should merge consecutive messages with same role", () => {
+		const input: Anthropic.Messages.MessageParam[] = [
+			{ role: "user", content: "Hello" },
+			{ role: "user", content: "How are you?" },
+			{ role: "assistant", content: "Hi!" },
+			{ role: "assistant", content: "I'm doing well" },
+		]
+
+		const expected: OpenAI.Chat.ChatCompletionMessageParam[] = [
+			{ role: "user", content: "Hello\nHow are you?" },
+			{ role: "assistant", content: "Hi!\nI'm doing well" },
+		]
+
+		expect(convertToR1Format(input)).toEqual(expected)
+	})
+
+	it("should handle image content", () => {
+		const input: Anthropic.Messages.MessageParam[] = [
+			{
+				role: "user",
+				content: [
+					{
+						type: "image",
+						source: {
+							type: "base64",
+							media_type: "image/jpeg",
+							data: "base64data",
+						},
+					},
+				],
+			},
+		]
+
+		const expected: OpenAI.Chat.ChatCompletionMessageParam[] = [
+			{
+				role: "user",
+				content: [
+					{
+						type: "image_url",
+						image_url: {
+							url: "data:image/jpeg;base64,base64data",
+						},
+					},
+				],
+			},
+		]
+
+		expect(convertToR1Format(input)).toEqual(expected)
+	})
+
+	it("should handle mixed text and image content", () => {
+		const input: Anthropic.Messages.MessageParam[] = [
+			{
+				role: "user",
+				content: [
+					{ type: "text", text: "Check this image:" },
+					{
+						type: "image",
+						source: {
+							type: "base64",
+							media_type: "image/jpeg",
+							data: "base64data",
+						},
+					},
+				],
+			},
+		]
+
+		const expected: OpenAI.Chat.ChatCompletionMessageParam[] = [
+			{
+				role: "user",
+				content: [
+					{ type: "text", text: "Check this image:" },
+					{
+						type: "image_url",
+						image_url: {
+							url: "data:image/jpeg;base64,base64data",
+						},
+					},
+				],
+			},
+		]
+
+		expect(convertToR1Format(input)).toEqual(expected)
+	})
+
+	it("should merge mixed content messages with same role", () => {
+		const input: Anthropic.Messages.MessageParam[] = [
+			{
+				role: "user",
+				content: [
+					{ type: "text", text: "First image:" },
+					{
+						type: "image",
+						source: {
+							type: "base64",
+							media_type: "image/jpeg",
+							data: "image1",
+						},
+					},
+				],
+			},
+			{
+				role: "user",
+				content: [
+					{ type: "text", text: "Second image:" },
+					{
+						type: "image",
+						source: {
+							type: "base64",
+							media_type: "image/png",
+							data: "image2",
+						},
+					},
+				],
+			},
+		]
+
+		const expected: OpenAI.Chat.ChatCompletionMessageParam[] = [
+			{
+				role: "user",
+				content: [
+					{ type: "text", text: "First image:" },
+					{
+						type: "image_url",
+						image_url: {
+							url: "data:image/jpeg;base64,image1",
+						},
+					},
+					{ type: "text", text: "Second image:" },
+					{
+						type: "image_url",
+						image_url: {
+							url: "data:image/png;base64,image2",
+						},
+					},
+				],
+			},
+		]
+
+		expect(convertToR1Format(input)).toEqual(expected)
+	})
+
+	it("should handle empty messages array", () => {
+		expect(convertToR1Format([])).toEqual([])
+	})
+
+	it("should handle messages with empty content", () => {
+		const input: Anthropic.Messages.MessageParam[] = [
+			{ role: "user", content: "" },
+			{ role: "assistant", content: "" },
+		]
+
+		const expected: OpenAI.Chat.ChatCompletionMessageParam[] = [
+			{ role: "user", content: "" },
+			{ role: "assistant", content: "" },
+		]
+
+		expect(convertToR1Format(input)).toEqual(expected)
+	})
+})

+ 98 - 0
src/api/transform/r1-format.ts

@@ -0,0 +1,98 @@
+import { Anthropic } from "@anthropic-ai/sdk"
+import OpenAI from "openai"
+
+type ContentPartText = OpenAI.Chat.ChatCompletionContentPartText
+type ContentPartImage = OpenAI.Chat.ChatCompletionContentPartImage
+type UserMessage = OpenAI.Chat.ChatCompletionUserMessageParam
+type AssistantMessage = OpenAI.Chat.ChatCompletionAssistantMessageParam
+type Message = OpenAI.Chat.ChatCompletionMessageParam
+type AnthropicMessage = Anthropic.Messages.MessageParam
+
+/**
+ * Converts Anthropic messages to OpenAI format while merging consecutive messages with the same role.
+ * This is required for DeepSeek Reasoner which does not support successive messages with the same role.
+ *
+ * @param messages Array of Anthropic messages
+ * @returns Array of OpenAI messages where consecutive messages with the same role are combined
+ */
+export function convertToR1Format(messages: AnthropicMessage[]): Message[] {
+	return messages.reduce<Message[]>((merged, message) => {
+		const lastMessage = merged[merged.length - 1]
+		let messageContent: string | (ContentPartText | ContentPartImage)[] = ""
+		let hasImages = false
+
+		// Convert content to appropriate format
+		if (Array.isArray(message.content)) {
+			const textParts: string[] = []
+			const imageParts: ContentPartImage[] = []
+
+			message.content.forEach((part) => {
+				if (part.type === "text") {
+					textParts.push(part.text)
+				}
+				if (part.type === "image") {
+					hasImages = true
+					imageParts.push({
+						type: "image_url",
+						image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` },
+					})
+				}
+			})
+
+			if (hasImages) {
+				const parts: (ContentPartText | ContentPartImage)[] = []
+				if (textParts.length > 0) {
+					parts.push({ type: "text", text: textParts.join("\n") })
+				}
+				parts.push(...imageParts)
+				messageContent = parts
+			} else {
+				messageContent = textParts.join("\n")
+			}
+		} else {
+			messageContent = message.content
+		}
+
+		// If last message has same role, merge the content
+		if (lastMessage?.role === message.role) {
+			if (typeof lastMessage.content === "string" && typeof messageContent === "string") {
+				lastMessage.content += `\n${messageContent}`
+			}
+			// If either has image content, convert both to array format
+			else {
+				const lastContent = Array.isArray(lastMessage.content)
+					? lastMessage.content
+					: [{ type: "text" as const, text: lastMessage.content || "" }]
+
+				const newContent = Array.isArray(messageContent)
+					? messageContent
+					: [{ type: "text" as const, text: messageContent }]
+
+				if (message.role === "assistant") {
+					const mergedContent = [...lastContent, ...newContent] as AssistantMessage["content"]
+					lastMessage.content = mergedContent
+				} else {
+					const mergedContent = [...lastContent, ...newContent] as UserMessage["content"]
+					lastMessage.content = mergedContent
+				}
+			}
+		} else {
+			// Add as new message with the correct type based on role
+			if (message.role === "assistant") {
+				const newMessage: AssistantMessage = {
+					role: "assistant",
+					content: messageContent as AssistantMessage["content"],
+				}
+				merged.push(newMessage)
+			} else {
+				const newMessage: UserMessage = {
+					role: "user",
+					content: messageContent as UserMessage["content"],
+				}
+				merged.push(newMessage)
+			}
+		}
+
+		return merged
+	}, [])
+}

+ 4 - 0
src/core/Cline.ts

@@ -2391,6 +2391,10 @@ export class Cline {
 			let reasoningMessage = ""
 			try {
 				for await (const chunk of stream) {
+					if (!chunk) {
+						// Sometimes chunk is undefined, no idea that can cause it, but this workaround seems to fix it
+						continue
+					}
 					switch (chunk.type) {
 						case "reasoning":
 							reasoningMessage += chunk.text