zai-format.ts 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. import { Anthropic } from "@anthropic-ai/sdk"
  2. import OpenAI from "openai"
  3. type ContentPartText = OpenAI.Chat.ChatCompletionContentPartText
  4. type ContentPartImage = OpenAI.Chat.ChatCompletionContentPartImage
  5. type UserMessage = OpenAI.Chat.ChatCompletionUserMessageParam
  6. type AssistantMessage = OpenAI.Chat.ChatCompletionAssistantMessageParam
  7. type SystemMessage = OpenAI.Chat.ChatCompletionSystemMessageParam
  8. type ToolMessage = OpenAI.Chat.ChatCompletionToolMessageParam
  9. type Message = OpenAI.Chat.ChatCompletionMessageParam
  10. type AnthropicMessage = Anthropic.Messages.MessageParam
  11. /**
  12. * Extended assistant message type to support Z.ai's interleaved thinking.
  13. * Z.ai's API returns reasoning_content alongside content and tool_calls,
  14. * and requires it to be passed back in subsequent requests for preserved thinking.
  15. */
  16. export type ZAiAssistantMessage = AssistantMessage & {
  17. reasoning_content?: string
  18. }
  19. /**
  20. * Converts Anthropic messages to OpenAI format optimized for Z.ai's GLM-4.7 thinking mode.
  21. *
  22. * Key differences from standard OpenAI format:
  23. * - Preserves reasoning_content on assistant messages for interleaved thinking
  24. * - Text content after tool_results (like environment_details) is merged into the last tool message
  25. * to avoid creating user messages that would cause reasoning_content to be dropped
  26. *
  27. * @param messages Array of Anthropic messages
  28. * @param options Optional configuration for message conversion
  29. * @param options.mergeToolResultText If true, merge text content after tool_results into the last
  30. * tool message instead of creating a separate user message.
  31. * This is critical for Z.ai's interleaved thinking mode.
  32. * @returns Array of OpenAI messages optimized for Z.ai's thinking mode
  33. */
  34. export function convertToZAiFormat(
  35. messages: AnthropicMessage[],
  36. options?: { mergeToolResultText?: boolean },
  37. ): Message[] {
  38. const result: Message[] = []
  39. for (const message of messages) {
  40. // Check if the message has reasoning_content (for Z.ai interleaved thinking)
  41. const messageWithReasoning = message as AnthropicMessage & { reasoning_content?: string }
  42. const reasoningContent = messageWithReasoning.reasoning_content
  43. if (message.role === "user") {
  44. // Handle user messages - may contain tool_result blocks
  45. if (Array.isArray(message.content)) {
  46. const textParts: string[] = []
  47. const imageParts: ContentPartImage[] = []
  48. const toolResults: { tool_use_id: string; content: string }[] = []
  49. for (const part of message.content) {
  50. if (part.type === "text") {
  51. textParts.push(part.text)
  52. } else if (part.type === "image") {
  53. imageParts.push({
  54. type: "image_url",
  55. image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` },
  56. })
  57. } else if (part.type === "tool_result") {
  58. // Convert tool_result to OpenAI tool message format
  59. let content: string
  60. if (typeof part.content === "string") {
  61. content = part.content
  62. } else if (Array.isArray(part.content)) {
  63. content =
  64. part.content
  65. ?.map((c) => {
  66. if (c.type === "text") return c.text
  67. if (c.type === "image") return "(image)"
  68. return ""
  69. })
  70. .join("\n") ?? ""
  71. } else {
  72. content = ""
  73. }
  74. toolResults.push({
  75. tool_use_id: part.tool_use_id,
  76. content,
  77. })
  78. }
  79. }
  80. // Add tool messages first (they must follow assistant tool_use)
  81. for (const toolResult of toolResults) {
  82. const toolMessage: ToolMessage = {
  83. role: "tool",
  84. tool_call_id: toolResult.tool_use_id,
  85. content: toolResult.content,
  86. }
  87. result.push(toolMessage)
  88. }
  89. // Handle text/image content after tool results
  90. if (textParts.length > 0 || imageParts.length > 0) {
  91. // For Z.ai interleaved thinking: when mergeToolResultText is enabled and we have
  92. // tool results followed by text, merge the text into the last tool message to avoid
  93. // creating a user message that would cause reasoning_content to be dropped.
  94. // This is critical because Z.ai drops all reasoning_content when it sees a user message.
  95. const shouldMergeIntoToolMessage =
  96. options?.mergeToolResultText && toolResults.length > 0 && imageParts.length === 0
  97. if (shouldMergeIntoToolMessage) {
  98. // Merge text content into the last tool message
  99. const lastToolMessage = result[result.length - 1] as ToolMessage
  100. if (lastToolMessage?.role === "tool") {
  101. const additionalText = textParts.join("\n")
  102. lastToolMessage.content = `${lastToolMessage.content}\n\n${additionalText}`
  103. }
  104. } else {
  105. // Standard behavior: add user message with text/image content
  106. let content: UserMessage["content"]
  107. if (imageParts.length > 0) {
  108. const parts: (ContentPartText | ContentPartImage)[] = []
  109. if (textParts.length > 0) {
  110. parts.push({ type: "text", text: textParts.join("\n") })
  111. }
  112. parts.push(...imageParts)
  113. content = parts
  114. } else {
  115. content = textParts.join("\n")
  116. }
  117. // Check if we can merge with the last message
  118. const lastMessage = result[result.length - 1]
  119. if (lastMessage?.role === "user") {
  120. // Merge with existing user message
  121. if (typeof lastMessage.content === "string" && typeof content === "string") {
  122. lastMessage.content += `\n${content}`
  123. } else {
  124. const lastContent = Array.isArray(lastMessage.content)
  125. ? lastMessage.content
  126. : [{ type: "text" as const, text: lastMessage.content || "" }]
  127. const newContent = Array.isArray(content)
  128. ? content
  129. : [{ type: "text" as const, text: content }]
  130. lastMessage.content = [...lastContent, ...newContent] as UserMessage["content"]
  131. }
  132. } else {
  133. result.push({ role: "user", content })
  134. }
  135. }
  136. }
  137. } else {
  138. // Simple string content
  139. const lastMessage = result[result.length - 1]
  140. if (lastMessage?.role === "user") {
  141. if (typeof lastMessage.content === "string") {
  142. lastMessage.content += `\n${message.content}`
  143. } else {
  144. ;(lastMessage.content as (ContentPartText | ContentPartImage)[]).push({
  145. type: "text",
  146. text: message.content,
  147. })
  148. }
  149. } else {
  150. result.push({ role: "user", content: message.content })
  151. }
  152. }
  153. } else if (message.role === "assistant") {
  154. // Handle assistant messages - may contain tool_use blocks and reasoning blocks
  155. if (Array.isArray(message.content)) {
  156. const textParts: string[] = []
  157. const toolCalls: OpenAI.Chat.ChatCompletionMessageToolCall[] = []
  158. let extractedReasoning: string | undefined
  159. for (const part of message.content) {
  160. if (part.type === "text") {
  161. textParts.push(part.text)
  162. } else if (part.type === "tool_use") {
  163. toolCalls.push({
  164. id: part.id,
  165. type: "function",
  166. function: {
  167. name: part.name,
  168. arguments: JSON.stringify(part.input),
  169. },
  170. })
  171. } else if ((part as any).type === "reasoning" && (part as any).text) {
  172. // Extract reasoning from content blocks (Task stores it this way)
  173. extractedReasoning = (part as any).text
  174. }
  175. }
  176. // Use reasoning from content blocks if not provided at top level
  177. const finalReasoning = reasoningContent || extractedReasoning
  178. const assistantMessage: ZAiAssistantMessage = {
  179. role: "assistant",
  180. content: textParts.length > 0 ? textParts.join("\n") : null,
  181. ...(toolCalls.length > 0 && { tool_calls: toolCalls }),
  182. // Preserve reasoning_content for Z.ai interleaved thinking
  183. ...(finalReasoning && { reasoning_content: finalReasoning }),
  184. }
  185. // Check if we can merge with the last message (only if no tool calls)
  186. const lastMessage = result[result.length - 1]
  187. if (lastMessage?.role === "assistant" && !toolCalls.length && !(lastMessage as any).tool_calls) {
  188. // Merge text content
  189. if (typeof lastMessage.content === "string" && typeof assistantMessage.content === "string") {
  190. lastMessage.content += `\n${assistantMessage.content}`
  191. } else if (assistantMessage.content) {
  192. const lastContent = lastMessage.content || ""
  193. lastMessage.content = `${lastContent}\n${assistantMessage.content}`
  194. }
  195. // Preserve reasoning_content from the new message if present
  196. if (finalReasoning) {
  197. ;(lastMessage as ZAiAssistantMessage).reasoning_content = finalReasoning
  198. }
  199. } else {
  200. result.push(assistantMessage)
  201. }
  202. } else {
  203. // Simple string content
  204. const lastMessage = result[result.length - 1]
  205. if (lastMessage?.role === "assistant" && !(lastMessage as any).tool_calls) {
  206. if (typeof lastMessage.content === "string") {
  207. lastMessage.content += `\n${message.content}`
  208. } else {
  209. lastMessage.content = message.content
  210. }
  211. // Preserve reasoning_content from the new message if present
  212. if (reasoningContent) {
  213. ;(lastMessage as ZAiAssistantMessage).reasoning_content = reasoningContent
  214. }
  215. } else {
  216. const assistantMessage: ZAiAssistantMessage = {
  217. role: "assistant",
  218. content: message.content,
  219. ...(reasoningContent && { reasoning_content: reasoningContent }),
  220. }
  221. result.push(assistantMessage)
  222. }
  223. }
  224. }
  225. }
  226. return result
  227. }