openrouter.ts 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. import { Anthropic } from "@anthropic-ai/sdk"
  2. import axios from "axios"
  3. import OpenAI from "openai"
  4. import { ApiHandler } from "../"
  5. import { ApiHandlerOptions, ModelInfo, openRouterDefaultModelId, openRouterDefaultModelInfo } from "../../shared/api"
  6. import { convertToOpenAiMessages } from "../transform/openai-format"
  7. import { ApiStreamChunk, ApiStreamUsageChunk } from "../transform/stream"
  8. import delay from "delay"
  9. // Add custom interface for OpenRouter params
  10. type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {
  11. transforms?: string[]
  12. include_reasoning?: boolean
  13. }
  14. // Add custom interface for OpenRouter usage chunk
  15. interface OpenRouterApiStreamUsageChunk extends ApiStreamUsageChunk {
  16. fullResponseText: string
  17. }
  18. import { SingleCompletionHandler } from ".."
  19. export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
  20. private options: ApiHandlerOptions
  21. private client: OpenAI
  22. constructor(options: ApiHandlerOptions) {
  23. this.options = options
  24. this.client = new OpenAI({
  25. baseURL: "https://openrouter.ai/api/v1",
  26. apiKey: this.options.openRouterApiKey,
  27. defaultHeaders: {
  28. "HTTP-Referer": "https://github.com/RooVetGit/Roo-Cline",
  29. "X-Title": "Roo Code",
  30. },
  31. })
  32. }
  33. async *createMessage(
  34. systemPrompt: string,
  35. messages: Anthropic.Messages.MessageParam[],
  36. ): AsyncGenerator<ApiStreamChunk> {
  37. // Convert Anthropic messages to OpenAI format
  38. const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
  39. { role: "system", content: systemPrompt },
  40. ...convertToOpenAiMessages(messages),
  41. ]
  42. // prompt caching: https://openrouter.ai/docs/prompt-caching
  43. // this is specifically for claude models (some models may 'support prompt caching' automatically without this)
  44. switch (this.getModel().id) {
  45. case "anthropic/claude-3.5-sonnet":
  46. case "anthropic/claude-3.5-sonnet:beta":
  47. case "anthropic/claude-3.5-sonnet-20240620":
  48. case "anthropic/claude-3.5-sonnet-20240620:beta":
  49. case "anthropic/claude-3-5-haiku":
  50. case "anthropic/claude-3-5-haiku:beta":
  51. case "anthropic/claude-3-5-haiku-20241022":
  52. case "anthropic/claude-3-5-haiku-20241022:beta":
  53. case "anthropic/claude-3-haiku":
  54. case "anthropic/claude-3-haiku:beta":
  55. case "anthropic/claude-3-opus":
  56. case "anthropic/claude-3-opus:beta":
  57. openAiMessages[0] = {
  58. role: "system",
  59. content: [
  60. {
  61. type: "text",
  62. text: systemPrompt,
  63. // @ts-ignore-next-line
  64. cache_control: { type: "ephemeral" },
  65. },
  66. ],
  67. }
  68. // Add cache_control to the last two user messages
  69. // (note: this works because we only ever add one user message at a time, but if we added multiple we'd need to mark the user message before the last assistant message)
  70. const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2)
  71. lastTwoUserMessages.forEach((msg) => {
  72. if (typeof msg.content === "string") {
  73. msg.content = [{ type: "text", text: msg.content }]
  74. }
  75. if (Array.isArray(msg.content)) {
  76. // NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
  77. let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
  78. if (!lastTextPart) {
  79. lastTextPart = { type: "text", text: "..." }
  80. msg.content.push(lastTextPart)
  81. }
  82. // @ts-ignore-next-line
  83. lastTextPart["cache_control"] = { type: "ephemeral" }
  84. }
  85. })
  86. break
  87. default:
  88. break
  89. }
  90. // Not sure how openrouter defaults max tokens when no value is provided, but the anthropic api requires this value and since they offer both 4096 and 8192 variants, we should ensure 8192.
  91. // (models usually default to max tokens allowed)
  92. let maxTokens: number | undefined
  93. switch (this.getModel().id) {
  94. case "anthropic/claude-3.5-sonnet":
  95. case "anthropic/claude-3.5-sonnet:beta":
  96. case "anthropic/claude-3.5-sonnet-20240620":
  97. case "anthropic/claude-3.5-sonnet-20240620:beta":
  98. case "anthropic/claude-3-5-haiku":
  99. case "anthropic/claude-3-5-haiku:beta":
  100. case "anthropic/claude-3-5-haiku-20241022":
  101. case "anthropic/claude-3-5-haiku-20241022:beta":
  102. maxTokens = 8_192
  103. break
  104. }
  105. let temperature = 0
  106. switch (this.getModel().id) {
  107. case "deepseek/deepseek-r1":
  108. // Recommended temperature for DeepSeek reasoning models
  109. temperature = 0.6
  110. }
  111. // https://openrouter.ai/docs/transforms
  112. let fullResponseText = ""
  113. const stream = await this.client.chat.completions.create({
  114. model: this.getModel().id,
  115. max_tokens: maxTokens,
  116. temperature: temperature,
  117. messages: openAiMessages,
  118. stream: true,
  119. include_reasoning: true,
  120. // This way, the transforms field will only be included in the parameters when openRouterUseMiddleOutTransform is true.
  121. ...(this.options.openRouterUseMiddleOutTransform && { transforms: ["middle-out"] }),
  122. } as OpenRouterChatCompletionParams)
  123. let genId: string | undefined
  124. for await (const chunk of stream as unknown as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>) {
  125. // openrouter returns an error object instead of the openai sdk throwing an error
  126. if ("error" in chunk) {
  127. const error = chunk.error as { message?: string; code?: number }
  128. console.error(`OpenRouter API Error: ${error?.code} - ${error?.message}`)
  129. throw new Error(`OpenRouter API Error ${error?.code}: ${error?.message}`)
  130. }
  131. if (!genId && chunk.id) {
  132. genId = chunk.id
  133. }
  134. const delta = chunk.choices[0]?.delta
  135. if ("reasoning" in delta && delta.reasoning) {
  136. yield {
  137. type: "reasoning",
  138. text: delta.reasoning,
  139. } as ApiStreamChunk
  140. }
  141. if (delta?.content) {
  142. fullResponseText += delta.content
  143. yield {
  144. type: "text",
  145. text: delta.content,
  146. } as ApiStreamChunk
  147. }
  148. // if (chunk.usage) {
  149. // yield {
  150. // type: "usage",
  151. // inputTokens: chunk.usage.prompt_tokens || 0,
  152. // outputTokens: chunk.usage.completion_tokens || 0,
  153. // }
  154. // }
  155. }
  156. await delay(500) // FIXME: necessary delay to ensure generation endpoint is ready
  157. try {
  158. const response = await axios.get(`https://openrouter.ai/api/v1/generation?id=${genId}`, {
  159. headers: {
  160. Authorization: `Bearer ${this.options.openRouterApiKey}`,
  161. },
  162. timeout: 5_000, // this request hangs sometimes
  163. })
  164. const generation = response.data?.data
  165. console.log("OpenRouter generation details:", response.data)
  166. yield {
  167. type: "usage",
  168. // cacheWriteTokens: 0,
  169. // cacheReadTokens: 0,
  170. // openrouter generation endpoint fails often
  171. inputTokens: generation?.native_tokens_prompt || 0,
  172. outputTokens: generation?.native_tokens_completion || 0,
  173. totalCost: generation?.total_cost || 0,
  174. fullResponseText,
  175. } as OpenRouterApiStreamUsageChunk
  176. } catch (error) {
  177. // ignore if fails
  178. console.error("Error fetching OpenRouter generation details:", error)
  179. }
  180. }
  181. getModel(): { id: string; info: ModelInfo } {
  182. const modelId = this.options.openRouterModelId
  183. const modelInfo = this.options.openRouterModelInfo
  184. if (modelId && modelInfo) {
  185. return { id: modelId, info: modelInfo }
  186. }
  187. return { id: openRouterDefaultModelId, info: openRouterDefaultModelInfo }
  188. }
  189. async completePrompt(prompt: string): Promise<string> {
  190. try {
  191. const response = await this.client.chat.completions.create({
  192. model: this.getModel().id,
  193. messages: [{ role: "user", content: prompt }],
  194. temperature: 0,
  195. stream: false,
  196. })
  197. if ("error" in response) {
  198. const error = response.error as { message?: string; code?: number }
  199. throw new Error(`OpenRouter API Error ${error?.code}: ${error?.message}`)
  200. }
  201. const completion = response as OpenAI.Chat.ChatCompletion
  202. return completion.choices[0]?.message?.content || ""
  203. } catch (error) {
  204. if (error instanceof Error) {
  205. throw new Error(`OpenRouter completion error: ${error.message}`)
  206. }
  207. throw error
  208. }
  209. }
  210. }