|
|
@@ -52,6 +52,8 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
|
|
|
...convertToOpenAiMessages(messages),
|
|
|
]
|
|
|
|
|
|
+ const { id: modelId, info: modelInfo } = this.getModel()
|
|
|
+
|
|
|
// prompt caching: https://openrouter.ai/docs/prompt-caching
|
|
|
// this is specifically for claude models (some models may 'support prompt caching' automatically without this)
|
|
|
switch (true) {
|
|
|
@@ -95,10 +97,7 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
|
|
|
let topP: number | undefined = undefined
|
|
|
|
|
|
// Handle models based on deepseek-r1
|
|
|
- if (
|
|
|
- this.getModel().id.startsWith("deepseek/deepseek-r1") ||
|
|
|
- this.getModel().id === "perplexity/sonar-reasoning"
|
|
|
- ) {
|
|
|
+ if (modelId.startsWith("deepseek/deepseek-r1") || modelId === "perplexity/sonar-reasoning") {
|
|
|
// Recommended temperature for DeepSeek reasoning models
|
|
|
defaultTemperature = DEEP_SEEK_DEFAULT_TEMPERATURE
|
|
|
// DeepSeek highly recommends using user instead of system role
|
|
|
@@ -107,24 +106,34 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
|
|
|
topP = 0.95
|
|
|
}
|
|
|
|
|
|
+ let temperature = this.options.modelTemperature ?? defaultTemperature
|
|
|
+
|
|
|
+ // Anthropic "Thinking" models require a temperature of 1.0.
|
|
|
+ if (modelInfo.thinking) {
|
|
|
+ temperature = 1.0
|
|
|
+ }
|
|
|
+
|
|
|
// https://openrouter.ai/docs/transforms
|
|
|
let fullResponseText = ""
|
|
|
- const stream = await this.client.chat.completions.create({
|
|
|
- model: this.getModel().id,
|
|
|
- max_tokens: this.getModel().info.maxTokens,
|
|
|
- temperature: this.options.modelTemperature ?? defaultTemperature,
|
|
|
+
|
|
|
+ const completionParams: OpenRouterChatCompletionParams = {
|
|
|
+ model: modelId,
|
|
|
+ max_tokens: modelInfo.maxTokens,
|
|
|
+ temperature,
|
|
|
top_p: topP,
|
|
|
messages: openAiMessages,
|
|
|
stream: true,
|
|
|
include_reasoning: true,
|
|
|
// This way, the transforms field will only be included in the parameters when openRouterUseMiddleOutTransform is true.
|
|
|
...(this.options.openRouterUseMiddleOutTransform && { transforms: ["middle-out"] }),
|
|
|
- } as OpenRouterChatCompletionParams)
|
|
|
+ }
|
|
|
+
|
|
|
+ const stream = await this.client.chat.completions.create(completionParams)
|
|
|
|
|
|
let genId: string | undefined
|
|
|
|
|
|
for await (const chunk of stream as unknown as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>) {
|
|
|
- // openrouter returns an error object instead of the openai sdk throwing an error
|
|
|
+ // OpenRouter returns an error object instead of the OpenAI SDK throwing an error.
|
|
|
if ("error" in chunk) {
|
|
|
const error = chunk.error as { message?: string; code?: number }
|
|
|
console.error(`OpenRouter API Error: ${error?.code} - ${error?.message}`)
|
|
|
@@ -136,12 +145,14 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
|
|
|
}
|
|
|
|
|
|
const delta = chunk.choices[0]?.delta
|
|
|
+
|
|
|
if ("reasoning" in delta && delta.reasoning) {
|
|
|
yield {
|
|
|
type: "reasoning",
|
|
|
text: delta.reasoning,
|
|
|
} as ApiStreamChunk
|
|
|
}
|
|
|
+
|
|
|
if (delta?.content) {
|
|
|
fullResponseText += delta.content
|
|
|
yield {
|
|
|
@@ -149,6 +160,7 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
|
|
|
text: delta.content,
|
|
|
} as ApiStreamChunk
|
|
|
}
|
|
|
+
|
|
|
// if (chunk.usage) {
|
|
|
// yield {
|
|
|
// type: "usage",
|
|
|
@@ -158,10 +170,12 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
|
|
|
// }
|
|
|
}
|
|
|
|
|
|
- // retry fetching generation details
|
|
|
+ // Retry fetching generation details.
|
|
|
let attempt = 0
|
|
|
+
|
|
|
while (attempt++ < 10) {
|
|
|
await delay(200) // FIXME: necessary delay to ensure generation endpoint is ready
|
|
|
+
|
|
|
try {
|
|
|
const response = await axios.get(`https://openrouter.ai/api/v1/generation?id=${genId}`, {
|
|
|
headers: {
|
|
|
@@ -171,7 +185,7 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
|
|
|
})
|
|
|
|
|
|
const generation = response.data?.data
|
|
|
- console.log("OpenRouter generation details:", response.data)
|
|
|
+
|
|
|
yield {
|
|
|
type: "usage",
|
|
|
// cacheWriteTokens: 0,
|
|
|
@@ -182,6 +196,7 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
|
|
|
totalCost: generation?.total_cost || 0,
|
|
|
fullResponseText,
|
|
|
} as OpenRouterApiStreamUsageChunk
|
|
|
+
|
|
|
return
|
|
|
} catch (error) {
|
|
|
// ignore if fails
|
|
|
@@ -189,13 +204,13 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- getModel(): { id: string; info: ModelInfo } {
|
|
|
+
|
|
|
+ getModel() {
|
|
|
const modelId = this.options.openRouterModelId
|
|
|
const modelInfo = this.options.openRouterModelInfo
|
|
|
- if (modelId && modelInfo) {
|
|
|
- return { id: modelId, info: modelInfo }
|
|
|
- }
|
|
|
- return { id: openRouterDefaultModelId, info: openRouterDefaultModelInfo }
|
|
|
+ return modelId && modelInfo
|
|
|
+ ? { id: modelId, info: modelInfo }
|
|
|
+ : { id: openRouterDefaultModelId, info: openRouterDefaultModelInfo }
|
|
|
}
|
|
|
|
|
|
async completePrompt(prompt: string): Promise<string> {
|
|
|
@@ -218,6 +233,7 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
|
|
|
if (error instanceof Error) {
|
|
|
throw new Error(`OpenRouter completion error: ${error.message}`)
|
|
|
}
|
|
|
+
|
|
|
throw error
|
|
|
}
|
|
|
}
|
|
|
@@ -239,6 +255,7 @@ export async function getOpenRouterModels() {
|
|
|
inputPrice: parseApiPrice(rawModel.pricing?.prompt),
|
|
|
outputPrice: parseApiPrice(rawModel.pricing?.completion),
|
|
|
description: rawModel.description,
|
|
|
+ thinking: rawModel.id === "anthropic/claude-3.7-sonnet:thinking",
|
|
|
}
|
|
|
|
|
|
// NOTE: this needs to be synced with api.ts/openrouter default model info.
|