|
|
@@ -8,11 +8,13 @@ import { calculateApiCostOpenAI } from "../../shared/cost"
|
|
|
|
|
|
import { convertToOpenAiMessages } from "../transform/openai-format"
|
|
|
import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
|
|
|
+import { getModelParams } from "../transform/model-params"
|
|
|
+import { AnthropicReasoningParams } from "../transform/reasoning"
|
|
|
|
|
|
import { DEFAULT_HEADERS } from "./constants"
|
|
|
import { getModels } from "./fetchers/modelCache"
|
|
|
import { BaseProvider } from "./base-provider"
|
|
|
-import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../"
|
|
|
+import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
|
|
|
|
|
|
// Requesty usage includes an extra field for Anthropic use cases.
|
|
|
// Safely cast the prompt token details section to the appropriate structure.
|
|
|
@@ -31,10 +33,7 @@ type RequestyChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {
|
|
|
mode?: string
|
|
|
}
|
|
|
}
|
|
|
- thinking?: {
|
|
|
- type: string
|
|
|
- budget_tokens?: number
|
|
|
- }
|
|
|
+ thinking?: AnthropicReasoningParams
|
|
|
}
|
|
|
|
|
|
export class RequestyHandler extends BaseProvider implements SingleCompletionHandler {
|
|
|
@@ -44,14 +43,14 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
|
|
|
|
|
|
constructor(options: ApiHandlerOptions) {
|
|
|
super()
|
|
|
- this.options = options
|
|
|
-
|
|
|
- const apiKey = this.options.requestyApiKey ?? "not-provided"
|
|
|
- const baseURL = "https://router.requesty.ai/v1"
|
|
|
|
|
|
- const defaultHeaders = DEFAULT_HEADERS
|
|
|
+ this.options = options
|
|
|
|
|
|
- this.client = new OpenAI({ baseURL, apiKey, defaultHeaders })
|
|
|
+ this.client = new OpenAI({
|
|
|
+ baseURL: "https://router.requesty.ai/v1",
|
|
|
+ apiKey: this.options.requestyApiKey ?? "not-provided",
|
|
|
+ defaultHeaders: DEFAULT_HEADERS,
|
|
|
+ })
|
|
|
}
|
|
|
|
|
|
public async fetchModel() {
|
|
|
@@ -59,10 +58,18 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
|
|
|
return this.getModel()
|
|
|
}
|
|
|
|
|
|
- override getModel(): { id: string; info: ModelInfo } {
|
|
|
+ override getModel() {
|
|
|
const id = this.options.requestyModelId ?? requestyDefaultModelId
|
|
|
const info = this.models[id] ?? requestyDefaultModelInfo
|
|
|
- return { id, info }
|
|
|
+
|
|
|
+ const params = getModelParams({
|
|
|
+ format: "anthropic",
|
|
|
+ modelId: id,
|
|
|
+ model: info,
|
|
|
+ settings: this.options,
|
|
|
+ })
|
|
|
+
|
|
|
+ return { id, info, ...params }
|
|
|
}
|
|
|
|
|
|
protected processUsageMetrics(usage: any, modelInfo?: ModelInfo): ApiStreamUsageChunk {
|
|
|
@@ -90,70 +97,44 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
|
|
|
messages: Anthropic.Messages.MessageParam[],
|
|
|
metadata?: ApiHandlerCreateMessageMetadata,
|
|
|
): ApiStream {
|
|
|
- const model = await this.fetchModel()
|
|
|
-
|
|
|
- let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
|
|
|
+ const {
|
|
|
+ id: model,
|
|
|
+ info,
|
|
|
+ maxTokens: max_tokens,
|
|
|
+ temperature,
|
|
|
+ reasoningEffort: reasoning_effort,
|
|
|
+ reasoning: thinking,
|
|
|
+ } = await this.fetchModel()
|
|
|
+
|
|
|
+ const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
|
|
|
{ role: "system", content: systemPrompt },
|
|
|
...convertToOpenAiMessages(messages),
|
|
|
]
|
|
|
|
|
|
- let maxTokens = undefined
|
|
|
- if (this.options.modelMaxTokens) {
|
|
|
- maxTokens = this.options.modelMaxTokens
|
|
|
- } else if (this.options.includeMaxTokens) {
|
|
|
- maxTokens = model.info.maxTokens
|
|
|
- }
|
|
|
-
|
|
|
- let reasoningEffort = undefined
|
|
|
- if (this.options.reasoningEffort) {
|
|
|
- reasoningEffort = this.options.reasoningEffort
|
|
|
- }
|
|
|
-
|
|
|
- let thinking = undefined
|
|
|
- if (this.options.modelMaxThinkingTokens) {
|
|
|
- thinking = {
|
|
|
- type: "enabled",
|
|
|
- budget_tokens: this.options.modelMaxThinkingTokens,
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- const temperature = this.options.modelTemperature
|
|
|
-
|
|
|
const completionParams: RequestyChatCompletionParams = {
|
|
|
- model: model.id,
|
|
|
- max_tokens: maxTokens,
|
|
|
messages: openAiMessages,
|
|
|
- temperature: temperature,
|
|
|
+ model,
|
|
|
+ max_tokens,
|
|
|
+ temperature,
|
|
|
+ ...(reasoning_effort && { reasoning_effort }),
|
|
|
+ ...(thinking && { thinking }),
|
|
|
stream: true,
|
|
|
stream_options: { include_usage: true },
|
|
|
- reasoning_effort: reasoningEffort,
|
|
|
- thinking: thinking,
|
|
|
- requesty: {
|
|
|
- trace_id: metadata?.taskId,
|
|
|
- extra: {
|
|
|
- mode: metadata?.mode,
|
|
|
- },
|
|
|
- },
|
|
|
+ requesty: { trace_id: metadata?.taskId, extra: { mode: metadata?.mode } },
|
|
|
}
|
|
|
|
|
|
const stream = await this.client.chat.completions.create(completionParams)
|
|
|
-
|
|
|
let lastUsage: any = undefined
|
|
|
|
|
|
for await (const chunk of stream) {
|
|
|
const delta = chunk.choices[0]?.delta
|
|
|
+
|
|
|
if (delta?.content) {
|
|
|
- yield {
|
|
|
- type: "text",
|
|
|
- text: delta.content,
|
|
|
- }
|
|
|
+ yield { type: "text", text: delta.content }
|
|
|
}
|
|
|
|
|
|
if (delta && "reasoning_content" in delta && delta.reasoning_content) {
|
|
|
- yield {
|
|
|
- type: "reasoning",
|
|
|
- text: (delta.reasoning_content as string | undefined) || "",
|
|
|
- }
|
|
|
+ yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" }
|
|
|
}
|
|
|
|
|
|
if (chunk.usage) {
|
|
|
@@ -162,25 +143,18 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
|
|
|
}
|
|
|
|
|
|
if (lastUsage) {
|
|
|
- yield this.processUsageMetrics(lastUsage, model.info)
|
|
|
+ yield this.processUsageMetrics(lastUsage, info)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
async completePrompt(prompt: string): Promise<string> {
|
|
|
- const model = await this.fetchModel()
|
|
|
+ const { id: model, maxTokens: max_tokens, temperature } = await this.fetchModel()
|
|
|
|
|
|
let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [{ role: "system", content: prompt }]
|
|
|
|
|
|
- let maxTokens = undefined
|
|
|
- if (this.options.includeMaxTokens) {
|
|
|
- maxTokens = model.info.maxTokens
|
|
|
- }
|
|
|
-
|
|
|
- const temperature = this.options.modelTemperature
|
|
|
-
|
|
|
const completionParams: RequestyChatCompletionParams = {
|
|
|
- model: model.id,
|
|
|
- max_tokens: maxTokens,
|
|
|
+ model,
|
|
|
+ max_tokens,
|
|
|
messages: openAiMessages,
|
|
|
temperature: temperature,
|
|
|
}
|