ソースを参照

Expose reasoning effort option for reasoning models on OpenRouter (#2483)

* Specify reasoning effort for OpenRouter reasoning models

* Add ReasoningEffort type

* Fix ReasoningEffort props

* Remove copypasta

* Set reasoning effort for Grok 3 Mini

* Use translations

* Add translations

* Remove this check
Chris Estreich 8 ヶ月 前
コミット
e7a57ea774

+ 5 - 1
src/api/index.ts

@@ -88,21 +88,25 @@ export function getModelParams({
 	model,
 	defaultMaxTokens,
 	defaultTemperature = 0,
+	defaultReasoningEffort,
 }: {
 	options: ApiHandlerOptions
 	model: ModelInfo
 	defaultMaxTokens?: number
 	defaultTemperature?: number
+	defaultReasoningEffort?: "low" | "medium" | "high"
 }) {
 	const {
 		modelMaxTokens: customMaxTokens,
 		modelMaxThinkingTokens: customMaxThinkingTokens,
 		modelTemperature: customTemperature,
+		reasoningEffort: customReasoningEffort,
 	} = options
 
 	let maxTokens = model.maxTokens ?? defaultMaxTokens
 	let thinking: BetaThinkingConfigParam | undefined = undefined
 	let temperature = customTemperature ?? defaultTemperature
+	const reasoningEffort = customReasoningEffort ?? defaultReasoningEffort
 
 	if (model.thinking) {
 		// Only honor `customMaxTokens` for thinking models.
@@ -118,5 +122,5 @@ export function getModelParams({
 		temperature = 1.0
 	}
 
-	return { maxTokens, thinking, temperature }
+	return { maxTokens, thinking, temperature, reasoningEffort }
 }

+ 15 - 0
src/api/providers/openai.ts

@@ -82,6 +82,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		const urlHost = this._getUrlHost(modelUrl)
 		const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format
 		const ark = modelUrl.includes(".volces.com")
+
 		if (modelId.startsWith("o3-mini")) {
 			yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages)
 			return
@@ -94,6 +95,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			}
 
 			let convertedMessages
+
 			if (deepseekReasoner) {
 				convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
 			} else if (ark || enabledLegacyFormat) {
@@ -112,16 +114,20 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 						],
 					}
 				}
+
 				convertedMessages = [systemMessage, ...convertToOpenAiMessages(messages)]
+
 				if (modelInfo.supportsPromptCache) {
 					// Note: the following logic is copied from openrouter:
 					// Add cache_control to the last two user messages
 					// (note: this works because we only ever add one user message at a time, but if we added multiple we'd need to mark the user message before the last assistant message)
 					const lastTwoUserMessages = convertedMessages.filter((msg) => msg.role === "user").slice(-2)
+
 					lastTwoUserMessages.forEach((msg) => {
 						if (typeof msg.content === "string") {
 							msg.content = [{ type: "text", text: msg.content }]
 						}
+
 						if (Array.isArray(msg.content)) {
 							// NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
 							let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
@@ -130,6 +136,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 								lastTextPart = { type: "text", text: "..." }
 								msg.content.push(lastTextPart)
 							}
+
 							// @ts-ignore-next-line
 							lastTextPart["cache_control"] = { type: "ephemeral" }
 						}
@@ -145,7 +152,9 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				messages: convertedMessages,
 				stream: true as const,
 				...(isGrokXAI ? {} : { stream_options: { include_usage: true } }),
+				reasoning_effort: this.getModel().info.reasoningEffort,
 			}
+
 			if (this.options.includeMaxTokens) {
 				requestOptions.max_tokens = modelInfo.maxTokens
 			}
@@ -185,6 +194,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 					lastUsage = chunk.usage
 				}
 			}
+
 			for (const chunk of matcher.final()) {
 				yield chunk
 			}
@@ -217,6 +227,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				type: "text",
 				text: response.choices[0]?.message.content || "",
 			}
+
 			yield this.processUsageMetrics(response.usage, modelInfo)
 		}
 	}
@@ -241,6 +252,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 	async completePrompt(prompt: string): Promise<string> {
 		try {
 			const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl)
+
 			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
 				model: this.getModel().id,
 				messages: [{ role: "user", content: prompt }],
@@ -250,11 +262,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				requestOptions,
 				isAzureAiInference ? { path: AZURE_AI_INFERENCE_PATH } : {},
 			)
+
 			return response.choices[0]?.message.content || ""
 		} catch (error) {
 			if (error instanceof Error) {
 				throw new Error(`OpenAI completion error: ${error.message}`)
 			}
+
 			throw error
 		}
 	}
@@ -333,6 +347,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			}
 		}
 	}
+
 	private _getUrlHost(baseUrl?: string): string {
 		try {
 			return new URL(baseUrl ?? "").host

+ 12 - 3
src/api/providers/openrouter.ts

@@ -1,8 +1,7 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta"
-import axios, { AxiosRequestConfig } from "axios"
+import axios from "axios"
 import OpenAI from "openai"
-import delay from "delay"
 
 import { ApiHandlerOptions, ModelInfo, openRouterDefaultModelId, openRouterDefaultModelInfo } from "../../shared/api"
 import { parseApiPrice } from "../../utils/cost"
@@ -22,6 +21,12 @@ type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {
 	transforms?: string[]
 	include_reasoning?: boolean
 	thinking?: BetaThinkingConfigParam
+	// https://openrouter.ai/docs/use-cases/reasoning-tokens
+	reasoning?: {
+		effort?: "high" | "medium" | "low"
+		max_tokens?: number
+		exclude?: boolean
+	}
 }
 
 export class OpenRouterHandler extends BaseProvider implements SingleCompletionHandler {
@@ -42,7 +47,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
 	): AsyncGenerator<ApiStreamChunk> {
-		let { id: modelId, maxTokens, thinking, temperature, topP } = this.getModel()
+		let { id: modelId, maxTokens, thinking, temperature, topP, reasoningEffort } = this.getModel()
 
 		// Convert Anthropic messages to OpenAI format.
 		let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
@@ -70,13 +75,16 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 						},
 					],
 				}
+
 				// Add cache_control to the last two user messages
 				// (note: this works because we only ever add one user message at a time, but if we added multiple we'd need to mark the user message before the last assistant message)
 				const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2)
+
 				lastTwoUserMessages.forEach((msg) => {
 					if (typeof msg.content === "string") {
 						msg.content = [{ type: "text", text: msg.content }]
 					}
+
 					if (Array.isArray(msg.content)) {
 						// NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
 						let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
@@ -113,6 +121,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 				}),
 			// This way, the transforms field will only be included in the parameters when openRouterUseMiddleOutTransform is true.
 			...((this.options.openRouterUseMiddleOutTransform ?? true) && { transforms: ["middle-out"] }),
+			...(reasoningEffort && { reasoning: { effort: reasoningEffort } }),
 		}
 
 		const stream = await this.client.chat.completions.create(completionParams)

+ 11 - 1
webview-ui/src/components/settings/ApiOptions.tsx

@@ -46,7 +46,7 @@ import {
 	OPENROUTER_DEFAULT_PROVIDER_NAME,
 } from "@/components/ui/hooks/useOpenRouterModelProviders"
 import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue, SelectSeparator, Button } from "@/components/ui"
-import { MODELS_BY_PROVIDER, PROVIDERS, VERTEX_REGIONS } from "./constants"
+import { MODELS_BY_PROVIDER, PROVIDERS, VERTEX_REGIONS, REASONING_MODELS } from "./constants"
 import { AWS_REGIONS } from "../../../../src/shared/aws_regions"
 import { VSCodeButtonLink } from "../common/VSCodeButtonLink"
 import { ModelInfoView } from "./ModelInfoView"
@@ -59,6 +59,7 @@ import { ThinkingBudget } from "./ThinkingBudget"
 import { R1FormatSetting } from "./R1FormatSetting"
 import { OpenRouterBalanceDisplay } from "./OpenRouterBalanceDisplay"
 import { RequestyBalanceDisplay } from "./RequestyBalanceDisplay"
+import { ReasoningEffort } from "./ReasoningEffort"
 
 interface ApiOptionsProps {
 	uriScheme: string | undefined
@@ -1538,6 +1539,13 @@ const ApiOptions = ({
 					</div>
 				)}
 
+			{selectedProvider === "openrouter" && REASONING_MODELS.has(selectedModelId) && (
+				<ReasoningEffort
+					apiConfiguration={apiConfiguration}
+					setApiConfigurationField={setApiConfigurationField}
+				/>
+			)}
+
 			{selectedProvider === "glama" && (
 				<ModelPicker
 					apiConfiguration={apiConfiguration}
@@ -1665,12 +1673,14 @@ const ApiOptions = ({
 								})()}
 						</>
 					)}
+
 					<ModelInfoView
 						selectedModelId={selectedModelId}
 						modelInfo={selectedModelInfo}
 						isDescriptionExpanded={isDescriptionExpanded}
 						setIsDescriptionExpanded={setIsDescriptionExpanded}
 					/>
+
 					<ThinkingBudget
 						key={`${selectedProvider}-${selectedModelId}`}
 						apiConfiguration={apiConfiguration}

+ 37 - 0
webview-ui/src/components/settings/ReasoningEffort.tsx

@@ -0,0 +1,37 @@
+import { useAppTranslation } from "@/i18n/TranslationContext"
+
+import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui"
+
+import { ApiConfiguration } from "../../../../src/shared/api"
+import { reasoningEfforts, ReasoningEffort as ReasoningEffortType } from "../../../../src/schemas"
+
+interface ReasoningEffortProps {
+	apiConfiguration: ApiConfiguration
+	setApiConfigurationField: <K extends keyof ApiConfiguration>(field: K, value: ApiConfiguration[K]) => void
+}
+
+export const ReasoningEffort = ({ apiConfiguration, setApiConfigurationField }: ReasoningEffortProps) => {
+	const { t } = useAppTranslation()
+
+	return (
+		<div className="flex flex-col gap-1">
+			<div className="flex justify-between items-center">
+				<label className="block font-medium mb-1">{t("settings:providers.reasoningEffort.label")}</label>
+			</div>
+			<Select
+				value={apiConfiguration.reasoningEffort}
+				onValueChange={(value) => setApiConfigurationField("reasoningEffort", value as ReasoningEffortType)}>
+				<SelectTrigger className="w-full">
+					<SelectValue placeholder={t("settings:common.select")} />
+				</SelectTrigger>
+				<SelectContent>
+					{reasoningEfforts.map((value) => (
+						<SelectItem key={value} value={value}>
+							{t(`settings:providers.reasoningEffort.${value}`)}
+						</SelectItem>
+					))}
+				</SelectContent>
+			</Select>
+		</div>
+	)
+}

+ 2 - 0
webview-ui/src/components/settings/constants.ts

@@ -46,3 +46,5 @@ export const VERTEX_REGIONS = [
 	{ value: "europe-west4", label: "europe-west4" },
 	{ value: "asia-southeast1", label: "asia-southeast1" },
 ]
+
+export const REASONING_MODELS = new Set(["x-ai/grok-3-mini-beta"])

+ 6 - 0
webview-ui/src/i18n/locales/ca/settings.json

@@ -225,6 +225,12 @@
 		"rateLimitSeconds": {
 			"label": "Límit de freqüència",
 			"description": "Temps mínim entre sol·licituds d'API."
+		},
+		"reasoningEffort": {
+			"label": "Esforç de raonament del model",
+			"high": "Alt",
+			"medium": "Mitjà",
+			"low": "Baix"
 		}
 	},
 	"browser": {

+ 6 - 0
webview-ui/src/i18n/locales/de/settings.json

@@ -225,6 +225,12 @@
 		"rateLimitSeconds": {
 			"label": "Ratenbegrenzung",
 			"description": "Minimale Zeit zwischen API-Anfragen."
+		},
+		"reasoningEffort": {
+			"label": "Modell-Denkaufwand",
+			"high": "Hoch",
+			"medium": "Mittel",
+			"low": "Niedrig"
 		}
 	},
 	"browser": {

+ 6 - 0
webview-ui/src/i18n/locales/en/settings.json

@@ -225,6 +225,12 @@
 		"rateLimitSeconds": {
 			"label": "Rate limit",
 			"description": "Minimum time between API requests."
+		},
+		"reasoningEffort": {
+			"label": "Model Reasoning Effort",
+			"high": "High",
+			"medium": "Medium",
+			"low": "Low"
 		}
 	},
 	"browser": {

+ 6 - 0
webview-ui/src/i18n/locales/es/settings.json

@@ -225,6 +225,12 @@
 		"rateLimitSeconds": {
 			"label": "Límite de tasa",
 			"description": "Tiempo mínimo entre solicitudes de API."
+		},
+		"reasoningEffort": {
+			"label": "Esfuerzo de razonamiento del modelo",
+			"high": "Alto",
+			"medium": "Medio",
+			"low": "Bajo"
 		}
 	},
 	"browser": {

+ 6 - 0
webview-ui/src/i18n/locales/fr/settings.json

@@ -225,6 +225,12 @@
 		"rateLimitSeconds": {
 			"label": "Limite de débit",
 			"description": "Temps minimum entre les requêtes API."
+		},
+		"reasoningEffort": {
+			"label": "Effort de raisonnement du modèle",
+			"high": "Élevé",
+			"medium": "Moyen",
+			"low": "Faible"
 		}
 	},
 	"browser": {

+ 6 - 0
webview-ui/src/i18n/locales/hi/settings.json

@@ -225,6 +225,12 @@
 		"rateLimitSeconds": {
 			"label": "दर सीमा",
 			"description": "API अनुरोधों के बीच न्यूनतम समय।"
+		},
+		"reasoningEffort": {
+			"label": "मॉडल तर्क प्रयास",
+			"high": "उच्च",
+			"medium": "मध्यम",
+			"low": "निम्न"
 		}
 	},
 	"browser": {

+ 6 - 0
webview-ui/src/i18n/locales/it/settings.json

@@ -225,6 +225,12 @@
 		"rateLimitSeconds": {
 			"label": "Limite di frequenza",
 			"description": "Tempo minimo tra le richieste API."
+		},
+		"reasoningEffort": {
+			"label": "Sforzo di ragionamento del modello",
+			"high": "Alto",
+			"medium": "Medio",
+			"low": "Basso"
 		}
 	},
 	"browser": {

+ 6 - 0
webview-ui/src/i18n/locales/ja/settings.json

@@ -225,6 +225,12 @@
 		"rateLimitSeconds": {
 			"label": "レート制限",
 			"description": "APIリクエスト間の最小時間。"
+		},
+		"reasoningEffort": {
+			"label": "モデル推論の労力",
+			"high": "高",
+			"medium": "中",
+			"low": "低"
 		}
 	},
 	"browser": {

+ 6 - 0
webview-ui/src/i18n/locales/ko/settings.json

@@ -225,6 +225,12 @@
 		"rateLimitSeconds": {
 			"label": "속도 제한",
 			"description": "API 요청 간 최소 시간."
+		},
+		"reasoningEffort": {
+			"label": "모델 추론 노력",
+			"high": "높음",
+			"medium": "중간",
+			"low": "낮음"
 		}
 	},
 	"browser": {

+ 6 - 0
webview-ui/src/i18n/locales/pl/settings.json

@@ -225,6 +225,12 @@
 		"rateLimitSeconds": {
 			"label": "Limit szybkości",
 			"description": "Minimalny czas między żądaniami API."
+		},
+		"reasoningEffort": {
+			"label": "Wysiłek rozumowania modelu",
+			"high": "Wysoki",
+			"medium": "Średni",
+			"low": "Niski"
 		}
 	},
 	"browser": {

+ 6 - 0
webview-ui/src/i18n/locales/pt-BR/settings.json

@@ -225,6 +225,12 @@
 		"rateLimitSeconds": {
 			"label": "Limite de taxa",
 			"description": "Tempo mínimo entre requisições de API."
+		},
+		"reasoningEffort": {
+			"label": "Esforço de raciocínio do modelo",
+			"high": "Alto",
+			"medium": "Médio",
+			"low": "Baixo"
 		}
 	},
 	"browser": {

+ 6 - 0
webview-ui/src/i18n/locales/tr/settings.json

@@ -225,6 +225,12 @@
 		"rateLimitSeconds": {
 			"label": "Hız sınırı",
 			"description": "API istekleri arasındaki minimum süre."
+		},
+		"reasoningEffort": {
+			"label": "Model Akıl Yürütme Çabası",
+			"high": "Yüksek",
+			"medium": "Orta",
+			"low": "Düşük"
 		}
 	},
 	"browser": {

+ 6 - 0
webview-ui/src/i18n/locales/vi/settings.json

@@ -225,6 +225,12 @@
 		"rateLimitSeconds": {
 			"label": "Giới hạn tốc độ",
 			"description": "Thời gian tối thiểu giữa các yêu cầu API."
+		},
+		"reasoningEffort": {
+			"label": "Nỗ lực suy luận của mô hình",
+			"high": "Cao",
+			"medium": "Trung bình",
+			"low": "Thấp"
 		}
 	},
 	"browser": {

+ 6 - 0
webview-ui/src/i18n/locales/zh-CN/settings.json

@@ -225,6 +225,12 @@
 		"rateLimitSeconds": {
 			"label": "请求频率限制",
 			"description": "设置API请求的最小间隔时间"
+		},
+		"reasoningEffort": {
+			"label": "模型推理强度",
+			"high": "高",
+			"medium": "中",
+			"low": "低"
 		}
 	},
 	"browser": {

+ 6 - 0
webview-ui/src/i18n/locales/zh-TW/settings.json

@@ -225,6 +225,12 @@
 		"rateLimitSeconds": {
 			"label": "速率限制",
 			"description": "API 請求間的最短時間"
+		},
+		"reasoningEffort": {
+			"label": "模型推理強度",
+			"high": "高",
+			"medium": "中",
+			"low": "低"
 		}
 	},
 	"browser": {