Просмотр исходного кода

Requesty provider fixes (#3193)

Co-authored-by: Chris Estreich <[email protected]>
Daniel Trugman 7 месяцев назад
Родитель
Сommit
ce8fbbdafa

+ 165 - 276
src/api/providers/__tests__/requesty.test.ts

@@ -2,338 +2,227 @@
 
 import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
-import { ApiHandlerOptions, ModelInfo } from "../../../shared/api"
+
 import { RequestyHandler } from "../requesty"
-import { convertToOpenAiMessages } from "../../transform/openai-format"
-import { convertToR1Format } from "../../transform/r1-format"
+import { ApiHandlerOptions } from "../../../shared/api"
 
-// Mock OpenAI and transform functions
 jest.mock("openai")
-jest.mock("../../transform/openai-format")
-jest.mock("../../transform/r1-format")
+jest.mock("delay", () => jest.fn(() => Promise.resolve()))
 jest.mock("../fetchers/cache", () => ({
-	getModels: jest.fn().mockResolvedValue({
-		"test-model": {
-			maxTokens: 8192,
-			contextWindow: 200_000,
-			supportsImages: true,
-			supportsComputerUse: true,
-			supportsPromptCache: true,
-			inputPrice: 3.0,
-			outputPrice: 15.0,
-			cacheWritesPrice: 3.75,
-			cacheReadsPrice: 0.3,
-			description: "Test model description",
-		},
+	getModels: jest.fn().mockImplementation(() => {
+		return Promise.resolve({
+			"coding/claude-3-7-sonnet": {
+				maxTokens: 8192,
+				contextWindow: 200000,
+				supportsImages: true,
+				supportsPromptCache: true,
+				supportsComputerUse: true,
+				inputPrice: 3,
+				outputPrice: 15,
+				cacheWritesPrice: 3.75,
+				cacheReadsPrice: 0.3,
+				description: "Claude 3.7 Sonnet",
+			},
+		})
 	}),
 }))
 
 describe("RequestyHandler", () => {
-	let handler: RequestyHandler
-	let mockCreate: jest.Mock
-
-	const modelInfo: ModelInfo = {
-		maxTokens: 8192,
-		contextWindow: 200_000,
-		supportsImages: true,
-		supportsComputerUse: true,
-		supportsPromptCache: true,
-		inputPrice: 3.0,
-		outputPrice: 15.0,
-		cacheWritesPrice: 3.75,
-		cacheReadsPrice: 0.3,
-		description:
-			"Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
-	}
-
-	const defaultOptions: ApiHandlerOptions = {
+	const mockOptions: ApiHandlerOptions = {
 		requestyApiKey: "test-key",
-		requestyModelId: "test-model",
-		openAiStreamingEnabled: true,
-		includeMaxTokens: true, // Add this to match the implementation
+		requestyModelId: "coding/claude-3-7-sonnet",
 	}
 
-	beforeEach(() => {
-		// Clear mocks
-		jest.clearAllMocks()
+	beforeEach(() => jest.clearAllMocks())
+
+	it("initializes with correct options", () => {
+		const handler = new RequestyHandler(mockOptions)
+		expect(handler).toBeInstanceOf(RequestyHandler)
+
+		expect(OpenAI).toHaveBeenCalledWith({
+			baseURL: "https://router.requesty.ai/v1",
+			apiKey: mockOptions.requestyApiKey,
+			defaultHeaders: {
+				"HTTP-Referer": "https://github.com/RooVetGit/Roo-Cline",
+				"X-Title": "Roo Code",
+			},
+		})
+	})
+
+	describe("fetchModel", () => {
+		it("returns correct model info when options are provided", async () => {
+			const handler = new RequestyHandler(mockOptions)
+			const result = await handler.fetchModel()
+
+			expect(result).toMatchObject({
+				id: mockOptions.requestyModelId,
+				info: {
+					maxTokens: 8192,
+					contextWindow: 200000,
+					supportsImages: true,
+					supportsPromptCache: true,
+					supportsComputerUse: true,
+					inputPrice: 3,
+					outputPrice: 15,
+					cacheWritesPrice: 3.75,
+					cacheReadsPrice: 0.3,
+					description: "Claude 3.7 Sonnet",
+				},
+			})
+		})
+
+		it("returns default model info when options are not provided", async () => {
+			const handler = new RequestyHandler({})
+			const result = await handler.fetchModel()
+
+			expect(result).toMatchObject({
+				id: mockOptions.requestyModelId,
+				info: {
+					maxTokens: 8192,
+					contextWindow: 200000,
+					supportsImages: true,
+					supportsPromptCache: true,
+					supportsComputerUse: true,
+					inputPrice: 3,
+					outputPrice: 15,
+					cacheWritesPrice: 3.75,
+					cacheReadsPrice: 0.3,
+					description: "Claude 3.7 Sonnet",
+				},
+			})
+		})
+	})
 
-		// Setup mock create function that preserves params
-		mockCreate = jest.fn().mockImplementation((_params) => {
-			return {
-				[Symbol.asyncIterator]: async function* () {
+	describe("createMessage", () => {
+		it("generates correct stream chunks", async () => {
+			const handler = new RequestyHandler(mockOptions)
+
+			const mockStream = {
+				async *[Symbol.asyncIterator]() {
 					yield {
-						choices: [{ delta: { content: "Hello" } }],
+						id: mockOptions.requestyModelId,
+						choices: [{ delta: { content: "test response" } }],
 					}
 					yield {
-						choices: [{ delta: { content: " world" } }],
+						id: "test-id",
+						choices: [{ delta: {} }],
 						usage: {
-							prompt_tokens: 30,
-							completion_tokens: 10,
+							prompt_tokens: 10,
+							completion_tokens: 20,
 							prompt_tokens_details: {
-								cached_tokens: 15,
 								caching_tokens: 5,
+								cached_tokens: 2,
 							},
 						},
 					}
 				},
 			}
-		})
 
-		// Mock OpenAI constructor
-		;(OpenAI as jest.MockedClass<typeof OpenAI>).mockImplementation(
-			() =>
-				({
-					chat: {
-						completions: {
-							create: (params: any) => {
-								// Store params for verification
-								const result = mockCreate(params)
-								// Make params available for test assertions
-								;(result as any).params = params
-								return result
-							},
-						},
-					},
-				}) as unknown as OpenAI,
-		)
+			// Mock OpenAI chat.completions.create
+			const mockCreate = jest.fn().mockResolvedValue(mockStream)
 
-		// Mock transform functions
-		;(convertToOpenAiMessages as jest.Mock).mockImplementation((messages) => messages)
-		;(convertToR1Format as jest.Mock).mockImplementation((messages) => messages)
+			;(OpenAI as jest.MockedClass<typeof OpenAI>).prototype.chat = {
+				completions: { create: mockCreate },
+			} as any
 
-		// Create handler instance
-		handler = new RequestyHandler(defaultOptions)
-	})
+			const systemPrompt = "test system prompt"
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user" as const, content: "test message" }]
 
-	describe("constructor", () => {
-		it("should initialize with correct options", () => {
-			expect(OpenAI).toHaveBeenCalledWith({
-				baseURL: "https://router.requesty.ai/v1",
-				apiKey: defaultOptions.requestyApiKey,
-				defaultHeaders: {
-					"HTTP-Referer": "https://github.com/RooVetGit/Roo-Cline",
-					"X-Title": "Roo Code",
-				},
-			})
-		})
-	})
+			const generator = handler.createMessage(systemPrompt, messages)
+			const chunks = []
 
-	describe("createMessage", () => {
-		const systemPrompt = "You are a helpful assistant"
-		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
+			for await (const chunk of generator) {
+				chunks.push(chunk)
+			}
 
-		describe("with streaming enabled", () => {
-			beforeEach(() => {
-				const stream = {
-					[Symbol.asyncIterator]: async function* () {
-						yield {
-							choices: [{ delta: { content: "Hello" } }],
-						}
-						yield {
-							choices: [{ delta: { content: " world" } }],
-							usage: {
-								prompt_tokens: 30,
-								completion_tokens: 10,
-								prompt_tokens_details: {
-									cached_tokens: 15,
-									caching_tokens: 5,
-								},
-							},
-						}
-					},
-				}
-				mockCreate.mockResolvedValue(stream)
+			// Verify stream chunks
+			expect(chunks).toHaveLength(2) // One text chunk and one usage chunk
+			expect(chunks[0]).toEqual({ type: "text", text: "test response" })
+			expect(chunks[1]).toEqual({
+				type: "usage",
+				inputTokens: 10,
+				outputTokens: 20,
+				cacheWriteTokens: 5,
+				cacheReadTokens: 2,
+				totalCost: expect.any(Number),
 			})
 
-			it("should handle streaming response correctly", async () => {
-				const stream = handler.createMessage(systemPrompt, messages)
-				const results = []
-
-				for await (const chunk of stream) {
-					results.push(chunk)
-				}
-
-				expect(results).toEqual([
-					{ type: "text", text: "Hello" },
-					{ type: "text", text: " world" },
-					{
-						type: "usage",
-						inputTokens: 30,
-						outputTokens: 10,
-						cacheWriteTokens: 5,
-						cacheReadTokens: 15,
-						totalCost: 0.00020325000000000003, // (10 * 3 / 1,000,000) + (5 * 3.75 / 1,000,000) + (15 * 0.3 / 1,000,000) + (10 * 15 / 1,000,000) (the ...0 is a fp skew)
-					},
-				])
-
-				// Get the actual params that were passed
-				const calls = mockCreate.mock.calls
-				expect(calls.length).toBe(1)
-				const actualParams = calls[0][0]
-
-				expect(actualParams).toEqual({
-					model: defaultOptions.requestyModelId,
-					temperature: 0,
+			// Verify OpenAI client was called with correct parameters
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					max_tokens: undefined,
 					messages: [
 						{
 							role: "system",
-							content: [
-								{
-									cache_control: {
-										type: "ephemeral",
-									},
-									text: systemPrompt,
-									type: "text",
-								},
-							],
+							content: "test system prompt",
 						},
 						{
 							role: "user",
-							content: [
-								{
-									cache_control: {
-										type: "ephemeral",
-									},
-									text: "Hello",
-									type: "text",
-								},
-							],
+							content: "test message",
 						},
 					],
+					model: "coding/claude-3-7-sonnet",
 					stream: true,
 					stream_options: { include_usage: true },
-					max_tokens: modelInfo.maxTokens,
-				})
-			})
-
-			it("should not include max_tokens when includeMaxTokens is false", async () => {
-				handler = new RequestyHandler({
-					...defaultOptions,
-					includeMaxTokens: false,
-				})
-
-				await handler.createMessage(systemPrompt, messages).next()
-
-				expect(mockCreate).toHaveBeenCalledWith(
-					expect.not.objectContaining({
-						max_tokens: expect.any(Number),
-					}),
-				)
-			})
-
-			it("should handle deepseek-reasoner model format", async () => {
-				handler = new RequestyHandler({
-					...defaultOptions,
-					requestyModelId: "deepseek-reasoner",
-				})
-
-				await handler.createMessage(systemPrompt, messages).next()
-
-				expect(convertToR1Format).toHaveBeenCalledWith([{ role: "user", content: systemPrompt }, ...messages])
-			})
+					temperature: undefined,
+				}),
+			)
 		})
 
-		describe("with streaming disabled", () => {
-			beforeEach(() => {
-				handler = new RequestyHandler({
-					...defaultOptions,
-					openAiStreamingEnabled: false,
-				})
-
-				mockCreate.mockResolvedValue({
-					choices: [{ message: { content: "Hello world" } }],
-					usage: {
-						prompt_tokens: 10,
-						completion_tokens: 5,
-					},
-				})
-			})
-
-			it("should handle non-streaming response correctly", async () => {
-				const stream = handler.createMessage(systemPrompt, messages)
-				const results = []
+		it("handles API errors", async () => {
+			const handler = new RequestyHandler(mockOptions)
+			const mockError = new Error("API Error")
+			const mockCreate = jest.fn().mockRejectedValue(mockError)
+			;(OpenAI as jest.MockedClass<typeof OpenAI>).prototype.chat = {
+				completions: { create: mockCreate },
+			} as any
 
-				for await (const chunk of stream) {
-					results.push(chunk)
-				}
-
-				expect(results).toEqual([
-					{ type: "text", text: "Hello world" },
-					{
-						type: "usage",
-						inputTokens: 10,
-						outputTokens: 5,
-						cacheWriteTokens: 0,
-						cacheReadTokens: 0,
-						totalCost: 0.000105, // (10 * 3 / 1,000,000) + (5 * 15 / 1,000,000)
-					},
-				])
-
-				expect(mockCreate).toHaveBeenCalledWith({
-					model: defaultOptions.requestyModelId,
-					messages: [
-						{ role: "user", content: systemPrompt },
-						{
-							role: "user",
-							content: [
-								{
-									cache_control: {
-										type: "ephemeral",
-									},
-									text: "Hello",
-									type: "text",
-								},
-							],
-						},
-					],
-				})
-			})
+			const generator = handler.createMessage("test", [])
+			await expect(generator.next()).rejects.toThrow("API Error")
 		})
 	})
 
-	describe("getModel", () => {
-		it("should return correct model information", () => {
-			const result = handler.getModel()
-			expect(result).toEqual({
-				id: defaultOptions.requestyModelId,
-				info: modelInfo,
-			})
-		})
+	describe("completePrompt", () => {
+		it("returns correct response", async () => {
+			const handler = new RequestyHandler(mockOptions)
+			const mockResponse = { choices: [{ message: { content: "test completion" } }] }
 
-		it("should use sane defaults when no model info provided", () => {
-			handler = new RequestyHandler(defaultOptions)
-			const result = handler.getModel()
+			const mockCreate = jest.fn().mockResolvedValue(mockResponse)
+			;(OpenAI as jest.MockedClass<typeof OpenAI>).prototype.chat = {
+				completions: { create: mockCreate },
+			} as any
 
-			expect(result).toEqual({
-				id: defaultOptions.requestyModelId,
-				info: modelInfo,
-			})
-		})
-	})
+			const result = await handler.completePrompt("test prompt")
 
-	describe("completePrompt", () => {
-		beforeEach(() => {
-			mockCreate.mockResolvedValue({
-				choices: [{ message: { content: "Completed response" } }],
-			})
-		})
+			expect(result).toBe("test completion")
 
-		it("should complete prompt successfully", async () => {
-			const result = await handler.completePrompt("Test prompt")
-			expect(result).toBe("Completed response")
 			expect(mockCreate).toHaveBeenCalledWith({
-				model: defaultOptions.requestyModelId,
-				messages: [{ role: "user", content: "Test prompt" }],
+				model: mockOptions.requestyModelId,
+				max_tokens: undefined,
+				messages: [{ role: "system", content: "test prompt" }],
+				temperature: undefined,
 			})
 		})
 
-		it("should handle errors correctly", async () => {
-			const errorMessage = "API error"
-			mockCreate.mockRejectedValue(new Error(errorMessage))
+		it("handles API errors", async () => {
+			const handler = new RequestyHandler(mockOptions)
+			const mockError = new Error("API Error")
+			const mockCreate = jest.fn().mockRejectedValue(mockError)
+			;(OpenAI as jest.MockedClass<typeof OpenAI>).prototype.chat = {
+				completions: { create: mockCreate },
+			} as any
 
-			await expect(handler.completePrompt("Test prompt")).rejects.toThrow(
-				`OpenAI completion error: ${errorMessage}`,
-			)
+			await expect(handler.completePrompt("test prompt")).rejects.toThrow("API Error")
+		})
+
+		it("handles unexpected errors", async () => {
+			const handler = new RequestyHandler(mockOptions)
+			const mockCreate = jest.fn().mockRejectedValue(new Error("Unexpected error"))
+			;(OpenAI as jest.MockedClass<typeof OpenAI>).prototype.chat = {
+				completions: { create: mockCreate },
+			} as any
+
+			await expect(handler.completePrompt("test prompt")).rejects.toThrow("Unexpected error")
 		})
 	})
 })

+ 11 - 3
src/api/providers/fetchers/cache.ts

@@ -38,9 +38,8 @@ async function readModels(router: RouterName): Promise<ModelRecord | undefined>
  * @param router - The router to fetch models from.
  * @returns The models from the cache or the fetched models.
  */
-export const getModels = async (router: RouterName): Promise<ModelRecord> => {
+export const getModels = async (router: RouterName, apiKey: string | undefined = undefined): Promise<ModelRecord> => {
 	let models = memoryCache.get<ModelRecord>(router)
-
 	if (models) {
 		// console.log(`[getModels] NodeCache hit for ${router} -> ${Object.keys(models).length}`)
 		return models
@@ -51,7 +50,8 @@ export const getModels = async (router: RouterName): Promise<ModelRecord> => {
 			models = await getOpenRouterModels()
 			break
 		case "requesty":
-			models = await getRequestyModels()
+			// Requesty models endpoint requires an API key for per-user custom policies
+			models = await getRequestyModels(apiKey)
 			break
 		case "glama":
 			models = await getGlamaModels()
@@ -80,3 +80,11 @@ export const getModels = async (router: RouterName): Promise<ModelRecord> => {
 
 	return models ?? {}
 }
+
+/**
+ * Flush models memory cache for a specific router
+ * @param router - The router to flush models for.
+ */
+export const flushModels = async (router: RouterName) => {
+	memoryCache.del(router)
+}

+ 98 - 21
src/api/providers/requesty.ts

@@ -1,11 +1,19 @@
 import { Anthropic } from "@anthropic-ai/sdk"
-import OpenAI from "openai"
-
-import { ModelInfo, ModelRecord, requestyDefaultModelId, requestyDefaultModelInfo } from "../../shared/api"
+import {
+	ApiHandlerOptions,
+	ModelInfo,
+	ModelRecord,
+	requestyDefaultModelId,
+	requestyDefaultModelInfo,
+} from "../../shared/api"
+import { convertToOpenAiMessages } from "../transform/openai-format"
 import { calculateApiCostOpenAI } from "../../utils/cost"
 import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
-import { OpenAiHandler, OpenAiHandlerOptions } from "./openai"
+import { SingleCompletionHandler } from "../"
+import { BaseProvider } from "./base-provider"
+import { DEFAULT_HEADERS } from "./constants"
 import { getModels } from "./fetchers/cache"
+import OpenAI from "openai"
 
 // Requesty usage includes an extra field for Anthropic use cases.
 // Safely cast the prompt token details section to the appropriate structure.
@@ -17,25 +25,28 @@ interface RequestyUsage extends OpenAI.CompletionUsage {
 	total_cost?: number
 }
 
-export class RequestyHandler extends OpenAiHandler {
+type RequestyChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {}
+
+export class RequestyHandler extends BaseProvider implements SingleCompletionHandler {
+	protected options: ApiHandlerOptions
 	protected models: ModelRecord = {}
+	private client: OpenAI
 
-	constructor(options: OpenAiHandlerOptions) {
-		if (!options.requestyApiKey) {
-			throw new Error("Requesty API key is required. Please provide it in the settings.")
-		}
+	constructor(options: ApiHandlerOptions) {
+		super()
+		this.options = options
+
+		const apiKey = this.options.requestyApiKey ?? "not-provided"
+		const baseURL = "https://router.requesty.ai/v1"
 
-		super({
-			...options,
-			openAiApiKey: options.requestyApiKey,
-			openAiModelId: options.requestyModelId ?? requestyDefaultModelId,
-			openAiBaseUrl: "https://router.requesty.ai/v1",
-		})
+		const defaultHeaders = DEFAULT_HEADERS
+
+		this.client = new OpenAI({ baseURL, apiKey, defaultHeaders })
 	}
 
-	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
+	public async fetchModel() {
 		this.models = await getModels("requesty")
-		yield* super.createMessage(systemPrompt, messages)
+		return this.getModel()
 	}
 
 	override getModel(): { id: string; info: ModelInfo } {
@@ -44,7 +55,7 @@ export class RequestyHandler extends OpenAiHandler {
 		return { id, info }
 	}
 
-	protected override processUsageMetrics(usage: any, modelInfo?: ModelInfo): ApiStreamUsageChunk {
+	protected processUsageMetrics(usage: any, modelInfo?: ModelInfo): ApiStreamUsageChunk {
 		const requestyUsage = usage as RequestyUsage
 		const inputTokens = requestyUsage?.prompt_tokens || 0
 		const outputTokens = requestyUsage?.completion_tokens || 0
@@ -64,8 +75,74 @@ export class RequestyHandler extends OpenAiHandler {
 		}
 	}
 
-	override async completePrompt(prompt: string): Promise<string> {
-		this.models = await getModels("requesty")
-		return super.completePrompt(prompt)
+	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
+		const model = await this.fetchModel()
+
+		let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
+			{ role: "system", content: systemPrompt },
+			...convertToOpenAiMessages(messages),
+		]
+
+		let maxTokens = undefined
+		if (this.options.includeMaxTokens) {
+			maxTokens = model.info.maxTokens
+		}
+
+		const temperature = this.options.modelTemperature
+
+		const completionParams: RequestyChatCompletionParams = {
+			model: model.id,
+			max_tokens: maxTokens,
+			messages: openAiMessages,
+			temperature: temperature,
+			stream: true,
+			stream_options: { include_usage: true },
+		}
+
+		const stream = await this.client.chat.completions.create(completionParams)
+
+		for await (const chunk of stream) {
+			const delta = chunk.choices[0]?.delta
+			if (delta?.content) {
+				yield {
+					type: "text",
+					text: delta.content,
+				}
+			}
+
+			if (delta && "reasoning_content" in delta && delta.reasoning_content) {
+				yield {
+					type: "reasoning",
+					text: (delta.reasoning_content as string | undefined) || "",
+				}
+			}
+
+			if (chunk.usage) {
+				yield this.processUsageMetrics(chunk.usage, model.info)
+			}
+		}
+	}
+
+	async completePrompt(prompt: string): Promise<string> {
+		const model = await this.fetchModel()
+
+		let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [{ role: "system", content: prompt }]
+
+		let maxTokens = undefined
+		if (this.options.includeMaxTokens) {
+			maxTokens = model.info.maxTokens
+		}
+
+		const temperature = this.options.modelTemperature
+
+		const completionParams: RequestyChatCompletionParams = {
+			model: model.id,
+			max_tokens: maxTokens,
+			messages: openAiMessages,
+			temperature: temperature,
+		}
+
+		const response: OpenAI.Chat.ChatCompletion = await this.client.chat.completions.create(completionParams)
+		return response.choices[0]?.message.content || ""
 	}
 }

+ 12 - 6
src/core/webview/webviewMessageHandler.ts

@@ -6,7 +6,7 @@ import * as vscode from "vscode"
 import { ClineProvider } from "./ClineProvider"
 import { Language, ApiConfigMeta } from "../../schemas"
 import { changeLanguage, t } from "../../i18n"
-import { ApiConfiguration } from "../../shared/api"
+import { ApiConfiguration, RouterName, toRouterName } from "../../shared/api"
 import { supportPrompt } from "../../shared/support-prompt"
 
 import { checkoutDiffPayloadSchema, checkoutRestorePayloadSchema, WebviewMessage } from "../../shared/WebviewMessage"
@@ -34,7 +34,7 @@ import { TelemetrySetting } from "../../shared/TelemetrySetting"
 import { getWorkspacePath } from "../../utils/path"
 import { Mode, defaultModeSlug } from "../../shared/modes"
 import { GlobalState } from "../../schemas"
-import { getModels } from "../../api/providers/fetchers/cache"
+import { getModels, flushModels } from "../../api/providers/fetchers/cache"
 import { generateSystemPrompt } from "./generateSystemPrompt"
 
 const ALLOWED_VSCODE_SETTINGS = new Set(["terminal.integrated.inheritEnv"])
@@ -282,12 +282,18 @@ export const webviewMessageHandler = async (provider: ClineProvider, message: We
 		case "resetState":
 			await provider.resetState()
 			break
+		case "flushRouterModels":
+			const routerName: RouterName = toRouterName(message.text)
+			await flushModels(routerName)
+			break
 		case "requestRouterModels":
+			const { apiConfiguration } = await provider.getState()
+
 			const [openRouterModels, requestyModels, glamaModels, unboundModels] = await Promise.all([
-				getModels("openrouter"),
-				getModels("requesty"),
-				getModels("glama"),
-				getModels("unbound"),
+				getModels("openrouter", apiConfiguration.openRouterApiKey),
+				getModels("requesty", apiConfiguration.requestyApiKey),
+				getModels("glama", apiConfiguration.glamaApiKey),
+				getModels("unbound", apiConfiguration.unboundApiKey),
 			])
 
 			provider.postMessageToWebview({

+ 1 - 0
src/shared/WebviewMessage.ts

@@ -42,6 +42,7 @@ export interface WebviewMessage {
 		| "importSettings"
 		| "exportSettings"
 		| "resetState"
+		| "flushRouterModels"
 		| "requestRouterModels"
 		| "requestOpenAiModels"
 		| "requestOllamaModels"

+ 9 - 2
src/shared/api.ts

@@ -437,7 +437,7 @@ export const glamaDefaultModelInfo: ModelInfo = {
 
 // Requesty
 // https://requesty.ai/router-2
-export const requestyDefaultModelId = "anthropic/claude-3-7-sonnet-latest"
+export const requestyDefaultModelId = "coding/claude-3-7-sonnet"
 export const requestyDefaultModelInfo: ModelInfo = {
 	maxTokens: 8192,
 	contextWindow: 200_000,
@@ -449,7 +449,7 @@ export const requestyDefaultModelInfo: ModelInfo = {
 	cacheWritesPrice: 3.75,
 	cacheReadsPrice: 0.3,
 	description:
-		"Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
+		"The best coding model, optimized by Requesty, and automatically routed to the fastest provider. Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
 }
 
 // OpenRouter
@@ -1701,6 +1701,13 @@ export type RouterName = (typeof routerNames)[number]
 
 export const isRouterName = (value: string): value is RouterName => routerNames.includes(value as RouterName)
 
+export function toRouterName(value?: string): RouterName {
+	if (value && isRouterName(value)) {
+		return value
+	}
+	throw new Error(`Invalid router name: ${value}`)
+}
+
 export type ModelRecord = Record<string, ModelInfo>
 
 export type RouterModels = Record<RouterName, ModelRecord>

+ 24 - 7
webview-ui/src/components/settings/ApiOptions.tsx

@@ -22,9 +22,9 @@ import {
 	useOpenRouterModelProviders,
 	OPENROUTER_DEFAULT_PROVIDER_NAME,
 } from "@src/components/ui/hooks/useOpenRouterModelProviders"
-import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@src/components/ui"
+import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue, Button } from "@src/components/ui"
 import { VSCodeButtonLink } from "@src/components/common/VSCodeButtonLink"
-import { getRequestyAuthUrl, getGlamaAuthUrl } from "@src/oauth/urls"
+import { getRequestyApiKeyUrl, getGlamaAuthUrl } from "@src/oauth/urls"
 
 // Providers
 import { Anthropic } from "./providers/Anthropic"
@@ -75,6 +75,8 @@ const ApiOptions = ({
 		return Object.entries(headers)
 	})
 
+	const [requestyShowRefreshHint, setRequestyShowRefreshHint] = useState<boolean>()
+
 	useEffect(() => {
 		const propHeaders = apiConfiguration?.openAiHeaders || {}
 
@@ -138,7 +140,7 @@ const ApiOptions = ({
 		info: selectedModelInfo,
 	} = useSelectedModel(apiConfiguration)
 
-	const { data: routerModels } = useRouterModels()
+	const { data: routerModels, refetch: refetchRouterModels } = useRouterModels()
 
 	// Update apiConfiguration.aiModelId whenever selectedModelId changes.
 	useEffect(() => {
@@ -373,13 +375,28 @@ const ApiOptions = ({
 						{t("settings:providers.apiKeyStorageNotice")}
 					</div>
 					{!apiConfiguration?.requestyApiKey && (
-						<VSCodeButtonLink
-							href={getRequestyAuthUrl(uriScheme)}
-							style={{ width: "100%" }}
-							appearance="primary">
+						<VSCodeButtonLink href={getRequestyApiKeyUrl()} style={{ width: "100%" }} appearance="primary">
 							{t("settings:providers.getRequestyApiKey")}
 						</VSCodeButtonLink>
 					)}
+					<Button
+						variant="outline"
+						title={t("settings:providers.refetchModels")}
+						onClick={() => {
+							vscode.postMessage({ type: "flushRouterModels", text: "requesty" })
+							refetchRouterModels()
+							setRequestyShowRefreshHint(true)
+						}}>
+						<div className="flex items-center gap-2">
+							<span className="codicon codicon-refresh" />
+							{t("settings:providers.flushModelsCache")}
+						</div>
+					</Button>
+					{requestyShowRefreshHint && (
+						<div className="flex items-center text-vscode-errorForeground">
+							{t("settings:providers.flushedModelsCache")}
+						</div>
+					)}
 				</>
 			)}
 

+ 2 - 0
webview-ui/src/i18n/locales/ca/settings.json

@@ -119,6 +119,8 @@
 		"glamaApiKey": "Clau API de Glama",
 		"getGlamaApiKey": "Obtenir clau API de Glama",
 		"requestyApiKey": "Clau API de Requesty",
+		"flushModelsCache": "Netejar memòria cau de models",
+		"flushedModelsCache": "Memòria cau netejada, si us plau torna a obrir la vista de configuració",
 		"getRequestyApiKey": "Obtenir clau API de Requesty",
 		"anthropicApiKey": "Clau API d'Anthropic",
 		"getAnthropicApiKey": "Obtenir clau API d'Anthropic",

+ 2 - 0
webview-ui/src/i18n/locales/de/settings.json

@@ -106,6 +106,8 @@
 		"awsCustomArnUse": "Geben Sie eine gültige Amazon Bedrock ARN für das Modell ein, das Sie verwenden möchten. Formatbeispiele:",
 		"awsCustomArnDesc": "Stellen Sie sicher, dass die Region in der ARN mit Ihrer oben ausgewählten AWS-Region übereinstimmt.",
 		"openRouterApiKey": "OpenRouter API-Schlüssel",
+		"flushModelsCache": "Modell-Cache leeren",
+		"flushedModelsCache": "Cache geleert, bitte öffnen Sie die Einstellungsansicht erneut",
 		"getOpenRouterApiKey": "OpenRouter API-Schlüssel erhalten",
 		"apiKeyStorageNotice": "API-Schlüssel werden sicher im VSCode Secret Storage gespeichert",
 		"glamaApiKey": "Glama API-Schlüssel",

+ 2 - 0
webview-ui/src/i18n/locales/en/settings.json

@@ -118,6 +118,8 @@
 		"headerValue": "Header value",
 		"noCustomHeaders": "No custom headers defined. Click the + button to add one.",
 		"requestyApiKey": "Requesty API Key",
+		"flushModelsCache": "Flush cached models",
+		"flushedModelsCache": "Flushed cache, please reopen the settings view",
 		"getRequestyApiKey": "Get Requesty API Key",
 		"openRouterTransformsText": "Compress prompts and message chains to the context size (<a>OpenRouter Transforms</a>)",
 		"anthropicApiKey": "Anthropic API Key",

+ 2 - 0
webview-ui/src/i18n/locales/es/settings.json

@@ -106,6 +106,8 @@
 		"awsCustomArnUse": "Ingrese un ARN de Amazon Bedrock válido para el modelo que desea utilizar. Ejemplos de formato:",
 		"awsCustomArnDesc": "Asegúrese de que la región en el ARN coincida con la región de AWS seleccionada anteriormente.",
 		"openRouterApiKey": "Clave API de OpenRouter",
+		"flushModelsCache": "Limpiar modelos en caché",
+		"flushedModelsCache": "Caché limpiada, por favor vuelva a abrir la vista de configuración",
 		"getOpenRouterApiKey": "Obtener clave API de OpenRouter",
 		"apiKeyStorageNotice": "Las claves API se almacenan de forma segura en el Almacenamiento Secreto de VSCode",
 		"glamaApiKey": "Clave API de Glama",

+ 2 - 0
webview-ui/src/i18n/locales/fr/settings.json

@@ -106,6 +106,8 @@
 		"awsCustomArnUse": "Entrez un ARN Amazon Bedrock valide pour le modèle que vous souhaitez utiliser. Exemples de format :",
 		"awsCustomArnDesc": "Assurez-vous que la région dans l'ARN correspond à la région AWS sélectionnée ci-dessus.",
 		"openRouterApiKey": "Clé API OpenRouter",
+		"flushModelsCache": "Vider le cache des modèles",
+		"flushedModelsCache": "Cache vidé, veuillez rouvrir la vue des paramètres",
 		"getOpenRouterApiKey": "Obtenir la clé API OpenRouter",
 		"apiKeyStorageNotice": "Les clés API sont stockées en toute sécurité dans le stockage sécurisé de VSCode",
 		"glamaApiKey": "Clé API Glama",

+ 2 - 0
webview-ui/src/i18n/locales/hi/settings.json

@@ -118,6 +118,8 @@
 		"headerValue": "हेडर मूल्य",
 		"noCustomHeaders": "कोई कस्टम हेडर परिभाषित नहीं है। एक जोड़ने के लिए + बटन पर क्लिक करें।",
 		"requestyApiKey": "Requesty API कुंजी",
+		"flushModelsCache": "मॉडल कैश साफ़ करें",
+		"flushedModelsCache": "कैश साफ़ किया गया, कृपया सेटिंग्स व्यू को फिर से खोलें",
 		"getRequestyApiKey": "Requesty API कुंजी प्राप्त करें",
 		"openRouterTransformsText": "संदर्भ आकार के लिए प्रॉम्प्ट और संदेश श्रृंखलाओं को संपीड़ित करें (<a>OpenRouter ट्रांसफॉर्म</a>)",
 		"anthropicApiKey": "Anthropic API कुंजी",

+ 2 - 0
webview-ui/src/i18n/locales/it/settings.json

@@ -118,6 +118,8 @@
 		"headerValue": "Valore intestazione",
 		"noCustomHeaders": "Nessuna intestazione personalizzata definita. Fai clic sul pulsante + per aggiungerne una.",
 		"requestyApiKey": "Chiave API Requesty",
+		"flushModelsCache": "Svuota cache dei modelli",
+		"flushedModelsCache": "Cache svuotata, riapri la vista delle impostazioni",
 		"getRequestyApiKey": "Ottieni chiave API Requesty",
 		"openRouterTransformsText": "Comprimi prompt e catene di messaggi alla dimensione del contesto (<a>Trasformazioni OpenRouter</a>)",
 		"anthropicApiKey": "Chiave API Anthropic",

+ 2 - 0
webview-ui/src/i18n/locales/ja/settings.json

@@ -118,6 +118,8 @@
 		"headerValue": "ヘッダー値",
 		"noCustomHeaders": "カスタムヘッダーが定義されていません。+ ボタンをクリックして追加してください。",
 		"requestyApiKey": "Requesty APIキー",
+		"flushModelsCache": "モデルキャッシュをクリア",
+		"flushedModelsCache": "キャッシュをクリアしました。設定ビューを再開してください",
 		"getRequestyApiKey": "Requesty APIキーを取得",
 		"openRouterTransformsText": "プロンプトとメッセージチェーンをコンテキストサイズに圧縮 (<a>OpenRouter Transforms</a>)",
 		"anthropicApiKey": "Anthropic APIキー",

+ 2 - 0
webview-ui/src/i18n/locales/ko/settings.json

@@ -118,6 +118,8 @@
 		"headerValue": "헤더 값",
 		"noCustomHeaders": "정의된 사용자 정의 헤더가 없습니다. + 버튼을 클릭하여 추가하세요.",
 		"requestyApiKey": "Requesty API 키",
+		"flushModelsCache": "모델 캐시 지우기",
+		"flushedModelsCache": "캐시가 지워졌습니다. 설정 보기를 다시 열어주세요",
 		"getRequestyApiKey": "Requesty API 키 받기",
 		"openRouterTransformsText": "프롬프트와 메시지 체인을 컨텍스트 크기로 압축 (<a>OpenRouter Transforms</a>)",
 		"anthropicApiKey": "Anthropic API 키",

+ 2 - 0
webview-ui/src/i18n/locales/pl/settings.json

@@ -118,6 +118,8 @@
 		"headerValue": "Wartość nagłówka",
 		"noCustomHeaders": "Brak zdefiniowanych niestandardowych nagłówków. Kliknij przycisk +, aby dodać.",
 		"requestyApiKey": "Klucz API Requesty",
+		"flushModelsCache": "Wyczyść pamięć podręczną modeli",
+		"flushedModelsCache": "Pamięć podręczna wyczyszczona, proszę ponownie otworzyć widok ustawień",
 		"getRequestyApiKey": "Uzyskaj klucz API Requesty",
 		"openRouterTransformsText": "Kompresuj podpowiedzi i łańcuchy wiadomości do rozmiaru kontekstu (<a>Transformacje OpenRouter</a>)",
 		"anthropicApiKey": "Klucz API Anthropic",

+ 2 - 0
webview-ui/src/i18n/locales/pt-BR/settings.json

@@ -106,6 +106,8 @@
 		"awsCustomArnUse": "Insira um ARN Amazon Bedrock válido para o modelo que deseja usar. Exemplos de formato:",
 		"awsCustomArnDesc": "Certifique-se de que a região no ARN corresponde à região AWS selecionada acima.",
 		"openRouterApiKey": "Chave de API OpenRouter",
+		"flushModelsCache": "Limpar cache de modelos",
+		"flushedModelsCache": "Cache limpo, por favor reabra a visualização de configurações",
 		"getOpenRouterApiKey": "Obter chave de API OpenRouter",
 		"apiKeyStorageNotice": "As chaves de API são armazenadas com segurança no Armazenamento Secreto do VSCode",
 		"glamaApiKey": "Chave de API Glama",

+ 2 - 0
webview-ui/src/i18n/locales/ru/settings.json

@@ -118,6 +118,8 @@
 		"headerValue": "Значение заголовка",
 		"noCustomHeaders": "Пользовательские заголовки не определены. Нажмите кнопку +, чтобы добавить.",
 		"requestyApiKey": "Requesty API-ключ",
+		"flushModelsCache": "Очистить кэш моделей",
+		"flushedModelsCache": "Кэш очищен, пожалуйста, переоткройте представление настроек",
 		"getRequestyApiKey": "Получить Requesty API-ключ",
 		"openRouterTransformsText": "Сжимать подсказки и цепочки сообщений до размера контекста (<a>OpenRouter Transforms</a>)",
 		"anthropicApiKey": "Anthropic API-ключ",

+ 2 - 0
webview-ui/src/i18n/locales/tr/settings.json

@@ -106,6 +106,8 @@
 		"awsCustomArnUse": "Kullanmak istediğiniz model için geçerli bir Amazon Bedrock ARN'si girin. Format örnekleri:",
 		"awsCustomArnDesc": "ARN içindeki bölgenin yukarıda seçilen AWS Bölgesiyle eşleştiğinden emin olun.",
 		"openRouterApiKey": "OpenRouter API Anahtarı",
+		"flushModelsCache": "Model önbelleğini temizle",
+		"flushedModelsCache": "Önbellek temizlendi, lütfen ayarlar görünümünü yeniden açın",
 		"getOpenRouterApiKey": "OpenRouter API Anahtarı Al",
 		"apiKeyStorageNotice": "API anahtarları VSCode'un Gizli Depolamasında güvenli bir şekilde saklanır",
 		"glamaApiKey": "Glama API Anahtarı",

+ 2 - 0
webview-ui/src/i18n/locales/vi/settings.json

@@ -118,6 +118,8 @@
 		"headerValue": "Giá trị tiêu đề",
 		"noCustomHeaders": "Chưa có tiêu đề tùy chỉnh nào được định nghĩa. Nhấp vào nút + để thêm.",
 		"requestyApiKey": "Khóa API Requesty",
+		"flushModelsCache": "Xóa bộ nhớ đệm mô hình",
+		"flushedModelsCache": "Đã xóa bộ nhớ đệm, vui lòng mở lại chế độ xem cài đặt",
 		"getRequestyApiKey": "Lấy khóa API Requesty",
 		"anthropicApiKey": "Khóa API Anthropic",
 		"getAnthropicApiKey": "Lấy khóa API Anthropic",

+ 2 - 0
webview-ui/src/i18n/locales/zh-CN/settings.json

@@ -118,6 +118,8 @@
 		"glamaApiKey": "Glama API 密钥",
 		"getGlamaApiKey": "获取 Glama API 密钥",
 		"requestyApiKey": "Requesty API 密钥",
+		"flushModelsCache": "清除模型缓存",
+		"flushedModelsCache": "缓存已清除,请重新打开设置视图",
 		"getRequestyApiKey": "获取 Requesty API 密钥",
 		"openRouterTransformsText": "自动压缩提示词和消息链到上下文长度限制内 (<a>OpenRouter转换</a>)",
 		"anthropicApiKey": "Anthropic API 密钥",

+ 2 - 0
webview-ui/src/i18n/locales/zh-TW/settings.json

@@ -118,6 +118,8 @@
 		"headerValue": "標頭值",
 		"noCustomHeaders": "尚未定義自訂標頭。點擊 + 按鈕以新增。",
 		"requestyApiKey": "Requesty API 金鑰",
+		"flushModelsCache": "清除模型快取",
+		"flushedModelsCache": "快取已清除,請重新開啟設定視圖",
 		"getRequestyApiKey": "取得 Requesty API 金鑰",
 		"openRouterTransformsText": "將提示和訊息鏈壓縮到上下文大小 (<a>OpenRouter 轉換</a>)",
 		"anthropicApiKey": "Anthropic API 金鑰",

+ 4 - 0
webview-ui/src/oauth/urls.ts

@@ -14,3 +14,7 @@ export function getOpenRouterAuthUrl(uriScheme?: string) {
 export function getRequestyAuthUrl(uriScheme?: string) {
 	return `https://app.requesty.ai/oauth/authorize?callback_url=${getCallbackUrl("requesty", uriScheme)}`
 }
+
+export function getRequestyApiKeyUrl() {
+	return "https://app.requesty.ai/api-keys"
+}