Browse Source

Improve OpenRouter model fetching (#2922)

Chris Estreich 8 months ago
parent
commit
b75379bed3

+ 97 - 0
package-lock.json

@@ -93,6 +93,7 @@
 				"knip": "^5.44.4",
 				"lint-staged": "^15.2.11",
 				"mkdirp": "^3.0.1",
+				"nock": "^14.0.4",
 				"npm-run-all": "^4.1.5",
 				"prettier": "^3.4.2",
 				"rimraf": "^6.0.1",
@@ -6673,6 +6674,24 @@
 				"zod": "^3.24.1"
 			}
 		},
+		"node_modules/@mswjs/interceptors": {
+			"version": "0.38.6",
+			"resolved": "https://registry.npmjs.org/@mswjs/interceptors/-/interceptors-0.38.6.tgz",
+			"integrity": "sha512-qFlpmObPqeUs4u3oFYv/OM/xyX+pNa5TRAjqjvMhbGYlyMhzSrE5UfncL2rUcEeVfD9Gebgff73hPwqcOwJQNA==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"@open-draft/deferred-promise": "^2.2.0",
+				"@open-draft/logger": "^0.3.0",
+				"@open-draft/until": "^2.0.0",
+				"is-node-process": "^1.2.0",
+				"outvariant": "^1.4.3",
+				"strict-event-emitter": "^0.5.1"
+			},
+			"engines": {
+				"node": ">=18"
+			}
+		},
 		"node_modules/@noble/ciphers": {
 			"version": "1.2.1",
 			"resolved": "https://registry.npmjs.org/@noble/ciphers/-/ciphers-1.2.1.tgz",
@@ -6750,6 +6769,31 @@
 				"node": ">= 8"
 			}
 		},
+		"node_modules/@open-draft/deferred-promise": {
+			"version": "2.2.0",
+			"resolved": "https://registry.npmjs.org/@open-draft/deferred-promise/-/deferred-promise-2.2.0.tgz",
+			"integrity": "sha512-CecwLWx3rhxVQF6V4bAgPS5t+So2sTbPgAzafKkVizyi7tlwpcFpdFqq+wqF2OwNBmqFuu6tOyouTuxgpMfzmA==",
+			"dev": true,
+			"license": "MIT"
+		},
+		"node_modules/@open-draft/logger": {
+			"version": "0.3.0",
+			"resolved": "https://registry.npmjs.org/@open-draft/logger/-/logger-0.3.0.tgz",
+			"integrity": "sha512-X2g45fzhxH238HKO4xbSr7+wBS8Fvw6ixhTDuvLd5mqh6bJJCFAPwU9mPDxbcrRtfxv4u5IHCEH77BmxvXmmxQ==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"is-node-process": "^1.2.0",
+				"outvariant": "^1.4.0"
+			}
+		},
+		"node_modules/@open-draft/until": {
+			"version": "2.1.0",
+			"resolved": "https://registry.npmjs.org/@open-draft/until/-/until-2.1.0.tgz",
+			"integrity": "sha512-U69T3ItWHvLwGg5eJ0n3I62nWuE6ilHlmz7zM0npLBRvPRd7e6NYmg54vvRtP5mZG7kZqZCFVdsTWo7BPtBujg==",
+			"dev": true,
+			"license": "MIT"
+		},
 		"node_modules/@pkgjs/parseargs": {
 			"version": "0.11.0",
 			"resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz",
@@ -14204,6 +14248,13 @@
 				"url": "https://github.com/sponsors/ljharb"
 			}
 		},
+		"node_modules/is-node-process": {
+			"version": "1.2.0",
+			"resolved": "https://registry.npmjs.org/is-node-process/-/is-node-process-1.2.0.tgz",
+			"integrity": "sha512-Vg4o6/fqPxIjtxgUH5QLJhwZ7gW5diGCVlXpuUfELC62CuxM1iHcRe51f2W1FDy04Ai4KJkagKjx3XaqyfRKXw==",
+			"dev": true,
+			"license": "MIT"
+		},
 		"node_modules/is-number": {
 			"version": "7.0.0",
 			"resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz",
@@ -15525,6 +15576,13 @@
 			"integrity": "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==",
 			"dev": true
 		},
+		"node_modules/json-stringify-safe": {
+			"version": "5.0.1",
+			"resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
+			"integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==",
+			"dev": true,
+			"license": "ISC"
+		},
 		"node_modules/json5": {
 			"version": "2.2.3",
 			"resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz",
@@ -16763,6 +16821,21 @@
 			"integrity": "sha512-1nh45deeb5olNY7eX82BkPO7SSxR5SSYJiPTrTdFUVYwAl8CKMA5N9PjTYkHiRjisVcxcQ1HXdLhx2qxxJzLNQ==",
 			"dev": true
 		},
+		"node_modules/nock": {
+			"version": "14.0.4",
+			"resolved": "https://registry.npmjs.org/nock/-/nock-14.0.4.tgz",
+			"integrity": "sha512-86fh+gIKH8H02+y0/HKAOZZXn6OwgzXvl6JYwfjvKkoKxUWz54wIIDU/+w24xzMvk/R8pNVXOrvTubyl+Ml6cg==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"@mswjs/interceptors": "^0.38.5",
+				"json-stringify-safe": "^5.0.1",
+				"propagate": "^2.0.0"
+			},
+			"engines": {
+				"node": ">=18.20.0 <20 || >=20.12.1"
+			}
+		},
 		"node_modules/node-abi": {
 			"version": "3.74.0",
 			"resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.74.0.tgz",
@@ -17428,6 +17501,13 @@
 			"integrity": "sha512-/jHxFIzoMXdqPzTaCpFzAAWhpkSjZPF4Vsn6jAfNpmbH/ymsmd7Qc6VE9BGn0L6YMj6uwpQLxCECpus4ukKS9Q==",
 			"dev": true
 		},
+		"node_modules/outvariant": {
+			"version": "1.4.3",
+			"resolved": "https://registry.npmjs.org/outvariant/-/outvariant-1.4.3.tgz",
+			"integrity": "sha512-+Sl2UErvtsoajRDKCE5/dBz4DIvHXQQnAxtQTF04OJxY0+DyZXSo5P5Bb7XYWOh81syohlYL24hbDwxedPUJCA==",
+			"dev": true,
+			"license": "MIT"
+		},
 		"node_modules/p-filter": {
 			"version": "2.1.0",
 			"resolved": "https://registry.npmjs.org/p-filter/-/p-filter-2.1.0.tgz",
@@ -18120,6 +18200,16 @@
 				"node": ">= 6"
 			}
 		},
+		"node_modules/propagate": {
+			"version": "2.0.1",
+			"resolved": "https://registry.npmjs.org/propagate/-/propagate-2.0.1.tgz",
+			"integrity": "sha512-vGrhOavPSTz4QVNuBNdcNXePNdNMaO1xj9yBeH1ScQPjk/rhg9sSlCXPhMkFuaNNW/syTvYqsnbIJxMBfRbbag==",
+			"dev": true,
+			"license": "MIT",
+			"engines": {
+				"node": ">= 8"
+			}
+		},
 		"node_modules/proxy-addr": {
 			"version": "2.0.7",
 			"resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",
@@ -19380,6 +19470,13 @@
 				"bare-events": "^2.2.0"
 			}
 		},
+		"node_modules/strict-event-emitter": {
+			"version": "0.5.1",
+			"resolved": "https://registry.npmjs.org/strict-event-emitter/-/strict-event-emitter-0.5.1.tgz",
+			"integrity": "sha512-vMgjE/GGEPEFnhFub6pa4FmJBRBVOLpIII2hvCZ8Kzb7K0hlHo7mQv6xYrBvCL2LtAIBwFUK8wvuJgTVSQ5MFQ==",
+			"dev": true,
+			"license": "MIT"
+		},
 		"node_modules/string_decoder": {
 			"version": "1.1.1",
 			"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz",

+ 1 - 0
package.json

@@ -485,6 +485,7 @@
 		"knip": "^5.44.4",
 		"lint-staged": "^15.2.11",
 		"mkdirp": "^3.0.1",
+		"nock": "^14.0.4",
 		"npm-run-all": "^4.1.5",
 		"prettier": "^3.4.2",
 		"rimraf": "^6.0.1",

+ 0 - 1
src/api/providers/__tests__/openai.test.ts

@@ -1,7 +1,6 @@
 import { OpenAiHandler } from "../openai"
 import { ApiHandlerOptions } from "../../../shared/api"
 import { Anthropic } from "@anthropic-ai/sdk"
-import { DEEP_SEEK_DEFAULT_TEMPERATURE } from "../constants"
 
 // Mock OpenAI client
 const mockCreate = jest.fn()

+ 0 - 1
src/api/providers/__tests__/openrouter.test.ts

@@ -55,7 +55,6 @@ describe("OpenRouterHandler", () => {
 				info: mockOptions.openRouterModelInfo,
 				maxTokens: 1000,
 				reasoning: undefined,
-				supportsPromptCache: false,
 				temperature: 0,
 				thinking: undefined,
 				topP: undefined,

+ 0 - 4
src/api/providers/constants.ts

@@ -6,7 +6,3 @@ export const DEFAULT_HEADERS = {
 export const ANTHROPIC_DEFAULT_MAX_TOKENS = 8192
 
 export const DEEP_SEEK_DEFAULT_TEMPERATURE = 0.6
-
-export const AZURE_AI_INFERENCE_PATH = "/models/chat/completions"
-
-export const REASONING_MODELS = new Set(["x-ai/grok-3-mini-beta", "grok-3-mini-beta", "grok-3-mini-fast-beta"])

File diff suppressed because it is too large
+ 8 - 0
src/api/providers/fetchers/__tests__/fixtures/openrouter-models.json


+ 72 - 0
src/api/providers/fetchers/__tests__/openrouter.test.ts

@@ -0,0 +1,72 @@
+// npx jest src/api/providers/fetchers/__tests__/openrouter.test.ts
+
+import path from "path"
+
+import { back as nockBack } from "nock"
+
+import { PROMPT_CACHING_MODELS } from "../../../../shared/api"
+
+import { getOpenRouterModels } from "../openrouter"
+
+nockBack.fixtures = path.join(__dirname, "fixtures")
+nockBack.setMode("dryrun")
+
+describe("OpenRouter API", () => {
+	describe("getOpenRouterModels", () => {
+		it("fetches models and validates schema", async () => {
+			const { nockDone } = await nockBack("openrouter-models.json")
+
+			const models = await getOpenRouterModels()
+
+			expect(
+				Object.entries(models)
+					.filter(([_, model]) => model.supportsPromptCache)
+					.map(([id, _]) => id)
+					.sort(),
+			).toEqual(Array.from(PROMPT_CACHING_MODELS).sort())
+
+			expect(
+				Object.entries(models)
+					.filter(([_, model]) => model.supportsComputerUse)
+					.map(([id, _]) => id)
+					.sort(),
+			).toEqual([
+				"anthropic/claude-3.5-sonnet",
+				"anthropic/claude-3.5-sonnet:beta",
+				"anthropic/claude-3.7-sonnet",
+				"anthropic/claude-3.7-sonnet:beta",
+				"anthropic/claude-3.7-sonnet:thinking",
+			])
+
+			expect(models["anthropic/claude-3.7-sonnet"]).toEqual({
+				maxTokens: 8192,
+				contextWindow: 200000,
+				supportsImages: true,
+				supportsPromptCache: true,
+				inputPrice: 3,
+				outputPrice: 15,
+				cacheWritesPrice: 3.75,
+				cacheReadsPrice: 0.3,
+				description: expect.any(String),
+				thinking: false,
+				supportsComputerUse: true,
+			})
+
+			expect(models["anthropic/claude-3.7-sonnet:thinking"]).toEqual({
+				maxTokens: 128000,
+				contextWindow: 200000,
+				supportsImages: true,
+				supportsPromptCache: true,
+				inputPrice: 3,
+				outputPrice: 15,
+				cacheWritesPrice: 3.75,
+				cacheReadsPrice: 0.3,
+				description: expect.any(String),
+				thinking: true,
+				supportsComputerUse: true,
+			})
+
+			nockDone()
+		})
+	})
+})

+ 115 - 0
src/api/providers/fetchers/openrouter.ts

@@ -0,0 +1,115 @@
+import axios from "axios"
+import { z } from "zod"
+
+import { ApiHandlerOptions, ModelInfo } from "../../../shared/api"
+import { parseApiPrice } from "../../../utils/cost"
+
+// https://openrouter.ai/api/v1/models
+export const openRouterModelSchema = z.object({
+	id: z.string(),
+	name: z.string(),
+	description: z.string().optional(),
+	context_length: z.number(),
+	max_completion_tokens: z.number().nullish(),
+	architecture: z
+		.object({
+			modality: z.string().nullish(),
+			tokenizer: z.string().nullish(),
+		})
+		.optional(),
+	pricing: z
+		.object({
+			prompt: z.string().nullish(),
+			completion: z.string().nullish(),
+			input_cache_write: z.string().nullish(),
+			input_cache_read: z.string().nullish(),
+		})
+		.optional(),
+	top_provider: z
+		.object({
+			max_completion_tokens: z.number().nullish(),
+		})
+		.optional(),
+})
+
+export type OpenRouterModel = z.infer<typeof openRouterModelSchema>
+
+const openRouterModelsResponseSchema = z.object({
+	data: z.array(openRouterModelSchema),
+})
+
+type OpenRouterModelsResponse = z.infer<typeof openRouterModelsResponseSchema>
+
+export async function getOpenRouterModels(options?: ApiHandlerOptions) {
+	const models: Record<string, ModelInfo> = {}
+	const baseURL = options?.openRouterBaseUrl || "https://openrouter.ai/api/v1"
+
+	try {
+		const response = await axios.get<OpenRouterModelsResponse>(`${baseURL}/models`)
+		const result = openRouterModelsResponseSchema.safeParse(response.data)
+		const rawModels = result.success ? result.data.data : response.data.data
+
+		if (!result.success) {
+			console.error("OpenRouter models response is invalid", result.error.format())
+		}
+
+		for (const rawModel of rawModels) {
+			const cacheWritesPrice = rawModel.pricing?.input_cache_write
+				? parseApiPrice(rawModel.pricing?.input_cache_write)
+				: undefined
+
+			const cacheReadsPrice = rawModel.pricing?.input_cache_read
+				? parseApiPrice(rawModel.pricing?.input_cache_read)
+				: undefined
+
+			// Disable prompt caching for Gemini models for now.
+			const supportsPromptCache = !!cacheWritesPrice && !!cacheReadsPrice && !rawModel.id.startsWith("google")
+
+			const modelInfo: ModelInfo = {
+				maxTokens: rawModel.top_provider?.max_completion_tokens,
+				contextWindow: rawModel.context_length,
+				supportsImages: rawModel.architecture?.modality?.includes("image"),
+				supportsPromptCache,
+				inputPrice: parseApiPrice(rawModel.pricing?.prompt),
+				outputPrice: parseApiPrice(rawModel.pricing?.completion),
+				cacheWritesPrice,
+				cacheReadsPrice,
+				description: rawModel.description,
+				thinking: rawModel.id === "anthropic/claude-3.7-sonnet:thinking",
+			}
+
+			// Then OpenRouter model definition doesn't give us any hints about computer use,
+			// so we need to set that manually.
+			// The ideal `maxTokens` values are model dependent, but we should probably DRY
+			// this up and use the values defined for the Anthropic providers.
+			switch (true) {
+				case rawModel.id.startsWith("anthropic/claude-3.7-sonnet"):
+					modelInfo.supportsComputerUse = true
+					modelInfo.maxTokens = rawModel.id === "anthropic/claude-3.7-sonnet:thinking" ? 128_000 : 8192
+					break
+				case rawModel.id.startsWith("anthropic/claude-3.5-sonnet-20240620"):
+					modelInfo.maxTokens = 8192
+					break
+				case rawModel.id.startsWith("anthropic/claude-3.5-sonnet"):
+					modelInfo.supportsComputerUse = true
+					modelInfo.maxTokens = 8192
+					break
+				case rawModel.id.startsWith("anthropic/claude-3-5-haiku"):
+				case rawModel.id.startsWith("anthropic/claude-3-opus"):
+				case rawModel.id.startsWith("anthropic/claude-3-haiku"):
+					modelInfo.maxTokens = 8192
+					break
+				default:
+					break
+			}
+
+			models[rawModel.id] = modelInfo
+		}
+	} catch (error) {
+		console.error(
+			`Error fetching OpenRouter models: ${JSON.stringify(error, Object.getOwnPropertyNames(error), 2)}`,
+		)
+	}
+
+	return models
+}

+ 3 - 1
src/api/providers/openai.ts

@@ -15,7 +15,9 @@ import { convertToSimpleMessages } from "../transform/simple-format"
 import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
 import { BaseProvider } from "./base-provider"
 import { XmlMatcher } from "../../utils/xml-matcher"
-import { DEEP_SEEK_DEFAULT_TEMPERATURE, DEFAULT_HEADERS, AZURE_AI_INFERENCE_PATH } from "./constants"
+import { DEFAULT_HEADERS, DEEP_SEEK_DEFAULT_TEMPERATURE } from "./constants"
+
+export const AZURE_AI_INFERENCE_PATH = "/models/chat/completions"
 
 export interface OpenAiHandlerOptions extends ApiHandlerOptions {}
 

+ 10 - 109
src/api/providers/openrouter.ts

@@ -1,16 +1,19 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta"
-import axios from "axios"
 import OpenAI from "openai"
 
-import { ApiHandlerOptions, ModelInfo, openRouterDefaultModelId, openRouterDefaultModelInfo } from "../../shared/api"
-import { parseApiPrice } from "../../utils/cost"
+import {
+	ApiHandlerOptions,
+	openRouterDefaultModelId,
+	openRouterDefaultModelInfo,
+	PROMPT_CACHING_MODELS,
+} from "../../shared/api"
 import { convertToOpenAiMessages } from "../transform/openai-format"
 import { ApiStreamChunk } from "../transform/stream"
 import { convertToR1Format } from "../transform/r1-format"
 
+import { getModelParams, SingleCompletionHandler } from "../index"
 import { DEFAULT_HEADERS, DEEP_SEEK_DEFAULT_TEMPERATURE } from "./constants"
-import { getModelParams, SingleCompletionHandler } from ".."
 import { BaseProvider } from "./base-provider"
 
 const OPENROUTER_DEFAULT_PROVIDER_NAME = "[default]"
@@ -62,15 +65,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
 	): AsyncGenerator<ApiStreamChunk> {
-		let {
-			id: modelId,
-			maxTokens,
-			thinking,
-			temperature,
-			supportsPromptCache,
-			topP,
-			reasoningEffort,
-		} = this.getModel()
+		let { id: modelId, maxTokens, thinking, temperature, topP, reasoningEffort, info } = this.getModel()
 
 		// Convert Anthropic messages to OpenAI format.
 		let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
@@ -86,8 +81,8 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 		// Prompt caching: https://openrouter.ai/docs/prompt-caching
 		// Now with Gemini support: https://openrouter.ai/docs/features/prompt-caching
 		// Note that we don't check the `ModelInfo` object because it is cached
-		// in the settings for OpenRouter.
-		if (this.isPromptCacheSupported(modelId)) {
+		// in the settings for OpenRouter and the value could be stale.
+		if (PROMPT_CACHING_MODELS.has(modelId)) {
 			openAiMessages[0] = {
 				role: "system",
 				// @ts-ignore-next-line
@@ -191,7 +186,6 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 
 		let id = modelId ?? openRouterDefaultModelId
 		const info = modelInfo ?? openRouterDefaultModelInfo
-		const supportsPromptCache = modelInfo?.supportsPromptCache
 		const isDeepSeekR1 = id.startsWith("deepseek/deepseek-r1") || modelId === "perplexity/sonar-reasoning"
 		const defaultTemperature = isDeepSeekR1 ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0
 		const topP = isDeepSeekR1 ? 0.95 : undefined
@@ -200,7 +194,6 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 			id,
 			info,
 			...getModelParams({ options: this.options, model: info, defaultTemperature }),
-			supportsPromptCache,
 			topP,
 		}
 	}
@@ -227,96 +220,4 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 		const completion = response as OpenAI.Chat.ChatCompletion
 		return completion.choices[0]?.message?.content || ""
 	}
-
-	private isPromptCacheSupported(modelId: string) {
-		return (
-			modelId.startsWith("anthropic/claude-3.7-sonnet") ||
-			modelId.startsWith("anthropic/claude-3.5-sonnet") ||
-			modelId.startsWith("anthropic/claude-3-opus") ||
-			modelId.startsWith("anthropic/claude-3-haiku")
-		)
-	}
-}
-
-export async function getOpenRouterModels(options?: ApiHandlerOptions) {
-	const models: Record<string, ModelInfo> = {}
-
-	const baseURL = options?.openRouterBaseUrl || "https://openrouter.ai/api/v1"
-
-	try {
-		const response = await axios.get(`${baseURL}/models`)
-		const rawModels = response.data.data
-
-		for (const rawModel of rawModels) {
-			const modelInfo: ModelInfo = {
-				maxTokens: rawModel.top_provider?.max_completion_tokens,
-				contextWindow: rawModel.context_length,
-				supportsImages: rawModel.architecture?.modality?.includes("image"),
-				supportsPromptCache: false,
-				inputPrice: parseApiPrice(rawModel.pricing?.prompt),
-				outputPrice: parseApiPrice(rawModel.pricing?.completion),
-				description: rawModel.description,
-				thinking: rawModel.id === "anthropic/claude-3.7-sonnet:thinking",
-			}
-
-			// NOTE: This needs to be synced with api.ts/openrouter default model info.
-			switch (true) {
-				case rawModel.id.startsWith("anthropic/claude-3.7-sonnet"):
-					modelInfo.supportsComputerUse = true
-					modelInfo.supportsPromptCache = true
-					modelInfo.cacheWritesPrice = 3.75
-					modelInfo.cacheReadsPrice = 0.3
-					modelInfo.maxTokens = rawModel.id === "anthropic/claude-3.7-sonnet:thinking" ? 128_000 : 8192
-					break
-				case rawModel.id.startsWith("anthropic/claude-3.5-sonnet-20240620"):
-					modelInfo.supportsPromptCache = true
-					modelInfo.cacheWritesPrice = 3.75
-					modelInfo.cacheReadsPrice = 0.3
-					modelInfo.maxTokens = 8192
-					break
-				case rawModel.id.startsWith("anthropic/claude-3.5-sonnet"):
-					modelInfo.supportsComputerUse = true
-					modelInfo.supportsPromptCache = true
-					modelInfo.cacheWritesPrice = 3.75
-					modelInfo.cacheReadsPrice = 0.3
-					modelInfo.maxTokens = 8192
-					break
-				case rawModel.id.startsWith("anthropic/claude-3-5-haiku"):
-					modelInfo.supportsPromptCache = true
-					modelInfo.cacheWritesPrice = 1.25
-					modelInfo.cacheReadsPrice = 0.1
-					modelInfo.maxTokens = 8192
-					break
-				case rawModel.id.startsWith("anthropic/claude-3-opus"):
-					modelInfo.supportsPromptCache = true
-					modelInfo.cacheWritesPrice = 18.75
-					modelInfo.cacheReadsPrice = 1.5
-					modelInfo.maxTokens = 8192
-					break
-				case rawModel.id.startsWith("anthropic/claude-3-haiku"):
-					modelInfo.supportsPromptCache = true
-					modelInfo.cacheWritesPrice = 0.3
-					modelInfo.cacheReadsPrice = 0.03
-					modelInfo.maxTokens = 8192
-					break
-				/* TODO: uncomment once we confirm it's working
-				case rawModel.id.startsWith("google/gemini-2.5-pro-preview-03-25"):
-				case rawModel.id.startsWith("google/gemini-2.0-flash-001"):
-				case rawModel.id.startsWith("google/gemini-flash-1.5"):
-					modelInfo.supportsPromptCache = true
-					break
-				*/
-				default:
-					break
-			}
-
-			models[rawModel.id] = modelInfo
-		}
-	} catch (error) {
-		console.error(
-			`Error fetching OpenRouter models: ${JSON.stringify(error, Object.getOwnPropertyNames(error), 2)}`,
-		)
-	}
-
-	return models
 }

+ 5 - 3
src/api/providers/xai.ts

@@ -1,11 +1,13 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
-import { ApiHandlerOptions, XAIModelId, ModelInfo, xaiDefaultModelId, xaiModels } from "../../shared/api"
+
+import { ApiHandlerOptions, XAIModelId, xaiDefaultModelId, xaiModels, REASONING_MODELS } from "../../shared/api"
 import { ApiStream } from "../transform/stream"
 import { convertToOpenAiMessages } from "../transform/openai-format"
-import { DEFAULT_HEADERS, REASONING_MODELS } from "./constants"
+
+import { SingleCompletionHandler } from "../index"
+import { DEFAULT_HEADERS } from "./constants"
 import { BaseProvider } from "./base-provider"
-import { SingleCompletionHandler } from ".."
 
 const XAI_DEFAULT_TEMPERATURE = 0
 

+ 4 - 2
src/core/webview/webviewMessageHandler.ts

@@ -12,7 +12,7 @@ import { GlobalFileNames } from "../../shared/globalFileNames"
 
 import { checkoutDiffPayloadSchema, checkoutRestorePayloadSchema, WebviewMessage } from "../../shared/WebviewMessage"
 import { checkExistKey } from "../../shared/checkExistApiConfig"
-import { EXPERIMENT_IDS, experimentDefault, ExperimentId } from "../../shared/experiments"
+import { experimentDefault } from "../../shared/experiments"
 import { Terminal } from "../../integrations/terminal/Terminal"
 import { openFile, openImage } from "../../integrations/misc/open-file"
 import { selectImages } from "../../integrations/misc/process-images"
@@ -25,7 +25,7 @@ import { playTts, setTtsEnabled, setTtsSpeed, stopTts } from "../../utils/tts"
 import { singleCompletionHandler } from "../../utils/single-completion-handler"
 import { searchCommits } from "../../utils/git"
 import { exportSettings, importSettings } from "../config/importExport"
-import { getOpenRouterModels } from "../../api/providers/openrouter"
+import { getOpenRouterModels } from "../../api/providers/fetchers/openrouter"
 import { getGlamaModels } from "../../api/providers/glama"
 import { getUnboundModels } from "../../api/providers/unbound"
 import { getRequestyModels } from "../../api/providers/requesty"
@@ -85,6 +85,7 @@ export const webviewMessageHandler = async (provider: ClineProvider, message: We
 			// if we hadn't retrieved the latest at this point
 			// (see normalizeApiConfiguration > openrouter).
 			const { apiConfiguration: currentApiConfig } = await provider.getState()
+
 			getOpenRouterModels(currentApiConfig).then(async (openRouterModels) => {
 				if (Object.keys(openRouterModels).length > 0) {
 					await provider.writeModelsToCache(GlobalFileNames.openRouterModels, openRouterModels)
@@ -101,6 +102,7 @@ export const webviewMessageHandler = async (provider: ClineProvider, message: We
 							"openRouterModelInfo",
 							openRouterModels[apiConfiguration.openRouterModelId],
 						)
+
 						await provider.postStateToWebview()
 					}
 				}

+ 29 - 3
src/shared/api.ts

@@ -1,7 +1,4 @@
 import { ModelInfo, ProviderName, ProviderSettings } from "../schemas"
-import { REASONING_MODELS } from "../api/providers/constants"
-
-export { REASONING_MODELS }
 
 export type { ModelInfo, ProviderName as ApiProvider }
 
@@ -1399,3 +1396,32 @@ export const vscodeLlmModels = {
 		maxInputTokens: number
 	}
 >
+
+/**
+ * Constants
+ */
+
+export const REASONING_MODELS = new Set(["x-ai/grok-3-mini-beta", "grok-3-mini-beta", "grok-3-mini-fast-beta"])
+
+export const PROMPT_CACHING_MODELS = new Set([
+	"anthropic/claude-3-haiku",
+	"anthropic/claude-3-haiku:beta",
+	"anthropic/claude-3-opus",
+	"anthropic/claude-3-opus:beta",
+	"anthropic/claude-3-sonnet",
+	"anthropic/claude-3-sonnet:beta",
+	"anthropic/claude-3.5-haiku",
+	"anthropic/claude-3.5-haiku-20241022",
+	"anthropic/claude-3.5-haiku-20241022:beta",
+	"anthropic/claude-3.5-haiku:beta",
+	"anthropic/claude-3.5-sonnet",
+	"anthropic/claude-3.5-sonnet-20240620",
+	"anthropic/claude-3.5-sonnet-20240620:beta",
+	"anthropic/claude-3.5-sonnet:beta",
+	"anthropic/claude-3.7-sonnet",
+	"anthropic/claude-3.7-sonnet:beta",
+	"anthropic/claude-3.7-sonnet:thinking",
+	// "google/gemini-2.0-flash-001",
+	// "google/gemini-flash-1.5",
+	// "google/gemini-flash-1.5-8b",
+])

+ 1 - 2
webview-ui/src/components/settings/constants.ts

@@ -9,10 +9,9 @@ import {
 	openAiNativeModels,
 	vertexModels,
 	xaiModels,
-	REASONING_MODELS,
 } from "@roo/shared/api"
 
-export { REASONING_MODELS }
+export { REASONING_MODELS, PROMPT_CACHING_MODELS } from "@roo/shared/api"
 
 export const MODELS_BY_PROVIDER: Partial<Record<ApiProvider, Record<string, ModelInfo>>> = {
 	anthropic: anthropicModels,

Some files were not shown because too many files changed in this diff