Browse Source

Add gemini pro 06 05 (#4386)

* feat: add gemini-2.5-pro-preview-06-05 to Gemini and Vertex providers and UI, identical to 05-06

* feat: add thinking variant for gemini-2.5-pro-preview-06-05

* feat: add gemini-2.5-pro-preview-06-05 support to OpenRouter

* fix: update gemini-2.5-pro-preview model references in OpenRouter

* fix: tests

* feat: enhance reasoning handling in Gemini and Vertex handlers

* feat: add google/gemini-2.5-pro-preview to required reasoning budget models

* fix: refactor thinkingConfig assignment for consistency in Gemini and Vertex handlers

* Fix Gemini reasoning

* Fix tsc error

* Fix tsc error

* Hack to exclude thinking tokens by default

* feat: add global region to VERTEX_REGIONS

---------

Co-authored-by: Shariq Riaz <[email protected]>
Co-authored-by: cte <[email protected]>
Daniel 6 months ago
parent
commit
3b45d86df7

+ 26 - 0
packages/types/src/providers/gemini.ts

@@ -104,6 +104,32 @@ export const geminiModels = {
 			},
 		],
 	},
+	"gemini-2.5-pro-preview-06-05": {
+		maxTokens: 65_535,
+		contextWindow: 1_048_576,
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
+		outputPrice: 15,
+		cacheReadsPrice: 0.625,
+		cacheWritesPrice: 4.5,
+		maxThinkingTokens: 32_768,
+		supportsReasoningBudget: true,
+		tiers: [
+			{
+				contextWindow: 200_000,
+				inputPrice: 1.25,
+				outputPrice: 10,
+				cacheReadsPrice: 0.31,
+			},
+			{
+				contextWindow: Infinity,
+				inputPrice: 2.5,
+				outputPrice: 15,
+				cacheReadsPrice: 0.625,
+			},
+		],
+	},
 	"gemini-2.0-flash-001": {
 		maxTokens: 8192,
 		contextWindow: 1_048_576,

+ 15 - 5
packages/types/src/providers/openrouter.ts

@@ -60,16 +60,26 @@ export const OPEN_ROUTER_COMPUTER_USE_MODELS = new Set([
 	"anthropic/claude-opus-4",
 ])
 
+// When we first launched these models we didn't have support for
+// enabling/disabling the reasoning budget for hybrid models. Now that we
+// do support this we should give users the option to enable/disable it
+// whenever possible. However these particular (virtual) model ids with the
+// `:thinking` suffix always require the reasoning budget to be enabled, so
+// for backwards compatibility we should still require it.
+// We should *not* be adding new models to this set.
+export const OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS = new Set([
+	"anthropic/claude-3.7-sonnet:thinking",
+	"google/gemini-2.5-flash-preview-05-20:thinking",
+])
+
 export const OPEN_ROUTER_REASONING_BUDGET_MODELS = new Set([
 	"anthropic/claude-3.7-sonnet:beta",
-	"anthropic/claude-3.7-sonnet:thinking",
 	"anthropic/claude-opus-4",
 	"anthropic/claude-sonnet-4",
+	"google/gemini-2.5-pro-preview",
 	"google/gemini-2.5-flash-preview-05-20",
-	"google/gemini-2.5-flash-preview-05-20:thinking",
-])
-
-export const OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS = new Set([
+	// Also include the models that require the reasoning budget to be enabled
+	// even though `OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS` takes precedence.
 	"anthropic/claude-3.7-sonnet:thinking",
 	"google/gemini-2.5-flash-preview-05-20:thinking",
 ])

+ 11 - 0
packages/types/src/providers/vertex.ts

@@ -60,6 +60,16 @@ export const vertexModels = {
 		inputPrice: 2.5,
 		outputPrice: 15,
 	},
+	"gemini-2.5-pro-preview-06-05": {
+		maxTokens: 65_535,
+		contextWindow: 1_048_576,
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 2.5,
+		outputPrice: 15,
+		maxThinkingTokens: 32_768,
+		supportsReasoningBudget: true,
+	},
 	"gemini-2.5-pro-exp-03-25": {
 		maxTokens: 65_535,
 		contextWindow: 1_048_576,
@@ -217,6 +227,7 @@ export const vertexModels = {
 } as const satisfies Record<string, ModelInfo>
 
 export const VERTEX_REGIONS = [
+	{ value: "global", label: "global" },
 	{ value: "us-east5", label: "us-east5" },
 	{ value: "us-central1", label: "us-central1" },
 	{ value: "europe-west1", label: "europe-west1" },

+ 4 - 2
src/api/providers/fetchers/__tests__/openrouter.spec.ts

@@ -185,10 +185,11 @@ describe("OpenRouter API", () => {
 
 			expect(endpoints).toEqual({
 				Google: {
-					maxTokens: 0,
+					maxTokens: 65535,
 					contextWindow: 1048576,
 					supportsImages: true,
 					supportsPromptCache: true,
+					supportsReasoningBudget: true,
 					inputPrice: 1.25,
 					outputPrice: 10,
 					cacheWritesPrice: 1.625,
@@ -198,10 +199,11 @@ describe("OpenRouter API", () => {
 					supportedParameters: undefined,
 				},
 				"Google AI Studio": {
-					maxTokens: 0,
+					maxTokens: 65536,
 					contextWindow: 1048576,
 					supportsImages: true,
 					supportsPromptCache: true,
+					supportsReasoningBudget: true,
 					inputPrice: 1.25,
 					outputPrice: 10,
 					cacheWritesPrice: 1.625,

+ 35 - 29
src/api/providers/gemini.ts

@@ -14,6 +14,7 @@ import { safeJsonParse } from "../../shared/safeJsonParse"
 
 import { convertAnthropicContentToGemini, convertAnthropicMessageToGemini } from "../transform/gemini-format"
 import type { ApiStream } from "../transform/stream"
+import { getModelParams } from "../transform/model-params"
 
 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
 import { BaseProvider } from "./base-provider"
@@ -62,7 +63,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
-		const { id: model, thinkingConfig, maxOutputTokens, info } = this.getModel()
+		const { id: model, info, reasoning: thinkingConfig, maxTokens } = this.getModel()
 
 		const contents = messages.map(convertAnthropicMessageToGemini)
 
@@ -70,7 +71,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 			systemInstruction,
 			httpOptions: this.options.googleGeminiBaseUrl ? { baseUrl: this.options.googleGeminiBaseUrl } : undefined,
 			thinkingConfig,
-			maxOutputTokens,
+			maxOutputTokens: this.options.modelMaxTokens ?? maxTokens ?? undefined,
 			temperature: this.options.modelTemperature ?? 0,
 		}
 
@@ -81,7 +82,28 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 		let lastUsageMetadata: GenerateContentResponseUsageMetadata | undefined
 
 		for await (const chunk of result) {
-			if (chunk.text) {
+			// Process candidates and their parts to separate thoughts from content
+			if (chunk.candidates && chunk.candidates.length > 0) {
+				const candidate = chunk.candidates[0]
+				if (candidate.content && candidate.content.parts) {
+					for (const part of candidate.content.parts) {
+						if (part.thought) {
+							// This is a thinking/reasoning part
+							if (part.text) {
+								yield { type: "reasoning", text: part.text }
+							}
+						} else {
+							// This is regular content
+							if (part.text) {
+								yield { type: "text", text: part.text }
+							}
+						}
+					}
+				}
+			}
+
+			// Fallback to the original text property if no candidates structure
+			else if (chunk.text) {
 				yield { type: "text", text: chunk.text }
 			}
 
@@ -108,32 +130,16 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 	}
 
 	override getModel() {
-		let id = this.options.apiModelId ?? geminiDefaultModelId
-		let info: ModelInfo = geminiModels[id as GeminiModelId]
-
-		if (id?.endsWith(":thinking")) {
-			id = id.slice(0, -":thinking".length)
-
-			if (geminiModels[id as GeminiModelId]) {
-				info = geminiModels[id as GeminiModelId]
-
-				return {
-					id,
-					info,
-					thinkingConfig: this.options.modelMaxThinkingTokens
-						? { thinkingBudget: this.options.modelMaxThinkingTokens }
-						: undefined,
-					maxOutputTokens: this.options.modelMaxTokens ?? info.maxTokens ?? undefined,
-				}
-			}
-		}
-
-		if (!info) {
-			id = geminiDefaultModelId
-			info = geminiModels[geminiDefaultModelId]
-		}
-
-		return { id, info }
+		const modelId = this.options.apiModelId
+		let id = modelId && modelId in geminiModels ? (modelId as GeminiModelId) : geminiDefaultModelId
+		const info: ModelInfo = geminiModels[id]
+		const params = getModelParams({ format: "gemini", modelId: id, model: info, settings: this.options })
+
+		// The `:thinking` suffix indicates that the model is a "Hybrid"
+		// reasoning model and that reasoning is required to be enabled.
+		// The actual model ID honored by Gemini's API does not have this
+		// suffix.
+		return { id: id.endsWith(":thinking") ? id.replace(":thinking", "") : id, info, ...params }
 	}
 
 	async completePrompt(prompt: string): Promise<string> {

+ 9 - 0
src/api/providers/openrouter.ts

@@ -74,6 +74,15 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 
 		let { id: modelId, maxTokens, temperature, topP, reasoning } = model
 
+		// OpenRouter sends reasoning tokens by default for Gemini 2.5 Pro
+		// Preview even if you don't request them. This is not the default for
+		// other providers (including Gemini), so we need to explicitly disable
+		// i We should generalize this using the logic in `getModelParams`, but
+		// this is easier for now.
+		if (modelId === "google/gemini-2.5-pro-preview" && typeof reasoning === "undefined") {
+			reasoning = { exclude: true }
+		}
+
 		// Convert Anthropic messages to OpenAI format.
 		let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
 			{ role: "system", content: systemPrompt },

+ 12 - 26
src/api/providers/vertex.ts

@@ -2,6 +2,8 @@ import { type ModelInfo, type VertexModelId, vertexDefaultModelId, vertexModels
 
 import type { ApiHandlerOptions } from "../../shared/api"
 
+import { getModelParams } from "../transform/model-params"
+
 import { GeminiHandler } from "./gemini"
 import { SingleCompletionHandler } from "../index"
 
@@ -11,31 +13,15 @@ export class VertexHandler extends GeminiHandler implements SingleCompletionHand
 	}
 
 	override getModel() {
-		let id = this.options.apiModelId ?? vertexDefaultModelId
-		let info: ModelInfo = vertexModels[id as VertexModelId]
-
-		if (id?.endsWith(":thinking")) {
-			id = id.slice(0, -":thinking".length) as VertexModelId
-
-			if (vertexModels[id as VertexModelId]) {
-				info = vertexModels[id as VertexModelId]
-
-				return {
-					id,
-					info,
-					thinkingConfig: this.options.modelMaxThinkingTokens
-						? { thinkingBudget: this.options.modelMaxThinkingTokens }
-						: undefined,
-					maxOutputTokens: this.options.modelMaxTokens ?? info.maxTokens ?? undefined,
-				}
-			}
-		}
-
-		if (!info) {
-			id = vertexDefaultModelId
-			info = vertexModels[vertexDefaultModelId]
-		}
-
-		return { id, info }
+		const modelId = this.options.apiModelId
+		let id = modelId && modelId in vertexModels ? (modelId as VertexModelId) : vertexDefaultModelId
+		const info: ModelInfo = vertexModels[id]
+		const params = getModelParams({ format: "gemini", modelId: id, model: info, settings: this.options })
+
+		// The `:thinking` suffix indicates that the model is a "Hybrid"
+		// reasoning model and that reasoning is required to be enabled.
+		// The actual model ID honored by Gemini's API does not have this
+		// suffix.
+		return { id: id.endsWith(":thinking") ? id.replace(":thinking", "") : id, info, ...params }
 	}
 }

+ 23 - 7
src/api/transform/model-params.ts

@@ -5,13 +5,17 @@ import { shouldUseReasoningBudget, shouldUseReasoningEffort } from "../../shared
 import {
 	type AnthropicReasoningParams,
 	type OpenAiReasoningParams,
+	type GeminiReasoningParams,
 	type OpenRouterReasoningParams,
 	getAnthropicReasoning,
 	getOpenAiReasoning,
+	getGeminiReasoning,
 	getOpenRouterReasoning,
 } from "./reasoning"
 
-type GetModelParamsOptions<T extends "openai" | "anthropic" | "openrouter"> = {
+type Format = "anthropic" | "openai" | "gemini" | "openrouter"
+
+type GetModelParamsOptions<T extends Format> = {
 	format: T
 	modelId: string
 	model: ModelInfo
@@ -26,14 +30,19 @@ type BaseModelParams = {
 	reasoningBudget: number | undefined
 }
 
+type AnthropicModelParams = {
+	format: "anthropic"
+	reasoning: AnthropicReasoningParams | undefined
+} & BaseModelParams
+
 type OpenAiModelParams = {
 	format: "openai"
 	reasoning: OpenAiReasoningParams | undefined
 } & BaseModelParams
 
-type AnthropicModelParams = {
-	format: "anthropic"
-	reasoning: AnthropicReasoningParams | undefined
+type GeminiModelParams = {
+	format: "gemini"
+	reasoning: GeminiReasoningParams | undefined
 } & BaseModelParams
 
 type OpenRouterModelParams = {
@@ -41,11 +50,12 @@ type OpenRouterModelParams = {
 	reasoning: OpenRouterReasoningParams | undefined
 } & BaseModelParams
 
-export type ModelParams = OpenAiModelParams | AnthropicModelParams | OpenRouterModelParams
+export type ModelParams = AnthropicModelParams | OpenAiModelParams | GeminiModelParams | OpenRouterModelParams
 
 // Function overloads for specific return types
-export function getModelParams(options: GetModelParamsOptions<"openai">): OpenAiModelParams
 export function getModelParams(options: GetModelParamsOptions<"anthropic">): AnthropicModelParams
+export function getModelParams(options: GetModelParamsOptions<"openai">): OpenAiModelParams
+export function getModelParams(options: GetModelParamsOptions<"gemini">): GeminiModelParams
 export function getModelParams(options: GetModelParamsOptions<"openrouter">): OpenRouterModelParams
 export function getModelParams({
 	format,
@@ -53,7 +63,7 @@ export function getModelParams({
 	model,
 	settings,
 	defaultTemperature = 0,
-}: GetModelParamsOptions<"openai" | "anthropic" | "openrouter">): ModelParams {
+}: GetModelParamsOptions<Format>): ModelParams {
 	const {
 		modelMaxTokens: customMaxTokens,
 		modelMaxThinkingTokens: customMaxThinkingTokens,
@@ -121,6 +131,12 @@ export function getModelParams({
 			...params,
 			reasoning: getOpenAiReasoning({ model, reasoningBudget, reasoningEffort, settings }),
 		}
+	} else if (format === "gemini") {
+		return {
+			format,
+			...params,
+			reasoning: getGeminiReasoning({ model, reasoningBudget, reasoningEffort, settings }),
+		}
 	} else {
 		// Special case for o1-pro, which doesn't support temperature.
 		// Note that OpenRouter's `supported_parameters` field includes

+ 12 - 0
src/api/transform/reasoning.ts

@@ -1,5 +1,6 @@
 import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta"
 import OpenAI from "openai"
+import type { GenerateContentConfig } from "@google/genai"
 
 import type { ModelInfo, ProviderSettings } from "@roo-code/types"
 
@@ -17,6 +18,8 @@ export type AnthropicReasoningParams = BetaThinkingConfigParam
 
 export type OpenAiReasoningParams = { reasoning_effort: OpenAI.Chat.ChatCompletionCreateParams["reasoning_effort"] }
 
+export type GeminiReasoningParams = GenerateContentConfig["thinkingConfig"]
+
 export type GetModelReasoningOptions = {
 	model: ModelInfo
 	reasoningBudget: number | undefined
@@ -49,3 +52,12 @@ export const getOpenAiReasoning = ({
 	settings,
 }: GetModelReasoningOptions): OpenAiReasoningParams | undefined =>
 	shouldUseReasoningEffort({ model, settings }) ? { reasoning_effort: reasoningEffort } : undefined
+
+export const getGeminiReasoning = ({
+	model,
+	reasoningBudget,
+	settings,
+}: GetModelReasoningOptions): GeminiReasoningParams | undefined =>
+	shouldUseReasoningBudget({ model, settings })
+		? { thinkingBudget: reasoningBudget!, includeThoughts: true }
+		: undefined

+ 1 - 2
webview-ui/src/components/settings/ModelInfoView.tsx

@@ -73,8 +73,7 @@ export const ModelInfoView = ({
 		),
 		apiProvider === "gemini" && (
 			<span className="italic">
-				{selectedModelId === "gemini-2.5-pro-preview-03-25" ||
-				selectedModelId === "gemini-2.5-pro-preview-05-06"
+				{selectedModelId.includes("pro-preview")
 					? t("settings:modelInfo.gemini.billingEstimate")
 					: t("settings:modelInfo.gemini.freeRequests", {
 							count: selectedModelId && selectedModelId.includes("flash") ? 15 : 2,