Sfoglia il codice sorgente

Merge branch 'upstream-at-v3.34.7' into roo-v3.34.7

Kevin van Dijk 2 mesi fa
parent
commit
f00dc5a160
100 ha cambiato i file con 4745 aggiunte e 1048 eliminazioni
  1. 1 1
      .kilocode/rules-translate/instructions-zh-cn.md
  2. 1 1
      apps/vscode-e2e/package.json
  3. 1 0
      apps/web-evals/package.json
  4. 1 2
      apps/web-evals/src/actions/runs.ts
  5. 0 24
      apps/web-evals/src/app/api/health/route.ts
  6. 178 3
      apps/web-evals/src/app/runs/[id]/run.tsx
  7. 360 103
      apps/web-evals/src/app/runs/new/new-run.tsx
  8. 35 45
      apps/web-evals/src/app/runs/new/settings-diff.tsx
  9. 76 19
      apps/web-evals/src/components/home/run.tsx
  10. 198 14
      apps/web-evals/src/components/home/runs.tsx
  11. 27 0
      apps/web-evals/src/components/ui/checkbox.tsx
  12. 1 0
      apps/web-evals/src/components/ui/index.ts
  13. 22 9
      apps/web-evals/src/components/ui/multi-select.tsx
  14. 37 0
      apps/web-evals/src/hooks/use-fuzzy-model-search.ts
  15. 8 2
      apps/web-evals/src/hooks/use-open-router-models.ts
  16. 66 0
      apps/web-evals/src/hooks/use-roo-code-cloud-models.ts
  17. 10 0
      apps/web-evals/src/lib/formatters.ts
  18. 1 3
      apps/web-evals/src/lib/schemas.ts
  19. 0 1
      apps/web-roo-code/.env.example
  20. 78 42
      apps/web-roo-code/src/app/pricing/page.tsx
  21. 190 0
      apps/web-roo-code/src/app/provider/pricing/components/model-card.tsx
  22. 253 0
      apps/web-roo-code/src/app/provider/pricing/page.tsx
  23. 2 9
      apps/web-roo-code/src/components/providers/posthog-provider.tsx
  24. 1 1
      apps/web-roo-code/src/lib/constants.ts
  25. 22 0
      apps/web-roo-code/src/lib/formatters.ts
  26. 31 0
      apps/web-roo-code/src/lib/types/models.ts
  27. 1 1
      knip.json
  28. 5 4
      package.json
  29. 0 12
      packages/build/src/__tests__/index.test.ts
  30. 10 0
      packages/cloud/src/CloudAPI.ts
  31. 4 3
      packages/cloud/src/CloudService.ts
  32. 2 1
      packages/cloud/src/StaticTokenAuthService.ts
  33. 14 3
      packages/cloud/src/WebAuthService.ts
  34. 96 0
      packages/cloud/src/__tests__/CloudAPI.creditBalance.spec.ts
  35. 12 2
      packages/cloud/src/__tests__/CloudService.test.ts
  36. 48 1
      packages/cloud/src/__tests__/WebAuthService.spec.ts
  37. 87 128
      packages/evals/scripts/setup.sh
  38. 1 1
      packages/evals/src/cli/index.ts
  39. 2 2
      packages/evals/src/cli/runEvals.ts
  40. 39 4
      packages/evals/src/cli/runTask.ts
  41. 1 0
      packages/evals/src/db/migrations/0003_simple_retro_girl.sql
  42. 459 0
      packages/evals/src/db/migrations/meta/0003_snapshot.json
  43. 7 0
      packages/evals/src/db/migrations/meta/_journal.json
  44. 1 0
      packages/evals/src/db/schema.ts
  45. 2 2
      packages/telemetry/src/TelemetryService.ts
  46. 1 1
      packages/types/npm/package.metadata.json
  47. 7 2
      packages/types/src/cloud.ts
  48. 14 1
      packages/types/src/codebase-index.ts
  49. 2 0
      packages/types/src/experiment.ts
  50. 9 0
      packages/types/src/global-settings.ts
  51. 59 0
      packages/types/src/image-generation.ts
  52. 1 0
      packages/types/src/index.ts
  53. 1 0
      packages/types/src/message.ts
  54. 2 0
      packages/types/src/model.ts
  55. 11 1
      packages/types/src/provider-settings.ts
  56. 17 5
      packages/types/src/providers/anthropic.ts
  57. 126 0
      packages/types/src/providers/baseten.ts
  58. 17 1
      packages/types/src/providers/bedrock.ts
  59. 0 30
      packages/types/src/providers/cerebras.ts
  60. 38 0
      packages/types/src/providers/claude-code.ts
  61. 60 243
      packages/types/src/providers/gemini.ts
  62. 4 0
      packages/types/src/providers/index.ts
  63. 7 3
      packages/types/src/providers/minimax.ts
  64. 18 9
      packages/types/src/providers/mistral.ts
  65. 4 0
      packages/types/src/providers/moonshot.ts
  66. 30 0
      packages/types/src/providers/openai.ts
  67. 2 0
      packages/types/src/providers/openrouter.ts
  68. 3 2
      packages/types/src/providers/vertex.ts
  69. 15 0
      packages/types/src/providers/zai.ts
  70. 2 0
      packages/types/src/telemetry.ts
  71. 179 174
      pnpm-lock.yaml
  72. BIN
      releases/3.32.1-release.png
  73. BIN
      releases/3.33.1-release.png
  74. BIN
      releases/3.33.3-release.png
  75. BIN
      releases/3.34.0-release.png
  76. BIN
      releases/3.34.2-release.png
  77. BIN
      releases/3.34.3-release.png
  78. BIN
      releases/3.34.4-release.png
  79. BIN
      releases/3.34.5-release.png
  80. BIN
      releases/3.34.6-release.png
  81. BIN
      releases/3.34.7-release.png
  82. BIN
      releases/v3.33.0-release.png
  83. 2 0
      src/activate/handleUri.ts
  84. 10 0
      src/api/index.ts
  85. 140 0
      src/api/providers/__tests__/anthropic-vertex.spec.ts
  86. 438 0
      src/api/providers/__tests__/anthropic.spec.ts
  87. 1 1
      src/api/providers/__tests__/base-openai-compatible-provider.spec.ts
  88. 41 0
      src/api/providers/__tests__/bedrock-inference-profiles.spec.ts
  89. 6 10
      src/api/providers/__tests__/featherless.spec.ts
  90. 4 6
      src/api/providers/__tests__/fireworks.spec.ts
  91. 9 28
      src/api/providers/__tests__/gemini.spec.ts
  92. 5 3
      src/api/providers/__tests__/groq.spec.ts
  93. 2 2
      src/api/providers/__tests__/io-intelligence.spec.ts
  94. 152 80
      src/api/providers/__tests__/minimax.spec.ts
  95. 220 1
      src/api/providers/__tests__/mistral.spec.ts
  96. 6 0
      src/api/providers/__tests__/openai-native.spec.ts
  97. 341 0
      src/api/providers/__tests__/openai.spec.ts
  98. 0 1
      src/api/providers/__tests__/openrouter.spec.ts
  99. 347 0
      src/api/providers/__tests__/roo.spec.ts
  100. 1 1
      src/api/providers/__tests__/sambanova.spec.ts

+ 1 - 1
.kilocode/rules-translate/instructions-zh-cn.md

@@ -115,7 +115,7 @@
 
     - 保留英文品牌名
     - 技术术语保持一致性
-    - 保留英文专有名词:如"AWS Bedrock ARN"
+    - 保留英文专有名词:如"Amazon Bedrock ARN"
 
 4. **用户操作**
     - 操作动词统一:

+ 1 - 1
apps/vscode-e2e/package.json

@@ -18,7 +18,7 @@
 		"@types/vscode": "^1.95.0",
 		"@vscode/test-cli": "^0.0.11",
 		"@vscode/test-electron": "^2.4.0",
-		"glob": "^11.0.1",
+		"glob": "^11.1.0",
 		"mocha": "^11.1.0",
 		"rimraf": "^6.1.0",
 		"typescript": "5.8.3"

+ 1 - 0
apps/web-evals/package.json

@@ -14,6 +14,7 @@
 	"dependencies": {
 		"@hookform/resolvers": "^5.1.1",
 		"@radix-ui/react-alert-dialog": "^1.1.7",
+		"@radix-ui/react-checkbox": "^1.1.5",
 		"@radix-ui/react-dialog": "^1.1.6",
 		"@radix-ui/react-dropdown-menu": "^2.1.7",
 		"@radix-ui/react-label": "^2.1.2",

+ 1 - 2
apps/web-evals/src/actions/runs.ts

@@ -21,8 +21,7 @@ import { CreateRun } from "@/lib/schemas"
 
 const EVALS_REPO_PATH = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../../../evals")
 
-// eslint-disable-next-line @typescript-eslint/no-unused-vars
-export async function createRun({ suite, exercises = [], systemPrompt, timeout, ...values }: CreateRun) {
+export async function createRun({ suite, exercises = [], timeout, ...values }: CreateRun) {
 	const run = await _createRun({
 		...values,
 		timeout,

+ 0 - 24
apps/web-evals/src/app/api/health/route.ts

@@ -1,24 +0,0 @@
-import { NextResponse } from "next/server"
-
-export async function GET() {
-	try {
-		return NextResponse.json(
-			{
-				status: "healthy",
-				timestamp: new Date().toISOString(),
-				uptime: process.uptime(),
-				environment: process.env.NODE_ENV || "production",
-			},
-			{ status: 200 },
-		)
-	} catch (error) {
-		return NextResponse.json(
-			{
-				status: "unhealthy",
-				timestamp: new Date().toISOString(),
-				error: error instanceof Error ? error.message : "Unknown error",
-			},
-			{ status: 503 },
-		)
-	}
-}

+ 178 - 3
apps/web-evals/src/app/runs/[id]/run.tsx

@@ -5,15 +5,36 @@ import { LoaderCircle } from "lucide-react"
 
 import type { Run, TaskMetrics as _TaskMetrics } from "@roo-code/evals"
 
-import { formatCurrency, formatDuration, formatTokens } from "@/lib/formatters"
+import { formatCurrency, formatDuration, formatTokens, formatToolUsageSuccessRate } from "@/lib/formatters"
 import { useRunStatus } from "@/hooks/use-run-status"
-import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui"
+import {
+	Table,
+	TableBody,
+	TableCell,
+	TableHead,
+	TableHeader,
+	TableRow,
+	Tooltip,
+	TooltipContent,
+	TooltipTrigger,
+} from "@/components/ui"
 
 import { TaskStatus } from "./task-status"
 import { RunStatus } from "./run-status"
 
 type TaskMetrics = Pick<_TaskMetrics, "tokensIn" | "tokensOut" | "tokensContext" | "duration" | "cost">
 
+type ToolUsageEntry = { attempts: number; failures: number }
+type ToolUsage = Record<string, ToolUsageEntry>
+
+// Generate abbreviation from tool name (e.g., "read_file" -> "RF", "list_code_definition_names" -> "LCDN")
+function getToolAbbreviation(toolName: string): string {
+	return toolName
+		.split("_")
+		.map((word) => word[0]?.toUpperCase() ?? "")
+		.join("")
+}
+
 export function Run({ run }: { run: Run }) {
 	const runStatus = useRunStatus(run)
 	const { tasks, tokenUsage, usageUpdatedAt } = runStatus
@@ -41,16 +62,170 @@ export function Run({ run }: { run: Run }) {
 		// eslint-disable-next-line react-hooks/exhaustive-deps
 	}, [tasks, tokenUsage, usageUpdatedAt])
 
+	// Compute aggregate stats
+	const stats = useMemo(() => {
+		if (!tasks) return null
+
+		const passed = tasks.filter((t) => t.passed === true).length
+		const failed = tasks.filter((t) => t.passed === false).length
+		// Count running tasks exactly like TaskStatus shows spinner:
+		// - passed is not true and not false (null/undefined)
+		// - AND has activity (startedAt or tokenUsage)
+		const running = tasks.filter(
+			(t) => t.passed !== true && t.passed !== false && (t.startedAt || tokenUsage.get(t.id)),
+		).length
+		const pending = tasks.filter(
+			(t) => t.passed !== true && t.passed !== false && !t.startedAt && !tokenUsage.get(t.id),
+		).length
+		const total = tasks.length
+		const completed = passed + failed
+
+		let totalTokensIn = 0
+		let totalTokensOut = 0
+		let totalCost = 0
+		let totalDuration = 0
+
+		// Aggregate tool usage from completed tasks
+		const toolUsage: ToolUsage = {}
+
+		for (const task of tasks) {
+			const metrics = taskMetrics[task.id]
+			if (metrics) {
+				totalTokensIn += metrics.tokensIn
+				totalTokensOut += metrics.tokensOut
+				totalCost += metrics.cost
+				totalDuration += metrics.duration
+			}
+
+			// Aggregate tool usage from finished tasks with taskMetrics
+			if (task.finishedAt && task.taskMetrics?.toolUsage) {
+				for (const [key, usage] of Object.entries(task.taskMetrics.toolUsage)) {
+					const tool = key as keyof ToolUsage
+					if (!toolUsage[tool]) {
+						toolUsage[tool] = { attempts: 0, failures: 0 }
+					}
+					toolUsage[tool].attempts += usage.attempts
+					toolUsage[tool].failures += usage.failures
+				}
+			}
+		}
+
+		return {
+			passed,
+			failed,
+			running,
+			pending,
+			total,
+			completed,
+			passRate: completed > 0 ? ((passed / completed) * 100).toFixed(1) : null,
+			totalTokensIn,
+			totalTokensOut,
+			totalCost,
+			totalDuration,
+			toolUsage,
+		}
+		// eslint-disable-next-line react-hooks/exhaustive-deps
+	}, [tasks, taskMetrics, tokenUsage, usageUpdatedAt])
+
 	return (
 		<>
 			<div>
-				<div className="mb-2">
+				<div className="mb-4">
 					<div>
 						<div className="font-mono">{run.model}</div>
 						{run.description && <div className="text-sm text-muted-foreground">{run.description}</div>}
 					</div>
 					{!run.taskMetricsId && <RunStatus runStatus={runStatus} />}
 				</div>
+
+				{stats && (
+					<div className="mb-4 p-4 border rounded-lg bg-muted/50">
+						{/* Main Stats Row */}
+						<div className="flex flex-wrap items-start justify-between gap-x-6 gap-y-3">
+							{/* Passed/Failed */}
+							<div className="text-center">
+								<div className="text-2xl font-bold whitespace-nowrap">
+									<span className="text-green-600">{stats.passed}</span>
+									<span className="text-muted-foreground mx-1">/</span>
+									<span className="text-red-600">{stats.failed}</span>
+									{stats.running > 0 && (
+										<span className="text-yellow-600 text-sm ml-2">({stats.running})</span>
+									)}
+								</div>
+								<div className="text-xs text-muted-foreground">Passed / Failed</div>
+							</div>
+
+							{/* Pass Rate */}
+							<div className="text-center">
+								<div className="text-2xl font-bold">{stats.passRate ? `${stats.passRate}%` : "-"}</div>
+								<div className="text-xs text-muted-foreground">Pass Rate</div>
+							</div>
+
+							{/* Tokens */}
+							<div className="text-center">
+								<div className="text-xl font-bold font-mono whitespace-nowrap">
+									{formatTokens(stats.totalTokensIn)}
+									<span className="text-muted-foreground mx-1">/</span>
+									{formatTokens(stats.totalTokensOut)}
+								</div>
+								<div className="text-xs text-muted-foreground">Tokens In / Out</div>
+							</div>
+
+							{/* Cost */}
+							<div className="text-center">
+								<div className="text-2xl font-bold font-mono">{formatCurrency(stats.totalCost)}</div>
+								<div className="text-xs text-muted-foreground">Cost</div>
+							</div>
+
+							{/* Duration */}
+							<div className="text-center">
+								<div className="text-2xl font-bold font-mono whitespace-nowrap">
+									{stats.totalDuration > 0 ? formatDuration(stats.totalDuration) : "-"}
+								</div>
+								<div className="text-xs text-muted-foreground">Duration</div>
+							</div>
+
+							{/* Tool Usage - Inline */}
+							{Object.keys(stats.toolUsage).length > 0 && (
+								<div className="flex items-center gap-2 flex-wrap">
+									{Object.entries(stats.toolUsage)
+										.sort(([, a], [, b]) => b.attempts - a.attempts)
+										.map(([toolName, usage]) => {
+											const abbr = getToolAbbreviation(toolName)
+											const successRate =
+												usage.attempts > 0
+													? ((usage.attempts - usage.failures) / usage.attempts) * 100
+													: 100
+											const rateColor =
+												successRate === 100
+													? "text-green-500"
+													: successRate >= 80
+														? "text-yellow-500"
+														: "text-red-500"
+											return (
+												<Tooltip key={toolName}>
+													<TooltipTrigger asChild>
+														<div className="flex items-center gap-1 px-2 py-1 rounded bg-background/50 border border-border/50 hover:border-border transition-colors cursor-default text-xs">
+															<span className="font-medium text-muted-foreground">
+																{abbr}
+															</span>
+															<span className="font-bold tabular-nums">
+																{usage.attempts}
+															</span>
+															<span className={`${rateColor}`}>
+																{formatToolUsageSuccessRate(usage)}
+															</span>
+														</div>
+													</TooltipTrigger>
+													<TooltipContent side="bottom">{toolName}</TooltipContent>
+												</Tooltip>
+											)
+										})}
+								</div>
+							)}
+						</div>
+					</div>
+				)}
 				{!tasks ? (
 					<LoaderCircle className="size-4 animate-spin" />
 				) : (

+ 360 - 103
apps/web-evals/src/app/runs/new/new-run.tsx

@@ -1,23 +1,29 @@
 "use client"
 
-import { useCallback, useRef, useState } from "react"
+import { useCallback, useEffect, useMemo, useState } from "react"
 import { useRouter } from "next/navigation"
 import { z } from "zod"
 import { useQuery } from "@tanstack/react-query"
 import { useForm, FormProvider } from "react-hook-form"
 import { zodResolver } from "@hookform/resolvers/zod"
-import fuzzysort from "fuzzysort"
 import { toast } from "sonner"
-import { X, Rocket, Check, ChevronsUpDown, SlidersHorizontal, CircleCheck } from "lucide-react"
+import { X, Rocket, Check, ChevronsUpDown, SlidersHorizontal } from "lucide-react"
 
-import { globalSettingsSchema, providerSettingsSchema, EVALS_SETTINGS, getModelId } from "@roo-code/types"
+import {
+	globalSettingsSchema,
+	providerSettingsSchema,
+	EVALS_SETTINGS,
+	getModelId,
+	type ProviderSettings,
+	type GlobalSettings,
+} from "@roo-code/types"
 
 import { createRun } from "@/actions/runs"
 import { getExercises } from "@/actions/exercises"
+
 import {
-	createRunSchema,
 	type CreateRun,
-	MODEL_DEFAULT,
+	createRunSchema,
 	CONCURRENCY_MIN,
 	CONCURRENCY_MAX,
 	CONCURRENCY_DEFAULT,
@@ -26,14 +32,19 @@ import {
 	TIMEOUT_DEFAULT,
 } from "@/lib/schemas"
 import { cn } from "@/lib/utils"
+
 import { useOpenRouterModels } from "@/hooks/use-open-router-models"
+import { useRooCodeCloudModels } from "@/hooks/use-roo-code-cloud-models"
+
 import {
 	Button,
+	Checkbox,
 	FormControl,
 	FormField,
 	FormItem,
 	FormLabel,
 	FormMessage,
+	Input,
 	Textarea,
 	Tabs,
 	TabsList,
@@ -48,36 +59,54 @@ import {
 	Popover,
 	PopoverContent,
 	PopoverTrigger,
-	ScrollArea,
-	ScrollBar,
 	Slider,
+	Label,
+	FormDescription,
 } from "@/components/ui"
 
 import { SettingsDiff } from "./settings-diff"
 
+type ImportedSettings = {
+	apiConfigs: Record<string, ProviderSettings>
+	globalSettings: GlobalSettings
+	currentApiConfigName: string
+}
+
 export function NewRun() {
 	const router = useRouter()
 
-	const [mode, setMode] = useState<"openrouter" | "settings">("openrouter")
-	const [modelSearchValue, setModelSearchValue] = useState("")
+	const [provider, setModelSource] = useState<"roo" | "openrouter" | "other">("roo")
 	const [modelPopoverOpen, setModelPopoverOpen] = useState(false)
+	const [useNativeToolProtocol, setUseNativeToolProtocol] = useState(true)
 
-	const modelSearchResultsRef = useRef<Map<string, number>>(new Map())
-	const modelSearchValueRef = useRef("")
+	// State for imported settings with config selection
+	const [importedSettings, setImportedSettings] = useState<ImportedSettings | null>(null)
+	const [selectedConfigName, setSelectedConfigName] = useState<string>("")
+	const [configPopoverOpen, setConfigPopoverOpen] = useState(false)
+
+	const openRouter = useOpenRouterModels()
+	const rooCodeCloud = useRooCodeCloudModels()
+	const models = provider === "openrouter" ? openRouter.data : rooCodeCloud.data
+	const searchValue = provider === "openrouter" ? openRouter.searchValue : rooCodeCloud.searchValue
+	const setSearchValue = provider === "openrouter" ? openRouter.setSearchValue : rooCodeCloud.setSearchValue
+	const onFilter = provider === "openrouter" ? openRouter.onFilter : rooCodeCloud.onFilter
 
-	const models = useOpenRouterModels()
 	const exercises = useQuery({ queryKey: ["getExercises"], queryFn: () => getExercises() })
 
+	// State for selected exercises (needed for language toggle buttons)
+	const [selectedExercises, setSelectedExercises] = useState<string[]>([])
+
 	const form = useForm<CreateRun>({
 		resolver: zodResolver(createRunSchema),
 		defaultValues: {
-			model: MODEL_DEFAULT,
+			model: "",
 			description: "",
 			suite: "full",
 			exercises: [],
 			settings: undefined,
 			concurrency: CONCURRENCY_DEFAULT,
 			timeout: TIMEOUT_DEFAULT,
+			jobToken: "",
 		},
 	})
 
@@ -90,11 +119,105 @@ export function NewRun() {
 
 	const [model, suite, settings] = watch(["model", "suite", "settings", "concurrency"])
 
+	// Load concurrency and timeout from localStorage on mount
+	useEffect(() => {
+		const savedConcurrency = localStorage.getItem("evals-concurrency")
+		if (savedConcurrency) {
+			const parsed = parseInt(savedConcurrency, 10)
+			if (!isNaN(parsed) && parsed >= CONCURRENCY_MIN && parsed <= CONCURRENCY_MAX) {
+				setValue("concurrency", parsed)
+			}
+		}
+		const savedTimeout = localStorage.getItem("evals-timeout")
+		if (savedTimeout) {
+			const parsed = parseInt(savedTimeout, 10)
+			if (!isNaN(parsed) && parsed >= TIMEOUT_MIN && parsed <= TIMEOUT_MAX) {
+				setValue("timeout", parsed)
+			}
+		}
+	}, [setValue])
+
+	// Extract unique languages from exercises
+	const languages = useMemo(() => {
+		if (!exercises.data) return []
+		const langs = new Set<string>()
+		for (const path of exercises.data) {
+			const lang = path.split("/")[0]
+			if (lang) langs.add(lang)
+		}
+		return Array.from(langs).sort()
+	}, [exercises.data])
+
+	// Get exercises for a specific language
+	const getExercisesForLanguage = useCallback(
+		(lang: string) => {
+			if (!exercises.data) return []
+			return exercises.data.filter((path) => path.startsWith(`${lang}/`))
+		},
+		[exercises.data],
+	)
+
+	// Toggle all exercises for a language
+	const toggleLanguage = useCallback(
+		(lang: string) => {
+			const langExercises = getExercisesForLanguage(lang)
+			const allSelected = langExercises.every((ex) => selectedExercises.includes(ex))
+
+			let newSelected: string[]
+			if (allSelected) {
+				// Remove all exercises for this language
+				newSelected = selectedExercises.filter((ex) => !ex.startsWith(`${lang}/`))
+			} else {
+				// Add all exercises for this language (avoiding duplicates)
+				const existing = new Set(selectedExercises)
+				for (const ex of langExercises) {
+					existing.add(ex)
+				}
+				newSelected = Array.from(existing)
+			}
+
+			setSelectedExercises(newSelected)
+			setValue("exercises", newSelected)
+		},
+		[getExercisesForLanguage, selectedExercises, setValue],
+	)
+
+	// Check if all exercises for a language are selected
+	const isLanguageSelected = useCallback(
+		(lang: string) => {
+			const langExercises = getExercisesForLanguage(lang)
+			return langExercises.length > 0 && langExercises.every((ex) => selectedExercises.includes(ex))
+		},
+		[getExercisesForLanguage, selectedExercises],
+	)
+
+	// Check if some (but not all) exercises for a language are selected
+	const isLanguagePartiallySelected = useCallback(
+		(lang: string) => {
+			const langExercises = getExercisesForLanguage(lang)
+			const selectedCount = langExercises.filter((ex) => selectedExercises.includes(ex)).length
+			return selectedCount > 0 && selectedCount < langExercises.length
+		},
+		[getExercisesForLanguage, selectedExercises],
+	)
+
 	const onSubmit = useCallback(
 		async (values: CreateRun) => {
 			try {
-				if (mode === "openrouter") {
-					values.settings = { ...(values.settings || {}), openRouterModelId: model }
+				if (provider === "openrouter") {
+					values.settings = {
+						...(values.settings || {}),
+						apiProvider: "openrouter",
+						openRouterModelId: model,
+						toolProtocol: useNativeToolProtocol ? "native" : "xml",
+					}
+				} else if (provider === "roo") {
+					values.settings = {
+						...(values.settings || {}),
+						apiProvider: "roo",
+						apiModelId: model,
+						toolProtocol: useNativeToolProtocol ? "native" : "xml",
+					}
 				}
 
 				const { id } = await createRun(values)
@@ -103,28 +226,7 @@ export function NewRun() {
 				toast.error(e instanceof Error ? e.message : "An unknown error occurred.")
 			}
 		},
-		[mode, model, router],
-	)
-
-	const onFilterModels = useCallback(
-		(value: string, search: string) => {
-			if (modelSearchValueRef.current !== search) {
-				modelSearchValueRef.current = search
-				modelSearchResultsRef.current.clear()
-
-				for (const {
-					obj: { id },
-					score,
-				} of fuzzysort.go(search, models.data || [], {
-					key: "name",
-				})) {
-					modelSearchResultsRef.current.set(id, score)
-				}
-			}
-
-			return modelSearchResultsRef.current.get(value) ?? 0
-		},
-		[models.data],
+		[provider, model, router, useNativeToolProtocol],
 	)
 
 	const onSelectModel = useCallback(
@@ -132,7 +234,7 @@ export function NewRun() {
 			setValue("model", model)
 			setModelPopoverOpen(false)
 		},
-		[setValue],
+		[setValue, setModelPopoverOpen],
 	)
 
 	const onImportSettings = useCallback(
@@ -156,11 +258,21 @@ export function NewRun() {
 					})
 					.parse(JSON.parse(await file.text()))
 
-				const providerSettings = providerProfiles.apiConfigs[providerProfiles.currentApiConfigName] ?? {}
+				// Store all imported configs for user selection
+				setImportedSettings({
+					apiConfigs: providerProfiles.apiConfigs,
+					globalSettings,
+					currentApiConfigName: providerProfiles.currentApiConfigName,
+				})
 
+				// Default to the current config
+				const defaultConfigName = providerProfiles.currentApiConfigName
+				setSelectedConfigName(defaultConfigName)
+
+				// Apply the default config
+				const providerSettings = providerProfiles.apiConfigs[defaultConfigName] ?? {}
 				setValue("model", getModelId(providerSettings) ?? "")
 				setValue("settings", { ...EVALS_SETTINGS, ...providerSettings, ...globalSettings })
-				setMode("settings")
 
 				event.target.value = ""
 			} catch (e) {
@@ -171,19 +283,123 @@ export function NewRun() {
 		[clearErrors, setValue],
 	)
 
+	const onSelectConfig = useCallback(
+		(configName: string) => {
+			if (!importedSettings) {
+				return
+			}
+
+			setSelectedConfigName(configName)
+			setConfigPopoverOpen(false)
+
+			const providerSettings = importedSettings.apiConfigs[configName] ?? {}
+			setValue("model", getModelId(providerSettings) ?? "")
+			setValue("settings", { ...EVALS_SETTINGS, ...providerSettings, ...importedSettings.globalSettings })
+		},
+		[importedSettings, setValue],
+	)
+
 	return (
 		<>
 			<FormProvider {...form}>
 				<form
 					onSubmit={form.handleSubmit(onSubmit)}
 					className="flex flex-col justify-center divide-y divide-primary *:py-5">
-					<div className="flex flex-row justify-between gap-4">
-						{mode === "openrouter" && (
-							<FormField
-								control={form.control}
-								name="model"
-								render={() => (
-									<FormItem className="flex-1">
+					<FormField
+						control={form.control}
+						name="model"
+						render={() => (
+							<FormItem>
+								<Tabs
+									value={provider}
+									onValueChange={(value) => setModelSource(value as "roo" | "openrouter" | "other")}>
+									<TabsList className="mb-2">
+										<TabsTrigger value="roo">Roo Code Cloud</TabsTrigger>
+										<TabsTrigger value="openrouter">OpenRouter</TabsTrigger>
+										<TabsTrigger value="other">Other</TabsTrigger>
+									</TabsList>
+								</Tabs>
+
+								{provider === "other" ? (
+									<div className="space-y-2 overflow-auto">
+										<Button
+											type="button"
+											variant="secondary"
+											onClick={() => document.getElementById("json-upload")?.click()}
+											className="w-full">
+											<SlidersHorizontal />
+											Import Settings
+										</Button>
+										<input
+											id="json-upload"
+											type="file"
+											accept="application/json"
+											className="hidden"
+											onChange={onImportSettings}
+										/>
+
+										{importedSettings && Object.keys(importedSettings.apiConfigs).length > 1 && (
+											<div className="space-y-1">
+												<Label>API Config</Label>
+												<Popover open={configPopoverOpen} onOpenChange={setConfigPopoverOpen}>
+													<PopoverTrigger asChild>
+														<Button
+															variant="input"
+															role="combobox"
+															aria-expanded={configPopoverOpen}
+															className="flex items-center justify-between w-full">
+															<div>{selectedConfigName || "Select config"}</div>
+															<ChevronsUpDown className="opacity-50" />
+														</Button>
+													</PopoverTrigger>
+													<PopoverContent className="p-0 w-[var(--radix-popover-trigger-width)]">
+														<Command>
+															<CommandInput
+																placeholder="Search configs..."
+																className="h-9"
+															/>
+															<CommandList>
+																<CommandEmpty>No config found.</CommandEmpty>
+																<CommandGroup>
+																	{Object.keys(importedSettings.apiConfigs).map(
+																		(configName) => (
+																			<CommandItem
+																				key={configName}
+																				value={configName}
+																				onSelect={onSelectConfig}>
+																				{configName}
+																				{configName ===
+																					importedSettings.currentApiConfigName && (
+																					<span className="ml-2 text-xs text-muted-foreground">
+																						(default)
+																					</span>
+																				)}
+																				<Check
+																					className={cn(
+																						"ml-auto size-4",
+																						configName ===
+																							selectedConfigName
+																							? "opacity-100"
+																							: "opacity-0",
+																					)}
+																				/>
+																			</CommandItem>
+																		),
+																	)}
+																</CommandGroup>
+															</CommandList>
+														</Command>
+													</PopoverContent>
+												</Popover>
+											</div>
+										)}
+
+										{settings && (
+											<SettingsDiff defaultSettings={EVALS_SETTINGS} customSettings={settings} />
+										)}
+									</div>
+								) : (
+									<>
 										<Popover open={modelPopoverOpen} onOpenChange={setModelPopoverOpen}>
 											<PopoverTrigger asChild>
 												<Button
@@ -192,25 +408,23 @@ export function NewRun() {
 													aria-expanded={modelPopoverOpen}
 													className="flex items-center justify-between">
 													<div>
-														{models.data?.find(({ id }) => id === model)?.name ||
-															model ||
-															"Select OpenRouter Model"}
+														{models?.find(({ id }) => id === model)?.name || `Select`}
 													</div>
 													<ChevronsUpDown className="opacity-50" />
 												</Button>
 											</PopoverTrigger>
 											<PopoverContent className="p-0 w-[var(--radix-popover-trigger-width)]">
-												<Command filter={onFilterModels}>
+												<Command filter={onFilter}>
 													<CommandInput
 														placeholder="Search"
-														value={modelSearchValue}
-														onValueChange={setModelSearchValue}
+														value={searchValue}
+														onValueChange={setSearchValue}
 														className="h-9"
 													/>
 													<CommandList>
 														<CommandEmpty>No model found.</CommandEmpty>
 														<CommandGroup>
-															{models.data?.map(({ id, name }) => (
+															{models?.map(({ id, name }) => (
 																<CommandItem
 																	key={id}
 																	value={id}
@@ -229,45 +443,49 @@ export function NewRun() {
 												</Command>
 											</PopoverContent>
 										</Popover>
-										<FormMessage />
-									</FormItem>
-								)}
-							/>
-						)}
 
-						<FormItem className="flex-1">
-							<Button
-								type="button"
-								variant="secondary"
-								onClick={() => document.getElementById("json-upload")?.click()}>
-								<SlidersHorizontal />
-								Import Settings
-							</Button>
-							<input
-								id="json-upload"
-								type="file"
-								accept="application/json"
-								className="hidden"
-								onChange={onImportSettings}
-							/>
-							{settings && (
-								<ScrollArea className="max-h-64 border rounded-sm">
-									<>
-										<div className="flex items-center gap-1 p-2 border-b">
-											<CircleCheck className="size-4 text-ring" />
-											<div className="text-sm">
-												Imported valid Kilo Code settings. Showing differences from default
-												settings.
-											</div>
+										<div className="flex items-center gap-1.5">
+											<Checkbox
+												id="native"
+												checked={useNativeToolProtocol}
+												onCheckedChange={(checked) =>
+													setUseNativeToolProtocol(checked === true)
+												}
+											/>
+											<Label htmlFor="native">Use Native Tool Calls</Label>
 										</div>
-										<SettingsDiff defaultSettings={EVALS_SETTINGS} customSettings={settings} />
 									</>
-									<ScrollBar orientation="horizontal" />
-								</ScrollArea>
+								)}
+
+								<FormMessage />
+							</FormItem>
+						)}
+					/>
+
+					{provider === "roo" && (
+						<FormField
+							control={form.control}
+							name="jobToken"
+							render={({ field }) => (
+								<FormItem>
+									<FormLabel>Roo Code Cloud Token</FormLabel>
+									<FormControl>
+										<Input type="password" {...field} />
+									</FormControl>
+									<FormMessage />
+									<FormDescription>
+										If you have access to the Roo Code Cloud repository then you can generate a
+										token with:
+										<br />
+										<code className="text-xs">
+											pnpm --filter @roo-code-cloud/auth production:create-job-token [org]
+											[timeout]
+										</code>
+									</FormDescription>
+								</FormItem>
 							)}
-							<FormMessage />
-						</FormItem>
-					</div>
+						/>
+					)}
 
 					<FormField
 						control={form.control}
@@ -275,18 +493,51 @@ export function NewRun() {
 						render={() => (
 							<FormItem>
 								<FormLabel>Exercises</FormLabel>
-								<Tabs
-									defaultValue="full"
-									onValueChange={(value) => setValue("suite", value as "full" | "partial")}>
-									<TabsList>
-										<TabsTrigger value="full">All</TabsTrigger>
-										<TabsTrigger value="partial">Some</TabsTrigger>
-									</TabsList>
-								</Tabs>
+								<div className="flex items-center gap-2 flex-wrap">
+									<Tabs
+										defaultValue="full"
+										onValueChange={(value) => {
+											setValue("suite", value as "full" | "partial")
+											if (value === "full") {
+												setSelectedExercises([])
+												setValue("exercises", [])
+											}
+										}}>
+										<TabsList>
+											<TabsTrigger value="full">All</TabsTrigger>
+											<TabsTrigger value="partial">Some</TabsTrigger>
+										</TabsList>
+									</Tabs>
+									{suite === "partial" && languages.length > 0 && (
+										<div className="flex items-center gap-1 flex-wrap">
+											{languages.map((lang) => (
+												<Button
+													key={lang}
+													type="button"
+													variant={
+														isLanguageSelected(lang)
+															? "default"
+															: isLanguagePartiallySelected(lang)
+																? "secondary"
+																: "outline"
+													}
+													size="sm"
+													onClick={() => toggleLanguage(lang)}
+													className="text-xs capitalize">
+													{lang}
+												</Button>
+											))}
+										</div>
+									)}
+								</div>
 								{suite === "partial" && (
 									<MultiSelect
 										options={exercises.data?.map((path) => ({ value: path, label: path })) || []}
-										onValueChange={(value) => setValue("exercises", value)}
+										value={selectedExercises}
+										onValueChange={(value) => {
+											setSelectedExercises(value)
+											setValue("exercises", value)
+										}}
 										placeholder="Select"
 										variant="inverted"
 										maxCount={4}
@@ -306,11 +557,14 @@ export function NewRun() {
 								<FormControl>
 									<div className="flex flex-row items-center gap-2">
 										<Slider
-											defaultValue={[field.value]}
+											value={[field.value]}
 											min={CONCURRENCY_MIN}
 											max={CONCURRENCY_MAX}
 											step={1}
-											onValueChange={(value) => field.onChange(value[0])}
+											onValueChange={(value) => {
+												field.onChange(value[0])
+												localStorage.setItem("evals-concurrency", String(value[0]))
+											}}
 										/>
 										<div>{field.value}</div>
 									</div>
@@ -329,11 +583,14 @@ export function NewRun() {
 								<FormControl>
 									<div className="flex flex-row items-center gap-2">
 										<Slider
-											defaultValue={[field.value]}
+											value={[field.value]}
 											min={TIMEOUT_MIN}
 											max={TIMEOUT_MAX}
 											step={1}
-											onValueChange={(value) => field.onChange(value[0])}
+											onValueChange={(value) => {
+												field.onChange(value[0])
+												localStorage.setItem("evals-timeout", String(value[0]))
+											}}
 										/>
 										<div>{field.value}</div>
 									</div>

+ 35 - 45
apps/web-evals/src/app/runs/new/settings-diff.tsx

@@ -1,12 +1,10 @@
-import { Fragment, HTMLAttributes } from "react"
-
 import { type Keys, type RooCodeSettings, GLOBAL_SETTINGS_KEYS, PROVIDER_SETTINGS_KEYS } from "@roo-code/types"
 
-import { cn } from "@/lib/utils"
+import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui"
 
 export const ROO_CODE_SETTINGS_KEYS = [...GLOBAL_SETTINGS_KEYS, ...PROVIDER_SETTINGS_KEYS] as Keys<RooCodeSettings>[]
 
-type SettingsDiffProps = HTMLAttributes<HTMLDivElement> & {
+type SettingsDiffProps = {
 	defaultSettings: RooCodeSettings
 	customSettings: RooCodeSettings
 }
@@ -14,53 +12,45 @@ type SettingsDiffProps = HTMLAttributes<HTMLDivElement> & {
 export function SettingsDiff({
 	customSettings: { experiments: customExperiments, ...customSettings },
 	defaultSettings: { experiments: defaultExperiments, ...defaultSettings },
-	className,
-	...props
 }: SettingsDiffProps) {
 	const defaults = { ...defaultSettings, ...defaultExperiments }
 	const custom = { ...customSettings, ...customExperiments }
 
 	return (
-		<div className={cn("grid grid-cols-3 gap-2 text-sm p-2", className)} {...props}>
-			<div className="font-medium text-muted-foreground">Setting</div>
-			<div className="font-medium text-muted-foreground">Default</div>
-			<div className="font-medium text-muted-foreground">Custom</div>
-			{ROO_CODE_SETTINGS_KEYS.map((key) => {
-				const defaultValue = defaults[key as keyof typeof defaults]
-				const customValue = custom[key as keyof typeof custom]
-				const isDefault = JSON.stringify(defaultValue) === JSON.stringify(customValue)
-
-				return isDefault ? null : (
-					<SettingDiff
-						key={key}
-						name={key}
-						defaultValue={JSON.stringify(defaultValue, null, 2)}
-						customValue={JSON.stringify(customValue, null, 2)}
-					/>
-				)
-			})}
+		<div className="border rounded-sm">
+			<Table>
+				<TableHeader>
+					<TableRow className="font-medium text-muted-foreground">
+						<TableHead>Setting</TableHead>
+						<TableHead>Default</TableHead>
+						<TableHead>Custom</TableHead>
+					</TableRow>
+				</TableHeader>
+				<TableBody>
+					{ROO_CODE_SETTINGS_KEYS.map((key) => {
+						const defaultValue = JSON.stringify(defaults[key as keyof typeof defaults], null, 2)
+						const customValue = JSON.stringify(custom[key as keyof typeof custom], null, 2)
+
+						return defaultValue === customValue ||
+							(isEmpty(defaultValue) && isEmpty(customValue)) ? null : (
+							<TableRow key={key}>
+								<TableCell className="font-mono" title={key}>
+									{key}
+								</TableCell>
+								<TableCell className="font-mono text-rose-500 line-through" title={defaultValue}>
+									{defaultValue}
+								</TableCell>
+								<TableCell className="font-mono text-teal-500" title={customValue}>
+									{customValue}
+								</TableCell>
+							</TableRow>
+						)
+					})}
+				</TableBody>
+			</Table>
 		</div>
 	)
 }
 
-type SettingDiffProps = HTMLAttributes<HTMLDivElement> & {
-	name: string
-	defaultValue?: string
-	customValue?: string
-}
-
-export function SettingDiff({ name, defaultValue, customValue, ...props }: SettingDiffProps) {
-	return (
-		<Fragment {...props}>
-			<div className="font-mono" title={name}>
-				{name}
-			</div>
-			<pre className="inline text-rose-500 line-through" title={defaultValue}>
-				{defaultValue}
-			</pre>
-			<pre className="inline text-teal-500" title={customValue}>
-				{customValue}
-			</pre>
-		</Fragment>
-	)
-}
+const isEmpty = (value: string | undefined) =>
+	value === undefined || value === "" || value === "null" || value === '""' || value === "[]" || value === "{}"

+ 76 - 19
apps/web-evals/src/components/home/run.tsx

@@ -1,11 +1,19 @@
 import { useCallback, useState, useRef } from "react"
 import Link from "next/link"
-import { Ellipsis, ClipboardList, Copy, Check, LoaderCircle, Trash } from "lucide-react"
+import { useRouter } from "next/navigation"
+import { Ellipsis, ClipboardList, Copy, Check, LoaderCircle, Trash, Settings } from "lucide-react"
 
 import type { Run as EvalsRun, TaskMetrics as EvalsTaskMetrics } from "@roo-code/evals"
+import type { ToolName } from "@roo-code/types"
 
 import { deleteRun } from "@/actions/runs"
-import { formatCurrency, formatDuration, formatTokens, formatToolUsageSuccessRate } from "@/lib/formatters"
+import {
+	formatCurrency,
+	formatDateTime,
+	formatDuration,
+	formatTokens,
+	formatToolUsageSuccessRate,
+} from "@/lib/formatters"
 import { useCopyRun } from "@/hooks/use-copy-run"
 import {
 	Button,
@@ -23,15 +31,23 @@ import {
 	AlertDialogFooter,
 	AlertDialogHeader,
 	AlertDialogTitle,
+	Dialog,
+	DialogContent,
+	DialogHeader,
+	DialogTitle,
+	ScrollArea,
 } from "@/components/ui"
 
 type RunProps = {
 	run: EvalsRun
 	taskMetrics: EvalsTaskMetrics | null
+	toolColumns: ToolName[]
 }
 
-export function Run({ run, taskMetrics }: RunProps) {
+export function Run({ run, taskMetrics, toolColumns }: RunProps) {
+	const router = useRouter()
 	const [deleteRunId, setDeleteRunId] = useState<number>()
+	const [showSettings, setShowSettings] = useState(false)
 	const continueRef = useRef<HTMLButtonElement>(null)
 	const { isPending, copyRun, copied } = useCopyRun(run.id)
 
@@ -48,10 +64,25 @@ export function Run({ run, taskMetrics }: RunProps) {
 		}
 	}, [deleteRunId])
 
+	const handleRowClick = useCallback(
+		(e: React.MouseEvent) => {
+			// Don't navigate if clicking on the dropdown menu
+			if ((e.target as HTMLElement).closest("[data-dropdown-trigger]")) {
+				return
+			}
+			router.push(`/runs/${run.id}`)
+		},
+		[router, run.id],
+	)
+
 	return (
 		<>
-			<TableRow>
-				<TableCell>{run.model}</TableCell>
+			<TableRow className="cursor-pointer hover:bg-muted/50" onClick={handleRowClick}>
+				<TableCell className="max-w-[200px] truncate">{run.model}</TableCell>
+				<TableCell>{run.settings?.apiProvider ?? "-"}</TableCell>
+				<TableCell className="text-sm text-muted-foreground whitespace-nowrap">
+					{formatDateTime(run.createdAt)}
+				</TableCell>
 				<TableCell>{run.passed}</TableCell>
 				<TableCell>{run.failed}</TableCell>
 				<TableCell>
@@ -61,27 +92,33 @@ export function Run({ run, taskMetrics }: RunProps) {
 				</TableCell>
 				<TableCell>
 					{taskMetrics && (
-						<div className="flex items-center gap-1.5">
-							<div>{formatTokens(taskMetrics.tokensIn)}</div>/
-							<div>{formatTokens(taskMetrics.tokensOut)}</div>
-						</div>
-					)}
-				</TableCell>
-				<TableCell>
-					{taskMetrics?.toolUsage?.apply_diff && (
-						<div className="flex flex-row items-center gap-1.5">
-							<div>{taskMetrics.toolUsage.apply_diff.attempts}</div>
-							<div>/</div>
-							<div>{formatToolUsageSuccessRate(taskMetrics.toolUsage.apply_diff)}</div>
+						<div className="flex items-center gap-1">
+							<span>{formatTokens(taskMetrics.tokensIn)}</span>/
+							<span>{formatTokens(taskMetrics.tokensOut)}</span>
 						</div>
 					)}
 				</TableCell>
+				{toolColumns.map((toolName) => {
+					const usage = taskMetrics?.toolUsage?.[toolName]
+					return (
+						<TableCell key={toolName} className="text-xs text-center">
+							{usage ? (
+								<div className="flex flex-col items-center">
+									<span className="font-medium">{usage.attempts}</span>
+									<span className="text-muted-foreground">{formatToolUsageSuccessRate(usage)}</span>
+								</div>
+							) : (
+								<span className="text-muted-foreground">-</span>
+							)}
+						</TableCell>
+					)
+				})}
 				<TableCell>{taskMetrics && formatCurrency(taskMetrics.cost)}</TableCell>
 				<TableCell>{taskMetrics && formatDuration(taskMetrics.duration)}</TableCell>
-				<TableCell>
+				<TableCell onClick={(e) => e.stopPropagation()}>
 					<DropdownMenu>
 						<Button variant="ghost" size="icon" asChild>
-							<DropdownMenuTrigger>
+							<DropdownMenuTrigger data-dropdown-trigger>
 								<Ellipsis />
 							</DropdownMenuTrigger>
 						</Button>
@@ -94,6 +131,14 @@ export function Run({ run, taskMetrics }: RunProps) {
 									</div>
 								</Link>
 							</DropdownMenuItem>
+							{run.settings && (
+								<DropdownMenuItem onClick={() => setShowSettings(true)}>
+									<div className="flex items-center gap-1">
+										<Settings />
+										<div>View Settings</div>
+									</div>
+								</DropdownMenuItem>
+							)}
 							{run.taskMetricsId && (
 								<DropdownMenuItem onClick={() => copyRun()} disabled={isPending || copied}>
 									<div className="flex items-center gap-1">
@@ -144,6 +189,18 @@ export function Run({ run, taskMetrics }: RunProps) {
 					</AlertDialogFooter>
 				</AlertDialogContent>
 			</AlertDialog>
+			<Dialog open={showSettings} onOpenChange={setShowSettings}>
+				<DialogContent className="max-w-2xl max-h-[80vh]">
+					<DialogHeader>
+						<DialogTitle>Run Settings</DialogTitle>
+					</DialogHeader>
+					<ScrollArea className="max-h-[60vh]">
+						<pre className="text-xs font-mono bg-muted p-4 rounded-md overflow-auto">
+							{JSON.stringify(run.settings, null, 2)}
+						</pre>
+					</ScrollArea>
+				</DialogContent>
+			</Dialog>
 		</>
 	)
 }

+ 198 - 14
apps/web-evals/src/components/home/runs.tsx

@@ -1,40 +1,224 @@
 "use client"
 
+import { useMemo, useState } from "react"
 import { useRouter } from "next/navigation"
-import { Rocket } from "lucide-react"
+import { ArrowDown, ArrowUp, ArrowUpDown, Rocket } from "lucide-react"
 
 import type { Run, TaskMetrics } from "@roo-code/evals"
+import type { ToolName } from "@roo-code/types"
 
-import { Button, Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui"
+import {
+	Button,
+	Table,
+	TableBody,
+	TableCell,
+	TableHead,
+	TableHeader,
+	TableRow,
+	Tooltip,
+	TooltipContent,
+	TooltipTrigger,
+} from "@/components/ui"
 import { Run as Row } from "@/components/home/run"
 
 type RunWithTaskMetrics = Run & { taskMetrics: TaskMetrics | null }
 
+type SortColumn = "model" | "provider" | "passed" | "failed" | "percent" | "cost" | "duration" | "createdAt"
+type SortDirection = "asc" | "desc"
+
+// Generate abbreviation from tool name (e.g., "read_file" -> "RF", "list_code_definition_names" -> "LCDN")
+function getToolAbbreviation(toolName: string): string {
+	return toolName
+		.split("_")
+		.map((word) => word[0]?.toUpperCase() ?? "")
+		.join("")
+}
+
+function SortIcon({
+	column,
+	sortColumn,
+	sortDirection,
+}: {
+	column: SortColumn
+	sortColumn: SortColumn | null
+	sortDirection: SortDirection
+}) {
+	if (sortColumn !== column) {
+		return <ArrowUpDown className="ml-1 h-3 w-3 opacity-50" />
+	}
+	return sortDirection === "asc" ? <ArrowUp className="ml-1 h-3 w-3" /> : <ArrowDown className="ml-1 h-3 w-3" />
+}
+
 export function Runs({ runs }: { runs: RunWithTaskMetrics[] }) {
 	const router = useRouter()
+	const [sortColumn, setSortColumn] = useState<SortColumn | null>("createdAt")
+	const [sortDirection, setSortDirection] = useState<SortDirection>("desc")
+
+	const handleSort = (column: SortColumn) => {
+		if (sortColumn === column) {
+			setSortDirection(sortDirection === "asc" ? "desc" : "asc")
+		} else {
+			setSortColumn(column)
+			setSortDirection("desc")
+		}
+	}
+
+	// Collect all unique tool names from all runs and sort by total attempts
+	const toolColumns = useMemo<ToolName[]>(() => {
+		const toolTotals = new Map<ToolName, number>()
+
+		for (const run of runs) {
+			if (run.taskMetrics?.toolUsage) {
+				for (const [toolName, usage] of Object.entries(run.taskMetrics.toolUsage)) {
+					const tool = toolName as ToolName
+					const current = toolTotals.get(tool) ?? 0
+					toolTotals.set(tool, current + usage.attempts)
+				}
+			}
+		}
+
+		// Sort by total attempts descending
+		return Array.from(toolTotals.entries())
+			.sort((a, b) => b[1] - a[1])
+			.map(([name]): ToolName => name)
+	}, [runs])
+
+	// Sort runs based on current sort column and direction
+	const sortedRuns = useMemo(() => {
+		if (!sortColumn) return runs
+
+		return [...runs].sort((a, b) => {
+			let aVal: string | number | Date | null = null
+			let bVal: string | number | Date | null = null
+
+			switch (sortColumn) {
+				case "model":
+					aVal = a.model
+					bVal = b.model
+					break
+				case "provider":
+					aVal = a.settings?.apiProvider ?? ""
+					bVal = b.settings?.apiProvider ?? ""
+					break
+				case "passed":
+					aVal = a.passed
+					bVal = b.passed
+					break
+				case "failed":
+					aVal = a.failed
+					bVal = b.failed
+					break
+				case "percent":
+					aVal = a.passed + a.failed > 0 ? a.passed / (a.passed + a.failed) : 0
+					bVal = b.passed + b.failed > 0 ? b.passed / (b.passed + b.failed) : 0
+					break
+				case "cost":
+					aVal = a.taskMetrics?.cost ?? 0
+					bVal = b.taskMetrics?.cost ?? 0
+					break
+				case "duration":
+					aVal = a.taskMetrics?.duration ?? 0
+					bVal = b.taskMetrics?.duration ?? 0
+					break
+				case "createdAt":
+					aVal = a.createdAt
+					bVal = b.createdAt
+					break
+			}
+
+			if (aVal === null || bVal === null) return 0
+
+			let comparison = 0
+			if (typeof aVal === "string" && typeof bVal === "string") {
+				comparison = aVal.localeCompare(bVal)
+			} else if (aVal instanceof Date && bVal instanceof Date) {
+				comparison = aVal.getTime() - bVal.getTime()
+			} else {
+				comparison = (aVal as number) - (bVal as number)
+			}
+
+			return sortDirection === "asc" ? comparison : -comparison
+		})
+	}, [runs, sortColumn, sortDirection])
+
+	// Calculate colSpan for empty state (7 base columns + dynamic tools + 3 end columns)
+	const totalColumns = 7 + toolColumns.length + 3
 
 	return (
 		<>
 			<Table className="border border-t-0">
 				<TableHeader>
 					<TableRow>
-						<TableHead>Model</TableHead>
-						<TableHead>Passed</TableHead>
-						<TableHead>Failed</TableHead>
-						<TableHead>% Correct</TableHead>
-						<TableHead>Tokens In / Out</TableHead>
-						<TableHead>Diff Edits</TableHead>
-						<TableHead>Cost</TableHead>
-						<TableHead>Duration</TableHead>
-						<TableHead />
+						<TableHead
+							className="max-w-[200px] cursor-pointer select-none"
+							onClick={() => handleSort("model")}>
+							<div className="flex items-center">
+								Model
+								<SortIcon column="model" sortColumn={sortColumn} sortDirection={sortDirection} />
+							</div>
+						</TableHead>
+						<TableHead className="cursor-pointer select-none" onClick={() => handleSort("provider")}>
+							<div className="flex items-center">
+								Provider
+								<SortIcon column="provider" sortColumn={sortColumn} sortDirection={sortDirection} />
+							</div>
+						</TableHead>
+						<TableHead className="cursor-pointer select-none" onClick={() => handleSort("createdAt")}>
+							<div className="flex items-center">
+								Created
+								<SortIcon column="createdAt" sortColumn={sortColumn} sortDirection={sortDirection} />
+							</div>
+						</TableHead>
+						<TableHead className="cursor-pointer select-none" onClick={() => handleSort("passed")}>
+							<div className="flex items-center">
+								Passed
+								<SortIcon column="passed" sortColumn={sortColumn} sortDirection={sortDirection} />
+							</div>
+						</TableHead>
+						<TableHead className="cursor-pointer select-none" onClick={() => handleSort("failed")}>
+							<div className="flex items-center">
+								Failed
+								<SortIcon column="failed" sortColumn={sortColumn} sortDirection={sortDirection} />
+							</div>
+						</TableHead>
+						<TableHead className="cursor-pointer select-none" onClick={() => handleSort("percent")}>
+							<div className="flex items-center">
+								%
+								<SortIcon column="percent" sortColumn={sortColumn} sortDirection={sortDirection} />
+							</div>
+						</TableHead>
+						<TableHead>Tokens</TableHead>
+						{toolColumns.map((toolName) => (
+							<TableHead key={toolName} className="text-xs text-center">
+								<Tooltip>
+									<TooltipTrigger>{getToolAbbreviation(toolName)}</TooltipTrigger>
+									<TooltipContent>{toolName}</TooltipContent>
+								</Tooltip>
+							</TableHead>
+						))}
+						<TableHead className="cursor-pointer select-none" onClick={() => handleSort("cost")}>
+							<div className="flex items-center">
+								Cost
+								<SortIcon column="cost" sortColumn={sortColumn} sortDirection={sortDirection} />
+							</div>
+						</TableHead>
+						<TableHead className="cursor-pointer select-none" onClick={() => handleSort("duration")}>
+							<div className="flex items-center">
+								Duration
+								<SortIcon column="duration" sortColumn={sortColumn} sortDirection={sortDirection} />
+							</div>
+						</TableHead>
+						<TableHead></TableHead>
 					</TableRow>
 				</TableHeader>
 				<TableBody>
-					{runs.length ? (
-						runs.map(({ taskMetrics, ...run }) => <Row key={run.id} run={run} taskMetrics={taskMetrics} />)
+					{sortedRuns.length ? (
+						sortedRuns.map(({ taskMetrics, ...run }) => (
+							<Row key={run.id} run={run} taskMetrics={taskMetrics} toolColumns={toolColumns} />
+						))
 					) : (
 						<TableRow>
-							<TableCell colSpan={9} className="text-center">
+							<TableCell colSpan={totalColumns} className="text-center">
 								No eval runs yet.
 								<Button variant="link" onClick={() => router.push("/runs/new")}>
 									Launch

+ 27 - 0
apps/web-evals/src/components/ui/checkbox.tsx

@@ -0,0 +1,27 @@
+"use client"
+
+import * as React from "react"
+import * as CheckboxPrimitive from "@radix-ui/react-checkbox"
+import { CheckIcon } from "lucide-react"
+
+import { cn } from "@/lib/utils"
+
+function Checkbox({ className, ...props }: React.ComponentProps<typeof CheckboxPrimitive.Root>) {
+	return (
+		<CheckboxPrimitive.Root
+			data-slot="checkbox"
+			className={cn(
+				"peer border-input dark:bg-input/30 data-[state=checked]:bg-primary data-[state=checked]:text-primary-foreground dark:data-[state=checked]:bg-primary data-[state=checked]:border-primary focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive size-4 shrink-0 rounded-[4px] border shadow-xs transition-shadow outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50",
+				className,
+			)}
+			{...props}>
+			<CheckboxPrimitive.Indicator
+				data-slot="checkbox-indicator"
+				className="grid place-content-center text-current transition-none">
+				<CheckIcon className="size-3.5" />
+			</CheckboxPrimitive.Indicator>
+		</CheckboxPrimitive.Root>
+	)
+}
+
+export { Checkbox }

+ 1 - 0
apps/web-evals/src/components/ui/index.ts

@@ -1,6 +1,7 @@
 export * from "./alert-dialog"
 export * from "./badge"
 export * from "./button"
+export * from "./checkbox"
 export * from "./command"
 export * from "./dialog"
 export * from "./drawer"

+ 22 - 9
apps/web-evals/src/components/ui/multi-select.tsx

@@ -48,7 +48,10 @@ interface MultiSelectProps extends React.HTMLAttributes<HTMLDivElement>, Variant
 	 */
 	onValueChange: (value: string[]) => void
 
-	/** The default selected values when the component mounts. */
+	/** The controlled selected values. When provided, the component becomes controlled. */
+	value?: string[]
+
+	/** The default selected values when the component mounts (uncontrolled mode). */
 	defaultValue?: string[]
 
 	/**
@@ -89,6 +92,7 @@ export const MultiSelect = React.forwardRef<HTMLDivElement, MultiSelectProps>(
 			options,
 			onValueChange,
 			variant,
+			value,
 			defaultValue = [],
 			placeholder = "Select options",
 			maxCount = 3,
@@ -98,17 +102,30 @@ export const MultiSelect = React.forwardRef<HTMLDivElement, MultiSelectProps>(
 		},
 		ref,
 	) => {
-		const [selectedValues, setSelectedValues] = React.useState<string[]>(defaultValue)
+		const [internalSelectedValues, setInternalSelectedValues] = React.useState<string[]>(defaultValue)
 		const [isPopoverOpen, setIsPopoverOpen] = React.useState(false)
 
+		// Use controlled value if provided, otherwise use internal state
+		const isControlled = value !== undefined
+		const selectedValues = isControlled ? value : internalSelectedValues
+
+		const setSelectedValues = React.useCallback(
+			(newValues: string[]) => {
+				if (!isControlled) {
+					setInternalSelectedValues(newValues)
+				}
+				onValueChange(newValues)
+			},
+			[isControlled, onValueChange],
+		)
+
 		const handleInputKeyDown = (event: React.KeyboardEvent<HTMLInputElement>) => {
 			if (event.key === "Enter") {
 				setIsPopoverOpen(true)
 			} else if (event.key === "Backspace" && !event.currentTarget.value) {
-				const newSelectedValues = [...selectedValues]
-				newSelectedValues.pop()
+				if (!selectedValues.length) return
+				const newSelectedValues = selectedValues.slice(0, -1)
 				setSelectedValues(newSelectedValues)
-				onValueChange(newSelectedValues)
 			}
 		}
 
@@ -117,7 +134,6 @@ export const MultiSelect = React.forwardRef<HTMLDivElement, MultiSelectProps>(
 				? selectedValues.filter((value) => value !== option)
 				: [...selectedValues, option]
 			setSelectedValues(newSelectedValues)
-			onValueChange(newSelectedValues)
 		}
 
 		const handleTogglePopover = () => {
@@ -127,7 +143,6 @@ export const MultiSelect = React.forwardRef<HTMLDivElement, MultiSelectProps>(
 		const clearExtraOptions = () => {
 			const newSelectedValues = selectedValues.slice(0, maxCount)
 			setSelectedValues(newSelectedValues)
-			onValueChange(newSelectedValues)
 		}
 
 		const searchResultsRef = React.useRef<Map<string, number>>(new Map())
@@ -141,12 +156,10 @@ export const MultiSelect = React.forwardRef<HTMLDivElement, MultiSelectProps>(
 				selectedValues.sort().join(",") === values.sort().join(",")
 			) {
 				setSelectedValues([])
-				onValueChange([])
 				return
 			}
 
 			setSelectedValues(values)
-			onValueChange(values)
 		}
 
 		const onFilter = React.useCallback(

+ 37 - 0
apps/web-evals/src/hooks/use-fuzzy-model-search.ts

@@ -0,0 +1,37 @@
+import { useCallback, useRef, useState } from "react"
+import fuzzysort from "fuzzysort"
+
+interface ModelWithId {
+	id: string
+	name: string
+}
+
+export const useFuzzyModelSearch = <T extends ModelWithId>(data: T[] | undefined) => {
+	const [searchValue, setSearchValue] = useState("")
+
+	const searchResultsRef = useRef<Map<string, number>>(new Map())
+	const searchValueRef = useRef("")
+
+	const onFilter = useCallback(
+		(value: string, search: string) => {
+			if (searchValueRef.current !== search) {
+				searchValueRef.current = search
+				searchResultsRef.current.clear()
+
+				for (const {
+					obj: { id },
+					score,
+				} of fuzzysort.go(search, data || [], {
+					key: "name",
+				})) {
+					searchResultsRef.current.set(id, score)
+				}
+			}
+
+			return searchResultsRef.current.get(value) ?? 0
+		},
+		[data],
+	)
+
+	return { searchValue, setSearchValue, onFilter }
+}

+ 8 - 2
apps/web-evals/src/hooks/use-open-router-models.ts

@@ -1,5 +1,6 @@
 import { z } from "zod"
 import { useQuery } from "@tanstack/react-query"
+import { useFuzzyModelSearch } from "./use-fuzzy-model-search"
 
 export const openRouterModelSchema = z.object({
 	id: z.string(),
@@ -25,8 +26,13 @@ export const getOpenRouterModels = async (): Promise<OpenRouterModel[]> => {
 	return result.data.data.sort((a, b) => a.name.localeCompare(b.name))
 }
 
-export const useOpenRouterModels = () =>
-	useQuery({
+export const useOpenRouterModels = () => {
+	const query = useQuery({
 		queryKey: ["getOpenRouterModels"],
 		queryFn: getOpenRouterModels,
 	})
+
+	const { searchValue, setSearchValue, onFilter } = useFuzzyModelSearch(query.data)
+
+	return { ...query, searchValue, setSearchValue, onFilter }
+}

+ 66 - 0
apps/web-evals/src/hooks/use-roo-code-cloud-models.ts

@@ -0,0 +1,66 @@
+import { z } from "zod"
+import { useQuery } from "@tanstack/react-query"
+import { useFuzzyModelSearch } from "./use-fuzzy-model-search"
+
+export const rooCodeCloudModelSchema = z.object({
+	object: z.literal("model"),
+	id: z.string(),
+	name: z.string(),
+	description: z.string().optional(),
+	context_window: z.number(),
+	max_tokens: z.number(),
+	supports_images: z.boolean().optional(),
+	supports_prompt_cache: z.boolean().optional(),
+	type: z.literal("language"),
+	tags: z.array(z.string()).optional(),
+	deprecationMessage: z.string().optional(),
+	owned_by: z.string(),
+	pricing: z.object({
+		input: z.string(),
+		output: z.string(),
+		input_cache_read: z.string().optional(),
+		input_cache_write: z.string().optional(),
+	}),
+	evals: z
+		.object({
+			score: z.number().min(0).max(100),
+		})
+		.optional(),
+	created: z.number(),
+	deprecated: z.boolean().optional(),
+})
+
+export type RooCodeCloudModel = z.infer<typeof rooCodeCloudModelSchema>
+
+export const getRooCodeCloudModels = async (): Promise<RooCodeCloudModel[]> => {
+	const response = await fetch("https://api.roocode.com/proxy/v1/models")
+
+	if (!response.ok) {
+		return []
+	}
+
+	const result = z
+		.object({
+			object: z.literal("list"),
+			data: z.array(rooCodeCloudModelSchema),
+		})
+		.safeParse(await response.json())
+
+	if (!result.success) {
+		console.error(result.error)
+		return []
+	}
+
+	return result.data.data.filter((model) => !model.deprecated).sort((a, b) => a.name.localeCompare(b.name))
+}
+
+export const useRooCodeCloudModels = () => {
+	const query = useQuery({
+		queryKey: ["getRooCodeCloudModels"],
+		queryFn: getRooCodeCloudModels,
+	})
+
+	const { searchValue, setSearchValue, onFilter } = useFuzzyModelSearch(query.data)
+
+	return { ...query, searchValue, setSearchValue, onFilter }
+}

+ 10 - 0
apps/web-evals/src/lib/formatters.ts

@@ -46,3 +46,13 @@ export const formatTokens = (tokens: number) => {
 
 export const formatToolUsageSuccessRate = (usage: { attempts: number; failures: number }) =>
 	usage.attempts === 0 ? "0%" : `${(((usage.attempts - usage.failures) / usage.attempts) * 100).toFixed(1)}%`
+
+export const formatDateTime = (date: Date) => {
+	return new Intl.DateTimeFormat("en-US", {
+		month: "short",
+		day: "numeric",
+		hour: "numeric",
+		minute: "2-digit",
+		hour12: true,
+	}).format(date)
+}

+ 1 - 3
apps/web-evals/src/lib/schemas.ts

@@ -6,8 +6,6 @@ import { rooCodeSettingsSchema } from "@roo-code/types"
  * CreateRun
  */
 
-export const MODEL_DEFAULT = "anthropic/claude-sonnet-4"
-
 export const CONCURRENCY_MIN = 1
 export const CONCURRENCY_MAX = 25
 export const CONCURRENCY_DEFAULT = 1
@@ -25,7 +23,7 @@ export const createRunSchema = z
 		settings: rooCodeSettingsSchema.optional(),
 		concurrency: z.number().int().min(CONCURRENCY_MIN).max(CONCURRENCY_MAX),
 		timeout: z.number().int().min(TIMEOUT_MIN).max(TIMEOUT_MAX),
-		systemPrompt: z.string().optional(),
+		jobToken: z.string().optional(),
 	})
 	.refine((data) => data.suite === "full" || (data.exercises || []).length > 0, {
 		message: "Exercises are required when running a partial suite.",

+ 0 - 1
apps/web-roo-code/.env.example

@@ -1,7 +1,6 @@
 # PostHog Analytics Configuration
 # Replace these values with your actual PostHog API key and host
 NEXT_PUBLIC_POSTHOG_KEY=your_posthog_api_key_here
-NEXT_PUBLIC_POSTHOG_HOST=https://us.i.posthog.com
 
 # Basin Form Endpoint for Static Form Submissions
 # Replace this with your actual Basin form endpoint (e.g., https://usebasin.com/f/your-form-id)

+ 78 - 42
apps/web-roo-code/src/app/pricing/page.tsx

@@ -1,4 +1,4 @@
-import { Users, Building2, ArrowRight, Star, LucideIcon, Check, Cloud } from "lucide-react"
+import { Users, Building2, ArrowRight, Star, LucideIcon, Check, Cloud, PlugZap } from "lucide-react"
 import type { Metadata } from "next"
 import Link from "next/link"
 
@@ -64,7 +64,6 @@ interface PricingTier {
 	period?: string
 	creditPrice?: string
 	trial?: string
-	cancellation?: string
 	description: string
 	featuresIntro?: string
 	features: string[]
@@ -80,13 +79,12 @@ const pricingTiers: PricingTier[] = [
 		name: "Cloud Free",
 		icon: Cloud,
 		price: "$0",
-		cancellation: "Cancel anytime",
 		description: "For folks just getting started",
 		features: [
 			"Token usage analytics",
+			"Access to the Roo Code Cloud Provider, including early access to free stealth models",
 			"Follow your tasks from anywhere",
 			"Share tasks with friends and co-workers",
-			"Early access to free AI Models",
 			"Community support",
 		],
 		cta: {
@@ -95,18 +93,18 @@ const pricingTiers: PricingTier[] = [
 		},
 	},
 	{
-		name: "Pro",
+		name: "Cloud Pro",
 		icon: Star,
 		price: "$20",
 		period: "/mo",
-		trial: "Free 14-day trial · ",
+		trial: "Free for 14 days, then",
 		creditPrice: `$${PRICE_CREDITS}`,
-		cancellation: "Cancel anytime",
 		description: "For pro Roo coders",
 		featuresIntro: "Everything in Free +",
 		features: [
-			"Cloud Agents: PR Reviewer and more",
-			"Roomote Control: Start, stop and control tasks from anywhere",
+			"Cloud Agents: Coder, Explainer, Planner, Reviewer, Fixer and more",
+			"Start tasks from Slack",
+			"Roomote Control: Start, stop and control extension tasks from anywhere",
 			"Paid support",
 		],
 		cta: {
@@ -115,13 +113,12 @@ const pricingTiers: PricingTier[] = [
 		},
 	},
 	{
-		name: "Team",
+		name: "Cloud Team",
 		icon: Users,
 		price: "$99",
 		period: "/mo",
 		creditPrice: `$${PRICE_CREDITS}`,
-		trial: "Free 14-day trial · ",
-		cancellation: "Cancel anytime",
+		trial: "Free for 14 days, then",
 		description: "For AI-forward teams",
 		featuresIntro: "Everything in Pro +",
 		features: ["Unlimited users (no per-seat cost)", "Shared configuration & policies", "Centralized billing"],
@@ -151,12 +148,30 @@ export default function PricingPage() {
 				</div>
 			</section>
 
-			{/* Free Extension Notice */}
-			<div className="mx-auto max-w-6xl">
-				<div className="rounded-xl p-4 mb-8 text-center bg-gradient-to-r from-blue-500/10 via-cyan-500/10 to-purple-500/10 border border-blue-500/20 dark:border-white/20">
+			<div className="mx-6 md:mx-auto max-w-6xl">
+				<div className="rounded-xl p-4 mb-8 text-center bg-gradient-to-r from-blue-500/10 via-cyan-500/10 to-purple-500/10 border border-blue-500/20 dark:border-white/20 ">
 					<p className="text-center">
-						<strong className="font-semibold">The Roo Code extension is free! </strong>
-						Roo Code Cloud is an optional service which takes it to the next level.
+						<strong className="font-semibold">The Roo Code extension is totally free! </strong>
+						But Cloud takes you so much further.
+					</p>
+				</div>
+			</div>
+
+			<div className="mx-6 md:mx-auto max-w-6xl p-7 mb-4 relative flex flex-col justify-start bg-background border rounded-2xl transition-all shadow-none hover:shadow-lg">
+				<h3 className="text-xl font-semibold flex items-center gap-2 justify-between">
+					Roo Code Provider
+					<PlugZap className="size-6" />
+				</h3>
+				<div className="text-sm text-muted-foreground space-y-1 mt-2">
+					<p className="">
+						On any plan, you can bring your own provider key or use the built-in Roo Code Cloud provider.
+					</p>
+					<p className="text-sm text-muted-foreground">
+						We offer a select mix of tested state of the art closed and open weight LLMs for you to choose,
+						with no markup.
+						<Link href="/provider/pricing" className="underline hover:no-underline ml-1">
+							See detailed pricing
+						</Link>
 					</p>
 				</div>
 			</div>
@@ -183,7 +198,7 @@ export default function PricingPage() {
 										<p className="text-sm text-muted-foreground font-light mb-2">
 											{tier.featuresIntro}&nbsp;
 										</p>
-										<ul className="space-y-3 my-0 h-[148px]">
+										<ul className="space-y-3 my-0 h-[168px]">
 											{tier.features.map((feature) => (
 												<li key={feature} className="flex items-start gap-2">
 													<Check className="mt-0.5 h-4 w-4 text-muted-foreground shrink-0" />
@@ -193,20 +208,28 @@ export default function PricingPage() {
 										</ul>
 									</div>
 
-									<p className="text-2xl mt-0 mb-1 tracking-tight">
-										<strong>{tier.price}</strong>
-										{tier.period}
-									</p>
+									<p className="text-base font-light">{tier.trial}</p>
 
-									{tier.creditPrice && (
-										<p className="text-sm text-muted-foreground mb-1">
-											+ {tier.creditPrice}/hour for Cloud tasks
-										</p>
-									)}
+									<p className="text-xl my-1 tracking-tight font-light">
+										<strong className="font-bold">{tier.price}</strong>
+										{tier.period} + prepaid credits
+									</p>
 
-									<p className="text-xs text-muted-foreground mb-4">
-										{tier.trial}
-										{tier.cancellation}
+									<p className="text-sm text-muted-foreground mb-3">
+										{tier.creditPrice && (
+											<>
+												Cloud Agents: {tier.creditPrice}/hour if used
+												<br />
+											</>
+										)}
+										Inference:{" "}
+										<Link href="/provider/pricing" className="underline hover:no-underline">
+											Roo Provider pricing
+										</Link>{" "}
+										or{" "}
+										<abbr title="Bring Your Own Key" className="cursor-help">
+											BYOK
+										</abbr>
 									</p>
 
 									{tier.cta.isContactForm ? (
@@ -249,7 +272,7 @@ export default function PricingPage() {
 						<h2 className="text-3xl font-bold tracking-tight sm:text-4xl">Frequently Asked Questions</h2>
 					</div>
 					<div className="mx-auto mt-12 grid max-w-5xl gap-8 md:grid-cols-2">
-						<div className="rounded-lg border border-border bg-card p-6">
+						<div className="rounded-xl border border-border bg-card p-6">
 							<h3 className="font-semibold">Wait, is Roo Code free or not?</h3>
 							<p className="mt-2 text-sm text-muted-foreground">
 								Yes! The Roo Code VS Code extension is open source and free forever. The extension acts
@@ -257,7 +280,7 @@ export default function PricingPage() {
 								Code Cloud.
 							</p>
 						</div>
-						<div className="rounded-lg border border-border bg-card p-6">
+						<div className="rounded-xl border border-border bg-card p-6">
 							<h3 className="font-semibold">Is there a free trial?</h3>
 							<p className="mt-2 text-sm text-muted-foreground">
 								Yes, all paid plans come with a 14-day free trial to try out functionality.
@@ -266,12 +289,25 @@ export default function PricingPage() {
 								To use Cloud Agents, you can buy credits.
 							</p>
 						</div>
-						<div className="rounded-lg border border-border bg-card p-6">
-							<h3 className="font-semibold">How do Cloud Agent credits work?</h3>
+						<div className="rounded-xl border border-border bg-card p-6">
+							<h3 className="font-semibold">How do credits work?</h3>
 							<p className="mt-2 text-sm text-muted-foreground">
-								Cloud Agents are a version of Roo running in the cloud without depending on your IDE.
-								You can run as many as you want, and bring your own inference provider key.
+								Roo Code Cloud credits can be used in two ways:
 							</p>
+							<ul className="mt-2 list-disc pl-5 text-sm text-muted-foreground">
+								<li>To pay for Cloud Agents running time (${PRICE_CREDITS}/hour)</li>
+								<li>
+									To pay for AI model inference costs (
+									<a
+										href="https://app.roocode.com/provider/pricing"
+										target="_blank"
+										rel="noopener noreferrer"
+										className="underline">
+										varies by model
+									</a>
+									)
+								</li>
+							</ul>
 							<p className="mt-2 text-sm text-muted-foreground">
 								To cover our infrastructure costs, we charge ${PRICE_CREDITS}/hour while the agent is
 								running (independent of inference costs).
@@ -280,25 +316,25 @@ export default function PricingPage() {
 								There are no markups, no tiers, no dumbing-down of models to increase our profit.
 							</p>
 						</div>
-						<div className="rounded-lg border border-border bg-card p-6">
+						<div className="rounded-xl border border-border bg-card p-6">
 							<h3 className="font-semibold">Do I need a credit card for the free trial?</h3>
 							<p className="mt-2 text-sm text-muted-foreground">
 								Yes, but you won&apos;t be charged until your trial ends, except for credit purchases.
 							</p>
 							<p className="mt-2 text-sm text-muted-foreground">You can cancel anytime with one click.</p>
 						</div>
-						<div className="rounded-lg border border-border bg-card p-6">
+						<div className="rounded-xl border border-border bg-card p-6">
 							<h3 className="font-semibold">What payment methods do you accept?</h3>
 							<p className="mt-2 text-sm text-muted-foreground">
 								We accept all major credit cards, debit cards, and can arrange invoice billing for
 								Enterprise customers.
 							</p>
 						</div>
-						<div className="rounded-lg border border-border bg-card p-6">
-							<h3 className="font-semibold">Can I change plans anytime?</h3>
+						<div className="rounded-xl border border-border bg-card p-6">
+							<h3 className="font-semibold">Can I cancel or change plans?</h3>
 							<p className="mt-2 text-sm text-muted-foreground">
-								Yes, you can upgrade or downgrade your plan at any time. Changes will be reflected in
-								your next billing cycle.
+								Yes, you can upgrade, downgrade or cancel your plan at any time. Changes will be
+								reflected in your next billing cycle.
 							</p>
 						</div>
 					</div>

+ 190 - 0
apps/web-roo-code/src/app/provider/pricing/components/model-card.tsx

@@ -0,0 +1,190 @@
+import { ModelWithTotalPrice } from "@/lib/types/models"
+import { formatCurrency, formatTokens } from "@/lib/formatters"
+import {
+	ArrowLeftToLine,
+	ArrowRightToLine,
+	Building2,
+	Check,
+	Expand,
+	Gift,
+	HardDriveDownload,
+	HardDriveUpload,
+	RulerDimensionLine,
+	ChevronDown,
+	ChevronUp,
+} from "lucide-react"
+import { useState } from "react"
+
+interface ModelCardProps {
+	model: ModelWithTotalPrice
+}
+
+export function ModelCard({ model }: ModelCardProps) {
+	// Prices are per token, multiply by 1M to get price per million tokens
+	const inputPrice = parseFloat(model.pricing.input) * 1_000_000
+	const outputPrice = parseFloat(model.pricing.output) * 1_000_000
+	const cacheReadPrice = parseFloat(model.pricing.input_cache_read || "0") * 1_000_000
+	const cacheWritePrice = parseFloat(model.pricing.input_cache_write || "0") * 1_000_000
+
+	const free = model.tags.includes("free")
+	// Filter tags to only show vision and reasoning
+	const displayTags = model.tags.filter((tag) => tag === "vision" || tag === "reasoning")
+
+	// Mobile collapsed/expanded state
+	const [expanded, setExpanded] = useState(false)
+
+	return (
+		<div
+			className={[
+				"relative cursor-default px-8 pt-7 pb-5 flex flex-col justify-start bg-background border rounded-3xl transition-all hover:shadow-xl",
+				// On mobile, visually hint at expandability
+				"sm:cursor-default",
+			].join(" ")}>
+			{/* Header: always visible */}
+			<div className="mb-4">
+				<h3 className="text-xl font-semibold tracking-tight mb-2 flex items-center gap-2 justify-between">
+					{model.name}
+					{free && (
+						<span className="inline-flex items-center text-sm font-medium text-green-500">
+							<Gift className="size-4 mr-1" />
+							Free!
+						</span>
+					)}
+				</h3>
+				<p
+					className={[
+						"text-sm text-muted-foreground",
+						// On mobile + collapsed: clamp description
+						"sm:line-clamp-none",
+						!expanded ? "line-clamp-2" : "",
+					]
+						.join(" ")
+						.trim()}>
+					{model.description}
+				</p>
+			</div>
+
+			{/* Content - pinned to bottom */}
+			<div className="overflow-x-auto mt-auto">
+				<table className="w-full text-xs">
+					<tbody>
+						{/* Provider: always visible if present */}
+						{model.owned_by && (
+							<tr className="border-b border-border">
+								<td className="py-1.5 font-medium text-muted-foreground">
+									<Building2 className="size-4 inline-block mr-1.5" />
+									Provider
+								</td>
+								<td className="py-1.5 text-right">{model.owned_by}</td>
+							</tr>
+						)}
+
+						{/* Context Window: always visible */}
+						<tr className="border-b border-border">
+							<td className="py-1.5 font-medium text-muted-foreground">
+								<RulerDimensionLine className="size-4 inline-block mr-1.5" />
+								Context Window
+							</td>
+							<td className="py-1.5 text-right font-mono">{formatTokens(model.context_window)}</td>
+						</tr>
+
+						{/* Max Output Tokens: always visible on >=sm, expandable on mobile */}
+						<tr
+							className={["border-b border-border", expanded ? "table-row" : "hidden sm:table-row"].join(
+								" ",
+							)}>
+							<td className="py-1.5 font-medium text-muted-foreground">
+								<Expand className="size-4 inline-block mr-1.5" />
+								Max Output Tokens
+							</td>
+							<td className="py-1.5 text-right font-mono">{formatTokens(model.max_tokens)}</td>
+						</tr>
+
+						{/* Input Price: always visible */}
+						<tr className="border-b border-border">
+							<td className="py-1.5 font-medium text-muted-foreground">
+								<ArrowRightToLine className="size-4 inline-block mr-1.5" />
+								Input Price
+							</td>
+							<td className="py-1.5 text-right">
+								{inputPrice === 0 ? "Free" : `${formatCurrency(inputPrice)}/1M tokens`}
+							</td>
+						</tr>
+
+						{/* Output Price: always visible */}
+						<tr
+							className={[
+								"border-b border-border",
+								// Add subtle separation from toggle on mobile
+							].join(" ")}>
+							<td className="py-1.5 font-medium text-muted-foreground">
+								<ArrowLeftToLine className="size-4 inline-block mr-1.5" />
+								Output Price
+							</td>
+							<td className="py-1.5 text-right">
+								{outputPrice === 0 ? "Free" : `${formatCurrency(outputPrice)}/1M tokens`}
+							</td>
+						</tr>
+
+						{/* Cache pricing: only visible on mobile when expanded, always visible on >=sm */}
+						{cacheReadPrice > 0 && (
+							<tr
+								className={[
+									"border-b border-border",
+									expanded ? "table-row" : "hidden sm:table-row",
+								].join(" ")}>
+								<td className="py-1.5 font-medium text-muted-foreground">
+									<HardDriveUpload className="size-4 inline-block mr-1.5" />
+									Cache Read
+								</td>
+								<td className="py-1.5 text-right">{formatCurrency(cacheReadPrice)}/1M tokens</td>
+							</tr>
+						)}
+
+						{cacheWritePrice > 0 && (
+							<tr
+								className={[
+									"border-b border-border",
+									expanded ? "table-row" : "hidden sm:table-row",
+								].join(" ")}>
+								<td className="py-1.5 font-medium text-muted-foreground">
+									<HardDriveDownload className="size-4 inline-block mr-1.5" />
+									Cache Write
+								</td>
+								<td className="py-1.5 text-right">{formatCurrency(cacheWritePrice)}/1M tokens</td>
+							</tr>
+						)}
+
+						{/* Tags row: only show if there are vision or reasoning tags */}
+						{displayTags.length > 0 && (
+							<tr className={[expanded ? "table-row" : "hidden sm:table-row"].join(" ")}>
+								<td className="py-1.5 font-medium text-muted-foreground align-top">Features</td>
+								<td className="py-1.5">
+									{displayTags.map((tag) => (
+										<span key={tag} className="flex justify-end items-center text-xs capitalize">
+											<Check className="size-3 m-1" />
+											{tag}
+										</span>
+									))}
+								</td>
+							</tr>
+						)}
+
+						{/* Mobile-only toggle row */}
+						<tr className="sm:hidden">
+							<td colSpan={2} className="pt-3">
+								<button
+									type="button"
+									onClick={() => setExpanded((v) => !v)}
+									className="w-full inline-flex items-center justify-center gap-1.5 text-xs font-medium text-primary">
+									{expanded ? "Less" : "More"}
+									{expanded ? <ChevronUp className="size-3" /> : <ChevronDown className="size-3" />}
+								</button>
+							</td>
+						</tr>
+					</tbody>
+				</table>
+			</div>
+		</div>
+	)
+}

+ 253 - 0
apps/web-roo-code/src/app/provider/pricing/page.tsx

@@ -0,0 +1,253 @@
+"use client"
+
+import { useEffect, useMemo, useState } from "react"
+import { ModelCard } from "./components/model-card"
+import { Model, ModelWithTotalPrice, ModelsResponse, SortOption } from "@/lib/types/models"
+import Link from "next/link"
+import { ChevronDown, CircleX, Loader, LoaderCircle, Search } from "lucide-react"
+
+const API_URL = "https://api.roocode.com/proxy/v1/models?include_paid=true"
+
+const faqs = [
+	{
+		question: "What are AI model providers?",
+		answer: "AI model providers offer various language models with different capabilities and pricing.",
+	},
+	{
+		question: "How is pricing calculated?",
+		answer: "Pricing is based on token usage for input and output, measured per million tokens, like pretty much any other provider out there.",
+	},
+	{
+		question: "What is the Roo Code Cloud Provider?",
+		answer: (
+			<>
+				<p>This is our very own model provider, optimized to work seamlessly with Roo Code Cloud.</p>
+				<p>
+					It offers a selection of state-of-the-art LLMs (both closed and open weight) we know work well with
+					Roo for you to choose, with no markup.
+				</p>
+				<p>
+					We also often feature 100% free models which labs share with us for the community to use and provide
+					feedback.
+				</p>
+			</>
+		),
+	},
+	{
+		question: "But how much does the Roo Code Cloud service cost?",
+		answer: (
+			<>
+				Our{" "}
+				<Link href="/pricing" className="underline hover:no-underline">
+					service pricing is here.
+				</Link>
+			</>
+		),
+	},
+]
+
+function calculateTotalPrice(model: Model): number {
+	return parseFloat(model.pricing.input) + parseFloat(model.pricing.output)
+}
+
+function enrichModelWithTotalPrice(model: Model): ModelWithTotalPrice {
+	return {
+		...model,
+		totalPrice: calculateTotalPrice(model),
+	}
+}
+
+export default function ProviderPricingPage() {
+	const [models, setModels] = useState<ModelWithTotalPrice[]>([])
+	const [loading, setLoading] = useState(true)
+	const [error, setError] = useState<string | null>(null)
+	const [searchQuery, setSearchQuery] = useState("")
+	const [sortOption, setSortOption] = useState<SortOption>("alphabetical")
+
+	useEffect(() => {
+		async function fetchModels() {
+			try {
+				setLoading(true)
+				setError(null)
+				const response = await fetch(API_URL)
+				if (!response.ok) {
+					throw new Error(`Failed to fetch models: ${response.statusText}`)
+				}
+				const data: ModelsResponse = await response.json()
+				const enrichedModels = data.data.map(enrichModelWithTotalPrice)
+				setModels(enrichedModels)
+			} catch (err) {
+				setError(err instanceof Error ? err.message : "An error occurred while fetching models")
+			} finally {
+				setLoading(false)
+			}
+		}
+
+		fetchModels()
+	}, [])
+
+	const filteredAndSortedModels = useMemo(() => {
+		// Filter out deprecated models
+		let filtered = models.filter((model) => !model.deprecated)
+
+		// Filter by search query
+		if (searchQuery.trim()) {
+			const query = searchQuery.toLowerCase()
+			filtered = filtered.filter((model) => {
+				return (
+					model.name.toLowerCase().includes(query) ||
+					model.owned_by?.toLowerCase().includes(query) ||
+					model.description.toLowerCase().includes(query)
+				)
+			})
+		}
+
+		// Sort filtered results
+		const sorted = [...filtered]
+		switch (sortOption) {
+			case "alphabetical":
+				sorted.sort((a, b) => a.name.localeCompare(b.name))
+				break
+			case "price-asc":
+				sorted.sort((a, b) => a.totalPrice - b.totalPrice)
+				break
+			case "price-desc":
+				sorted.sort((a, b) => b.totalPrice - a.totalPrice)
+				break
+			case "context-window-asc":
+				sorted.sort((a, b) => a.context_window - b.context_window)
+				break
+			case "context-window-desc":
+				sorted.sort((a, b) => b.context_window - a.context_window)
+				break
+		}
+
+		return sorted
+	}, [models, searchQuery, sortOption])
+
+	// Count non-deprecated models for the display
+	const nonDeprecatedCount = useMemo(() => models.filter((model) => !model.deprecated).length, [models])
+
+	return (
+		<>
+			<section className="relative overflow-hidden py-16">
+				<div className="container relative z-10 mx-auto px-4 sm:px-6 lg:px-8">
+					<div className="text-center">
+						<h1 className="text-4xl md:text-5xl font-bold tracking-tight">
+							Roo Code Cloud Provider Pricing
+						</h1>
+						<p className="mx-auto mt-4 max-w-2xl md:text-lg text-muted-foreground">
+							See pricing and features for all models we offer in our selection.
+							<br />
+							You can always bring your own key (
+							<Link href="#faq" className="underline hover:no-underline">
+								FAQ
+							</Link>
+							).
+						</p>
+					</div>
+				</div>
+			</section>
+
+			<section className="py-10 relative border-t border-b">
+				<div className="absolute inset-0 bg-gradient-to-br from-violet-500/0 via-violet-500/10 to-violet-500/0 dark:from-blue-500/10 dark:via-cyan-500/10 dark:to-purple-500/10" />
+				<div className="container mx-auto px-4 sm:px-6 lg:px-8">
+					<div className="mx-auto max-w-4xl">
+						<div className="flex flex-col gap-4 sm:flex-row sm:items-center sm:justify-between">
+							<div className="flex-1">
+								<div className="relative">
+									<Search className="absolute left-3 top-1/2 h-4 w-4 -translate-y-1/2 text-muted-foreground" />
+									<input
+										type="text"
+										placeholder="Search models..."
+										value={searchQuery}
+										onChange={(e) => setSearchQuery(e.target.value)}
+										className="w-full rounded-full border border-input bg-background px-10 py-2 text-base ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2"
+									/>
+
+									<div className="text-sm cursor-default text-muted-foreground absolute bg-background right-0 top-0 m-0.5 px-3 py-2 rounded-full">
+										{filteredAndSortedModels.length} of {nonDeprecatedCount} models
+									</div>
+								</div>
+							</div>
+							<div className="flex-shrink-0">
+								<div className="flex items-center gap-2 relative">
+									<select
+										id="sort"
+										value={sortOption}
+										onChange={(e) => setSortOption(e.target.value as SortOption)}
+										className="rounded-full cursor-pointer border border-input bg-background hover:bg-muted pl-4 w-full md:w-auto pr-9 py-2.5 text-base ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 relative appearance-none">
+										<option value="alphabetical">Alphabetical</option>
+										<option value="price-asc">Price: Low to High</option>
+										<option value="price-desc">Price: High to Low</option>
+										<option value="context-window-asc">Context Window: Small to Large</option>
+										<option value="context-window-desc">Context Window: Large to Small</option>
+									</select>
+									<ChevronDown className="size-4 absolute right-3" />
+								</div>
+							</div>
+						</div>
+					</div>
+				</div>
+
+				<div className="container mx-auto px-4 sm:px-6 lg:px-8 ">
+					<div className="mx-auto max-w-6xl">
+						{loading && (
+							<div className="text-center pt-12 space-y-2 mb-4">
+								<LoaderCircle className="size-8 text-muted-foreground mx-auto animate-spin" />
+								<p className="text-lg">Loading model list...</p>
+							</div>
+						)}
+
+						{error && (
+							<div className="text-center pt-12 space-y-2">
+								<CircleX className="size-8 text-muted-foreground mx-auto mb-4" />
+								<p className="text-lg">Oops, couldn&apos;t load the model list.</p>
+								<p className="text-muted-foreground">Try again in a bit please.</p>
+							</div>
+						)}
+
+						{!loading && !error && filteredAndSortedModels.length === 0 && (
+							<div className="text-center pt-12 space-y-2">
+								<Loader className="size-8 text-muted-foreground mx-auto mb-4" />
+								<p className="text-lg">No models match your search.</p>
+								<p className="text-muted-foreground">
+									Keep in mind we don&apos;t have every model under the sun – only the ones we think
+									are worth using.
+									<br />
+									You can always use a third-party provider to access a wider selection.
+								</p>
+							</div>
+						)}
+
+						{!loading && !error && filteredAndSortedModels.length > 0 && (
+							<div className="grid gap-4 pt-8 md:grid-cols-2 lg:grid-cols-3">
+								{filteredAndSortedModels.map((model) => (
+									<ModelCard key={model.id} model={model} />
+								))}
+							</div>
+						)}
+					</div>
+				</div>
+			</section>
+
+			{/* FAQ Section */}
+			<section className="bg-background my-16 relative z-50">
+				<a id="faq" />
+				<div className="container mx-auto px-4 sm:px-6 lg:px-8">
+					<div className="mx-auto max-w-3xl text-center">
+						<h2 className="text-3xl font-bold tracking-tight sm:text-4xl">Frequently Asked Questions</h2>
+					</div>
+					<div className="mx-auto mt-12 grid max-w-5xl gap-8 md:grid-cols-2">
+						{faqs.map((faq, index) => (
+							<div key={index} className="rounded-lg border border-border bg-card p-6">
+								<h3 className="font-semibold">{faq.question}</h3>
+								<p className="mt-2 text-sm text-muted-foreground">{faq.answer}</p>
+							</div>
+						))}
+					</div>
+				</div>
+			</section>
+		</>
+	)
+}

+ 2 - 9
apps/web-roo-code/src/components/providers/posthog-provider.tsx

@@ -32,7 +32,6 @@ export function PostHogProvider({ children }: { children: React.ReactNode }) {
 		// Initialize PostHog immediately on the client side
 		if (typeof window !== "undefined" && !posthog.__loaded) {
 			const posthogKey = process.env.NEXT_PUBLIC_POSTHOG_KEY
-			const posthogHost = process.env.NEXT_PUBLIC_POSTHOG_HOST
 
 			// Check if environment variables are set
 			if (!posthogKey) {
@@ -43,19 +42,13 @@ export function PostHogProvider({ children }: { children: React.ReactNode }) {
 				return
 			}
 
-			if (!posthogHost) {
-				console.warn(
-					"PostHog host URL is missing. Using default host. " +
-						"Please set NEXT_PUBLIC_POSTHOG_HOST in your .env file.",
-				)
-			}
-
 			// Check if user has already consented to cookies
 			const userHasConsented = hasConsent()
 
 			// Initialize PostHog with appropriate persistence based on consent
 			posthog.init(posthogKey, {
-				api_host: posthogHost || "https://us.i.posthog.com",
+				api_host: "https://ph.roocode.com",
+				ui_host: "https://us.posthog.com",
 				capture_pageview: false, // We handle pageview tracking manually
 				loaded: (posthogInstance) => {
 					if (process.env.NODE_ENV === "development") {

+ 1 - 1
apps/web-roo-code/src/lib/constants.ts

@@ -26,7 +26,7 @@ export const EXTERNAL_LINKS = {
 	TESTIMONIALS: "https://roocode.com/#testimonials",
 	CLOUD_APP_LOGIN: "https://app.roocode.com/sign-in",
 	CLOUD_APP_SIGNUP: "https://app.roocode.com/sign-up",
-	CLOUD_APP_SIGNUP_PRO: "https://app.roocode.com/sign-up?redirect_url=/cloud-agents/welcome",
+	CLOUD_APP_SIGNUP_PRO: "https://app.roocode.com/sign-up?redirect_url=/cloud-agents/setup",
 }
 
 export const INTERNAL_LINKS = {

+ 22 - 0
apps/web-roo-code/src/lib/formatters.ts

@@ -0,0 +1,22 @@
+const formatter = new Intl.NumberFormat("en-US", {
+	style: "currency",
+	currency: "USD",
+})
+
+export const formatCurrency = (amount: number) => formatter.format(amount)
+
+export const formatTokens = (tokens: number) => {
+	if (tokens < 1000) {
+		return tokens.toString()
+	}
+
+	if (tokens < 1000000) {
+		return `${(tokens / 1000).toFixed(1)}K`
+	}
+
+	if (tokens < 1000000000) {
+		return `${(tokens / 1000000).toFixed(1)}M`
+	}
+
+	return `${(tokens / 1000000000).toFixed(1)}B`
+}

+ 31 - 0
apps/web-roo-code/src/lib/types/models.ts

@@ -0,0 +1,31 @@
+export interface ModelPricing {
+	input: string
+	output: string
+	input_cache_read: string
+	input_cache_write: string
+}
+
+export interface Model {
+	id: string
+	object: string
+	created: number
+	owned_by: string
+	name: string
+	description: string
+	context_window: number
+	max_tokens: number
+	type: string
+	tags: string[]
+	pricing: ModelPricing
+	deprecated?: boolean
+}
+
+export interface ModelsResponse {
+	data: Model[]
+}
+
+export interface ModelWithTotalPrice extends Model {
+	totalPrice: number
+}
+
+export type SortOption = "alphabetical" | "price-asc" | "price-desc" | "context-window-asc" | "context-window-desc"

+ 1 - 1
knip.json

@@ -16,7 +16,7 @@
 			"project": ["**/*.ts"]
 		},
 		"webview-ui": {
-			"entry": ["src/index.tsx"],
+			"entry": ["src/index.tsx", "src/browser-panel.tsx"],
 			"project": ["src/**/*.{ts,tsx}", "../src/shared/*.ts"]
 		},
 		"packages/{build,cloud,evals,ipc,telemetry,types}": {

+ 5 - 4
package.json

@@ -50,7 +50,7 @@
 		"@vscode/vsce": "3.3.2",
 		"esbuild": "^0.25.0",
 		"eslint": "^9.27.0",
-		"glob": "^11.0.3",
+		"glob": "^11.1.0",
 		"husky": "^9.1.7",
 		"knip": "^5.44.4",
 		"lint-staged": "^16.0.0",
@@ -70,12 +70,13 @@
 	},
 	"pnpm": {
 		"overrides": {
-			"tar-fs": ">=2.1.3",
+			"tar-fs": ">=3.1.1",
 			"esbuild": ">=0.25.0",
-			"brace-expansion": ">=2.0.2",
+			"undici": ">=5.29.0",
+			"brace-expansion": "^2.0.2",
 			"form-data": ">=4.0.4",
 			"bluebird": ">=3.7.2",
-			"type-fest": "2.19.0"
+			"glob": ">=11.1.0"
 		}
 	}
 }

+ 0 - 12
packages/build/src/__tests__/index.test.ts

@@ -100,12 +100,6 @@ describe("generatePackageJson", () => {
 								default: "",
 								description: "%settings.customStoragePath.description%",
 							},
-							"roo-cline.toolProtocol": {
-								type: "string",
-								enum: ["xml", "native"],
-								default: "xml",
-								description: "%settings.toolProtocol.description%",
-							},
 						},
 					},
 				},
@@ -219,12 +213,6 @@ describe("generatePackageJson", () => {
 							default: "",
 							description: "%settings.customStoragePath.description%",
 						},
-						"roo-code-nightly.toolProtocol": {
-							type: "string",
-							enum: ["xml", "native"],
-							default: "xml",
-							description: "%settings.toolProtocol.description%",
-						},
 					},
 				},
 			},

+ 10 - 0
packages/cloud/src/CloudAPI.ts

@@ -134,4 +134,14 @@ export class CloudAPI {
 					.parse(data),
 		})
 	}
+
+	async creditBalance(): Promise<number> {
+		return this.request("/api/extension/credit-balance", {
+			method: "GET",
+			parseResponse: (data) => {
+				const result = z.object({ balance: z.number() }).parse(data)
+				return result.balance
+			},
+		})
+	}
 }

+ 4 - 3
packages/cloud/src/CloudService.ts

@@ -178,9 +178,9 @@ export class CloudService extends EventEmitter<CloudServiceEvents> implements Di
 
 	// AuthService
 
-	public async login(landingPageSlug?: string): Promise<void> {
+	public async login(landingPageSlug?: string, useProviderSignup: boolean = false): Promise<void> {
 		this.ensureInitialized()
-		return this.authService!.login(landingPageSlug)
+		return this.authService!.login(landingPageSlug, useProviderSignup)
 	}
 
 	public async logout(): Promise<void> {
@@ -245,9 +245,10 @@ export class CloudService extends EventEmitter<CloudServiceEvents> implements Di
 		code: string | null,
 		state: string | null,
 		organizationId?: string | null,
+		providerModel?: string | null,
 	): Promise<void> {
 		this.ensureInitialized()
-		return this.authService!.handleCallback(code, state, organizationId)
+		return this.authService!.handleCallback(code, state, organizationId, providerModel)
 	}
 
 	public async switchOrganization(organizationId: string | null): Promise<void> {

+ 2 - 1
packages/cloud/src/StaticTokenAuthService.ts

@@ -47,7 +47,7 @@ export class StaticTokenAuthService extends EventEmitter<AuthServiceEvents> impl
 		this.emit("user-info", { userInfo: this.userInfo })
 	}
 
-	public async login(): Promise<void> {
+	public async login(_landingPageSlug?: string, _useProviderSignup?: boolean): Promise<void> {
 		throw new Error("Authentication methods are disabled in StaticTokenAuthService")
 	}
 
@@ -59,6 +59,7 @@ export class StaticTokenAuthService extends EventEmitter<AuthServiceEvents> impl
 		_code: string | null,
 		_state: string | null,
 		_organizationId?: string | null,
+		_providerModel?: string | null,
 	): Promise<void> {
 		throw new Error("Authentication methods are disabled in StaticTokenAuthService")
 	}

+ 14 - 3
packages/cloud/src/WebAuthService.ts

@@ -252,8 +252,9 @@ export class WebAuthService extends EventEmitter<AuthServiceEvents> implements A
 	 * and opening the browser to the authorization URL.
 	 *
 	 * @param landingPageSlug Optional slug of a specific landing page (e.g., "supernova", "special-offer", etc.)
+	 * @param useProviderSignup If true, uses provider signup flow (/extension/provider-sign-up). If false, uses standard sign-in (/extension/sign-in). Defaults to false.
 	 */
-	public async login(landingPageSlug?: string): Promise<void> {
+	public async login(landingPageSlug?: string, useProviderSignup: boolean = false): Promise<void> {
 		try {
 			const vscode = await importVscode()
 
@@ -272,10 +273,12 @@ export class WebAuthService extends EventEmitter<AuthServiceEvents> implements A
 				auth_redirect: `${vscode.env.uriScheme}://${publisher}.${name}`,
 			})
 
-			// Use landing page URL if slug is provided, otherwise use default sign-in URL
+			// Use landing page URL if slug is provided, otherwise use provider sign-up or sign-in URL based on parameter
 			const url = landingPageSlug
 				? `${getRooCodeApiUrl()}/l/${landingPageSlug}?${params.toString()}`
-				: `${getRooCodeApiUrl()}/extension/sign-in?${params.toString()}`
+				: useProviderSignup
+					? `${getRooCodeApiUrl()}/extension/provider-sign-up?${params.toString()}`
+					: `${getRooCodeApiUrl()}/extension/sign-in?${params.toString()}`
 
 			await vscode.env.openExternal(vscode.Uri.parse(url))
 		} catch (error) {
@@ -294,11 +297,13 @@ export class WebAuthService extends EventEmitter<AuthServiceEvents> implements A
 	 * @param code The authorization code from the callback
 	 * @param state The state parameter from the callback
 	 * @param organizationId The organization ID from the callback (null for personal accounts)
+	 * @param providerModel The model ID selected during signup (optional)
 	 */
 	public async handleCallback(
 		code: string | null,
 		state: string | null,
 		organizationId?: string | null,
+		providerModel?: string | null,
 	): Promise<void> {
 		if (!code || !state) {
 			const vscode = await importVscode()
@@ -326,6 +331,12 @@ export class WebAuthService extends EventEmitter<AuthServiceEvents> implements A
 
 			await this.storeCredentials(credentials)
 
+			// Store the provider model if provided
+			if (providerModel) {
+				await this.context.globalState.update("roo-provider-model", providerModel)
+				this.log(`[auth] Stored provider model: ${providerModel}`)
+			}
+
 			const vscode = await importVscode()
 
 			if (vscode) {

+ 96 - 0
packages/cloud/src/__tests__/CloudAPI.creditBalance.spec.ts

@@ -0,0 +1,96 @@
+import { describe, it, expect, vi, beforeEach, type Mock } from "vitest"
+import { CloudAPI } from "../CloudAPI.js"
+import { AuthenticationError, CloudAPIError } from "../errors.js"
+import type { AuthService } from "@roo-code/types"
+
+// Mock the config module
+vi.mock("../config.js", () => ({
+	getRooCodeApiUrl: () => "https://api.test.com",
+}))
+
+// Mock the utils module
+vi.mock("../utils.js", () => ({
+	getUserAgent: () => "test-user-agent",
+}))
+
+describe("CloudAPI.creditBalance", () => {
+	let mockAuthService: {
+		getSessionToken: Mock<() => string | undefined>
+	}
+	let cloudAPI: CloudAPI
+
+	beforeEach(() => {
+		mockAuthService = {
+			getSessionToken: vi.fn(),
+		}
+		cloudAPI = new CloudAPI(mockAuthService as unknown as AuthService)
+
+		// Reset fetch mock
+		global.fetch = vi.fn()
+	})
+
+	it("should fetch credit balance successfully", async () => {
+		const mockBalance = 12.34
+		mockAuthService.getSessionToken.mockReturnValue("test-session-token")
+
+		global.fetch = vi.fn().mockResolvedValue({
+			ok: true,
+			json: async () => ({ balance: mockBalance }),
+		})
+
+		const balance = await cloudAPI.creditBalance()
+
+		expect(balance).toBe(mockBalance)
+		expect(global.fetch).toHaveBeenCalledWith(
+			"https://api.test.com/api/extension/credit-balance",
+			expect.objectContaining({
+				method: "GET",
+				headers: expect.objectContaining({
+					Authorization: "Bearer test-session-token",
+					"Content-Type": "application/json",
+					"User-Agent": "test-user-agent",
+				}),
+			}),
+		)
+	})
+
+	it("should throw AuthenticationError when session token is missing", async () => {
+		mockAuthService.getSessionToken.mockReturnValue(undefined)
+
+		await expect(cloudAPI.creditBalance()).rejects.toThrow(AuthenticationError)
+	})
+
+	it("should handle API errors", async () => {
+		mockAuthService.getSessionToken.mockReturnValue("test-session-token")
+
+		global.fetch = vi.fn().mockResolvedValue({
+			ok: false,
+			status: 500,
+			statusText: "Internal Server Error",
+			json: async () => ({ error: "Server error" }),
+		})
+
+		await expect(cloudAPI.creditBalance()).rejects.toThrow(CloudAPIError)
+	})
+
+	it("should handle network errors", async () => {
+		mockAuthService.getSessionToken.mockReturnValue("test-session-token")
+
+		global.fetch = vi.fn().mockRejectedValue(new TypeError("fetch failed"))
+
+		await expect(cloudAPI.creditBalance()).rejects.toThrow(
+			"Network error while calling /api/extension/credit-balance",
+		)
+	})
+
+	it("should handle invalid response format", async () => {
+		mockAuthService.getSessionToken.mockReturnValue("test-session-token")
+
+		global.fetch = vi.fn().mockResolvedValue({
+			ok: true,
+			json: async () => ({ invalid: "response" }),
+		})
+
+		await expect(cloudAPI.creditBalance()).rejects.toThrow()
+	})
+})

+ 12 - 2
packages/cloud/src/__tests__/CloudService.test.ts

@@ -296,12 +296,22 @@ describe("CloudService", () => {
 
 		it("should delegate handleAuthCallback to AuthService", async () => {
 			await cloudService.handleAuthCallback("code", "state")
-			expect(mockAuthService.handleCallback).toHaveBeenCalledWith("code", "state", undefined)
+			expect(mockAuthService.handleCallback).toHaveBeenCalledWith("code", "state", undefined, undefined)
 		})
 
 		it("should delegate handleAuthCallback with organizationId to AuthService", async () => {
 			await cloudService.handleAuthCallback("code", "state", "org_123")
-			expect(mockAuthService.handleCallback).toHaveBeenCalledWith("code", "state", "org_123")
+			expect(mockAuthService.handleCallback).toHaveBeenCalledWith("code", "state", "org_123", undefined)
+		})
+
+		it("should delegate handleAuthCallback with providerModel to AuthService", async () => {
+			await cloudService.handleAuthCallback("code", "state", "org_123", "xai/grok-code-fast-1")
+			expect(mockAuthService.handleCallback).toHaveBeenCalledWith(
+				"code",
+				"state",
+				"org_123",
+				"xai/grok-code-fast-1",
+			)
 		})
 
 		it("should return stored organization ID from AuthService", () => {

+ 48 - 1
packages/cloud/src/__tests__/WebAuthService.spec.ts

@@ -261,7 +261,7 @@ describe("WebAuthService", () => {
 			)
 		})
 
-		it("should use package.json values for redirect URI", async () => {
+		it("should use package.json values for redirect URI with default sign-in endpoint", async () => {
 			const mockOpenExternal = vi.fn()
 			const vscode = await import("vscode")
 			vi.mocked(vscode.env.openExternal).mockImplementation(mockOpenExternal)
@@ -281,6 +281,26 @@ describe("WebAuthService", () => {
 			expect(calledUri.toString()).toBe(expectedUrl)
 		})
 
+		it("should use provider signup URL when useProviderSignup is true", async () => {
+			const mockOpenExternal = vi.fn()
+			const vscode = await import("vscode")
+			vi.mocked(vscode.env.openExternal).mockImplementation(mockOpenExternal)
+
+			await authService.login(undefined, true)
+
+			const expectedUrl =
+				"https://api.test.com/extension/provider-sign-up?state=746573742d72616e646f6d2d6279746573&auth_redirect=vscode%3A%2F%2FRooVeterinaryInc.roo-cline"
+			expect(mockOpenExternal).toHaveBeenCalledWith(
+				expect.objectContaining({
+					toString: expect.any(Function),
+				}),
+			)
+
+			// Verify the actual URL
+			const calledUri = mockOpenExternal.mock.calls[0]?.[0]
+			expect(calledUri.toString()).toBe(expectedUrl)
+		})
+
 		it("should handle errors during login", async () => {
 			vi.mocked(crypto.randomBytes).mockImplementation(() => {
 				throw new Error("Crypto error")
@@ -351,6 +371,33 @@ describe("WebAuthService", () => {
 			expect(mockShowInfo).toHaveBeenCalledWith("Successfully authenticated with Roo Code Cloud")
 		})
 
+		it("should store provider model when provided in callback", async () => {
+			const storedState = "valid-state"
+			mockContext.globalState.get.mockReturnValue(storedState)
+
+			// Mock successful Clerk sign-in response
+			const mockResponse = {
+				ok: true,
+				json: () =>
+					Promise.resolve({
+						response: { created_session_id: "session-123" },
+					}),
+				headers: {
+					get: (header: string) => (header === "authorization" ? "Bearer token-123" : null),
+				},
+			}
+			mockFetch.mockResolvedValue(mockResponse)
+
+			const vscode = await import("vscode")
+			const mockShowInfo = vi.fn()
+			vi.mocked(vscode.window.showInformationMessage).mockImplementation(mockShowInfo)
+
+			await authService.handleCallback("auth-code", storedState, null, "xai/grok-code-fast-1")
+
+			expect(mockContext.globalState.update).toHaveBeenCalledWith("roo-provider-model", "xai/grok-code-fast-1")
+			expect(mockLog).toHaveBeenCalledWith("[auth] Stored provider model: xai/grok-code-fast-1")
+		})
+
 		it("should handle Clerk API errors", async () => {
 			const storedState = "valid-state"
 			mockContext.globalState.get.mockReturnValue(storedState)

+ 87 - 128
packages/evals/scripts/setup.sh

@@ -1,13 +1,5 @@
 #!/bin/bash
 
-has_asdf_plugin() {
-  local plugin="$1"
-  case "$plugin" in
-    nodejs|python|golang|rust) echo "true" ;;
-    *) echo "false" ;;
-  esac
-}
-
 build_extension() {
   echo "🔨 Building the Kilo Code extension..."
   pnpm -w vsix -- --out ../bin/kilo-code-$(git rev-parse --short HEAD).vsix || exit 1
@@ -147,40 +139,33 @@ else
   echo "✅ Homebrew is installed ($BREW_VERSION)"
 fi
 
-ASDF_PATH="$(brew --prefix asdf)/libexec/asdf.sh"
-
-if ! command -v asdf &>/dev/null; then
-  if [[ -f "$ASDF_PATH" ]]; then
-    echo "⚠️ asdf is installed but not in your PATH"
-    exit 1
-  fi
-
-  read -p "🛠️ asdf (https://asdf-vm.com) is required. Install it? (Y/n): " install_asdf
+if ! command -v mise &>/dev/null; then
+  read -p "🛠️ mise (https://mise.jdx.dev) is required. Install it? (Y/n): " install_mise
 
-  if [[ "$install_asdf" =~ ^[Yy]|^$ ]]; then
-    echo "🛠️ Installing asdf..."
-    brew install asdf || exit 1
+  if [[ "$install_mise" =~ ^[Yy]|^$ ]]; then
+    echo "🛠️ Installing mise..."
+    brew install mise || exit 1
     # Can be undone with:
-    # brew uninstall asdf
-    # rm -rvf ~/.asdf
+    # brew uninstall mise
+    # rm -rvf ~/.local/share/mise ~/.config/mise
 
-    . "$ASDF_PATH"
+    eval "$(mise activate bash)"
 
-    if [[ "$SHELL" == "/bin/zsh" ]] && ! grep -q 'source "$(brew --prefix asdf)/libexec/asdf.sh"' ~/.zshrc; then
-      echo '[[ -s "/opt/homebrew/bin/brew" ]] && [[ -s "$(brew --prefix asdf)/libexec/asdf.sh" ]] && source "$(brew --prefix asdf)/libexec/asdf.sh"' >>~/.zprofile
-    elif [[ "$SHELL" == "/bin/bash" ]] && ! grep -q 'source "$(brew --prefix asdf)/libexec/asdf.sh"' ~/.bash_profile; then
-      echo '[[ -s "/opt/homebrew/bin/brew" ]] && [[ -s "$(brew --prefix asdf)/libexec/asdf.sh" ]] && source "$(brew --prefix asdf)/libexec/asdf.sh"' >>~/.bash_profile
+    if [[ "$SHELL" == "/bin/zsh" ]] && ! grep -q 'mise activate zsh' ~/.zprofile; then
+      echo 'eval "$(mise activate zsh)"' >>~/.zprofile
+    elif [[ "$SHELL" == "/bin/bash" ]] && ! grep -q 'mise activate bash' ~/.bash_profile; then
+      echo 'eval "$(mise activate bash)"' >>~/.bash_profile
     fi
 
-    ASDF_VERSION=$(asdf --version)
-    echo "✅ asdf is installed ($ASDF_VERSION)"
+    MISE_VERSION=$(mise --version)
+    echo "✅ mise is installed ($MISE_VERSION)"
   else
     exit 1
   fi
 else
-  ASDF_VERSION=$(asdf --version)
-  echo "✅ asdf is installed ($ASDF_VERSION)"
-  . "$ASDF_PATH"
+  MISE_VERSION=$(mise --version)
+  echo "✅ mise is installed ($MISE_VERSION)"
+  eval "$(mise activate bash)"
 fi
 
 if ! command -v gh &>/dev/null; then
@@ -197,108 +182,82 @@ else
   echo "✅ gh is installed ($GH_VERSION)"
 fi
 
-options=("nodejs" "python" "golang" "rust" "java")
-binaries=("node" "python" "go" "rustc" "javac")
-
-for i in "${!options[@]}"; do
-  plugin="${options[$i]}"
-  binary="${binaries[$i]}"
-
-  if [[ "$(has_asdf_plugin "$plugin")" == "true" ]]; then
-    if ! asdf plugin list | grep -q "^${plugin}$" && ! command -v "${binary}" &>/dev/null; then
-      echo "📦 Installing ${plugin} asdf plugin..."
-      asdf plugin add "${plugin}" || exit 1
-      echo "✅ asdf ${plugin} plugin installed successfully"
-    fi
-  fi
-
-  case "${plugin}" in
-  "nodejs")
-    if ! command -v node &>/dev/null; then
-      asdf install nodejs 20.19.2 || exit 1
-      asdf set nodejs 20.19.2 || exit 1
-      NODE_VERSION=$(node --version)
-      echo "✅ Node.js is installed ($NODE_VERSION)"
-    else
-      NODE_VERSION=$(node --version)
-      echo "✅ Node.js is installed ($NODE_VERSION)"
-    fi
+# Install language runtimes via mise
+if ! command -v node &>/dev/null; then
+  echo "📦 Installing Node.js via mise..."
+  mise install [email protected] || exit 1
+  mise use --global [email protected] || exit 1
+  eval "$(mise activate bash)"
+  NODE_VERSION=$(node --version)
+  echo "✅ Node.js is installed ($NODE_VERSION)"
+else
+  NODE_VERSION=$(node --version)
+  echo "✅ Node.js is installed ($NODE_VERSION)"
+fi
 
-    if [[ $(node --version) != "v20.19.2" ]]; then
-      NODE_VERSION=$(node --version)
-      echo "🚨 You have the wrong version of node installed ($NODE_VERSION)."
-      echo "💡 If you are using nvm then run 'nvm install' to install the version specified by the repo's .nvmrc."
-      exit 1
-    fi
-    ;;
-
-  "python")
-    if ! command -v python &>/dev/null; then
-      asdf install python 3.13.2 || exit 1
-      asdf set python 3.13.2 || exit 1
-      PYTHON_VERSION=$(python --version)
-      echo "✅ Python is installed ($PYTHON_VERSION)"
-    else
-      PYTHON_VERSION=$(python --version)
-      echo "✅ Python is installed ($PYTHON_VERSION)"
-    fi
+if [[ $(node --version) != "v20.19.2" ]]; then
+  NODE_VERSION=$(node --version)
+  echo "🚨 You have the wrong version of node installed ($NODE_VERSION)."
+  echo "💡 If you are using nvm then run 'nvm install' to install the version specified by the repo's .nvmrc."
+  exit 1
+fi
 
-    if ! command -v uv &>/dev/null; then
-      brew install uv || exit 1
-      UV_VERSION=$(uv --version)
-      echo "✅ uv is installed ($UV_VERSION)"
-    else
-      UV_VERSION=$(uv --version)
-      echo "✅ uv is installed ($UV_VERSION)"
-    fi
-    ;;
-
-  "golang")
-    if ! command -v go &>/dev/null; then
-      asdf install golang 1.24.2 || exit 1
-      asdf set golang 1.24.2 || exit 1
-      GO_VERSION=$(go version)
-      echo "✅ Go is installed ($GO_VERSION)"
-    else
-      GO_VERSION=$(go version)
-      echo "✅ Go is installed ($GO_VERSION)"
-    fi
-    ;;
-
-  "rust")
-    if ! command -v rustc &>/dev/null; then
-      asdf install rust 1.85.1 || exit 1
-      asdf set rust 1.85.1 || exit 1
-      RUST_VERSION=$(rustc --version)
-      echo "✅ Rust is installed ($RUST_VERSION)"
-    else
-      RUST_VERSION=$(rustc --version)
-      echo "✅ Rust is installed ($RUST_VERSION)"
-    fi
-    ;;
+if ! command -v python &>/dev/null; then
+  echo "📦 Installing Python via mise..."
+  mise install [email protected] || exit 1
+  mise use --global [email protected] || exit 1
+  eval "$(mise activate bash)"
+  PYTHON_VERSION=$(python --version)
+  echo "✅ Python is installed ($PYTHON_VERSION)"
+else
+  PYTHON_VERSION=$(python --version)
+  echo "✅ Python is installed ($PYTHON_VERSION)"
+fi
 
-  "java")
-    if ! command -v javac &>/dev/null || ! javac --version &>/dev/null; then
-      echo "☕ Installing Java..."
-      brew install openjdk@17 || exit 1
+if ! command -v uv &>/dev/null; then
+  brew install uv || exit 1
+  UV_VERSION=$(uv --version)
+  echo "✅ uv is installed ($UV_VERSION)"
+else
+  UV_VERSION=$(uv --version)
+  echo "✅ uv is installed ($UV_VERSION)"
+fi
 
-      export PATH="/opt/homebrew/opt/openjdk@17/bin:$PATH"
+if ! command -v go &>/dev/null; then
+  echo "📦 Installing Go via mise..."
+  mise install [email protected] || exit 1
+  mise use --global [email protected] || exit 1
+  eval "$(mise activate bash)"
+  GO_VERSION=$(go version)
+  echo "✅ Go is installed ($GO_VERSION)"
+else
+  GO_VERSION=$(go version)
+  echo "✅ Go is installed ($GO_VERSION)"
+fi
 
-      if [[ "$SHELL" == "/bin/zsh" ]] && ! grep -q 'export PATH="/opt/homebrew/opt/openjdk@17/bin:$PATH"' ~/.zprofile; then
-        echo 'export PATH="/opt/homebrew/opt/openjdk@17/bin:$PATH"' >> ~/.zprofile
-      elif [[ "$SHELL" == "/bin/bash" ]] && ! grep -q 'export PATH="/opt/homebrew/opt/openjdk@17/bin:$PATH"' ~/.bash_profile; then
-        echo 'export PATH="/opt/homebrew/opt/openjdk@17/bin:$PATH"' >> ~/.bash_profile
-      fi
+if ! command -v rustc &>/dev/null; then
+  echo "📦 Installing Rust via mise..."
+  mise install [email protected] || exit 1
+  mise use --global [email protected] || exit 1
+  eval "$(mise activate bash)"
+  RUST_VERSION=$(rustc --version)
+  echo "✅ Rust is installed ($RUST_VERSION)"
+else
+  RUST_VERSION=$(rustc --version)
+  echo "✅ Rust is installed ($RUST_VERSION)"
+fi
 
-      JAVA_VERSION=$(javac --version | head -n 1)
-      echo "✅ Java is installed ($JAVA_VERSION)"
-    else
-      JAVA_VERSION=$(javac --version | head -n 1)
-      echo "✅ Java is installed ($JAVA_VERSION)"
-    fi
-    ;;
-  esac
-done
+if ! command -v javac &>/dev/null || ! javac --version &>/dev/null; then
+  echo "☕ Installing Java via mise..."
+  mise install java@openjdk-17 || exit 1
+  mise use --global java@openjdk-17 || exit 1
+  eval "$(mise activate bash)"
+  JAVA_VERSION=$(javac --version | head -n 1)
+  echo "✅ Java is installed ($JAVA_VERSION)"
+else
+  JAVA_VERSION=$(javac --version | head -n 1)
+  echo "✅ Java is installed ($JAVA_VERSION)"
+fi
 
 if ! command -v pnpm &>/dev/null; then
   brew install pnpm || exit 1

+ 1 - 1
packages/evals/src/cli/index.ts

@@ -28,7 +28,7 @@ const main = async () => {
 					} else if (runId !== -1) {
 						await runEvals(runId)
 					} else if (taskId !== -1) {
-						await processTask({ taskId })
+						await processTask({ taskId, jobToken: process.env.ROO_CODE_CLOUD_TOKEN || null })
 					} else {
 						throw new Error("Either runId or taskId must be provided.")
 					}

+ 2 - 2
packages/evals/src/cli/runEvals.ts

@@ -44,9 +44,9 @@ export const runEvals = async (runId: number) => {
 				.map((task) => async () => {
 					try {
 						if (containerized) {
-							await processTaskInContainer({ taskId: task.id, logger })
+							await processTaskInContainer({ taskId: task.id, jobToken: run.jobToken, logger })
 						} else {
-							await processTask({ taskId: task.id, logger })
+							await processTask({ taskId: task.id, jobToken: run.jobToken, logger })
 						}
 					} catch (error) {
 						logger.error("error processing task", error)

+ 39 - 4
packages/evals/src/cli/runTask.ts

@@ -38,7 +38,15 @@ class SubprocessTimeoutError extends Error {
 	}
 }
 
-export const processTask = async ({ taskId, logger }: { taskId: number; logger?: Logger }) => {
+export const processTask = async ({
+	taskId,
+	jobToken,
+	logger,
+}: {
+	taskId: number
+	jobToken: string | null
+	logger?: Logger
+}) => {
 	const task = await findTask(taskId)
 	const { language, exercise } = task
 	const run = await findRun(task.runId)
@@ -61,7 +69,7 @@ export const processTask = async ({ taskId, logger }: { taskId: number; logger?:
 		}
 
 		logger.info(`running task ${task.id} (${language}/${exercise})...`)
-		await runTask({ run, task, publish, logger })
+		await runTask({ run, task, jobToken, publish, logger })
 
 		logger.info(`testing task ${task.id} (${language}/${exercise})...`)
 		const passed = await runUnitTest({ task, logger })
@@ -80,10 +88,12 @@ export const processTask = async ({ taskId, logger }: { taskId: number; logger?:
 
 export const processTaskInContainer = async ({
 	taskId,
+	jobToken,
 	logger,
 	maxRetries = 10,
 }: {
 	taskId: number
+	jobToken: string | null
 	logger: Logger
 	maxRetries?: number
 }) => {
@@ -95,6 +105,10 @@ export const processTaskInContainer = async ({
 		"-e HOST_EXECUTION_METHOD=docker",
 	]
 
+	if (jobToken) {
+		baseArgs.push(`-e ROO_CODE_CLOUD_TOKEN=${jobToken}`)
+	}
+
 	const command = `pnpm --filter @roo-code/evals cli --taskId ${taskId}`
 	logger.info(command)
 
@@ -144,11 +158,12 @@ export const processTaskInContainer = async ({
 type RunTaskOptions = {
 	run: Run
 	task: Task
+	jobToken: string | null
 	publish: (taskEvent: TaskEvent) => Promise<void>
 	logger: Logger
 }
 
-export const runTask = async ({ run, task, publish, logger }: RunTaskOptions) => {
+export const runTask = async ({ run, task, publish, logger, jobToken }: RunTaskOptions) => {
 	const { language, exercise } = task
 	const prompt = fs.readFileSync(path.resolve(EVALS_REPO_PATH, `prompts/${language}.md`), "utf-8")
 	const workspacePath = path.resolve(EVALS_REPO_PATH, language, exercise)
@@ -158,10 +173,14 @@ export const runTask = async ({ run, task, publish, logger }: RunTaskOptions) =>
 	const cancelSignal = controller.signal
 	const containerized = isDockerContainer()
 
-	const codeCommand = containerized
+	let codeCommand = containerized
 		? `xvfb-run --auto-servernum --server-num=1 code --wait --log trace --disable-workspace-trust --disable-gpu --disable-lcd-text --no-sandbox --user-data-dir /roo/.vscode --password-store="basic" -n ${workspacePath}`
 		: `code --disable-workspace-trust -n ${workspacePath}`
 
+	if (jobToken) {
+		codeCommand = `ROO_CODE_CLOUD_TOKEN=${jobToken} ${codeCommand}`
+	}
+
 	logger.info(codeCommand)
 
 	// Sleep for a random amount of time between 5 and 10 seconds, unless we're
@@ -217,11 +236,23 @@ export const runTask = async ({ run, task, publish, logger }: RunTaskOptions) =>
 		"diff_error",
 		"condense_context",
 		"condense_context_error",
+		"api_req_retry_delayed",
+		"api_req_retried",
 	]
 
+	let isApiUnstable = false
+
 	client.on(IpcMessageType.TaskEvent, async (taskEvent) => {
 		const { eventName, payload } = taskEvent
 
+		if (
+			eventName === RooCodeEventName.Message &&
+			payload[0].message.say &&
+			["api_req_retry_delayed", "api_req_retried"].includes(payload[0].message.say)
+		) {
+			isApiUnstable = true
+		}
+
 		// Publish all events except for these to Redis.
 		if (!ignoreEvents.broadcast.includes(eventName)) {
 			await publish({ ...taskEvent, taskId: task.id })
@@ -388,4 +419,8 @@ export const runTask = async ({ run, task, publish, logger }: RunTaskOptions) =>
 	}
 
 	logger.close()
+
+	if (isApiUnstable) {
+		throw new Error("API is unstable, throwing to trigger a retry.")
+	}
 }

+ 1 - 0
packages/evals/src/db/migrations/0003_simple_retro_girl.sql

@@ -0,0 +1 @@
+ALTER TABLE "runs" ADD COLUMN "jobToken" text;

+ 459 - 0
packages/evals/src/db/migrations/meta/0003_snapshot.json

@@ -0,0 +1,459 @@
+{
+	"id": "853d308a-3946-4ea8-9039-236bfce3c6c0",
+	"prevId": "3d2b8423-6170-4cb2-9f62-1c86756da97a",
+	"version": "7",
+	"dialect": "postgresql",
+	"tables": {
+		"public.runs": {
+			"name": "runs",
+			"schema": "",
+			"columns": {
+				"id": {
+					"name": "id",
+					"type": "integer",
+					"primaryKey": true,
+					"notNull": true,
+					"identity": {
+						"type": "always",
+						"name": "runs_id_seq",
+						"schema": "public",
+						"increment": "1",
+						"startWith": "1",
+						"minValue": "1",
+						"maxValue": "2147483647",
+						"cache": "1",
+						"cycle": false
+					}
+				},
+				"task_metrics_id": {
+					"name": "task_metrics_id",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"model": {
+					"name": "model",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"name": {
+					"name": "name",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"description": {
+					"name": "description",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"contextWindow": {
+					"name": "contextWindow",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"inputPrice": {
+					"name": "inputPrice",
+					"type": "real",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"outputPrice": {
+					"name": "outputPrice",
+					"type": "real",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"cacheWritesPrice": {
+					"name": "cacheWritesPrice",
+					"type": "real",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"cacheReadsPrice": {
+					"name": "cacheReadsPrice",
+					"type": "real",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"settings": {
+					"name": "settings",
+					"type": "jsonb",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"jobToken": {
+					"name": "jobToken",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"pid": {
+					"name": "pid",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"socket_path": {
+					"name": "socket_path",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"concurrency": {
+					"name": "concurrency",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"default": 2
+				},
+				"timeout": {
+					"name": "timeout",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"default": 5
+				},
+				"passed": {
+					"name": "passed",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"default": 0
+				},
+				"failed": {
+					"name": "failed",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"default": 0
+				},
+				"created_at": {
+					"name": "created_at",
+					"type": "timestamp",
+					"primaryKey": false,
+					"notNull": true
+				}
+			},
+			"indexes": {},
+			"foreignKeys": {
+				"runs_task_metrics_id_taskMetrics_id_fk": {
+					"name": "runs_task_metrics_id_taskMetrics_id_fk",
+					"tableFrom": "runs",
+					"tableTo": "taskMetrics",
+					"columnsFrom": ["task_metrics_id"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				}
+			},
+			"compositePrimaryKeys": {},
+			"uniqueConstraints": {},
+			"policies": {},
+			"checkConstraints": {},
+			"isRLSEnabled": false
+		},
+		"public.taskMetrics": {
+			"name": "taskMetrics",
+			"schema": "",
+			"columns": {
+				"id": {
+					"name": "id",
+					"type": "integer",
+					"primaryKey": true,
+					"notNull": true,
+					"identity": {
+						"type": "always",
+						"name": "taskMetrics_id_seq",
+						"schema": "public",
+						"increment": "1",
+						"startWith": "1",
+						"minValue": "1",
+						"maxValue": "2147483647",
+						"cache": "1",
+						"cycle": false
+					}
+				},
+				"tokens_in": {
+					"name": "tokens_in",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"tokens_out": {
+					"name": "tokens_out",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"tokens_context": {
+					"name": "tokens_context",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"cache_writes": {
+					"name": "cache_writes",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"cache_reads": {
+					"name": "cache_reads",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"cost": {
+					"name": "cost",
+					"type": "real",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"duration": {
+					"name": "duration",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"tool_usage": {
+					"name": "tool_usage",
+					"type": "jsonb",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"created_at": {
+					"name": "created_at",
+					"type": "timestamp",
+					"primaryKey": false,
+					"notNull": true
+				}
+			},
+			"indexes": {},
+			"foreignKeys": {},
+			"compositePrimaryKeys": {},
+			"uniqueConstraints": {},
+			"policies": {},
+			"checkConstraints": {},
+			"isRLSEnabled": false
+		},
+		"public.tasks": {
+			"name": "tasks",
+			"schema": "",
+			"columns": {
+				"id": {
+					"name": "id",
+					"type": "integer",
+					"primaryKey": true,
+					"notNull": true,
+					"identity": {
+						"type": "always",
+						"name": "tasks_id_seq",
+						"schema": "public",
+						"increment": "1",
+						"startWith": "1",
+						"minValue": "1",
+						"maxValue": "2147483647",
+						"cache": "1",
+						"cycle": false
+					}
+				},
+				"run_id": {
+					"name": "run_id",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"task_metrics_id": {
+					"name": "task_metrics_id",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"language": {
+					"name": "language",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"exercise": {
+					"name": "exercise",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"passed": {
+					"name": "passed",
+					"type": "boolean",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"started_at": {
+					"name": "started_at",
+					"type": "timestamp",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"finished_at": {
+					"name": "finished_at",
+					"type": "timestamp",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"created_at": {
+					"name": "created_at",
+					"type": "timestamp",
+					"primaryKey": false,
+					"notNull": true
+				}
+			},
+			"indexes": {
+				"tasks_language_exercise_idx": {
+					"name": "tasks_language_exercise_idx",
+					"columns": [
+						{
+							"expression": "run_id",
+							"isExpression": false,
+							"asc": true,
+							"nulls": "last"
+						},
+						{
+							"expression": "language",
+							"isExpression": false,
+							"asc": true,
+							"nulls": "last"
+						},
+						{
+							"expression": "exercise",
+							"isExpression": false,
+							"asc": true,
+							"nulls": "last"
+						}
+					],
+					"isUnique": true,
+					"concurrently": false,
+					"method": "btree",
+					"with": {}
+				}
+			},
+			"foreignKeys": {
+				"tasks_run_id_runs_id_fk": {
+					"name": "tasks_run_id_runs_id_fk",
+					"tableFrom": "tasks",
+					"tableTo": "runs",
+					"columnsFrom": ["run_id"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				},
+				"tasks_task_metrics_id_taskMetrics_id_fk": {
+					"name": "tasks_task_metrics_id_taskMetrics_id_fk",
+					"tableFrom": "tasks",
+					"tableTo": "taskMetrics",
+					"columnsFrom": ["task_metrics_id"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				}
+			},
+			"compositePrimaryKeys": {},
+			"uniqueConstraints": {},
+			"policies": {},
+			"checkConstraints": {},
+			"isRLSEnabled": false
+		},
+		"public.toolErrors": {
+			"name": "toolErrors",
+			"schema": "",
+			"columns": {
+				"id": {
+					"name": "id",
+					"type": "integer",
+					"primaryKey": true,
+					"notNull": true,
+					"identity": {
+						"type": "always",
+						"name": "toolErrors_id_seq",
+						"schema": "public",
+						"increment": "1",
+						"startWith": "1",
+						"minValue": "1",
+						"maxValue": "2147483647",
+						"cache": "1",
+						"cycle": false
+					}
+				},
+				"run_id": {
+					"name": "run_id",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"task_id": {
+					"name": "task_id",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"tool_name": {
+					"name": "tool_name",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"error": {
+					"name": "error",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"created_at": {
+					"name": "created_at",
+					"type": "timestamp",
+					"primaryKey": false,
+					"notNull": true
+				}
+			},
+			"indexes": {},
+			"foreignKeys": {
+				"toolErrors_run_id_runs_id_fk": {
+					"name": "toolErrors_run_id_runs_id_fk",
+					"tableFrom": "toolErrors",
+					"tableTo": "runs",
+					"columnsFrom": ["run_id"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				},
+				"toolErrors_task_id_tasks_id_fk": {
+					"name": "toolErrors_task_id_tasks_id_fk",
+					"tableFrom": "toolErrors",
+					"tableTo": "tasks",
+					"columnsFrom": ["task_id"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				}
+			},
+			"compositePrimaryKeys": {},
+			"uniqueConstraints": {},
+			"policies": {},
+			"checkConstraints": {},
+			"isRLSEnabled": false
+		}
+	},
+	"enums": {},
+	"schemas": {},
+	"sequences": {},
+	"roles": {},
+	"policies": {},
+	"views": {},
+	"_meta": {
+		"columns": {},
+		"schemas": {},
+		"tables": {}
+	}
+}

+ 7 - 0
packages/evals/src/db/migrations/meta/_journal.json

@@ -22,6 +22,13 @@
 			"when": 1757191027855,
 			"tag": "0002_bouncy_blazing_skull",
 			"breakpoints": true
+		},
+		{
+			"idx": 3,
+			"version": "7",
+			"when": 1763797232454,
+			"tag": "0003_simple_retro_girl",
+			"breakpoints": true
 		}
 	]
 }

+ 1 - 0
packages/evals/src/db/schema.ts

@@ -21,6 +21,7 @@ export const runs = pgTable("runs", {
 	cacheWritesPrice: real(),
 	cacheReadsPrice: real(),
 	settings: jsonb().$type<RooCodeSettings>(),
+	jobToken: text(),
 	pid: integer(),
 	socketPath: text("socket_path").notNull(),
 	concurrency: integer().default(2).notNull(),

+ 2 - 2
packages/telemetry/src/TelemetryService.ts

@@ -114,8 +114,8 @@ export class TelemetryService {
 		this.captureEvent(TelemetryEventName.MODE_SWITCH, { taskId, newMode })
 	}
 
-	public captureToolUsage(taskId: string, tool: string): void {
-		this.captureEvent(TelemetryEventName.TOOL_USED, { taskId, tool })
+	public captureToolUsage(taskId: string, tool: string, toolProtocol: string): void {
+		this.captureEvent(TelemetryEventName.TOOL_USED, { taskId, tool, toolProtocol })
 	}
 
 	public captureCheckpointCreated(taskId: string): void {

+ 1 - 1
packages/types/npm/package.metadata.json

@@ -1,6 +1,6 @@
 {
 	"name": "@roo-code/types",
-	"version": "1.85.0",
+	"version": "1.87.0",
 	"description": "TypeScript type definitions for Roo Code.",
 	"publishConfig": {
 		"access": "public",

+ 7 - 2
packages/types/src/cloud.ts

@@ -239,9 +239,14 @@ export interface AuthService extends EventEmitter<AuthServiceEvents> {
 	broadcast(): void
 
 	// Authentication methods
-	login(landingPageSlug?: string): Promise<void>
+	login(landingPageSlug?: string, useProviderSignup?: boolean): Promise<void>
 	logout(): Promise<void>
-	handleCallback(code: string | null, state: string | null, organizationId?: string | null): Promise<void>
+	handleCallback(
+		code: string | null,
+		state: string | null,
+		organizationId?: string | null,
+		providerModel?: string | null,
+	): Promise<void>
 	switchOrganization(organizationId: string | null): Promise<void>
 
 	// State methods

+ 14 - 1
packages/types/src/codebase-index.ts

@@ -22,7 +22,16 @@ export const codebaseIndexConfigSchema = z.object({
 	codebaseIndexEnabled: z.boolean().optional(),
 	codebaseIndexQdrantUrl: z.string().optional(),
 	codebaseIndexEmbedderProvider: z
-		.enum(["openai", "ollama", "openai-compatible", "gemini", "mistral", "vercel-ai-gateway", "openrouter"])
+		.enum([
+			"openai",
+			"ollama",
+			"openai-compatible",
+			"gemini",
+			"mistral",
+			"vercel-ai-gateway",
+			"bedrock",
+			"openrouter",
+		])
 		.optional(),
 	// kilocode_change start
 	codebaseIndexVectorStoreProvider: z.enum(["lancedb", "qdrant"]).optional(),
@@ -40,6 +49,9 @@ export const codebaseIndexConfigSchema = z.object({
 	// OpenAI Compatible specific fields
 	codebaseIndexOpenAiCompatibleBaseUrl: z.string().optional(),
 	codebaseIndexOpenAiCompatibleModelDimension: z.number().optional(),
+	// Bedrock specific fields
+	codebaseIndexBedrockRegion: z.string().optional(),
+	codebaseIndexBedrockProfile: z.string().optional(),
 })
 
 export type CodebaseIndexConfig = z.infer<typeof codebaseIndexConfigSchema>
@@ -56,6 +68,7 @@ export const codebaseIndexModelsSchema = z.object({
 	mistral: z.record(z.string(), z.object({ dimension: z.number() })).optional(),
 	"vercel-ai-gateway": z.record(z.string(), z.object({ dimension: z.number() })).optional(),
 	openrouter: z.record(z.string(), z.object({ dimension: z.number() })).optional(),
+	bedrock: z.record(z.string(), z.object({ dimension: z.number() })).optional(),
 })
 
 export type CodebaseIndexModels = z.infer<typeof codebaseIndexModelsSchema>

+ 2 - 0
packages/types/src/experiment.ts

@@ -13,6 +13,7 @@ export const experimentIds = [
 	"preventFocusDisruption",
 	"imageGeneration",
 	"runSlashCommand",
+	"multipleNativeToolCalls",
 ] as const
 
 export const experimentIdsSchema = z.enum([...experimentIds, ...kilocodeExperimentIds])
@@ -30,6 +31,7 @@ export const experimentsSchema = z.object({
 	preventFocusDisruption: z.boolean().optional(),
 	imageGeneration: z.boolean().optional(),
 	runSlashCommand: z.boolean().optional(),
+	multipleNativeToolCalls: z.boolean().optional(),
 })
 
 export type Experiments = z.infer<typeof experimentsSchema>

+ 9 - 0
packages/types/src/global-settings.ts

@@ -60,6 +60,7 @@ export const globalSettingsSchema = z.object({
 	dismissedUpsells: z.array(z.string()).optional(),
 
 	// Image generation settings (experimental) - flattened for simplicity
+	imageGenerationProvider: z.enum(["openrouter", "roo"]).optional(),
 	openRouterImageApiKey: z.string().optional(),
 	openRouterImageGenerationSelectedModel: z.string().optional(),
 	kiloCodeImageApiKey: z.string().optional(),
@@ -108,6 +109,12 @@ export const globalSettingsSchema = z.object({
 	 * @default true
 	 */
 	includeCurrentCost: z.boolean().optional(),
+	/**
+	 * Maximum number of git status file entries to include in the environment details.
+	 * Set to 0 to disable git status. The header (branch, commits) is always included when > 0.
+	 * @default 0
+	 */
+	maxGitStatusFiles: z.number().optional(),
 
 	/**
 	 * Whether to include diagnostic messages (errors, warnings) in tool outputs
@@ -284,6 +291,7 @@ export const SECRET_STATE_KEYS = [
 	"ioIntelligenceApiKey",
 	"vercelAiGatewayApiKey",
 	"sapAiCoreServiceKey", // kilocode_change
+	"basetenApiKey",
 ] as const
 
 // Global secrets that are part of GlobalSettings (not ProviderSettings)
@@ -395,6 +403,7 @@ export const EVALS_SETTINGS: RooCodeSettings = {
 	rateLimitSeconds: 0,
 	maxOpenTabsContext: 20,
 	maxWorkspaceFiles: 200,
+	maxGitStatusFiles: 20,
 	showRooIgnoredFiles: true,
 	maxReadFileLine: -1, // -1 to enable full file reading.
 

+ 59 - 0
packages/types/src/image-generation.ts

@@ -0,0 +1,59 @@
+/**
+ * Image generation model constants
+ */
+
+/**
+ * API method used for image generation
+ */
+export type ImageGenerationApiMethod = "chat_completions" | "images_api"
+
+export interface ImageGenerationModel {
+	value: string
+	label: string
+	provider: ImageGenerationProvider
+	apiMethod?: ImageGenerationApiMethod
+}
+
+export const IMAGE_GENERATION_MODELS: ImageGenerationModel[] = [
+	// OpenRouter models
+	{ value: "google/gemini-2.5-flash-image", label: "Gemini 2.5 Flash Image", provider: "openrouter" },
+	{ value: "google/gemini-3-pro-image-preview", label: "Gemini 3 Pro Image Preview", provider: "openrouter" },
+	{ value: "openai/gpt-5-image", label: "GPT-5 Image", provider: "openrouter" },
+	{ value: "openai/gpt-5-image-mini", label: "GPT-5 Image Mini", provider: "openrouter" },
+	{ value: "black-forest-labs/flux.2-flex", label: "Black Forest Labs FLUX.2 Flex", provider: "openrouter" },
+	{ value: "black-forest-labs/flux.2-pro", label: "Black Forest Labs FLUX.2 Pro", provider: "openrouter" },
+	// Roo Code Cloud models
+	// kilocode_change start: disable roo cloud models
+	// { value: "google/gemini-2.5-flash-image", label: "Gemini 2.5 Flash Image", provider: "roo" },
+	// { value: "google/gemini-3-pro-image", label: "Gemini 3 Pro Image", provider: "roo" },
+	// {
+	// 	value: "bfl/flux-2-pro:free",
+	// 	label: "Black Forest Labs FLUX.2 Pro (Free)",
+	// 	provider: "roo",
+	// 	apiMethod: "images_api",
+	// },
+	// kilocode_change end
+]
+
+/**
+ * Get array of model values only (for backend validation)
+ */
+export const IMAGE_GENERATION_MODEL_IDS = IMAGE_GENERATION_MODELS.map((m) => m.value)
+
+/**
+ * Image generation provider type
+ */
+export type ImageGenerationProvider = "openrouter" | "roo"
+
+/**
+ * Get the image generation provider with backwards compatibility
+ * - If provider is explicitly set, use it
+ * - If a model is already configured (existing users), default to "openrouter"
+ * - Otherwise default to "roo" (new users)
+ */
+export function getImageGenerationProvider(
+	explicitProvider: ImageGenerationProvider | undefined,
+	hasExistingModel: boolean,
+): ImageGenerationProvider {
+	return explicitProvider !== undefined ? explicitProvider : hasExistingModel ? "openrouter" : "roo"
+}

+ 1 - 0
packages/types/src/index.ts

@@ -9,6 +9,7 @@ export * from "./feature-flags.js"
 export * from "./followup.js"
 export * from "./global-settings.js"
 export * from "./history.js"
+export * from "./image-generation.js"
 export * from "./ipc.js"
 export * from "./marketplace.js"
 export * from "./mcp.js"

+ 1 - 0
packages/types/src/message.ts

@@ -183,6 +183,7 @@ export const clineSays = [
 	"shell_integration_warning",
 	"browser_action",
 	"browser_action_result",
+	"browser_session_status",
 	"mcp_server_request_started",
 	"mcp_server_response",
 	"subtask_result",

+ 2 - 0
packages/types/src/model.ts

@@ -114,6 +114,8 @@ export const modelInfoSchema = z.object({
 	isFree: z.boolean().optional(),
 	// Flag to indicate if the model supports native tool calling (OpenAI-style function calling)
 	supportsNativeTools: z.boolean().optional(),
+	// Default tool protocol preferred by this model (if not specified, falls back to capability/provider defaults)
+	defaultToolProtocol: z.enum(["xml", "native"]).optional(),
 	/**
 	 * Service tiers with pricing information.
 	 * Each tier can have a name (for OpenAI service tiers) and pricing overrides.

+ 11 - 1
packages/types/src/provider-settings.ts

@@ -5,6 +5,7 @@ import { codebaseIndexProviderSchema } from "./codebase-index.js"
 import { profileTypeSchema } from "./profile-type.js" // kilocode_change
 import {
 	anthropicModels,
+	basetenModels,
 	bedrockModels,
 	cerebrasModels,
 	claudeCodeModels,
@@ -133,6 +134,7 @@ export const providerNames = [
 	...fauxProviders,
 	"anthropic",
 	"bedrock",
+	"baseten",
 	"cerebras",
 	"claude-code",
 	"doubao",
@@ -528,6 +530,10 @@ const sapAiCoreSchema = baseProviderSettingsSchema.extend({
 })
 // kilocode_change end
 
+const basetenSchema = apiModelIdProviderModelSchema.extend({
+	basetenApiKey: z.string().optional(),
+})
+
 const defaultSchema = z.object({
 	apiProvider: z.undefined(),
 })
@@ -566,6 +572,7 @@ export const providerSettingsSchemaDiscriminated = z.discriminatedUnion("apiProv
 	inceptionSchema.merge(z.object({ apiProvider: z.literal("inception") })),
 	// kilocode_change end
 	groqSchema.merge(z.object({ apiProvider: z.literal("groq") })),
+	basetenSchema.merge(z.object({ apiProvider: z.literal("baseten") })),
 	huggingFaceSchema.merge(z.object({ apiProvider: z.literal("huggingface") })),
 	chutesSchema.merge(z.object({ apiProvider: z.literal("chutes") })),
 	litellmSchema.merge(z.object({ apiProvider: z.literal("litellm") })),
@@ -617,6 +624,7 @@ export const providerSettingsSchema = z.object({
 	...fakeAiSchema.shape,
 	...xaiSchema.shape,
 	...groqSchema.shape,
+	...basetenSchema.shape,
 	...huggingFaceSchema.shape,
 	...chutesSchema.shape,
 	...litellmSchema.shape,
@@ -718,6 +726,7 @@ export const modelIdKeysByProvider: Record<TypicalProvider, ModelIdKey> = {
 	"sap-ai-core": "sapAiCoreModelId",
 	// kilocode_change end
 	groq: "apiModelId",
+	baseten: "apiModelId",
 	chutes: "apiModelId",
 	litellm: "litellmModelId",
 	huggingface: "huggingFaceModelId",
@@ -737,7 +746,7 @@ export const modelIdKeysByProvider: Record<TypicalProvider, ModelIdKey> = {
  */
 
 // Providers that use Anthropic-style API protocol.
-export const ANTHROPIC_STYLE_PROVIDERS: ProviderName[] = ["anthropic", "claude-code", "bedrock"]
+export const ANTHROPIC_STYLE_PROVIDERS: ProviderName[] = ["anthropic", "claude-code", "bedrock", "minimax"]
 
 export const getApiProtocol = (provider: ProviderName | undefined, modelId?: string): "anthropic" | "openai" => {
 	if (provider && ANTHROPIC_STYLE_PROVIDERS.includes(provider)) {
@@ -858,6 +867,7 @@ export const MODELS_BY_PROVIDER: Record<
 	},
 	xai: { id: "xai", label: "xAI (Grok)", models: Object.keys(xaiModels) },
 	zai: { id: "zai", label: "Zai", models: Object.keys(internationalZAiModels) },
+	baseten: { id: "baseten", label: "BaseTen", models: Object.keys(basetenModels) },
 
 	// Dynamic providers; models pulled from remote APIs.
 	glama: { id: "glama", label: "Glama", models: [] },

+ 17 - 5
packages/types/src/providers/anthropic.ts

@@ -11,6 +11,7 @@ export const anthropicModels = {
 		contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
 		supportsImages: true,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 3.0, // $3 per million input tokens (≤200K context)
 		outputPrice: 15.0, // $15 per million output tokens (≤200K context)
 		cacheWritesPrice: 3.75, // $3.75 per million tokens
@@ -32,6 +33,7 @@ export const anthropicModels = {
 		contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
 		supportsImages: true,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 3.0, // $3 per million input tokens (≤200K context)
 		outputPrice: 15.0, // $15 per million output tokens (≤200K context)
 		cacheWritesPrice: 3.75, // $3.75 per million tokens
@@ -49,14 +51,15 @@ export const anthropicModels = {
 		],
 	},
 	"claude-opus-4-5-20251101": {
-		maxTokens: 32_000,
+		maxTokens: 32_000, // Overridden to 8k if `enableReasoningEffort` is false.
 		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: true,
-		inputPrice: 5.0,
-		outputPrice: 25.0,
-		cacheWritesPrice: 6.25,
-		cacheReadsPrice: 0.5,
+		supportsNativeTools: true,
+		inputPrice: 5.0, // $5 per million input tokens
+		outputPrice: 25.0, // $25 per million output tokens
+		cacheWritesPrice: 6.25, // $6.25 per million tokens
+		cacheReadsPrice: 0.5, // $0.50 per million tokens
 		supportsReasoningBudget: true,
 		supportsVerbosity: true, // kilocode_change
 	},
@@ -65,6 +68,7 @@ export const anthropicModels = {
 		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 15.0, // $15 per million input tokens
 		outputPrice: 75.0, // $75 per million output tokens
 		cacheWritesPrice: 18.75, // $18.75 per million tokens
@@ -76,6 +80,7 @@ export const anthropicModels = {
 		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 15.0, // $15 per million input tokens
 		outputPrice: 75.0, // $75 per million output tokens
 		cacheWritesPrice: 18.75, // $18.75 per million tokens
@@ -87,6 +92,7 @@ export const anthropicModels = {
 		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 3.0, // $3 per million input tokens
 		outputPrice: 15.0, // $15 per million output tokens
 		cacheWritesPrice: 3.75, // $3.75 per million tokens
@@ -99,6 +105,7 @@ export const anthropicModels = {
 		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 3.0, // $3 per million input tokens
 		outputPrice: 15.0, // $15 per million output tokens
 		cacheWritesPrice: 3.75, // $3.75 per million tokens
@@ -109,6 +116,7 @@ export const anthropicModels = {
 		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 3.0, // $3 per million input tokens
 		outputPrice: 15.0, // $15 per million output tokens
 		cacheWritesPrice: 3.75, // $3.75 per million tokens
@@ -119,6 +127,7 @@ export const anthropicModels = {
 		contextWindow: 200_000,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 1.0,
 		outputPrice: 5.0,
 		cacheWritesPrice: 1.25,
@@ -129,6 +138,7 @@ export const anthropicModels = {
 		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 15.0,
 		outputPrice: 75.0,
 		cacheWritesPrice: 18.75,
@@ -139,6 +149,7 @@ export const anthropicModels = {
 		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 0.25,
 		outputPrice: 1.25,
 		cacheWritesPrice: 0.3,
@@ -149,6 +160,7 @@ export const anthropicModels = {
 		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 1.0,
 		outputPrice: 5.0,
 		cacheWritesPrice: 1.25,

+ 126 - 0
packages/types/src/providers/baseten.ts

@@ -0,0 +1,126 @@
+import type { ModelInfo } from "../model.js"
+
+// Baseten
+// https://baseten.co/products/model-apis/
+
+export const basetenModels = {
+	"moonshotai/Kimi-K2-Thinking": {
+		maxTokens: 163_800,
+		contextWindow: 262_000,
+		supportsImages: false,
+		supportsPromptCache: false,
+		supportsNativeTools: true,
+		inputPrice: 0.6,
+		outputPrice: 2.5,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description: "Kimi K2 Thinking - A model with enhanced reasoning capabilities from Kimi K2",
+	},
+	"zai-org/GLM-4.6": {
+		maxTokens: 200_000,
+		contextWindow: 200_000,
+		supportsImages: false,
+		supportsPromptCache: false,
+		supportsNativeTools: true,
+		inputPrice: 0.6,
+		outputPrice: 2.2,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description: "Frontier open model with advanced agentic, reasoning and coding capabilities",
+	},
+	"deepseek-ai/DeepSeek-R1": {
+		maxTokens: 131_072,
+		contextWindow: 163_840,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 2.55,
+		outputPrice: 5.95,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description: "DeepSeek's first-generation reasoning model",
+	},
+	"deepseek-ai/DeepSeek-R1-0528": {
+		maxTokens: 131_072,
+		contextWindow: 163_840,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 2.55,
+		outputPrice: 5.95,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description: "The latest revision of DeepSeek's first-generation reasoning model",
+	},
+	"deepseek-ai/DeepSeek-V3-0324": {
+		maxTokens: 131_072,
+		contextWindow: 163_840,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0.77,
+		outputPrice: 0.77,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description: "Fast general-purpose LLM with enhanced reasoning capabilities",
+	},
+	"deepseek-ai/DeepSeek-V3.1": {
+		maxTokens: 131_072,
+		contextWindow: 163_840,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0.5,
+		outputPrice: 1.5,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description:
+			"Extremely capable general-purpose LLM with hybrid reasoning capabilities and advanced tool calling",
+	},
+	"Qwen/Qwen3-235B-A22B-Instruct-2507": {
+		maxTokens: 262_144,
+		contextWindow: 262_144,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0.22,
+		outputPrice: 0.8,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description: "Mixture-of-experts LLM with math and reasoning capabilities",
+	},
+	"Qwen/Qwen3-Coder-480B-A35B-Instruct": {
+		maxTokens: 262_144,
+		contextWindow: 262_144,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0.38,
+		outputPrice: 1.53,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description: "Mixture-of-experts LLM with advanced coding and reasoning capabilities",
+	},
+	"openai/gpt-oss-120b": {
+		maxTokens: 128_072,
+		contextWindow: 128_072,
+		supportsImages: false,
+		supportsPromptCache: false,
+		supportsNativeTools: true,
+		inputPrice: 0.1,
+		outputPrice: 0.5,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description: "Extremely capable general-purpose LLM with strong, controllable reasoning capabilities",
+	},
+	"moonshotai/Kimi-K2-Instruct-0905": {
+		maxTokens: 168_000,
+		contextWindow: 262_000,
+		supportsImages: false,
+		supportsPromptCache: false,
+		supportsNativeTools: true,
+		inputPrice: 0.6,
+		outputPrice: 2.5,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description: "State of the art language model for agentic and coding tasks. September Update.",
+	},
+} as const satisfies Record<string, ModelInfo>
+
+export type BasetenModelId = keyof typeof basetenModels
+
+export const basetenDefaultModelId = "zai-org/GLM-4.6" satisfies BasetenModelId

+ 17 - 1
packages/types/src/providers/bedrock.ts

@@ -105,6 +105,20 @@ export const bedrockModels = {
 		maxCachePoints: 4,
 		cachableFields: ["system", "messages", "tools"],
 	},
+	"anthropic.claude-opus-4-5-20251101-v1:0": {
+		maxTokens: 8192,
+		contextWindow: 200_000,
+		supportsImages: true,
+		supportsPromptCache: true,
+		supportsReasoningBudget: true,
+		inputPrice: 5.0,
+		outputPrice: 25.0,
+		cacheWritesPrice: 6.25,
+		cacheReadsPrice: 0.5,
+		minTokensPerCachePoint: 1024,
+		maxCachePoints: 4,
+		cachableFields: ["system", "messages", "tools"],
+	},
 	"anthropic.claude-opus-4-20250514-v1:0": {
 		maxTokens: 8192,
 		contextWindow: 200_000,
@@ -455,12 +469,14 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [
 ] as const
 
 // Amazon Bedrock models that support Global Inference profiles
-// As of Oct 2025, AWS supports Global Inference for:
+// As of Nov 2025, AWS supports Global Inference for:
 // - Claude Sonnet 4
 // - Claude Sonnet 4.5
 // - Claude Haiku 4.5
+// - Claude Opus 4.5
 export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [
 	"anthropic.claude-sonnet-4-20250514-v1:0",
 	"anthropic.claude-sonnet-4-5-20250929-v1:0",
 	"anthropic.claude-haiku-4-5-20251001-v1:0",
+	"anthropic.claude-opus-4-5-20251101-v1:0",
 ] as const

+ 0 - 30
packages/types/src/providers/cerebras.ts

@@ -15,26 +15,6 @@ export const cerebrasModels = {
 		outputPrice: 0,
 		description: "Highly intelligent general purpose model with up to 1,000 tokens/s",
 	},
-	"qwen-3-coder-480b-free": {
-		maxTokens: 40000,
-		contextWindow: 64000,
-		supportsImages: false,
-		supportsPromptCache: false,
-		inputPrice: 0,
-		outputPrice: 0,
-		description:
-			"[SOON TO BE DEPRECATED] SOTA coding model with ~2000 tokens/s ($0 free tier)\n\n• Use this if you don't have a Cerebras subscription\n• 64K context window\n• Rate limits: 150K TPM, 1M TPH/TPD, 10 RPM, 100 RPH/RPD\n\nUpgrade for higher limits: [https://cloud.cerebras.ai/?utm=roocode](https://cloud.cerebras.ai/?utm=roocode)",
-	},
-	"qwen-3-coder-480b": {
-		maxTokens: 40000,
-		contextWindow: 128000,
-		supportsImages: false,
-		supportsPromptCache: false,
-		inputPrice: 0,
-		outputPrice: 0,
-		description:
-			"[SOON TO BE DEPRECATED] SOTA coding model with ~2000 tokens/s ($50/$250 paid tiers)\n\n• Use this if you have a Cerebras subscription\n• 131K context window with higher rate limits",
-	},
 	"qwen-3-235b-a22b-instruct-2507": {
 		maxTokens: 64000,
 		contextWindow: 64000,
@@ -62,16 +42,6 @@ export const cerebrasModels = {
 		outputPrice: 0,
 		description: "SOTA coding performance with ~2500 tokens/s",
 	},
-	"qwen-3-235b-a22b-thinking-2507": {
-		maxTokens: 40000,
-		contextWindow: 65000,
-		supportsImages: false,
-		supportsPromptCache: false,
-		inputPrice: 0,
-		outputPrice: 0,
-		description: "SOTA performance with ~1500 tokens/s",
-		supportsReasoningEffort: true,
-	},
 	"gpt-oss-120b": {
 		maxTokens: 8000,
 		contextWindow: 64000,

+ 38 - 0
packages/types/src/providers/claude-code.ts

@@ -47,6 +47,9 @@ export const claudeCodeModels = {
 		supportsReasoningEffort: false,
 		supportsReasoningBudget: false,
 		requiredReasoningBudget: false,
+		// Claude Code manages its own tools and temperature via the CLI
+		supportsNativeTools: false,
+		supportsTemperature: false,
 	},
 	"claude-sonnet-4-5-20250929[1m]": {
 		...anthropicModels["claude-sonnet-4-5"],
@@ -56,6 +59,9 @@ export const claudeCodeModels = {
 		supportsReasoningEffort: false,
 		supportsReasoningBudget: false,
 		requiredReasoningBudget: false,
+		// Claude Code manages its own tools and temperature via the CLI
+		supportsNativeTools: false,
+		supportsTemperature: false,
 	},
 	"claude-sonnet-4-20250514": {
 		...anthropicModels["claude-sonnet-4-20250514"],
@@ -64,6 +70,20 @@ export const claudeCodeModels = {
 		supportsReasoningEffort: false,
 		supportsReasoningBudget: false,
 		requiredReasoningBudget: false,
+		// Claude Code manages its own tools and temperature via the CLI
+		supportsNativeTools: false,
+		supportsTemperature: false,
+	},
+	"claude-opus-4-5-20251101": {
+		...anthropicModels["claude-opus-4-5-20251101"],
+		supportsImages: false,
+		supportsPromptCache: true, // Claude Code does report cache tokens
+		supportsReasoningEffort: false,
+		supportsReasoningBudget: false,
+		requiredReasoningBudget: false,
+		// Claude Code manages its own tools and temperature via the CLI
+		supportsNativeTools: false,
+		supportsTemperature: false,
 	},
 	"claude-opus-4-5-20251101": {
 		...anthropicModels["claude-opus-4-5-20251101"],
@@ -80,6 +100,9 @@ export const claudeCodeModels = {
 		supportsReasoningEffort: false,
 		supportsReasoningBudget: false,
 		requiredReasoningBudget: false,
+		// Claude Code manages its own tools and temperature via the CLI
+		supportsNativeTools: false,
+		supportsTemperature: false,
 	},
 	"claude-opus-4-20250514": {
 		...anthropicModels["claude-opus-4-20250514"],
@@ -88,6 +111,9 @@ export const claudeCodeModels = {
 		supportsReasoningEffort: false,
 		supportsReasoningBudget: false,
 		requiredReasoningBudget: false,
+		// Claude Code manages its own tools and temperature via the CLI
+		supportsNativeTools: false,
+		supportsTemperature: false,
 	},
 	"claude-3-7-sonnet-20250219": {
 		...anthropicModels["claude-3-7-sonnet-20250219"],
@@ -96,6 +122,9 @@ export const claudeCodeModels = {
 		supportsReasoningEffort: false,
 		supportsReasoningBudget: false,
 		requiredReasoningBudget: false,
+		// Claude Code manages its own tools and temperature via the CLI
+		supportsNativeTools: false,
+		supportsTemperature: false,
 	},
 	"claude-3-5-sonnet-20241022": {
 		...anthropicModels["claude-3-5-sonnet-20241022"],
@@ -104,6 +133,9 @@ export const claudeCodeModels = {
 		supportsReasoningEffort: false,
 		supportsReasoningBudget: false,
 		requiredReasoningBudget: false,
+		// Claude Code manages its own tools and temperature via the CLI
+		supportsNativeTools: false,
+		supportsTemperature: false,
 	},
 	"claude-3-5-haiku-20241022": {
 		...anthropicModels["claude-3-5-haiku-20241022"],
@@ -112,6 +144,9 @@ export const claudeCodeModels = {
 		supportsReasoningEffort: false,
 		supportsReasoningBudget: false,
 		requiredReasoningBudget: false,
+		// Claude Code manages its own tools and temperature via the CLI
+		supportsNativeTools: false,
+		supportsTemperature: false,
 	},
 	"claude-haiku-4-5-20251001": {
 		...anthropicModels["claude-haiku-4-5-20251001"],
@@ -120,5 +155,8 @@ export const claudeCodeModels = {
 		supportsReasoningEffort: false,
 		supportsReasoningBudget: false,
 		requiredReasoningBudget: false,
+		// Claude Code manages its own tools and temperature via the CLI
+		supportsNativeTools: false,
+		supportsTemperature: false,
 	},
 } as const satisfies Record<string, ModelInfo>

+ 60 - 243
packages/types/src/providers/gemini.ts

@@ -6,15 +6,16 @@ export type GeminiModelId = keyof typeof geminiModels
 export const geminiDefaultModelId: GeminiModelId = "gemini-3-pro-preview" // kilocode_change
 
 export const geminiModels = {
-	// Latest models (pointing to the most recent stable versions)
 	"gemini-3-pro-preview": {
 		maxTokens: 65_536,
 		contextWindow: 1_048_576,
 		supportsImages: true,
+		supportsNativeTools: true,
 		supportsPromptCache: true,
-		supportsReasoningEffort: true,
+		supportsReasoningEffort: ["low", "high"],
 		reasoningEffort: "low",
 		supportsTemperature: true,
+		defaultTemperature: 1,
 		inputPrice: 4.0,
 		outputPrice: 18.0,
 		tiers: [
@@ -30,136 +31,12 @@ export const geminiModels = {
 			},
 		],
 	},
-	"gemini-flash-latest": {
-		maxTokens: 65_536,
-		contextWindow: 1_048_576,
-		supportsImages: true,
-		supportsPromptCache: true,
-		inputPrice: 0.3,
-		outputPrice: 2.5,
-		cacheReadsPrice: 0.075,
-		cacheWritesPrice: 1.0,
-		maxThinkingTokens: 24_576,
-		supportsReasoningBudget: true,
-	},
-	"gemini-flash-lite-latest": {
-		maxTokens: 65_536,
-		contextWindow: 1_048_576,
-		supportsImages: true,
-		supportsPromptCache: true,
-		inputPrice: 0.1,
-		outputPrice: 0.4,
-		cacheReadsPrice: 0.025,
-		cacheWritesPrice: 1.0,
-		supportsReasoningBudget: true,
-		maxThinkingTokens: 24_576,
-	},
-
-	// 2.5 Flash models (09-2025 versions - most recent)
-	"gemini-2.5-flash-preview-09-2025": {
-		maxTokens: 65_536,
-		contextWindow: 1_048_576,
-		supportsImages: true,
-		supportsPromptCache: true,
-		inputPrice: 0.3,
-		outputPrice: 2.5,
-		cacheReadsPrice: 0.075,
-		cacheWritesPrice: 1.0,
-		maxThinkingTokens: 24_576,
-		supportsReasoningBudget: true,
-	},
-	"gemini-2.5-flash-lite-preview-09-2025": {
-		maxTokens: 65_536,
-		contextWindow: 1_048_576,
-		supportsImages: true,
-		supportsPromptCache: true,
-		inputPrice: 0.1,
-		outputPrice: 0.4,
-		cacheReadsPrice: 0.025,
-		cacheWritesPrice: 1.0,
-		supportsReasoningBudget: true,
-		maxThinkingTokens: 24_576,
-	},
-
-	// 2.5 Flash models (06-17 version)
-	"gemini-2.5-flash-lite-preview-06-17": {
-		maxTokens: 64_000,
-		contextWindow: 1_048_576,
-		supportsImages: true,
-		supportsPromptCache: true,
-		inputPrice: 0.1,
-		outputPrice: 0.4,
-		cacheReadsPrice: 0.025,
-		cacheWritesPrice: 1.0,
-		supportsReasoningBudget: true,
-		maxThinkingTokens: 24_576,
-	},
-
-	// 2.5 Flash models (05-20 versions)
-	"gemini-2.5-flash-preview-05-20:thinking": {
-		maxTokens: 65_535,
-		contextWindow: 1_048_576,
-		supportsImages: true,
-		supportsPromptCache: true,
-		inputPrice: 0.15,
-		outputPrice: 3.5,
-		cacheReadsPrice: 0.0375,
-		cacheWritesPrice: 1.0,
-		maxThinkingTokens: 24_576,
-		supportsReasoningBudget: true,
-		requiredReasoningBudget: true,
-	},
-	"gemini-2.5-flash-preview-05-20": {
-		maxTokens: 65_535,
-		contextWindow: 1_048_576,
-		supportsImages: true,
-		supportsPromptCache: true,
-		inputPrice: 0.15,
-		outputPrice: 0.6,
-		cacheReadsPrice: 0.0375,
-		cacheWritesPrice: 1.0,
-	},
-
-	// 2.5 Flash models (04-17 versions)
-	"gemini-2.5-flash-preview-04-17:thinking": {
-		maxTokens: 65_535,
-		contextWindow: 1_048_576,
-		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 0.15,
-		outputPrice: 3.5,
-		maxThinkingTokens: 24_576,
-		supportsReasoningBudget: true,
-		requiredReasoningBudget: true,
-	},
-	"gemini-2.5-flash-preview-04-17": {
-		maxTokens: 65_535,
-		contextWindow: 1_048_576,
-		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 0.15,
-		outputPrice: 0.6,
-	},
-
-	// 2.5 Flash stable
-	"gemini-2.5-flash": {
-		maxTokens: 64_000,
-		contextWindow: 1_048_576,
-		supportsImages: true,
-		supportsPromptCache: true,
-		inputPrice: 0.3,
-		outputPrice: 2.5,
-		cacheReadsPrice: 0.075,
-		cacheWritesPrice: 1.0,
-		maxThinkingTokens: 24_576,
-		supportsReasoningBudget: true,
-	},
-
 	// 2.5 Pro models
-	"gemini-2.5-pro-preview-06-05": {
-		maxTokens: 65_535,
+	"gemini-2.5-pro": {
+		maxTokens: 64_000,
 		contextWindow: 1_048_576,
 		supportsImages: true,
+		supportsNativeTools: true,
 		supportsPromptCache: true,
 		inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
 		outputPrice: 15,
@@ -167,6 +44,7 @@ export const geminiModels = {
 		cacheWritesPrice: 4.5,
 		maxThinkingTokens: 32_768,
 		supportsReasoningBudget: true,
+		requiredReasoningBudget: true,
 		tiers: [
 			{
 				contextWindow: 200_000,
@@ -182,15 +60,18 @@ export const geminiModels = {
 			},
 		],
 	},
-	"gemini-2.5-pro-preview-05-06": {
+	"gemini-2.5-pro-preview-06-05": {
 		maxTokens: 65_535,
 		contextWindow: 1_048_576,
 		supportsImages: true,
+		supportsNativeTools: true,
 		supportsPromptCache: true,
 		inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
 		outputPrice: 15,
 		cacheReadsPrice: 0.625,
 		cacheWritesPrice: 4.5,
+		maxThinkingTokens: 32_768,
+		supportsReasoningBudget: true,
 		tiers: [
 			{
 				contextWindow: 200_000,
@@ -206,17 +87,16 @@ export const geminiModels = {
 			},
 		],
 	},
-	"gemini-2.5-pro-preview-03-25": {
+	"gemini-2.5-pro-preview-05-06": {
 		maxTokens: 65_535,
 		contextWindow: 1_048_576,
 		supportsImages: true,
+		supportsNativeTools: true,
 		supportsPromptCache: true,
 		inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
 		outputPrice: 15,
 		cacheReadsPrice: 0.625,
 		cacheWritesPrice: 4.5,
-		maxThinkingTokens: 32_768,
-		supportsReasoningBudget: true,
 		tiers: [
 			{
 				contextWindow: 200_000,
@@ -232,18 +112,11 @@ export const geminiModels = {
 			},
 		],
 	},
-	"gemini-2.5-pro-exp-03-25": {
+	"gemini-2.5-pro-preview-03-25": {
 		maxTokens: 65_535,
 		contextWindow: 1_048_576,
 		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 0,
-		outputPrice: 0,
-	},
-	"gemini-2.5-pro": {
-		maxTokens: 64_000,
-		contextWindow: 1_048_576,
-		supportsImages: true,
+		supportsNativeTools: true,
 		supportsPromptCache: true,
 		inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
 		outputPrice: 15,
@@ -251,7 +124,6 @@ export const geminiModels = {
 		cacheWritesPrice: 4.5,
 		maxThinkingTokens: 32_768,
 		supportsReasoningBudget: true,
-		requiredReasoningBudget: true,
 		tiers: [
 			{
 				contextWindow: 200_000,
@@ -268,127 +140,72 @@ export const geminiModels = {
 		],
 	},
 
-	// 2.0 Flash models
-	"gemini-2.0-flash-lite-preview-02-05": {
-		maxTokens: 8192,
+	// 2.5 Flash models
+	"gemini-flash-latest": {
+		maxTokens: 65_536,
 		contextWindow: 1_048_576,
 		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 0,
-		outputPrice: 0,
+		supportsNativeTools: true,
+		supportsPromptCache: true,
+		inputPrice: 0.3,
+		outputPrice: 2.5,
+		cacheReadsPrice: 0.075,
+		cacheWritesPrice: 1.0,
+		maxThinkingTokens: 24_576,
+		supportsReasoningBudget: true,
 	},
-	"gemini-2.0-flash-thinking-exp-01-21": {
+	"gemini-2.5-flash-preview-09-2025": {
 		maxTokens: 65_536,
 		contextWindow: 1_048_576,
 		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 0,
-		outputPrice: 0,
-	},
-	"gemini-2.0-flash-thinking-exp-1219": {
-		maxTokens: 8192,
-		contextWindow: 32_767,
-		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 0,
-		outputPrice: 0,
+		supportsNativeTools: true,
+		supportsPromptCache: true,
+		inputPrice: 0.3,
+		outputPrice: 2.5,
+		cacheReadsPrice: 0.075,
+		cacheWritesPrice: 1.0,
+		maxThinkingTokens: 24_576,
+		supportsReasoningBudget: true,
 	},
-	"gemini-2.0-flash-exp": {
-		maxTokens: 8192,
+	"gemini-2.5-flash": {
+		maxTokens: 64_000,
 		contextWindow: 1_048_576,
 		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 0,
-		outputPrice: 0,
+		supportsNativeTools: true,
+		supportsPromptCache: true,
+		inputPrice: 0.3,
+		outputPrice: 2.5,
+		cacheReadsPrice: 0.075,
+		cacheWritesPrice: 1.0,
+		maxThinkingTokens: 24_576,
+		supportsReasoningBudget: true,
 	},
-	"gemini-2.0-flash-001": {
-		maxTokens: 8192,
+
+	// 2.5 Flash Lite models
+	"gemini-flash-lite-latest": {
+		maxTokens: 65_536,
 		contextWindow: 1_048_576,
 		supportsImages: true,
+		supportsNativeTools: true,
 		supportsPromptCache: true,
 		inputPrice: 0.1,
 		outputPrice: 0.4,
 		cacheReadsPrice: 0.025,
 		cacheWritesPrice: 1.0,
+		supportsReasoningBudget: true,
+		maxThinkingTokens: 24_576,
 	},
-
-	// 2.0 Pro models
-	"gemini-2.0-pro-exp-02-05": {
-		maxTokens: 8192,
-		contextWindow: 2_097_152,
-		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 0,
-		outputPrice: 0,
-	},
-
-	// 1.5 Flash models
-	"gemini-1.5-flash-002": {
-		maxTokens: 8192,
+	"gemini-2.5-flash-lite-preview-09-2025": {
+		maxTokens: 65_536,
 		contextWindow: 1_048_576,
 		supportsImages: true,
+		supportsNativeTools: true,
 		supportsPromptCache: true,
-		inputPrice: 0.15, // This is the pricing for prompts above 128k tokens.
-		outputPrice: 0.6,
-		cacheReadsPrice: 0.0375,
+		inputPrice: 0.1,
+		outputPrice: 0.4,
+		cacheReadsPrice: 0.025,
 		cacheWritesPrice: 1.0,
-		tiers: [
-			{
-				contextWindow: 128_000,
-				inputPrice: 0.075,
-				outputPrice: 0.3,
-				cacheReadsPrice: 0.01875,
-			},
-			{
-				contextWindow: Infinity,
-				inputPrice: 0.15,
-				outputPrice: 0.6,
-				cacheReadsPrice: 0.0375,
-			},
-		],
-	},
-	"gemini-1.5-flash-exp-0827": {
-		maxTokens: 8192,
-		contextWindow: 1_048_576,
-		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 0,
-		outputPrice: 0,
-	},
-	"gemini-1.5-flash-8b-exp-0827": {
-		maxTokens: 8192,
-		contextWindow: 1_048_576,
-		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 0,
-		outputPrice: 0,
-	},
-
-	// 1.5 Pro models
-	"gemini-1.5-pro-002": {
-		maxTokens: 8192,
-		contextWindow: 2_097_152,
-		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 0,
-		outputPrice: 0,
-	},
-	"gemini-1.5-pro-exp-0827": {
-		maxTokens: 8192,
-		contextWindow: 2_097_152,
-		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 0,
-		outputPrice: 0,
-	},
-
-	// Experimental models
-	"gemini-exp-1206": {
-		maxTokens: 8192,
-		contextWindow: 2_097_152,
-		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 0,
-		outputPrice: 0,
+		supportsReasoningBudget: true,
+		maxThinkingTokens: 24_576,
 	},
 } as const satisfies Record<string, ModelInfo>

+ 4 - 0
packages/types/src/providers/index.ts

@@ -1,4 +1,5 @@
 export * from "./anthropic.js"
+export * from "./baseten.js"
 export * from "./bedrock.js"
 export * from "./cerebras.js"
 export * from "./chutes.js"
@@ -41,6 +42,7 @@ export * from "./deepinfra.js"
 export * from "./minimax.js"
 
 import { anthropicDefaultModelId } from "./anthropic.js"
+import { basetenDefaultModelId } from "./baseten.js"
 import { bedrockDefaultModelId } from "./bedrock.js"
 import { cerebrasDefaultModelId } from "./cerebras.js"
 import { chutesDefaultModelId } from "./chutes.js"
@@ -101,6 +103,8 @@ export function getProviderDefaultModelId(
 			return "meta-llama/Llama-3.3-70B-Instruct"
 		case "chutes":
 			return chutesDefaultModelId
+		case "baseten":
+			return basetenDefaultModelId
 		case "bedrock":
 			return bedrockDefaultModelId
 		case "vertex":

+ 7 - 3
packages/types/src/providers/minimax.ts

@@ -13,11 +13,12 @@ export const minimaxModels = {
 		contextWindow: 192_000,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
+		preserveReasoning: true,
 		inputPrice: 0.3,
 		outputPrice: 1.2,
 		cacheWritesPrice: 0.375,
 		cacheReadsPrice: 0.03,
-		preserveReasoning: true,
 		description:
 			"MiniMax M2, a model born for Agents and code, featuring Top-tier Coding Capabilities, Powerful Agentic Performance, and Ultimate Cost-Effectiveness & Speed.",
 	},
@@ -26,15 +27,18 @@ export const minimaxModels = {
 		contextWindow: 192_000,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
+		preserveReasoning: true,
 		inputPrice: 0.3,
 		outputPrice: 1.2,
 		cacheWritesPrice: 0.375,
 		cacheReadsPrice: 0.03,
-		preserveReasoning: true,
 		description:
 			"MiniMax M2 Stable (High Concurrency, Commercial Use), a model born for Agents and code, featuring Top-tier Coding Capabilities, Powerful Agentic Performance, and Ultimate Cost-Effectiveness & Speed.",
 	},
 } as const satisfies Record<string, ModelInfo>
 
-export const MINIMAX_DEFAULT_MAX_TOKENS = 16384 // kilocode_change
+export const minimaxDefaultModelInfo: ModelInfo = minimaxModels[minimaxDefaultModelId]
+
+export const MINIMAX_DEFAULT_MAX_TOKENS = 16_384
 export const MINIMAX_DEFAULT_TEMPERATURE = 1.0

+ 18 - 9
packages/types/src/providers/mistral.ts

@@ -11,62 +11,70 @@ export const mistralModels = {
 		contextWindow: 128_000,
 		supportsImages: true,
 		supportsPromptCache: false,
+		supportsNativeTools: true,
 		inputPrice: 2.0,
 		outputPrice: 5.0,
 	},
 	"devstral-medium-latest": {
-		maxTokens: 131_000,
+		maxTokens: 8192,
 		contextWindow: 131_000,
 		supportsImages: true,
 		supportsPromptCache: false,
+		supportsNativeTools: true,
 		inputPrice: 0.4,
 		outputPrice: 2.0,
 	},
 	"mistral-medium-latest": {
-		maxTokens: 131_000,
+		maxTokens: 8192,
 		contextWindow: 131_000,
 		supportsImages: true,
 		supportsPromptCache: false,
+		supportsNativeTools: true,
 		inputPrice: 0.4,
 		outputPrice: 2.0,
 	},
 	"codestral-latest": {
-		maxTokens: 256_000,
+		maxTokens: 8192,
 		contextWindow: 256_000,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsNativeTools: true,
 		inputPrice: 0.3,
 		outputPrice: 0.9,
 	},
 	"mistral-large-latest": {
-		maxTokens: 131_000,
+		maxTokens: 8192,
 		contextWindow: 131_000,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsNativeTools: true,
 		inputPrice: 2.0,
 		outputPrice: 6.0,
 	},
 	"ministral-8b-latest": {
-		maxTokens: 131_000,
+		maxTokens: 8192,
 		contextWindow: 131_000,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsNativeTools: true,
 		inputPrice: 0.1,
 		outputPrice: 0.1,
 	},
 	"ministral-3b-latest": {
-		maxTokens: 131_000,
+		maxTokens: 8192,
 		contextWindow: 131_000,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsNativeTools: true,
 		inputPrice: 0.04,
 		outputPrice: 0.04,
 	},
 	"mistral-small-latest": {
-		maxTokens: 32_000,
+		maxTokens: 8192,
 		contextWindow: 32_000,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsNativeTools: true,
 		inputPrice: 0.2,
 		outputPrice: 0.6,
 	},
@@ -89,13 +97,14 @@ export const mistralModels = {
 	},
 	// kilocode_change end
 	"pixtral-large-latest": {
-		maxTokens: 131_000,
+		maxTokens: 8192,
 		contextWindow: 131_000,
 		supportsImages: true,
 		supportsPromptCache: false,
+		supportsNativeTools: true,
 		inputPrice: 2.0,
 		outputPrice: 6.0,
 	},
 } as const satisfies Record<string, ModelInfo>
 
-export const MISTRAL_DEFAULT_TEMPERATURE = 0
+export const MISTRAL_DEFAULT_TEMPERATURE = 1

+ 4 - 0
packages/types/src/providers/moonshot.ts

@@ -11,6 +11,7 @@ export const moonshotModels = {
 		contextWindow: 131_072,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 0.6, // $0.60 per million tokens (cache miss)
 		outputPrice: 2.5, // $2.50 per million tokens
 		cacheWritesPrice: 0, // $0 per million tokens (cache miss)
@@ -22,6 +23,7 @@ export const moonshotModels = {
 		contextWindow: 262144,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 0.6,
 		outputPrice: 2.5,
 		cacheReadsPrice: 0.15,
@@ -33,6 +35,7 @@ export const moonshotModels = {
 		contextWindow: 262_144,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 2.4, // $2.40 per million tokens (cache miss)
 		outputPrice: 10, // $10.00 per million tokens
 		cacheWritesPrice: 0, // $0 per million tokens (cache miss)
@@ -44,6 +47,7 @@ export const moonshotModels = {
 		contextWindow: 262_144, // 262,144 tokens
 		supportsImages: false, // Text-only (no image/vision support)
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 0.6, // $0.60 per million tokens (cache miss)
 		outputPrice: 2.5, // $2.50 per million tokens
 		cacheWritesPrice: 0, // $0 per million tokens (cache miss)

+ 30 - 0
packages/types/src/providers/openai.ts

@@ -9,6 +9,7 @@ export const openAiNativeModels = {
 	"gpt-5.1": {
 		maxTokens: 128000,
 		contextWindow: 400000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		promptCacheRetention: "24h",
@@ -28,6 +29,7 @@ export const openAiNativeModels = {
 	"gpt-5.1-codex": {
 		maxTokens: 128000,
 		contextWindow: 400000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		promptCacheRetention: "24h",
@@ -43,6 +45,7 @@ export const openAiNativeModels = {
 	"gpt-5.1-codex-mini": {
 		maxTokens: 128000,
 		contextWindow: 400000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		promptCacheRetention: "24h",
@@ -57,6 +60,7 @@ export const openAiNativeModels = {
 	"gpt-5": {
 		maxTokens: 128000,
 		contextWindow: 400000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		supportsReasoningEffort: ["minimal", "low", "medium", "high"],
@@ -75,6 +79,7 @@ export const openAiNativeModels = {
 	"gpt-5-mini": {
 		maxTokens: 128000,
 		contextWindow: 400000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		supportsReasoningEffort: ["minimal", "low", "medium", "high"],
@@ -93,6 +98,7 @@ export const openAiNativeModels = {
 	"gpt-5-codex": {
 		maxTokens: 128000,
 		contextWindow: 400000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		supportsReasoningEffort: ["low", "medium", "high"],
@@ -107,6 +113,7 @@ export const openAiNativeModels = {
 	"gpt-5-nano": {
 		maxTokens: 128000,
 		contextWindow: 400000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		supportsReasoningEffort: ["minimal", "low", "medium", "high"],
@@ -122,6 +129,7 @@ export const openAiNativeModels = {
 	"gpt-5-chat-latest": {
 		maxTokens: 128000,
 		contextWindow: 400000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		inputPrice: 1.25,
@@ -132,6 +140,7 @@ export const openAiNativeModels = {
 	"gpt-4.1": {
 		maxTokens: 32_768,
 		contextWindow: 1_047_576,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		inputPrice: 2,
@@ -145,6 +154,7 @@ export const openAiNativeModels = {
 	"gpt-4.1-mini": {
 		maxTokens: 32_768,
 		contextWindow: 1_047_576,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		inputPrice: 0.4,
@@ -158,6 +168,7 @@ export const openAiNativeModels = {
 	"gpt-4.1-nano": {
 		maxTokens: 32_768,
 		contextWindow: 1_047_576,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		inputPrice: 0.1,
@@ -171,6 +182,7 @@ export const openAiNativeModels = {
 	o3: {
 		maxTokens: 100_000,
 		contextWindow: 200_000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		inputPrice: 2.0,
@@ -187,6 +199,7 @@ export const openAiNativeModels = {
 	"o3-high": {
 		maxTokens: 100_000,
 		contextWindow: 200_000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		inputPrice: 2.0,
@@ -198,6 +211,7 @@ export const openAiNativeModels = {
 	"o3-low": {
 		maxTokens: 100_000,
 		contextWindow: 200_000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		inputPrice: 2.0,
@@ -209,6 +223,7 @@ export const openAiNativeModels = {
 	"o4-mini": {
 		maxTokens: 100_000,
 		contextWindow: 200_000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		inputPrice: 1.1,
@@ -225,6 +240,7 @@ export const openAiNativeModels = {
 	"o4-mini-high": {
 		maxTokens: 100_000,
 		contextWindow: 200_000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		inputPrice: 1.1,
@@ -236,6 +252,7 @@ export const openAiNativeModels = {
 	"o4-mini-low": {
 		maxTokens: 100_000,
 		contextWindow: 200_000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		inputPrice: 1.1,
@@ -247,6 +264,7 @@ export const openAiNativeModels = {
 	"o3-mini": {
 		maxTokens: 100_000,
 		contextWindow: 200_000,
+		supportsNativeTools: true,
 		supportsImages: false,
 		supportsPromptCache: true,
 		inputPrice: 1.1,
@@ -259,6 +277,7 @@ export const openAiNativeModels = {
 	"o3-mini-high": {
 		maxTokens: 100_000,
 		contextWindow: 200_000,
+		supportsNativeTools: true,
 		supportsImages: false,
 		supportsPromptCache: true,
 		inputPrice: 1.1,
@@ -270,6 +289,7 @@ export const openAiNativeModels = {
 	"o3-mini-low": {
 		maxTokens: 100_000,
 		contextWindow: 200_000,
+		supportsNativeTools: true,
 		supportsImages: false,
 		supportsPromptCache: true,
 		inputPrice: 1.1,
@@ -281,6 +301,7 @@ export const openAiNativeModels = {
 	o1: {
 		maxTokens: 100_000,
 		contextWindow: 200_000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		inputPrice: 15,
@@ -291,6 +312,7 @@ export const openAiNativeModels = {
 	"o1-preview": {
 		maxTokens: 32_768,
 		contextWindow: 128_000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		inputPrice: 15,
@@ -301,6 +323,7 @@ export const openAiNativeModels = {
 	"o1-mini": {
 		maxTokens: 65_536,
 		contextWindow: 128_000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		inputPrice: 1.1,
@@ -311,6 +334,7 @@ export const openAiNativeModels = {
 	"gpt-4o": {
 		maxTokens: 16_384,
 		contextWindow: 128_000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		inputPrice: 2.5,
@@ -324,6 +348,7 @@ export const openAiNativeModels = {
 	"gpt-4o-mini": {
 		maxTokens: 16_384,
 		contextWindow: 128_000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		inputPrice: 0.15,
@@ -337,6 +362,7 @@ export const openAiNativeModels = {
 	"codex-mini-latest": {
 		maxTokens: 16_384,
 		contextWindow: 200_000,
+		supportsNativeTools: true,
 		supportsImages: false,
 		supportsPromptCache: false,
 		inputPrice: 1.5,
@@ -350,6 +376,7 @@ export const openAiNativeModels = {
 	"gpt-5-2025-08-07": {
 		maxTokens: 128000,
 		contextWindow: 400000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		supportsReasoningEffort: ["minimal", "low", "medium", "high"],
@@ -368,6 +395,7 @@ export const openAiNativeModels = {
 	"gpt-5-mini-2025-08-07": {
 		maxTokens: 128000,
 		contextWindow: 400000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		supportsReasoningEffort: ["minimal", "low", "medium", "high"],
@@ -386,6 +414,7 @@ export const openAiNativeModels = {
 	"gpt-5-nano-2025-08-07": {
 		maxTokens: 128000,
 		contextWindow: 400000,
+		supportsNativeTools: true,
 		supportsImages: true,
 		supportsPromptCache: true,
 		supportsReasoningEffort: ["minimal", "low", "medium", "high"],
@@ -407,6 +436,7 @@ export const openAiModelInfoSaneDefaults: ModelInfo = {
 	supportsPromptCache: false,
 	inputPrice: 0,
 	outputPrice: 0,
+	supportsNativeTools: true,
 }
 
 // https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation

+ 2 - 0
packages/types/src/providers/openrouter.ts

@@ -43,6 +43,7 @@ export const OPEN_ROUTER_PROMPT_CACHING_MODELS = new Set([
 	"anthropic/claude-opus-4.1",
 	"anthropic/claude-opus-4.5",
 	"anthropic/claude-haiku-4.5",
+	"anthropic/claude-opus-4.5",
 	"google/gemini-2.5-flash-preview",
 	"google/gemini-2.5-flash-preview:thinking",
 	"google/gemini-2.5-flash-preview-05-20",
@@ -74,6 +75,7 @@ export const OPEN_ROUTER_REASONING_BUDGET_MODELS = new Set([
 	"anthropic/claude-opus-4.5",
 	"anthropic/claude-sonnet-4",
 	"anthropic/claude-sonnet-4.5",
+	"anthropic/claude-opus-4.5",
 	"anthropic/claude-haiku-4.5",
 	"google/gemini-2.5-pro-preview",
 	"google/gemini-2.5-pro",

+ 3 - 2
packages/types/src/providers/vertex.ts

@@ -11,9 +11,10 @@ export const vertexModels = {
 		contextWindow: 1_048_576,
 		supportsImages: true,
 		supportsPromptCache: true,
-		supportsReasoningEffort: true,
+		supportsReasoningEffort: ["low", "high"],
 		reasoningEffort: "low",
 		supportsTemperature: true,
+		defaultTemperature: 1,
 		inputPrice: 4.0,
 		outputPrice: 18.0,
 		tiers: [
@@ -220,7 +221,7 @@ export const vertexModels = {
 		supportsReasoningBudget: true,
 	},
 	"claude-opus-4-5@20251101": {
-		maxTokens: 64000,
+		maxTokens: 8192,
 		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: true,

+ 15 - 0
packages/types/src/providers/zai.ts

@@ -16,6 +16,7 @@ export const internationalZAiModels = {
 		contextWindow: 131_072,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		supportsReasoningBinary: true,
 		inputPrice: 0.6,
 		outputPrice: 2.2,
@@ -29,6 +30,7 @@ export const internationalZAiModels = {
 		contextWindow: 131_072,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 0.2,
 		outputPrice: 1.1,
 		cacheWritesPrice: 0,
@@ -41,6 +43,7 @@ export const internationalZAiModels = {
 		contextWindow: 131_072,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 2.2,
 		outputPrice: 8.9,
 		cacheWritesPrice: 0,
@@ -53,6 +56,7 @@ export const internationalZAiModels = {
 		contextWindow: 131_072,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 1.1,
 		outputPrice: 4.5,
 		cacheWritesPrice: 0,
@@ -64,6 +68,7 @@ export const internationalZAiModels = {
 		contextWindow: 131_072,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 0,
 		outputPrice: 0,
 		cacheWritesPrice: 0,
@@ -75,6 +80,7 @@ export const internationalZAiModels = {
 		contextWindow: 131_072,
 		supportsImages: true,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 0.6,
 		outputPrice: 1.8,
 		cacheWritesPrice: 0,
@@ -87,6 +93,7 @@ export const internationalZAiModels = {
 		contextWindow: 200_000,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		supportsReasoningBinary: true,
 		inputPrice: 0.6,
 		outputPrice: 2.2,
@@ -100,6 +107,7 @@ export const internationalZAiModels = {
 		contextWindow: 131_072,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsNativeTools: true,
 		inputPrice: 0.1,
 		outputPrice: 0.1,
 		cacheWritesPrice: 0,
@@ -116,6 +124,7 @@ export const mainlandZAiModels = {
 		contextWindow: 131_072,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		supportsReasoningBinary: true,
 		inputPrice: 0.29,
 		outputPrice: 1.14,
@@ -129,6 +138,7 @@ export const mainlandZAiModels = {
 		contextWindow: 131_072,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 0.1,
 		outputPrice: 0.6,
 		cacheWritesPrice: 0,
@@ -141,6 +151,7 @@ export const mainlandZAiModels = {
 		contextWindow: 131_072,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 0.29,
 		outputPrice: 1.14,
 		cacheWritesPrice: 0,
@@ -153,6 +164,7 @@ export const mainlandZAiModels = {
 		contextWindow: 131_072,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 0.1,
 		outputPrice: 0.6,
 		cacheWritesPrice: 0,
@@ -164,6 +176,7 @@ export const mainlandZAiModels = {
 		contextWindow: 131_072,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 0,
 		outputPrice: 0,
 		cacheWritesPrice: 0,
@@ -175,6 +188,7 @@ export const mainlandZAiModels = {
 		contextWindow: 131_072,
 		supportsImages: true,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		inputPrice: 0.29,
 		outputPrice: 0.93,
 		cacheWritesPrice: 0,
@@ -187,6 +201,7 @@ export const mainlandZAiModels = {
 		contextWindow: 204_800,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsNativeTools: true,
 		supportsReasoningBinary: true,
 		inputPrice: 0.29,
 		outputPrice: 1.14,

+ 2 - 0
packages/types/src/telemetry.ts

@@ -97,6 +97,7 @@ export enum TelemetryEventName {
 	CONSECUTIVE_MISTAKE_ERROR = "Consecutive Mistake Error",
 	CODE_INDEX_ERROR = "Code Index Error",
 	TELEMETRY_SETTINGS_CHANGED = "Telemetry Settings Changed",
+	MODEL_CACHE_EMPTY_RESPONSE = "Model Cache Empty Response",
 }
 
 /**
@@ -245,6 +246,7 @@ export const rooCodeTelemetryEventSchema = z.discriminatedUnion("type", [
 			TelemetryEventName.SHELL_INTEGRATION_ERROR,
 			TelemetryEventName.CONSECUTIVE_MISTAKE_ERROR,
 			TelemetryEventName.CODE_INDEX_ERROR,
+			TelemetryEventName.MODEL_CACHE_EMPTY_RESPONSE,
 			TelemetryEventName.CONTEXT_CONDENSED,
 			TelemetryEventName.SLIDING_WINDOW_TRUNCATION,
 			TelemetryEventName.TAB_SHOWN,

File diff suppressed because it is too large
+ 179 - 174
pnpm-lock.yaml


BIN
releases/3.32.1-release.png


BIN
releases/3.33.1-release.png


BIN
releases/3.33.3-release.png


BIN
releases/3.34.0-release.png


BIN
releases/3.34.2-release.png


BIN
releases/3.34.3-release.png


BIN
releases/3.34.4-release.png


BIN
releases/3.34.5-release.png


BIN
releases/3.34.6-release.png


BIN
releases/3.34.7-release.png


BIN
releases/v3.33.0-release.png


+ 2 - 0
src/activate/handleUri.ts

@@ -59,11 +59,13 @@ export const handleUri = async (uri: vscode.Uri) => {
 			const code = query.get("code")
 			const state = query.get("state")
 			const organizationId = query.get("organizationId")
+			const providerModel = query.get("provider_model")
 
 			await CloudService.instance.handleAuthCallback(
 				code,
 				state,
 				organizationId === "null" ? null : organizationId,
+				providerModel,
 			)
 			break
 		}

+ 10 - 0
src/api/index.ts

@@ -51,6 +51,7 @@ import {
 	VercelAiGatewayHandler,
 	DeepInfraHandler,
 	// MiniMaxHandler, // kilocode_change
+	BasetenHandler,
 } from "./providers"
 // kilocode_change start
 import { KilocodeOpenrouterHandler } from "./providers/kilocode-openrouter"
@@ -109,6 +110,13 @@ export interface ApiHandlerCreateMessageMetadata {
 	 * Used by providers to determine whether to include native tool definitions.
 	 */
 	toolProtocol?: ToolProtocol
+	/**
+	 * Controls whether the model can return multiple tool calls in a single response.
+	 * When true, parallel tool calls are enabled (OpenAI's parallel_tool_calls=true).
+	 * When false (default), only one tool call is returned per response.
+	 * Only applies when toolProtocol is "native".
+	 */
+	parallelToolCalls?: boolean
 }
 
 export interface ApiHandler {
@@ -235,6 +243,8 @@ export function buildApiHandler(configuration: ProviderSettings): ApiHandler {
 			return new VercelAiGatewayHandler(options)
 		case "minimax":
 			return new MiniMaxAnthropicHandler(options) // kilocode_change: anthropic
+		case "baseten":
+			return new BasetenHandler(options)
 		default:
 			apiProvider satisfies "gemini-cli" | undefined
 			return new AnthropicHandler(options)

+ 140 - 0
src/api/providers/__tests__/anthropic-vertex.spec.ts

@@ -601,6 +601,146 @@ describe("VertexHandler", () => {
 				text: "Second thinking block",
 			})
 		})
+
+		it("should filter out internal reasoning blocks before sending to API", async () => {
+			handler = new AnthropicVertexHandler({
+				apiModelId: "claude-3-5-sonnet-v2@20241022",
+				vertexProjectId: "test-project",
+				vertexRegion: "us-central1",
+			})
+
+			const mockCreate = vitest.fn().mockImplementation(async (options) => {
+				return {
+					async *[Symbol.asyncIterator]() {
+						yield {
+							type: "message_start",
+							message: {
+								usage: {
+									input_tokens: 10,
+									output_tokens: 0,
+								},
+							},
+						}
+						yield {
+							type: "content_block_start",
+							index: 0,
+							content_block: {
+								type: "text",
+								text: "Response",
+							},
+						}
+					},
+				}
+			})
+			;(handler["client"].messages as any).create = mockCreate
+
+			// Messages with internal reasoning blocks (from stored conversation history)
+			const messagesWithReasoning: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "user",
+					content: "Hello",
+				},
+				{
+					role: "assistant",
+					content: [
+						{
+							type: "reasoning" as any,
+							text: "This is internal reasoning that should be filtered",
+						},
+						{
+							type: "text",
+							text: "This is the response",
+						},
+					],
+				},
+				{
+					role: "user",
+					content: "Continue",
+				},
+			]
+
+			const stream = handler.createMessage(systemPrompt, messagesWithReasoning)
+			const chunks: ApiStreamChunk[] = []
+
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Verify the API was called with filtered messages (no reasoning blocks)
+			const calledMessages = mockCreate.mock.calls[0][0].messages
+			expect(calledMessages).toHaveLength(3)
+
+			// Check user message 1
+			expect(calledMessages[0]).toMatchObject({
+				role: "user",
+			})
+
+			// Check assistant message - should have reasoning block filtered out
+			const assistantMessage = calledMessages.find((m: any) => m.role === "assistant")
+			expect(assistantMessage).toBeDefined()
+			expect(assistantMessage.content).toEqual([{ type: "text", text: "This is the response" }])
+
+			// Verify reasoning blocks were NOT sent to the API
+			expect(assistantMessage.content).not.toContainEqual(expect.objectContaining({ type: "reasoning" }))
+		})
+
+		it("should filter empty messages after removing all reasoning blocks", async () => {
+			handler = new AnthropicVertexHandler({
+				apiModelId: "claude-3-5-sonnet-v2@20241022",
+				vertexProjectId: "test-project",
+				vertexRegion: "us-central1",
+			})
+
+			const mockCreate = vitest.fn().mockImplementation(async (options) => {
+				return {
+					async *[Symbol.asyncIterator]() {
+						yield {
+							type: "message_start",
+							message: {
+								usage: {
+									input_tokens: 10,
+									output_tokens: 0,
+								},
+							},
+						}
+					},
+				}
+			})
+			;(handler["client"].messages as any).create = mockCreate
+
+			// Message with only reasoning content (should be completely filtered)
+			const messagesWithOnlyReasoning: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "user",
+					content: "Hello",
+				},
+				{
+					role: "assistant",
+					content: [
+						{
+							type: "reasoning" as any,
+							text: "Only reasoning, no actual text",
+						},
+					],
+				},
+				{
+					role: "user",
+					content: "Continue",
+				},
+			]
+
+			const stream = handler.createMessage(systemPrompt, messagesWithOnlyReasoning)
+			const chunks: ApiStreamChunk[] = []
+
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Verify empty message was filtered out
+			const calledMessages = mockCreate.mock.calls[0][0].messages
+			expect(calledMessages).toHaveLength(2) // Only the two user messages
+			expect(calledMessages.every((m: any) => m.role === "user")).toBe(true)
+		})
 	})
 
 	describe("completePrompt", () => {

+ 438 - 0
src/api/providers/__tests__/anthropic.spec.ts

@@ -289,4 +289,442 @@ describe("AnthropicHandler", () => {
 			expect(model.info.outputPrice).toBe(22.5)
 		})
 	})
+
+	describe("reasoning block filtering", () => {
+		const systemPrompt = "You are a helpful assistant."
+
+		it("should filter out internal reasoning blocks before sending to API", async () => {
+			handler = new AnthropicHandler({
+				apiKey: "test-api-key",
+				apiModelId: "claude-3-5-sonnet-20241022",
+			})
+
+			// Messages with internal reasoning blocks (from stored conversation history)
+			const messagesWithReasoning: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "user",
+					content: "Hello",
+				},
+				{
+					role: "assistant",
+					content: [
+						{
+							type: "reasoning" as any,
+							text: "This is internal reasoning that should be filtered",
+						},
+						{
+							type: "text",
+							text: "This is the response",
+						},
+					],
+				},
+				{
+					role: "user",
+					content: "Continue",
+				},
+			]
+
+			const stream = handler.createMessage(systemPrompt, messagesWithReasoning)
+			const chunks: any[] = []
+
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Verify the API was called with filtered messages (no reasoning blocks)
+			const calledMessages = mockCreate.mock.calls[mockCreate.mock.calls.length - 1][0].messages
+			expect(calledMessages).toHaveLength(3)
+
+			// Check assistant message - should have reasoning block filtered out
+			const assistantMessage = calledMessages.find((m: any) => m.role === "assistant")
+			expect(assistantMessage).toBeDefined()
+			expect(assistantMessage.content).toEqual([{ type: "text", text: "This is the response" }])
+
+			// Verify reasoning blocks were NOT sent to the API
+			expect(assistantMessage.content).not.toContainEqual(expect.objectContaining({ type: "reasoning" }))
+		})
+
+		it("should filter empty messages after removing all reasoning blocks", async () => {
+			handler = new AnthropicHandler({
+				apiKey: "test-api-key",
+				apiModelId: "claude-3-5-sonnet-20241022",
+			})
+
+			// Message with only reasoning content (should be completely filtered)
+			const messagesWithOnlyReasoning: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "user",
+					content: "Hello",
+				},
+				{
+					role: "assistant",
+					content: [
+						{
+							type: "reasoning" as any,
+							text: "Only reasoning, no actual text",
+						},
+					],
+				},
+				{
+					role: "user",
+					content: "Continue",
+				},
+			]
+
+			const stream = handler.createMessage(systemPrompt, messagesWithOnlyReasoning)
+			const chunks: any[] = []
+
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Verify empty message was filtered out
+			const calledMessages = mockCreate.mock.calls[mockCreate.mock.calls.length - 1][0].messages
+			expect(calledMessages.length).toBe(2) // Only the two user messages
+			expect(calledMessages.every((m: any) => m.role === "user")).toBe(true)
+		})
+	})
+
+	describe("native tool calling", () => {
+		const systemPrompt = "You are a helpful assistant."
+		const messages: Anthropic.Messages.MessageParam[] = [
+			{
+				role: "user",
+				content: [{ type: "text" as const, text: "What's the weather in London?" }],
+			},
+		]
+
+		const mockTools = [
+			{
+				type: "function" as const,
+				function: {
+					name: "get_weather",
+					description: "Get the current weather",
+					parameters: {
+						type: "object",
+						properties: {
+							location: { type: "string" },
+						},
+						required: ["location"],
+					},
+				},
+			},
+		]
+
+		it("should include tools in request when toolProtocol is native", async () => {
+			const stream = handler.createMessage(systemPrompt, messages, {
+				taskId: "test-task",
+				tools: mockTools,
+				toolProtocol: "native",
+			})
+
+			// Consume the stream to trigger the API call
+			for await (const _chunk of stream) {
+				// Just consume
+			}
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					tools: expect.arrayContaining([
+						expect.objectContaining({
+							name: "get_weather",
+							description: "Get the current weather",
+							input_schema: expect.objectContaining({
+								type: "object",
+								properties: expect.objectContaining({
+									location: { type: "string" },
+								}),
+							}),
+						}),
+					]),
+				}),
+				expect.anything(),
+			)
+		})
+
+		it("should not include tools when toolProtocol is xml", async () => {
+			const stream = handler.createMessage(systemPrompt, messages, {
+				taskId: "test-task",
+				tools: mockTools,
+				toolProtocol: "xml",
+			})
+
+			// Consume the stream to trigger the API call
+			for await (const _chunk of stream) {
+				// Just consume
+			}
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.not.objectContaining({
+					tools: expect.anything(),
+				}),
+				expect.anything(),
+			)
+		})
+
+		it("should not include tools when no tools are provided", async () => {
+			const stream = handler.createMessage(systemPrompt, messages, {
+				taskId: "test-task",
+				toolProtocol: "native",
+			})
+
+			// Consume the stream to trigger the API call
+			for await (const _chunk of stream) {
+				// Just consume
+			}
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.not.objectContaining({
+					tools: expect.anything(),
+				}),
+				expect.anything(),
+			)
+		})
+
+		it("should convert tool_choice 'auto' to Anthropic format", async () => {
+			const stream = handler.createMessage(systemPrompt, messages, {
+				taskId: "test-task",
+				tools: mockTools,
+				toolProtocol: "native",
+				tool_choice: "auto",
+			})
+
+			// Consume the stream to trigger the API call
+			for await (const _chunk of stream) {
+				// Just consume
+			}
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					tool_choice: { type: "auto", disable_parallel_tool_use: true },
+				}),
+				expect.anything(),
+			)
+		})
+
+		it("should convert tool_choice 'required' to Anthropic 'any' format", async () => {
+			const stream = handler.createMessage(systemPrompt, messages, {
+				taskId: "test-task",
+				tools: mockTools,
+				toolProtocol: "native",
+				tool_choice: "required",
+			})
+
+			// Consume the stream to trigger the API call
+			for await (const _chunk of stream) {
+				// Just consume
+			}
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					tool_choice: { type: "any", disable_parallel_tool_use: true },
+				}),
+				expect.anything(),
+			)
+		})
+
+		it("should omit both tools and tool_choice when tool_choice is 'none'", async () => {
+			const stream = handler.createMessage(systemPrompt, messages, {
+				taskId: "test-task",
+				tools: mockTools,
+				toolProtocol: "native",
+				tool_choice: "none",
+			})
+
+			// Consume the stream to trigger the API call
+			for await (const _chunk of stream) {
+				// Just consume
+			}
+
+			// Verify that neither tools nor tool_choice are included in the request
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.not.objectContaining({
+					tools: expect.anything(),
+				}),
+				expect.anything(),
+			)
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.not.objectContaining({
+					tool_choice: expect.anything(),
+				}),
+				expect.anything(),
+			)
+		})
+
+		it("should convert specific tool_choice to Anthropic 'tool' format", async () => {
+			const stream = handler.createMessage(systemPrompt, messages, {
+				taskId: "test-task",
+				tools: mockTools,
+				toolProtocol: "native",
+				tool_choice: { type: "function" as const, function: { name: "get_weather" } },
+			})
+
+			// Consume the stream to trigger the API call
+			for await (const _chunk of stream) {
+				// Just consume
+			}
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					tool_choice: { type: "tool", name: "get_weather", disable_parallel_tool_use: true },
+				}),
+				expect.anything(),
+			)
+		})
+
+		it("should enable parallel tool calls when parallelToolCalls is true", async () => {
+			const stream = handler.createMessage(systemPrompt, messages, {
+				taskId: "test-task",
+				tools: mockTools,
+				toolProtocol: "native",
+				tool_choice: "auto",
+				parallelToolCalls: true,
+			})
+
+			// Consume the stream to trigger the API call
+			for await (const _chunk of stream) {
+				// Just consume
+			}
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					tool_choice: { type: "auto", disable_parallel_tool_use: false },
+				}),
+				expect.anything(),
+			)
+		})
+
+		it("should handle tool_use blocks in stream and emit tool_call_partial", async () => {
+			mockCreate.mockImplementationOnce(async () => ({
+				async *[Symbol.asyncIterator]() {
+					yield {
+						type: "message_start",
+						message: {
+							usage: {
+								input_tokens: 100,
+								output_tokens: 50,
+							},
+						},
+					}
+					yield {
+						type: "content_block_start",
+						index: 0,
+						content_block: {
+							type: "tool_use",
+							id: "toolu_123",
+							name: "get_weather",
+						},
+					}
+				},
+			}))
+
+			const stream = handler.createMessage(systemPrompt, messages, {
+				taskId: "test-task",
+				tools: mockTools,
+				toolProtocol: "native",
+			})
+
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Find the tool_call_partial chunk
+			const toolCallChunk = chunks.find((chunk) => chunk.type === "tool_call_partial")
+			expect(toolCallChunk).toBeDefined()
+			expect(toolCallChunk).toEqual({
+				type: "tool_call_partial",
+				index: 0,
+				id: "toolu_123",
+				name: "get_weather",
+				arguments: undefined,
+			})
+		})
+
+		it("should handle input_json_delta in stream and emit tool_call_partial arguments", async () => {
+			mockCreate.mockImplementationOnce(async () => ({
+				async *[Symbol.asyncIterator]() {
+					yield {
+						type: "message_start",
+						message: {
+							usage: {
+								input_tokens: 100,
+								output_tokens: 50,
+							},
+						},
+					}
+					yield {
+						type: "content_block_start",
+						index: 0,
+						content_block: {
+							type: "tool_use",
+							id: "toolu_123",
+							name: "get_weather",
+						},
+					}
+					yield {
+						type: "content_block_delta",
+						index: 0,
+						delta: {
+							type: "input_json_delta",
+							partial_json: '{"location":',
+						},
+					}
+					yield {
+						type: "content_block_delta",
+						index: 0,
+						delta: {
+							type: "input_json_delta",
+							partial_json: '"London"}',
+						},
+					}
+					yield {
+						type: "content_block_stop",
+						index: 0,
+					}
+				},
+			}))
+
+			const stream = handler.createMessage(systemPrompt, messages, {
+				taskId: "test-task",
+				tools: mockTools,
+				toolProtocol: "native",
+			})
+
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Find the tool_call_partial chunks
+			const toolCallChunks = chunks.filter((chunk) => chunk.type === "tool_call_partial")
+			expect(toolCallChunks).toHaveLength(3)
+
+			// First chunk has id and name
+			expect(toolCallChunks[0]).toEqual({
+				type: "tool_call_partial",
+				index: 0,
+				id: "toolu_123",
+				name: "get_weather",
+				arguments: undefined,
+			})
+
+			// Subsequent chunks have arguments
+			expect(toolCallChunks[1]).toEqual({
+				type: "tool_call_partial",
+				index: 0,
+				id: undefined,
+				name: undefined,
+				arguments: '{"location":',
+			})
+
+			expect(toolCallChunks[2]).toEqual({
+				type: "tool_call_partial",
+				index: 0,
+				id: undefined,
+				name: undefined,
+				arguments: '"London"}',
+			})
+		})
+	})
 })

+ 1 - 1
src/api/providers/__tests__/base-openai-compatible-provider.spec.ts

@@ -380,7 +380,7 @@ describe("BaseOpenAiCompatibleProvider", () => {
 			const firstChunk = await stream.next()
 
 			expect(firstChunk.done).toBe(false)
-			expect(firstChunk.value).toEqual({ type: "usage", inputTokens: 100, outputTokens: 50 })
+			expect(firstChunk.value).toMatchObject({ type: "usage", inputTokens: 100, outputTokens: 50 })
 		})
 	})
 })

+ 41 - 0
src/api/providers/__tests__/bedrock-inference-profiles.spec.ts

@@ -254,5 +254,46 @@ describe("Amazon Bedrock Inference Profiles", () => {
 			const usModel = usHandler.getModel()
 			expect(usModel.id).toBe("us.anthropic.claude-3-sonnet-20240229-v1:0")
 		})
+
+		it("should prioritize global inference over cross-region inference when both are enabled", () => {
+			// When both global inference and cross-region inference are enabled,
+			// global inference should take precedence
+			const handler = createHandler({
+				awsUseCrossRegionInference: true,
+				awsUseGlobalInference: true,
+				awsRegion: "us-east-1",
+				apiModelId: "anthropic.claude-sonnet-4-20250514-v1:0", // Model that supports global inference
+			})
+
+			const model = handler.getModel()
+			// Should use global. prefix, not us. prefix
+			expect(model.id).toBe("global.anthropic.claude-sonnet-4-20250514-v1:0")
+		})
+
+		it("should fall back to cross-region inference when global inference is disabled", () => {
+			const handler = createHandler({
+				awsUseCrossRegionInference: true,
+				awsUseGlobalInference: false,
+				awsRegion: "us-east-1",
+				apiModelId: "anthropic.claude-sonnet-4-20250514-v1:0",
+			})
+
+			const model = handler.getModel()
+			// Should use cross-region prefix since global is disabled
+			expect(model.id).toBe("us.anthropic.claude-sonnet-4-20250514-v1:0")
+		})
+
+		it("should not apply global inference prefix to unsupported models even when enabled", () => {
+			const handler = createHandler({
+				awsUseCrossRegionInference: true,
+				awsUseGlobalInference: true,
+				awsRegion: "us-east-1",
+				apiModelId: "anthropic.claude-3-sonnet-20240229-v1:0", // Model that does NOT support global inference
+			})
+
+			const model = handler.getModel()
+			// Should fall back to cross-region prefix since model doesn't support global inference
+			expect(model.id).toBe("us.anthropic.claude-3-sonnet-20240229-v1:0")
+		})
 	})
 })

+ 6 - 10
src/api/providers/__tests__/featherless.spec.ts

@@ -123,11 +123,9 @@ describe("FeatherlessHandler", () => {
 			chunks.push(chunk)
 		}
 
-		expect(chunks).toEqual([
-			{ type: "reasoning", text: "Thinking..." },
-			{ type: "text", text: "Hello" },
-			{ type: "usage", inputTokens: 10, outputTokens: 5 },
-		])
+		expect(chunks[0]).toEqual({ type: "reasoning", text: "Thinking..." })
+		expect(chunks[1]).toEqual({ type: "text", text: "Hello" })
+		expect(chunks[2]).toMatchObject({ type: "usage", inputTokens: 10, outputTokens: 5 })
 	})
 
 	it("should fall back to base provider for non-DeepSeek models", async () => {
@@ -145,10 +143,8 @@ describe("FeatherlessHandler", () => {
 			chunks.push(chunk)
 		}
 
-		expect(chunks).toEqual([
-			{ type: "text", text: "Test response" },
-			{ type: "usage", inputTokens: 10, outputTokens: 5 },
-		])
+		expect(chunks[0]).toEqual({ type: "text", text: "Test response" })
+		expect(chunks[1]).toMatchObject({ type: "usage", inputTokens: 10, outputTokens: 5 })
 	})
 
 	it("should return default model when no model is specified", () => {
@@ -226,7 +222,7 @@ describe("FeatherlessHandler", () => {
 		const firstChunk = await stream.next()
 
 		expect(firstChunk.done).toBe(false)
-		expect(firstChunk.value).toEqual({ type: "usage", inputTokens: 10, outputTokens: 20 })
+		expect(firstChunk.value).toMatchObject({ type: "usage", inputTokens: 10, outputTokens: 20 })
 	})
 
 	it("createMessage should pass correct parameters to Featherless client for DeepSeek R1", async () => {

+ 4 - 6
src/api/providers/__tests__/fireworks.spec.ts

@@ -384,7 +384,7 @@ describe("FireworksHandler", () => {
 		const firstChunk = await stream.next()
 
 		expect(firstChunk.done).toBe(false)
-		expect(firstChunk.value).toEqual({ type: "usage", inputTokens: 10, outputTokens: 20 })
+		expect(firstChunk.value).toMatchObject({ type: "usage", inputTokens: 10, outputTokens: 20 })
 	})
 
 	it("createMessage should pass correct parameters to Fireworks client", async () => {
@@ -494,10 +494,8 @@ describe("FireworksHandler", () => {
 			chunks.push(chunk)
 		}
 
-		expect(chunks).toEqual([
-			{ type: "text", text: "Hello" },
-			{ type: "text", text: " world" },
-			{ type: "usage", inputTokens: 5, outputTokens: 10 },
-		])
+		expect(chunks[0]).toEqual({ type: "text", text: "Hello" })
+		expect(chunks[1]).toEqual({ type: "text", text: " world" })
+		expect(chunks[2]).toMatchObject({ type: "usage", inputTokens: 5, outputTokens: 10 })
 	})
 })

+ 9 - 28
src/api/providers/__tests__/gemini.spec.ts

@@ -7,7 +7,7 @@ import { type ModelInfo, geminiDefaultModelId } from "@roo-code/types"
 import { t } from "i18next"
 import { GeminiHandler } from "../gemini"
 
-const GEMINI_20_FLASH_THINKING_NAME = "gemini-2.0-flash-thinking-exp-1219"
+const GEMINI_MODEL_NAME = geminiDefaultModelId
 
 // kilocode_change start
 const getGeminiModelsMock = vi.hoisted(() => vi.fn())
@@ -44,7 +44,7 @@ describe("GeminiHandler", () => {
 
 		handler = new GeminiHandler({
 			apiKey: "test-key",
-			apiModelId: GEMINI_20_FLASH_THINKING_NAME,
+			apiModelId: GEMINI_MODEL_NAME,
 			geminiApiKey: "test-key",
 		})
 
@@ -61,7 +61,7 @@ describe("GeminiHandler", () => {
 	describe("constructor", () => {
 		it("should initialize with provided config", () => {
 			expect(handler["options"].geminiApiKey).toBe("test-key")
-			expect(handler["options"].apiModelId).toBe(GEMINI_20_FLASH_THINKING_NAME)
+			expect(handler["options"].apiModelId).toBe(GEMINI_MODEL_NAME)
 		})
 	})
 
@@ -100,14 +100,14 @@ describe("GeminiHandler", () => {
 			expect(chunks.length).toBe(3)
 			expect(chunks[0]).toEqual({ type: "text", text: "Hello" })
 			expect(chunks[1]).toEqual({ type: "text", text: " world!" })
-			expect(chunks[2]).toEqual({ type: "usage", inputTokens: 10, outputTokens: 5 })
+			expect(chunks[2]).toMatchObject({ type: "usage", inputTokens: 10, outputTokens: 5 })
 
 			// Verify the call to generateContentStream
 			expect(handler["client"].models.generateContentStream).toHaveBeenCalledWith(
 				expect.objectContaining({
-					model: GEMINI_20_FLASH_THINKING_NAME,
+					model: GEMINI_MODEL_NAME,
 					config: expect.objectContaining({
-						temperature: 0,
+						temperature: 1,
 						systemInstruction: systemPrompt,
 					}),
 				}),
@@ -140,11 +140,11 @@ describe("GeminiHandler", () => {
 
 			// Verify the call to generateContent
 			expect(handler["client"].models.generateContent).toHaveBeenCalledWith({
-				model: GEMINI_20_FLASH_THINKING_NAME,
+				model: GEMINI_MODEL_NAME,
 				contents: [{ role: "user", parts: [{ text: "Test prompt" }] }],
 				config: {
 					httpOptions: undefined,
-					temperature: 0,
+					temperature: 1,
 				},
 			})
 		})
@@ -172,10 +172,8 @@ describe("GeminiHandler", () => {
 	describe("getModel", () => {
 		it("should return correct model info", () => {
 			const modelInfo = handler.getModel()
-			expect(modelInfo.id).toBe(GEMINI_20_FLASH_THINKING_NAME)
+			expect(modelInfo.id).toBe(GEMINI_MODEL_NAME)
 			expect(modelInfo.info).toBeDefined()
-			expect(modelInfo.info.maxTokens).toBe(8192)
-			expect(modelInfo.info.contextWindow).toBe(32_767)
 		})
 
 		it("should return default model if invalid model specified", () => {
@@ -232,23 +230,6 @@ describe("GeminiHandler", () => {
 			expect(handler.calculateCost({ info: mockInfo, inputTokens: 0, outputTokens })).toBeCloseTo(expectedCost)
 		})
 
-		it("should calculate cost with cache write tokens", () => {
-			const inputTokens = 10000
-			const outputTokens = 20000
-			const cacheWriteTokens = 5000
-			const CACHE_TTL = 5 // Match the constant in gemini.ts
-
-			// Added non-null assertions (!)
-			const expectedInputCost = (inputTokens / 1_000_000) * mockInfo.inputPrice!
-			const expectedOutputCost = (outputTokens / 1_000_000) * mockInfo.outputPrice!
-			const expectedCacheWriteCost =
-				mockInfo.cacheWritesPrice! * (cacheWriteTokens / 1_000_000) * (CACHE_TTL / 60)
-			const expectedCost = expectedInputCost + expectedOutputCost + expectedCacheWriteCost
-
-			const cost = handler.calculateCost({ info: mockInfo, inputTokens, outputTokens })
-			expect(cost).toBeCloseTo(expectedCost)
-		})
-
 		it("should calculate cost with cache read tokens", () => {
 			const inputTokens = 10000 // Total logical input
 			const outputTokens = 20000

+ 5 - 3
src/api/providers/__tests__/groq.spec.ts

@@ -119,9 +119,10 @@ describe("GroqHandler", () => {
 			type: "usage",
 			inputTokens: 10,
 			outputTokens: 20,
-			cacheWriteTokens: 0,
-			cacheReadTokens: 0,
 		})
+		// cacheWriteTokens and cacheReadTokens will be undefined when 0
+		expect(firstChunk.value.cacheWriteTokens).toBeUndefined()
+		expect(firstChunk.value.cacheReadTokens).toBeUndefined()
 		// Check that totalCost is a number (we don't need to test the exact value as that's tested in cost.spec.ts)
 		expect(typeof firstChunk.value.totalCost).toBe("number")
 	})
@@ -158,9 +159,10 @@ describe("GroqHandler", () => {
 			type: "usage",
 			inputTokens: 100,
 			outputTokens: 50,
-			cacheWriteTokens: 0,
 			cacheReadTokens: 30,
 		})
+		// cacheWriteTokens will be undefined when 0
+		expect(firstChunk.value.cacheWriteTokens).toBeUndefined()
 		expect(typeof firstChunk.value.totalCost).toBe("number")
 	})
 

+ 2 - 2
src/api/providers/__tests__/io-intelligence.spec.ts

@@ -178,7 +178,7 @@ describe("IOIntelligenceHandler", () => {
 		expect(results).toHaveLength(3)
 		expect(results[0]).toEqual({ type: "text", text: "Hello" })
 		expect(results[1]).toEqual({ type: "text", text: " world" })
-		expect(results[2]).toEqual({
+		expect(results[2]).toMatchObject({
 			type: "usage",
 			inputTokens: 10,
 			outputTokens: 5,
@@ -243,7 +243,7 @@ describe("IOIntelligenceHandler", () => {
 		const firstChunk = await stream.next()
 
 		expect(firstChunk.done).toBe(false)
-		expect(firstChunk.value).toEqual({ type: "usage", inputTokens: 10, outputTokens: 20 })
+		expect(firstChunk.value).toMatchObject({ type: "usage", inputTokens: 10, outputTokens: 20 })
 	})
 
 	it("should return model info from cache when available", () => {

+ 152 - 80
src/api/providers/__tests__/minimax.spec.ts

@@ -8,27 +8,35 @@ vitest.mock("vscode", () => ({
 	},
 }))
 
-import OpenAI from "openai"
 import { Anthropic } from "@anthropic-ai/sdk"
 
 import { type MinimaxModelId, minimaxDefaultModelId, minimaxModels } from "@roo-code/types"
 
 import { MiniMaxHandler } from "../minimax"
 
-vitest.mock("openai", () => {
-	const createMock = vitest.fn()
+vitest.mock("@anthropic-ai/sdk", () => {
+	const mockCreate = vitest.fn()
+	const mockCountTokens = vitest.fn()
 	return {
-		default: vitest.fn(() => ({ chat: { completions: { create: createMock } } })),
+		Anthropic: vitest.fn(() => ({
+			messages: {
+				create: mockCreate,
+				countTokens: mockCountTokens,
+			},
+		})),
 	}
 })
 
 describe("MiniMaxHandler", () => {
 	let handler: MiniMaxHandler
 	let mockCreate: any
+	let mockCountTokens: any
 
 	beforeEach(() => {
 		vitest.clearAllMocks()
-		mockCreate = (OpenAI as unknown as any)().chat.completions.create
+		const anthropicInstance = (Anthropic as unknown as any)()
+		mockCreate = anthropicInstance.messages.create
+		mockCountTokens = anthropicInstance.messages.countTokens
 	})
 
 	describe("International MiniMax (default)", () => {
@@ -41,9 +49,21 @@ describe("MiniMaxHandler", () => {
 
 		it("should use the correct international MiniMax base URL by default", () => {
 			new MiniMaxHandler({ minimaxApiKey: "test-minimax-api-key" })
-			expect(OpenAI).toHaveBeenCalledWith(
+			expect(Anthropic).toHaveBeenCalledWith(
 				expect.objectContaining({
-					baseURL: "https://api.minimax.io/v1",
+					baseURL: "https://api.minimax.io/anthropic",
+				}),
+			)
+		})
+
+		it("should convert /v1 endpoint to /anthropic endpoint", () => {
+			new MiniMaxHandler({
+				minimaxApiKey: "test-minimax-api-key",
+				minimaxBaseUrl: "https://api.minimax.io/v1",
+			})
+			expect(Anthropic).toHaveBeenCalledWith(
+				expect.objectContaining({
+					baseURL: "https://api.minimax.io/anthropic",
 				}),
 			)
 		})
@@ -51,7 +71,7 @@ describe("MiniMaxHandler", () => {
 		it("should use the provided API key", () => {
 			const minimaxApiKey = "test-minimax-api-key"
 			new MiniMaxHandler({ minimaxApiKey })
-			expect(OpenAI).toHaveBeenCalledWith(expect.objectContaining({ apiKey: minimaxApiKey }))
+			expect(Anthropic).toHaveBeenCalledWith(expect.objectContaining({ apiKey: minimaxApiKey }))
 		})
 
 		it("should return default model when no model is specified", () => {
@@ -117,15 +137,25 @@ describe("MiniMaxHandler", () => {
 				minimaxApiKey: "test-minimax-api-key",
 				minimaxBaseUrl: "https://api.minimaxi.com/anthropic", // kilocode_change: anthropic
 			})
-			expect(OpenAI).toHaveBeenCalledWith(
-				expect.objectContaining({ baseURL: "https://api.minimaxi.com/anthropic" }), // kilocode_change: anthropic
+			expect(Anthropic).toHaveBeenCalledWith(
+				expect.objectContaining({ baseURL: "https://api.minimaxi.com/anthropic" }),
+			)
+		})
+
+		it("should convert China /v1 endpoint to /anthropic endpoint", () => {
+			new MiniMaxHandler({
+				minimaxApiKey: "test-minimax-api-key",
+				minimaxBaseUrl: "https://api.minimaxi.com/anthropic", // kilocode_change: anthropic
+			})
+			expect(Anthropic).toHaveBeenCalledWith(
+				expect.objectContaining({ baseURL: "https://api.minimaxi.com/anthropic" }),
 			)
 		})
 
 		it("should use the provided API key for China", () => {
 			const minimaxApiKey = "test-minimax-api-key"
 			new MiniMaxHandler({ minimaxApiKey, minimaxBaseUrl: "https://api.minimaxi.com/anthropic" }) // kilocode_change: anthropic
-			expect(OpenAI).toHaveBeenCalledWith(expect.objectContaining({ apiKey: minimaxApiKey }))
+			expect(Anthropic).toHaveBeenCalledWith(expect.objectContaining({ apiKey: minimaxApiKey }))
 		})
 
 		it("should return default model when no model is specified", () => {
@@ -138,9 +168,9 @@ describe("MiniMaxHandler", () => {
 	describe("Default behavior", () => {
 		it("should default to international base URL when none is specified", () => {
 			const handlerDefault = new MiniMaxHandler({ minimaxApiKey: "test-minimax-api-key" })
-			expect(OpenAI).toHaveBeenCalledWith(
+			expect(Anthropic).toHaveBeenCalledWith(
 				expect.objectContaining({
-					baseURL: "https://api.minimax.io/v1",
+					baseURL: "https://api.minimax.io/anthropic",
 				}),
 			)
 
@@ -163,7 +193,9 @@ describe("MiniMaxHandler", () => {
 
 		it("completePrompt method should return text from MiniMax API", async () => {
 			const expectedResponse = "This is a test response from MiniMax"
-			mockCreate.mockResolvedValueOnce({ choices: [{ message: { content: expectedResponse } }] })
+			mockCreate.mockResolvedValueOnce({
+				content: [{ type: "text", text: expectedResponse }],
+			})
 			const result = await handler.completePrompt("test prompt")
 			expect(result).toBe(expectedResponse)
 		})
@@ -177,18 +209,20 @@ describe("MiniMaxHandler", () => {
 		it("createMessage should yield text content from stream", async () => {
 			const testContent = "This is test content from MiniMax stream"
 
-			mockCreate.mockImplementationOnce(() => {
-				return {
-					[Symbol.asyncIterator]: () => ({
-						next: vitest
-							.fn()
-							.mockResolvedValueOnce({
-								done: false,
-								value: { choices: [{ delta: { content: testContent } }] },
-							})
-							.mockResolvedValueOnce({ done: true }),
-					}),
-				}
+			mockCreate.mockResolvedValueOnce({
+				[Symbol.asyncIterator]: () => ({
+					next: vitest
+						.fn()
+						.mockResolvedValueOnce({
+							done: false,
+							value: {
+								type: "content_block_start",
+								index: 0,
+								content_block: { type: "text", text: testContent },
+							},
+						})
+						.mockResolvedValueOnce({ done: true }),
+				}),
 			})
 
 			const stream = handler.createMessage("system prompt", [])
@@ -199,21 +233,24 @@ describe("MiniMaxHandler", () => {
 		})
 
 		it("createMessage should yield usage data from stream", async () => {
-			mockCreate.mockImplementationOnce(() => {
-				return {
-					[Symbol.asyncIterator]: () => ({
-						next: vitest
-							.fn()
-							.mockResolvedValueOnce({
-								done: false,
-								value: {
-									choices: [{ delta: {} }],
-									usage: { prompt_tokens: 10, completion_tokens: 20 },
+			mockCreate.mockResolvedValueOnce({
+				[Symbol.asyncIterator]: () => ({
+					next: vitest
+						.fn()
+						.mockResolvedValueOnce({
+							done: false,
+							value: {
+								type: "message_start",
+								message: {
+									usage: {
+										input_tokens: 10,
+										output_tokens: 20,
+									},
 								},
-							})
-							.mockResolvedValueOnce({ done: true }),
-					}),
-				}
+							},
+						})
+						.mockResolvedValueOnce({ done: true }),
+				}),
 			})
 
 			const stream = handler.createMessage("system prompt", [])
@@ -231,14 +268,12 @@ describe("MiniMaxHandler", () => {
 				minimaxApiKey: "test-minimax-api-key",
 			})
 
-			mockCreate.mockImplementationOnce(() => {
-				return {
-					[Symbol.asyncIterator]: () => ({
-						async next() {
-							return { done: true }
-						},
-					}),
-				}
+			mockCreate.mockResolvedValueOnce({
+				[Symbol.asyncIterator]: () => ({
+					async next() {
+						return { done: true }
+					},
+				}),
 			})
 
 			const systemPrompt = "Test system prompt for MiniMax"
@@ -252,23 +287,20 @@ describe("MiniMaxHandler", () => {
 					model: modelId,
 					max_tokens: Math.min(modelInfo.maxTokens, Math.ceil(modelInfo.contextWindow * 0.2)),
 					temperature: 1,
-					messages: expect.arrayContaining([{ role: "system", content: systemPrompt }]),
+					system: expect.any(Array),
+					messages: expect.any(Array),
 					stream: true,
-					stream_options: { include_usage: true },
 				}),
-				undefined,
 			)
 		})
 
 		it("should use temperature 1 by default", async () => {
-			mockCreate.mockImplementationOnce(() => {
-				return {
-					[Symbol.asyncIterator]: () => ({
-						async next() {
-							return { done: true }
-						},
-					}),
-				}
+			mockCreate.mockResolvedValueOnce({
+				[Symbol.asyncIterator]: () => ({
+					async next() {
+						return { done: true }
+					},
+				}),
 			})
 
 			const messageGenerator = handler.createMessage("test", [])
@@ -278,36 +310,76 @@ describe("MiniMaxHandler", () => {
 				expect.objectContaining({
 					temperature: 1,
 				}),
-				undefined,
 			)
 		})
 
-		it("should handle streaming chunks with null choices array", async () => {
-			const testContent = "Content after null choices"
-
-			mockCreate.mockImplementationOnce(() => {
-				return {
-					[Symbol.asyncIterator]: () => ({
-						next: vitest
-							.fn()
-							.mockResolvedValueOnce({
-								done: false,
-								value: { choices: null },
-							})
-							.mockResolvedValueOnce({
-								done: false,
-								value: { choices: [{ delta: { content: testContent } }] },
-							})
-							.mockResolvedValueOnce({ done: true }),
-					}),
-				}
+		it("should handle thinking blocks in stream", async () => {
+			const thinkingContent = "Let me think about this..."
+
+			mockCreate.mockResolvedValueOnce({
+				[Symbol.asyncIterator]: () => ({
+					next: vitest
+						.fn()
+						.mockResolvedValueOnce({
+							done: false,
+							value: {
+								type: "content_block_start",
+								index: 0,
+								content_block: { type: "thinking", thinking: thinkingContent },
+							},
+						})
+						.mockResolvedValueOnce({ done: true }),
+				}),
 			})
 
 			const stream = handler.createMessage("system prompt", [])
 			const firstChunk = await stream.next()
 
 			expect(firstChunk.done).toBe(false)
-			expect(firstChunk.value).toEqual({ type: "text", text: testContent })
+			expect(firstChunk.value).toEqual({ type: "reasoning", text: thinkingContent })
+		})
+
+		it("should handle tool calls in stream", async () => {
+			mockCreate.mockResolvedValueOnce({
+				[Symbol.asyncIterator]: () => ({
+					next: vitest
+						.fn()
+						.mockResolvedValueOnce({
+							done: false,
+							value: {
+								type: "content_block_start",
+								index: 0,
+								content_block: {
+									type: "tool_use",
+									id: "tool-123",
+									name: "get_weather",
+									input: { city: "London" },
+								},
+							},
+						})
+						.mockResolvedValueOnce({
+							done: false,
+							value: {
+								type: "content_block_stop",
+								index: 0,
+							},
+						})
+						.mockResolvedValueOnce({ done: true }),
+				}),
+			})
+
+			const stream = handler.createMessage("system prompt", [])
+			const firstChunk = await stream.next()
+
+			expect(firstChunk.done).toBe(false)
+			// Provider now yields tool_call_partial chunks, NativeToolCallParser handles reassembly
+			expect(firstChunk.value).toEqual({
+				type: "tool_call_partial",
+				index: 0,
+				id: "tool-123",
+				name: "get_weather",
+				arguments: undefined,
+			})
 		})
 	})
 

+ 220 - 1
src/api/providers/__tests__/mistral.spec.ts

@@ -39,9 +39,11 @@ vi.mock("@mistralai/mistralai", () => {
 })
 
 import type { Anthropic } from "@anthropic-ai/sdk"
+import type OpenAI from "openai"
 import { MistralHandler } from "../mistral"
 import type { ApiHandlerOptions } from "../../../shared/api"
-import type { ApiStreamTextChunk, ApiStreamReasoningChunk } from "../../transform/stream"
+import type { ApiHandlerCreateMessageMetadata } from "../../index"
+import type { ApiStreamTextChunk, ApiStreamReasoningChunk, ApiStreamToolCallPartialChunk } from "../../transform/stream"
 
 describe("MistralHandler", () => {
 	let handler: MistralHandler
@@ -223,6 +225,223 @@ describe("MistralHandler", () => {
 		})
 	})
 
+	describe("native tool calling", () => {
+		const systemPrompt = "You are a helpful assistant."
+		const messages: Anthropic.Messages.MessageParam[] = [
+			{
+				role: "user",
+				content: [{ type: "text", text: "What's the weather?" }],
+			},
+		]
+
+		const mockTools: OpenAI.Chat.ChatCompletionTool[] = [
+			{
+				type: "function",
+				function: {
+					name: "get_weather",
+					description: "Get the current weather",
+					parameters: {
+						type: "object",
+						properties: {
+							location: { type: "string" },
+						},
+						required: ["location"],
+					},
+				},
+			},
+		]
+
+		it("should include tools in request when toolProtocol is native", async () => {
+			const metadata: ApiHandlerCreateMessageMetadata = {
+				taskId: "test-task",
+				tools: mockTools,
+				toolProtocol: "native",
+			}
+
+			const iterator = handler.createMessage(systemPrompt, messages, metadata)
+			await iterator.next()
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					tools: expect.arrayContaining([
+						expect.objectContaining({
+							type: "function",
+							function: expect.objectContaining({
+								name: "get_weather",
+								description: "Get the current weather",
+								parameters: expect.any(Object),
+							}),
+						}),
+					]),
+					toolChoice: "any",
+				}),
+			)
+		})
+
+		it("should not include tools when toolProtocol is xml", async () => {
+			const metadata: ApiHandlerCreateMessageMetadata = {
+				taskId: "test-task",
+				tools: mockTools,
+				toolProtocol: "xml",
+			}
+
+			const iterator = handler.createMessage(systemPrompt, messages, metadata)
+			await iterator.next()
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.not.objectContaining({
+					tools: expect.anything(),
+				}),
+			)
+		})
+
+		it("should handle tool calls in streaming response", async () => {
+			// Mock stream with tool calls
+			mockCreate.mockImplementationOnce(async (_options) => {
+				const stream = {
+					[Symbol.asyncIterator]: async function* () {
+						yield {
+							data: {
+								choices: [
+									{
+										delta: {
+											toolCalls: [
+												{
+													id: "call_123",
+													type: "function",
+													function: {
+														name: "get_weather",
+														arguments: '{"location":"New York"}',
+													},
+												},
+											],
+										},
+										index: 0,
+									},
+								],
+							},
+						}
+					},
+				}
+				return stream
+			})
+
+			const metadata: ApiHandlerCreateMessageMetadata = {
+				taskId: "test-task",
+				tools: mockTools,
+				toolProtocol: "native",
+			}
+
+			const iterator = handler.createMessage(systemPrompt, messages, metadata)
+			const results: ApiStreamToolCallPartialChunk[] = []
+
+			for await (const chunk of iterator) {
+				if (chunk.type === "tool_call_partial") {
+					results.push(chunk)
+				}
+			}
+
+			expect(results).toHaveLength(1)
+			expect(results[0]).toEqual({
+				type: "tool_call_partial",
+				index: 0,
+				id: "call_123",
+				name: "get_weather",
+				arguments: '{"location":"New York"}',
+			})
+		})
+
+		it("should handle multiple tool calls in a single response", async () => {
+			// Mock stream with multiple tool calls
+			mockCreate.mockImplementationOnce(async (_options) => {
+				const stream = {
+					[Symbol.asyncIterator]: async function* () {
+						yield {
+							data: {
+								choices: [
+									{
+										delta: {
+											toolCalls: [
+												{
+													id: "call_1",
+													type: "function",
+													function: {
+														name: "get_weather",
+														arguments: '{"location":"NYC"}',
+													},
+												},
+												{
+													id: "call_2",
+													type: "function",
+													function: {
+														name: "get_weather",
+														arguments: '{"location":"LA"}',
+													},
+												},
+											],
+										},
+										index: 0,
+									},
+								],
+							},
+						}
+					},
+				}
+				return stream
+			})
+
+			const metadata: ApiHandlerCreateMessageMetadata = {
+				taskId: "test-task",
+				tools: mockTools,
+				toolProtocol: "native",
+			}
+
+			const iterator = handler.createMessage(systemPrompt, messages, metadata)
+			const results: ApiStreamToolCallPartialChunk[] = []
+
+			for await (const chunk of iterator) {
+				if (chunk.type === "tool_call_partial") {
+					results.push(chunk)
+				}
+			}
+
+			expect(results).toHaveLength(2)
+			expect(results[0]).toEqual({
+				type: "tool_call_partial",
+				index: 0,
+				id: "call_1",
+				name: "get_weather",
+				arguments: '{"location":"NYC"}',
+			})
+			expect(results[1]).toEqual({
+				type: "tool_call_partial",
+				index: 1,
+				id: "call_2",
+				name: "get_weather",
+				arguments: '{"location":"LA"}',
+			})
+		})
+
+		it("should always set toolChoice to 'any' when tools are provided", async () => {
+			// Even if tool_choice is provided in metadata, we override it to "any"
+			const metadata: ApiHandlerCreateMessageMetadata = {
+				taskId: "test-task",
+				tools: mockTools,
+				toolProtocol: "native",
+				tool_choice: "auto", // This should be ignored
+			}
+
+			const iterator = handler.createMessage(systemPrompt, messages, metadata)
+			await iterator.next()
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					toolChoice: "any",
+				}),
+			)
+		})
+	})
+
 	describe("completePrompt", () => {
 		it("should complete prompt successfully", async () => {
 			const prompt = "Test prompt"

+ 6 - 0
src/api/providers/__tests__/openai-native.spec.ts

@@ -159,6 +159,9 @@ describe("OpenAiNativeHandler", () => {
 						},
 					],
 				}),
+				expect.objectContaining({
+					signal: expect.any(Object),
+				}),
 			)
 		})
 
@@ -1136,6 +1139,9 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 					stream: false,
 					store: false,
 				}),
+				expect.objectContaining({
+					signal: expect.any(Object),
+				}),
 			)
 		})
 

+ 341 - 0
src/api/providers/__tests__/openai.spec.ts

@@ -158,6 +158,55 @@ describe("OpenAiHandler", () => {
 			expect(usageChunk?.outputTokens).toBe(5)
 		})
 
+		it("should handle tool calls in non-streaming mode", async () => {
+			mockCreate.mockResolvedValueOnce({
+				choices: [
+					{
+						message: {
+							role: "assistant",
+							content: null,
+							tool_calls: [
+								{
+									id: "call_1",
+									type: "function",
+									function: {
+										name: "test_tool",
+										arguments: '{"arg":"value"}',
+									},
+								},
+							],
+						},
+						finish_reason: "tool_calls",
+					},
+				],
+				usage: {
+					prompt_tokens: 10,
+					completion_tokens: 5,
+					total_tokens: 15,
+				},
+			})
+
+			const handler = new OpenAiHandler({
+				...mockOptions,
+				openAiStreamingEnabled: false,
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			const toolCallChunks = chunks.filter((chunk) => chunk.type === "tool_call")
+			expect(toolCallChunks).toHaveLength(1)
+			expect(toolCallChunks[0]).toEqual({
+				type: "tool_call",
+				id: "call_1",
+				name: "test_tool",
+				arguments: '{"arg":"value"}',
+			})
+		})
+
 		it("should handle streaming responses", async () => {
 			const stream = handler.createMessage(systemPrompt, messages)
 			const chunks: any[] = []
@@ -171,6 +220,135 @@ describe("OpenAiHandler", () => {
 			expect(textChunks[0].text).toBe("Test response")
 		})
 
+		it("should handle tool calls in streaming responses", async () => {
+			mockCreate.mockImplementation(async (options) => {
+				return {
+					[Symbol.asyncIterator]: async function* () {
+						yield {
+							choices: [
+								{
+									delta: {
+										tool_calls: [
+											{
+												index: 0,
+												id: "call_1",
+												function: { name: "test_tool", arguments: "" },
+											},
+										],
+									},
+									finish_reason: null,
+								},
+							],
+						}
+						yield {
+							choices: [
+								{
+									delta: {
+										tool_calls: [{ index: 0, function: { arguments: '{"arg":' } }],
+									},
+									finish_reason: null,
+								},
+							],
+						}
+						yield {
+							choices: [
+								{
+									delta: {
+										tool_calls: [{ index: 0, function: { arguments: '"value"}' } }],
+									},
+									finish_reason: "tool_calls",
+								},
+							],
+						}
+					},
+				}
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Provider now yields tool_call_partial chunks, NativeToolCallParser handles reassembly
+			const toolCallPartialChunks = chunks.filter((chunk) => chunk.type === "tool_call_partial")
+			expect(toolCallPartialChunks).toHaveLength(3)
+			// First chunk has id and name
+			expect(toolCallPartialChunks[0]).toEqual({
+				type: "tool_call_partial",
+				index: 0,
+				id: "call_1",
+				name: "test_tool",
+				arguments: "",
+			})
+			// Subsequent chunks have arguments
+			expect(toolCallPartialChunks[1]).toEqual({
+				type: "tool_call_partial",
+				index: 0,
+				id: undefined,
+				name: undefined,
+				arguments: '{"arg":',
+			})
+			expect(toolCallPartialChunks[2]).toEqual({
+				type: "tool_call_partial",
+				index: 0,
+				id: undefined,
+				name: undefined,
+				arguments: '"value"}',
+			})
+		})
+
+		it("should yield tool calls even when finish_reason is not set (fallback behavior)", async () => {
+			mockCreate.mockImplementation(async (options) => {
+				return {
+					[Symbol.asyncIterator]: async function* () {
+						yield {
+							choices: [
+								{
+									delta: {
+										tool_calls: [
+											{
+												index: 0,
+												id: "call_fallback",
+												function: { name: "fallback_tool", arguments: '{"test":"fallback"}' },
+											},
+										],
+									},
+									finish_reason: null,
+								},
+							],
+						}
+						// Stream ends without finish_reason being set to "tool_calls"
+						yield {
+							choices: [
+								{
+									delta: {},
+									finish_reason: "stop", // Different finish reason
+								},
+							],
+						}
+					},
+				}
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Provider now yields tool_call_partial chunks, NativeToolCallParser handles reassembly
+			const toolCallPartialChunks = chunks.filter((chunk) => chunk.type === "tool_call_partial")
+			expect(toolCallPartialChunks).toHaveLength(1)
+			expect(toolCallPartialChunks[0]).toEqual({
+				type: "tool_call_partial",
+				index: 0,
+				id: "call_fallback",
+				name: "fallback_tool",
+				arguments: '{"test":"fallback"}',
+			})
+		})
+
 		it("should include reasoning_effort when reasoning effort is enabled", async () => {
 			const reasoningOptions: ApiHandlerOptions = {
 				...mockOptions,
@@ -619,6 +797,120 @@ describe("OpenAiHandler", () => {
 			)
 		})
 
+		it("should handle tool calls with O3 model in streaming mode", async () => {
+			const o3Handler = new OpenAiHandler(o3Options)
+
+			mockCreate.mockImplementation(async (options) => {
+				return {
+					[Symbol.asyncIterator]: async function* () {
+						yield {
+							choices: [
+								{
+									delta: {
+										tool_calls: [
+											{
+												index: 0,
+												id: "call_1",
+												function: { name: "test_tool", arguments: "" },
+											},
+										],
+									},
+									finish_reason: null,
+								},
+							],
+						}
+						yield {
+							choices: [
+								{
+									delta: {
+										tool_calls: [{ index: 0, function: { arguments: "{}" } }],
+									},
+									finish_reason: "tool_calls",
+								},
+							],
+						}
+					},
+				}
+			})
+
+			const stream = o3Handler.createMessage("system", [])
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Provider now yields tool_call_partial chunks, NativeToolCallParser handles reassembly
+			const toolCallPartialChunks = chunks.filter((chunk) => chunk.type === "tool_call_partial")
+			expect(toolCallPartialChunks).toHaveLength(2)
+			expect(toolCallPartialChunks[0]).toEqual({
+				type: "tool_call_partial",
+				index: 0,
+				id: "call_1",
+				name: "test_tool",
+				arguments: "",
+			})
+			expect(toolCallPartialChunks[1]).toEqual({
+				type: "tool_call_partial",
+				index: 0,
+				id: undefined,
+				name: undefined,
+				arguments: "{}",
+			})
+		})
+
+		it("should yield tool calls for O3 model even when finish_reason is not set (fallback behavior)", async () => {
+			const o3Handler = new OpenAiHandler(o3Options)
+
+			mockCreate.mockImplementation(async (options) => {
+				return {
+					[Symbol.asyncIterator]: async function* () {
+						yield {
+							choices: [
+								{
+									delta: {
+										tool_calls: [
+											{
+												index: 0,
+												id: "call_o3_fallback",
+												function: { name: "o3_fallback_tool", arguments: '{"o3":"test"}' },
+											},
+										],
+									},
+									finish_reason: null,
+								},
+							],
+						}
+						// Stream ends with different finish reason
+						yield {
+							choices: [
+								{
+									delta: {},
+									finish_reason: "length", // Different finish reason
+								},
+							],
+						}
+					},
+				}
+			})
+
+			const stream = o3Handler.createMessage("system", [])
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Provider now yields tool_call_partial chunks, NativeToolCallParser handles reassembly
+			const toolCallPartialChunks = chunks.filter((chunk) => chunk.type === "tool_call_partial")
+			expect(toolCallPartialChunks).toHaveLength(1)
+			expect(toolCallPartialChunks[0]).toEqual({
+				type: "tool_call_partial",
+				index: 0,
+				id: "call_o3_fallback",
+				name: "o3_fallback_tool",
+				arguments: '{"o3":"test"}',
+			})
+		})
+
 		it("should handle O3 model with streaming and exclude max_tokens when includeMaxTokens is false", async () => {
 			const o3Handler = new OpenAiHandler({
 				...o3Options,
@@ -706,6 +998,55 @@ describe("OpenAiHandler", () => {
 			expect(callArgs).not.toHaveProperty("stream")
 		})
 
+		it("should handle tool calls with O3 model in non-streaming mode", async () => {
+			const o3Handler = new OpenAiHandler({
+				...o3Options,
+				openAiStreamingEnabled: false,
+			})
+
+			mockCreate.mockResolvedValueOnce({
+				choices: [
+					{
+						message: {
+							role: "assistant",
+							content: null,
+							tool_calls: [
+								{
+									id: "call_1",
+									type: "function",
+									function: {
+										name: "test_tool",
+										arguments: "{}",
+									},
+								},
+							],
+						},
+						finish_reason: "tool_calls",
+					},
+				],
+				usage: {
+					prompt_tokens: 10,
+					completion_tokens: 5,
+					total_tokens: 15,
+				},
+			})
+
+			const stream = o3Handler.createMessage("system", [])
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			const toolCallChunks = chunks.filter((chunk) => chunk.type === "tool_call")
+			expect(toolCallChunks).toHaveLength(1)
+			expect(toolCallChunks[0]).toEqual({
+				type: "tool_call",
+				id: "call_1",
+				name: "test_tool",
+				arguments: "{}",
+			})
+		})
+
 		it("should use default temperature of 0 when not specified for O3 models", async () => {
 			const o3Handler = new OpenAiHandler({
 				...o3Options,

+ 0 - 1
src/api/providers/__tests__/openrouter.spec.ts

@@ -305,7 +305,6 @@ describe("OpenRouterHandler", () => {
 				{
 					model: mockOptions.openRouterModelId,
 					max_tokens: 8192,
-					thinking: undefined,
 					temperature: 0,
 					messages: [{ role: "user", content: "test prompt" }],
 					stream: false,

+ 347 - 0
src/api/providers/__tests__/roo.spec.ts

@@ -102,6 +102,24 @@ vitest.mock("../../providers/fetchers/modelCache", () => ({
 					inputPrice: 0,
 					outputPrice: 0,
 				},
+				"minimax/minimax-m2": {
+					maxTokens: 32_768,
+					contextWindow: 1_000_000,
+					supportsImages: false,
+					supportsPromptCache: true,
+					supportsNativeTools: true,
+					inputPrice: 0.15,
+					outputPrice: 0.6,
+				},
+				"anthropic/claude-haiku-4.5": {
+					maxTokens: 8_192,
+					contextWindow: 200_000,
+					supportsImages: true,
+					supportsPromptCache: true,
+					supportsNativeTools: true,
+					inputPrice: 0.8,
+					outputPrice: 4,
+				},
 			}
 		}
 		return {}
@@ -402,6 +420,41 @@ describe("RooHandler", () => {
 				expect(modelInfo.info.contextWindow).toBeDefined()
 			}
 		})
+
+		it("should apply defaultToolProtocol: native for minimax/minimax-m2", () => {
+			const handlerWithMinimax = new RooHandler({
+				apiModelId: "minimax/minimax-m2",
+			})
+			const modelInfo = handlerWithMinimax.getModel()
+			expect(modelInfo.id).toBe("minimax/minimax-m2")
+			expect((modelInfo.info as any).defaultToolProtocol).toBe("native")
+			// Verify cached model info is preserved
+			expect(modelInfo.info.maxTokens).toBe(32_768)
+			expect(modelInfo.info.contextWindow).toBe(1_000_000)
+		})
+
+		it("should apply defaultToolProtocol: native for anthropic/claude-haiku-4.5", () => {
+			const handlerWithHaiku = new RooHandler({
+				apiModelId: "anthropic/claude-haiku-4.5",
+			})
+			const modelInfo = handlerWithHaiku.getModel()
+			expect(modelInfo.id).toBe("anthropic/claude-haiku-4.5")
+			expect((modelInfo.info as any).defaultToolProtocol).toBe("native")
+			// Verify cached model info is preserved
+			expect(modelInfo.info.maxTokens).toBe(8_192)
+			expect(modelInfo.info.contextWindow).toBe(200_000)
+		})
+
+		it("should not override existing properties when applying MODEL_DEFAULTS", () => {
+			const handlerWithMinimax = new RooHandler({
+				apiModelId: "minimax/minimax-m2",
+			})
+			const modelInfo = handlerWithMinimax.getModel()
+			// The defaults should be merged, but not overwrite existing cached values
+			expect(modelInfo.info.supportsNativeTools).toBe(true)
+			expect(modelInfo.info.inputPrice).toBe(0.15)
+			expect(modelInfo.info.outputPrice).toBe(0.6)
+		})
 	})
 
 	describe("temperature and model configuration", () => {
@@ -630,4 +683,298 @@ describe("RooHandler", () => {
 			)
 		})
 	})
+
+	describe("tool calls handling", () => {
+		beforeEach(() => {
+			handler = new RooHandler(mockOptions)
+		})
+
+		it("should yield raw tool call chunks when tool_calls present", async () => {
+			mockCreate.mockResolvedValueOnce({
+				[Symbol.asyncIterator]: async function* () {
+					yield {
+						choices: [
+							{
+								delta: {
+									tool_calls: [
+										{
+											index: 0,
+											id: "call_123",
+											function: { name: "read_file", arguments: '{"path":"' },
+										},
+									],
+								},
+								index: 0,
+							},
+						],
+					}
+					yield {
+						choices: [
+							{
+								delta: {
+									tool_calls: [
+										{
+											index: 0,
+											function: { arguments: 'test.ts"}' },
+										},
+									],
+								},
+								index: 0,
+							},
+						],
+					}
+					yield {
+						choices: [
+							{
+								delta: {},
+								finish_reason: "tool_calls",
+								index: 0,
+							},
+						],
+						usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 },
+					}
+				},
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Verify we get raw tool call chunks
+			const rawChunks = chunks.filter((chunk) => chunk.type === "tool_call_partial")
+
+			expect(rawChunks).toHaveLength(2)
+			expect(rawChunks[0]).toEqual({
+				type: "tool_call_partial",
+				index: 0,
+				id: "call_123",
+				name: "read_file",
+				arguments: '{"path":"',
+			})
+			expect(rawChunks[1]).toEqual({
+				type: "tool_call_partial",
+				index: 0,
+				id: undefined,
+				name: undefined,
+				arguments: 'test.ts"}',
+			})
+		})
+
+		it("should yield raw tool call chunks even when finish_reason is not tool_calls", async () => {
+			mockCreate.mockResolvedValueOnce({
+				[Symbol.asyncIterator]: async function* () {
+					yield {
+						choices: [
+							{
+								delta: {
+									tool_calls: [
+										{
+											index: 0,
+											id: "call_456",
+											function: {
+												name: "write_to_file",
+												arguments: '{"path":"test.ts","content":"hello"}',
+											},
+										},
+									],
+								},
+								index: 0,
+							},
+						],
+					}
+					yield {
+						choices: [
+							{
+								delta: {},
+								finish_reason: "stop",
+								index: 0,
+							},
+						],
+						usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 },
+					}
+				},
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			const rawChunks = chunks.filter((chunk) => chunk.type === "tool_call_partial")
+
+			expect(rawChunks).toHaveLength(1)
+			expect(rawChunks[0]).toEqual({
+				type: "tool_call_partial",
+				index: 0,
+				id: "call_456",
+				name: "write_to_file",
+				arguments: '{"path":"test.ts","content":"hello"}',
+			})
+		})
+
+		it("should handle multiple tool calls with different indices", async () => {
+			mockCreate.mockResolvedValueOnce({
+				[Symbol.asyncIterator]: async function* () {
+					yield {
+						choices: [
+							{
+								delta: {
+									tool_calls: [
+										{
+											index: 0,
+											id: "call_1",
+											function: { name: "read_file", arguments: '{"path":"file1.ts"}' },
+										},
+									],
+								},
+								index: 0,
+							},
+						],
+					}
+					yield {
+						choices: [
+							{
+								delta: {
+									tool_calls: [
+										{
+											index: 1,
+											id: "call_2",
+											function: { name: "read_file", arguments: '{"path":"file2.ts"}' },
+										},
+									],
+								},
+								index: 0,
+							},
+						],
+					}
+					yield {
+						choices: [
+							{
+								delta: {},
+								finish_reason: "tool_calls",
+								index: 0,
+							},
+						],
+						usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 },
+					}
+				},
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			const rawChunks = chunks.filter((chunk) => chunk.type === "tool_call_partial")
+
+			expect(rawChunks).toHaveLength(2)
+			expect(rawChunks[0].index).toBe(0)
+			expect(rawChunks[0].id).toBe("call_1")
+			expect(rawChunks[1].index).toBe(1)
+			expect(rawChunks[1].id).toBe("call_2")
+		})
+
+		it("should emit raw chunks for streaming arguments", async () => {
+			mockCreate.mockResolvedValueOnce({
+				[Symbol.asyncIterator]: async function* () {
+					yield {
+						choices: [
+							{
+								delta: {
+									tool_calls: [
+										{
+											index: 0,
+											id: "call_789",
+											function: { name: "execute_command", arguments: '{"command":"' },
+										},
+									],
+								},
+								index: 0,
+							},
+						],
+					}
+					yield {
+						choices: [
+							{
+								delta: {
+									tool_calls: [
+										{
+											index: 0,
+											function: { arguments: "npm install" },
+										},
+									],
+								},
+								index: 0,
+							},
+						],
+					}
+					yield {
+						choices: [
+							{
+								delta: {
+									tool_calls: [
+										{
+											index: 0,
+											function: { arguments: '"}' },
+										},
+									],
+								},
+								index: 0,
+							},
+						],
+					}
+					yield {
+						choices: [
+							{
+								delta: {},
+								finish_reason: "tool_calls",
+								index: 0,
+							},
+						],
+						usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 },
+					}
+				},
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			const rawChunks = chunks.filter((chunk) => chunk.type === "tool_call_partial")
+
+			expect(rawChunks).toHaveLength(3)
+			expect(rawChunks[0].arguments).toBe('{"command":"')
+			expect(rawChunks[1].arguments).toBe("npm install")
+			expect(rawChunks[2].arguments).toBe('"}')
+		})
+
+		it("should not yield tool call chunks when no tool calls present", async () => {
+			mockCreate.mockResolvedValueOnce({
+				[Symbol.asyncIterator]: async function* () {
+					yield {
+						choices: [{ delta: { content: "Regular text response" }, index: 0 }],
+					}
+					yield {
+						choices: [{ delta: {}, finish_reason: "stop", index: 0 }],
+						usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 },
+					}
+				},
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			const rawChunks = chunks.filter((chunk) => chunk.type === "tool_call_partial")
+			expect(rawChunks).toHaveLength(0)
+		})
+	})
 })

+ 1 - 1
src/api/providers/__tests__/sambanova.spec.ts

@@ -120,7 +120,7 @@ describe("SambaNovaHandler", () => {
 		const firstChunk = await stream.next()
 
 		expect(firstChunk.done).toBe(false)
-		expect(firstChunk.value).toEqual({ type: "usage", inputTokens: 10, outputTokens: 20 })
+		expect(firstChunk.value).toMatchObject({ type: "usage", inputTokens: 10, outputTokens: 20 })
 	})
 
 	it("createMessage should pass correct parameters to SambaNova client", async () => {

Some files were not shown because too many files changed in this diff