2 mesi fa · 3f0a6971ca
--- a/apps/web-evals/package.json
+++ b/apps/web-evals/package.json
@@ -29,6 +29,7 @@
 
				 		"@roo-code/evals": "workspace:^",
			
 
				 		"@roo-code/types": "workspace:^",
			
 
				 		"@tanstack/react-query": "^5.69.0",
			
 
				+		"archiver": "^7.0.1",
			
 
				 		"class-variance-authority": "^0.7.1",
			
 
				 		"clsx": "^2.1.1",
			
 
				 		"cmdk": "^1.1.0",
			
@@ -52,6 +53,7 @@
 
				 		"@roo-code/config-eslint": "workspace:^",
			
 
				 		"@roo-code/config-typescript": "workspace:^",
			
 
				 		"@tailwindcss/postcss": "^4",
			
 
				+		"@types/archiver": "^7.0.0",
			
 
				 		"@types/ps-tree": "^1.1.6",
			
 
				 		"@types/react": "^18.3.23",
			
 
				 		"@types/react-dom": "^18.3.5",
			
--- a/apps/web-evals/src/actions/runs.ts
+++ b/apps/web-evals/src/actions/runs.ts
@@ -21,7 +21,7 @@ import { CreateRun } from "@/lib/schemas"
 
				 
			
 
				 const EVALS_REPO_PATH = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../../../evals")
			
 
				 
			
 
				-export async function createRun({ suite, exercises = [], timeout, ...values }: CreateRun) {
			
 
				+export async function createRun({ suite, exercises = [], timeout, iterations = 1, ...values }: CreateRun) {
			
 
				 	const run = await _createRun({
			
 
				 		...values,
			
 
				 		timeout,
			
@@ -36,15 +36,34 @@ export async function createRun({ suite, exercises = [], timeout, ...values }: C
 
				 				throw new Error("Invalid exercise path: " + path)
			
 
				 			}
			
 
				 
			
 
				-			await createTask({ ...values, runId: run.id, language: language as ExerciseLanguage, exercise })
			
 
				+			// Create multiple tasks for each iteration
			
 
				+			for (let iteration = 1; iteration <= iterations; iteration++) {
			
 
				+				await createTask({
			
 
				+					...values,
			
 
				+					runId: run.id,
			
 
				+					language: language as ExerciseLanguage,
			
 
				+					exercise,
			
 
				+					iteration,
			
 
				+				})
			
 
				+			}
			
 
				 		}
			
 
				 	} else {
			
 
				 		for (const language of exerciseLanguages) {
			
 
				-			const exercises = await getExercisesForLanguage(EVALS_REPO_PATH, language)
			
 
				+			const languageExercises = await getExercisesForLanguage(EVALS_REPO_PATH, language)
			
 
				+
			
 
				+			// Create tasks for all iterations of each exercise
			
 
				+			const tasksToCreate: Array<{ language: ExerciseLanguage; exercise: string; iteration: number }> = []
			
 
				+			for (const exercise of languageExercises) {
			
 
				+				for (let iteration = 1; iteration <= iterations; iteration++) {
			
 
				+					tasksToCreate.push({ language, exercise, iteration })
			
 
				+				}
			
 
				+			}
			
 
				 
			
 
				-			await pMap(exercises, (exercise) => createTask({ runId: run.id, language, exercise }), {
			
 
				-				concurrency: 10,
			
 
				-			})
			
 
				+			await pMap(
			
 
				+				tasksToCreate,
			
 
				+				({ language, exercise, iteration }) => createTask({ runId: run.id, language, exercise, iteration }),
			
 
				+				{ concurrency: 10 },
			
 
				+			)
			
 
				 		}
			
 
				 	}
			
 
				 
			
--- a/apps/web-evals/src/app/api/runs/[id]/logs/[taskId]/route.ts
+++ b/apps/web-evals/src/app/api/runs/[id]/logs/[taskId]/route.ts
@@ -0,0 +1,74 @@
 
				+import { NextResponse } from "next/server"
			
 
				+import type { NextRequest } from "next/server"
			
 
				+import * as fs from "node:fs/promises"
			
 
				+import * as path from "node:path"
			
 
				+
			
 
				+import { findTask, findRun } from "@roo-code/evals"
			
 
				+
			
 
				+export const dynamic = "force-dynamic"
			
 
				+
			
 
				+const LOG_BASE_PATH = "/tmp/evals/runs"
			
 
				+
			
 
				+// Sanitize path components to prevent path traversal attacks
			
 
				+function sanitizePathComponent(component: string): string {
			
 
				+	// Remove any path separators, null bytes, and other dangerous characters
			
 
				+	return component.replace(/[/\\:\0*?"<>|]/g, "_")
			
 
				+}
			
 
				+
			
 
				+export async function GET(request: NextRequest, { params }: { params: Promise<{ id: string; taskId: string }> }) {
			
 
				+	const { id, taskId } = await params
			
 
				+
			
 
				+	try {
			
 
				+		const runId = Number(id)
			
 
				+		const taskIdNum = Number(taskId)
			
 
				+
			
 
				+		if (isNaN(runId) || isNaN(taskIdNum)) {
			
 
				+			return NextResponse.json({ error: "Invalid run ID or task ID" }, { status: 400 })
			
 
				+		}
			
 
				+
			
 
				+		// Verify the run exists
			
 
				+		await findRun(runId)
			
 
				+
			
 
				+		// Get the task to find its language and exercise
			
 
				+		const task = await findTask(taskIdNum)
			
 
				+
			
 
				+		// Verify the task belongs to this run
			
 
				+		if (task.runId !== runId) {
			
 
				+			return NextResponse.json({ error: "Task does not belong to this run" }, { status: 404 })
			
 
				+		}
			
 
				+
			
 
				+		// Sanitize language and exercise to prevent path traversal
			
 
				+		const safeLanguage = sanitizePathComponent(task.language)
			
 
				+		const safeExercise = sanitizePathComponent(task.exercise)
			
 
				+
			
 
				+		// Construct the log file path
			
 
				+		const logFileName = `${safeLanguage}-${safeExercise}.log`
			
 
				+		const logFilePath = path.join(LOG_BASE_PATH, String(runId), logFileName)
			
 
				+
			
 
				+		// Verify the resolved path is within the expected directory (defense in depth)
			
 
				+		const resolvedPath = path.resolve(logFilePath)
			
 
				+		const expectedBase = path.resolve(LOG_BASE_PATH)
			
 
				+		if (!resolvedPath.startsWith(expectedBase)) {
			
 
				+			return NextResponse.json({ error: "Invalid log path" }, { status: 400 })
			
 
				+		}
			
 
				+
			
 
				+		// Check if the log file exists and read it (async)
			
 
				+		try {
			
 
				+			const logContent = await fs.readFile(logFilePath, "utf-8")
			
 
				+			return NextResponse.json({ logContent })
			
 
				+		} catch (err) {
			
 
				+			if ((err as NodeJS.ErrnoException).code === "ENOENT") {
			
 
				+				return NextResponse.json({ error: "Log file not found", logContent: null }, { status: 200 })
			
 
				+			}
			
 
				+			throw err
			
 
				+		}
			
 
				+	} catch (error) {
			
 
				+		console.error("Error reading task log:", error)
			
 
				+
			
 
				+		if (error instanceof Error && error.name === "RecordNotFoundError") {
			
 
				+			return NextResponse.json({ error: "Task or run not found" }, { status: 404 })
			
 
				+		}
			
 
				+
			
 
				+		return NextResponse.json({ error: "Failed to read log file" }, { status: 500 })
			
 
				+	}
			
 
				+}
			
--- a/apps/web-evals/src/app/api/runs/[id]/logs/failed/route.ts
+++ b/apps/web-evals/src/app/api/runs/[id]/logs/failed/route.ts
@@ -0,0 +1,129 @@
 
				+import { NextResponse } from "next/server"
			
 
				+import type { NextRequest } from "next/server"
			
 
				+import * as fs from "node:fs"
			
 
				+import * as path from "node:path"
			
 
				+import archiver from "archiver"
			
 
				+
			
 
				+import { findRun, getTasks } from "@roo-code/evals"
			
 
				+
			
 
				+export const dynamic = "force-dynamic"
			
 
				+
			
 
				+const LOG_BASE_PATH = "/tmp/evals/runs"
			
 
				+
			
 
				+// Sanitize path components to prevent path traversal attacks
			
 
				+function sanitizePathComponent(component: string): string {
			
 
				+	// Remove any path separators, null bytes, and other dangerous characters
			
 
				+	return component.replace(/[/\\:\0*?"<>|]/g, "_")
			
 
				+}
			
 
				+
			
 
				+export async function GET(request: NextRequest, { params }: { params: Promise<{ id: string }> }) {
			
 
				+	const { id } = await params
			
 
				+
			
 
				+	try {
			
 
				+		const runId = Number(id)
			
 
				+
			
 
				+		if (isNaN(runId)) {
			
 
				+			return NextResponse.json({ error: "Invalid run ID" }, { status: 400 })
			
 
				+		}
			
 
				+
			
 
				+		// Verify the run exists
			
 
				+		await findRun(runId)
			
 
				+
			
 
				+		// Get all tasks for this run
			
 
				+		const tasks = await getTasks(runId)
			
 
				+
			
 
				+		// Filter for failed tasks only
			
 
				+		const failedTasks = tasks.filter((task) => task.passed === false)
			
 
				+
			
 
				+		if (failedTasks.length === 0) {
			
 
				+			return NextResponse.json({ error: "No failed tasks to export" }, { status: 400 })
			
 
				+		}
			
 
				+
			
 
				+		// Create a zip archive
			
 
				+		const archive = archiver("zip", { zlib: { level: 9 } })
			
 
				+
			
 
				+		// Collect chunks to build the response
			
 
				+		const chunks: Buffer[] = []
			
 
				+
			
 
				+		archive.on("data", (chunk: Buffer) => {
			
 
				+			chunks.push(chunk)
			
 
				+		})
			
 
				+
			
 
				+		// Track archive errors
			
 
				+		let archiveError: Error | null = null
			
 
				+		archive.on("error", (err: Error) => {
			
 
				+			archiveError = err
			
 
				+		})
			
 
				+
			
 
				+		// Set up the end promise before finalizing (proper event listener ordering)
			
 
				+		const archiveEndPromise = new Promise<void>((resolve, reject) => {
			
 
				+			archive.on("end", resolve)
			
 
				+			archive.on("error", reject)
			
 
				+		})
			
 
				+
			
 
				+		// Add each failed task's log file to the archive
			
 
				+		const logDir = path.join(LOG_BASE_PATH, String(runId))
			
 
				+		let filesAdded = 0
			
 
				+
			
 
				+		for (const task of failedTasks) {
			
 
				+			// Sanitize language and exercise to prevent path traversal
			
 
				+			const safeLanguage = sanitizePathComponent(task.language)
			
 
				+			const safeExercise = sanitizePathComponent(task.exercise)
			
 
				+			const logFileName = `${safeLanguage}-${safeExercise}.log`
			
 
				+			const logFilePath = path.join(logDir, logFileName)
			
 
				+
			
 
				+			// Verify the resolved path is within the expected directory (defense in depth)
			
 
				+			const resolvedPath = path.resolve(logFilePath)
			
 
				+			const expectedBase = path.resolve(LOG_BASE_PATH)
			
 
				+			if (!resolvedPath.startsWith(expectedBase)) {
			
 
				+				continue // Skip files with suspicious paths
			
 
				+			}
			
 
				+
			
 
				+			if (fs.existsSync(logFilePath)) {
			
 
				+				archive.file(logFilePath, { name: logFileName })
			
 
				+				filesAdded++
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		// Check if any files were actually added
			
 
				+		if (filesAdded === 0) {
			
 
				+			archive.abort()
			
 
				+			return NextResponse.json(
			
 
				+				{ error: "No log files found - they may have been cleared from disk" },
			
 
				+				{ status: 404 },
			
 
				+			)
			
 
				+		}
			
 
				+
			
 
				+		// Finalize the archive
			
 
				+		await archive.finalize()
			
 
				+
			
 
				+		// Wait for all data to be collected
			
 
				+		await archiveEndPromise
			
 
				+
			
 
				+		// Check for archive errors
			
 
				+		if (archiveError) {
			
 
				+			throw archiveError
			
 
				+		}
			
 
				+
			
 
				+		// Combine all chunks into a single buffer
			
 
				+		const zipBuffer = Buffer.concat(chunks)
			
 
				+
			
 
				+		// Return the zip file
			
 
				+		return new NextResponse(zipBuffer, {
			
 
				+			status: 200,
			
 
				+			headers: {
			
 
				+				"Content-Type": "application/zip",
			
 
				+				"Content-Disposition": `attachment; filename="run-${runId}-failed-logs.zip"`,
			
 
				+				"Content-Length": String(zipBuffer.length),
			
 
				+			},
			
 
				+		})
			
 
				+	} catch (error) {
			
 
				+		console.error("Error exporting failed logs:", error)
			
 
				+
			
 
				+		if (error instanceof Error && error.name === "RecordNotFoundError") {
			
 
				+			return NextResponse.json({ error: "Run not found" }, { status: 404 })
			
 
				+		}
			
 
				+
			
 
				+		return NextResponse.json({ error: "Failed to export logs" }, { status: 500 })
			
 
				+	}
			
 
				+}
			
--- a/apps/web-evals/src/app/runs/[id]/run.tsx
+++ b/apps/web-evals/src/app/runs/[id]/run.tsx
@@ -1,9 +1,10 @@
 
				 "use client"
			
 
				 
			
 
				-import { useMemo } from "react"
			
 
				-import { LoaderCircle } from "lucide-react"
			
 
				+import { useMemo, useState, useCallback, useEffect } from "react"
			
 
				+import { toast } from "sonner"
			
 
				+import { LoaderCircle, FileText, Copy, Check } from "lucide-react"
			
 
				 
			
 
				-import type { Run, TaskMetrics as _TaskMetrics } from "@roo-code/evals"
			
 
				+import type { Run, TaskMetrics as _TaskMetrics, Task } from "@roo-code/evals"
			
 
				 
			
 
				 import { formatCurrency, formatDuration, formatTokens, formatToolUsageSuccessRate } from "@/lib/formatters"
			
 
				 import { useRunStatus } from "@/hooks/use-run-status"
			
@@ -17,6 +18,12 @@ import {
 
				 	Tooltip,
			
 
				 	TooltipContent,
			
 
				 	TooltipTrigger,
			
 
				+	Dialog,
			
 
				+	DialogContent,
			
 
				+	DialogHeader,
			
 
				+	DialogTitle,
			
 
				+	ScrollArea,
			
 
				+	Button,
			
 
				 } from "@/components/ui"
			
 
				 
			
 
				 import { TaskStatus } from "./task-status"
			
@@ -35,10 +42,169 @@ function getToolAbbreviation(toolName: string): string {
 
				 		.join("")
			
 
				 }
			
 
				 
			
 
				+// Pattern definitions for syntax highlighting
			
 
				+type HighlightPattern = {
			
 
				+	pattern: RegExp
			
 
				+	className: string
			
 
				+	// If true, wraps the entire match; if a number, wraps that capture group
			
 
				+	wrapGroup?: number
			
 
				+}
			
 
				+
			
 
				+const HIGHLIGHT_PATTERNS: HighlightPattern[] = [
			
 
				+	// Timestamps [YYYY-MM-DDTHH:MM:SS.sssZ]
			
 
				+	{ pattern: /\[(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z)\]/g, className: "text-blue-400" },
			
 
				+	// Log levels
			
 
				+	{ pattern: /\|\s*(INFO)\s*\|/g, className: "text-green-400", wrapGroup: 1 },
			
 
				+	{ pattern: /\|\s*(WARN|WARNING)\s*\|/g, className: "text-yellow-400", wrapGroup: 1 },
			
 
				+	{ pattern: /\|\s*(ERROR)\s*\|/g, className: "text-red-400", wrapGroup: 1 },
			
 
				+	{ pattern: /\|\s*(DEBUG)\s*\|/g, className: "text-gray-400", wrapGroup: 1 },
			
 
				+	// Task identifiers
			
 
				+	{ pattern: /(taskCreated|taskFocused|taskStarted|taskCompleted|EvalPass|EvalFail)/g, className: "text-purple-400" },
			
 
				+	// Message arrows
			
 
				+	{ pattern: /→/g, className: "text-cyan-400" },
			
 
				+]
			
 
				+
			
 
				+// Format a single line with syntax highlighting using React elements (XSS-safe)
			
 
				+function formatLine(line: string): React.ReactNode[] {
			
 
				+	// Find all matches with their positions
			
 
				+	type Match = { start: number; end: number; text: string; className: string }
			
 
				+	const matches: Match[] = []
			
 
				+
			
 
				+	for (const { pattern, className, wrapGroup } of HIGHLIGHT_PATTERNS) {
			
 
				+		// Reset regex state
			
 
				+		pattern.lastIndex = 0
			
 
				+		let regexMatch
			
 
				+		while ((regexMatch = pattern.exec(line)) !== null) {
			
 
				+			const capturedText = wrapGroup !== undefined ? regexMatch[wrapGroup] : regexMatch[0]
			
 
				+			// Skip if capture group didn't match
			
 
				+			if (!capturedText) continue
			
 
				+			const start =
			
 
				+				wrapGroup !== undefined ? regexMatch.index + regexMatch[0].indexOf(capturedText) : regexMatch.index
			
 
				+			matches.push({
			
 
				+				start,
			
 
				+				end: start + capturedText.length,
			
 
				+				text: capturedText,
			
 
				+				className,
			
 
				+			})
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// Sort matches by position and filter overlapping ones
			
 
				+	matches.sort((a, b) => a.start - b.start)
			
 
				+	const filteredMatches: Match[] = []
			
 
				+	for (const m of matches) {
			
 
				+		const lastMatch = filteredMatches[filteredMatches.length - 1]
			
 
				+		if (!lastMatch || m.start >= lastMatch.end) {
			
 
				+			filteredMatches.push(m)
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// Build result with highlighted spans
			
 
				+	const result: React.ReactNode[] = []
			
 
				+	let currentPos = 0
			
 
				+
			
 
				+	for (const [i, m] of filteredMatches.entries()) {
			
 
				+		// Add text before this match
			
 
				+		if (m.start > currentPos) {
			
 
				+			result.push(line.slice(currentPos, m.start))
			
 
				+		}
			
 
				+		// Add highlighted match
			
 
				+		result.push(
			
 
				+			<span key={`${i}-${m.start}`} className={m.className}>
			
 
				+				{m.text}
			
 
				+			</span>,
			
 
				+		)
			
 
				+		currentPos = m.end
			
 
				+	}
			
 
				+
			
 
				+	// Add remaining text
			
 
				+	if (currentPos < line.length) {
			
 
				+		result.push(line.slice(currentPos))
			
 
				+	}
			
 
				+
			
 
				+	return result.length > 0 ? result : [line]
			
 
				+}
			
 
				+
			
 
				+// Format log content with basic highlighting (XSS-safe - no dangerouslySetInnerHTML)
			
 
				+function formatLogContent(log: string): React.ReactNode[] {
			
 
				+	const lines = log.split("\n")
			
 
				+	return lines.map((line, index) => (
			
 
				+		<div key={index} className="hover:bg-white/5">
			
 
				+			{line ? formatLine(line) : " "}
			
 
				+		</div>
			
 
				+	))
			
 
				+}
			
 
				+
			
 
				 export function Run({ run }: { run: Run }) {
			
 
				 	const runStatus = useRunStatus(run)
			
 
				 	const { tasks, tokenUsage, usageUpdatedAt } = runStatus
			
 
				 
			
 
				+	const [selectedTask, setSelectedTask] = useState<Task | null>(null)
			
 
				+	const [taskLog, setTaskLog] = useState<string | null>(null)
			
 
				+	const [isLoadingLog, setIsLoadingLog] = useState(false)
			
 
				+	const [copied, setCopied] = useState(false)
			
 
				+
			
 
				+	const onCopyLog = useCallback(async () => {
			
 
				+		if (!taskLog) return
			
 
				+
			
 
				+		try {
			
 
				+			await navigator.clipboard.writeText(taskLog)
			
 
				+			setCopied(true)
			
 
				+			toast.success("Log copied to clipboard")
			
 
				+			setTimeout(() => setCopied(false), 2000)
			
 
				+		} catch (error) {
			
 
				+			console.error("Failed to copy log:", error)
			
 
				+			toast.error("Failed to copy log")
			
 
				+		}
			
 
				+	}, [taskLog])
			
 
				+
			
 
				+	// Handle ESC key to close the dialog
			
 
				+	useEffect(() => {
			
 
				+		const handleKeyDown = (e: KeyboardEvent) => {
			
 
				+			if (e.key === "Escape" && selectedTask) {
			
 
				+				setSelectedTask(null)
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		document.addEventListener("keydown", handleKeyDown)
			
 
				+		return () => document.removeEventListener("keydown", handleKeyDown)
			
 
				+	}, [selectedTask])
			
 
				+
			
 
				+	const onViewTaskLog = useCallback(
			
 
				+		async (task: Task) => {
			
 
				+			// Only allow viewing logs for completed tasks
			
 
				+			if (task.passed === null || task.passed === undefined) {
			
 
				+				toast.error("Task is still running")
			
 
				+				return
			
 
				+			}
			
 
				+
			
 
				+			setSelectedTask(task)
			
 
				+			setIsLoadingLog(true)
			
 
				+			setTaskLog(null)
			
 
				+
			
 
				+			try {
			
 
				+				const response = await fetch(`/api/runs/${run.id}/logs/${task.id}`)
			
 
				+
			
 
				+				if (!response.ok) {
			
 
				+					const error = await response.json()
			
 
				+					toast.error(error.error || "Failed to load log")
			
 
				+					setSelectedTask(null)
			
 
				+					return
			
 
				+				}
			
 
				+
			
 
				+				const data = await response.json()
			
 
				+				setTaskLog(data.logContent)
			
 
				+			} catch (error) {
			
 
				+				console.error("Error loading task log:", error)
			
 
				+				toast.error("Failed to load log")
			
 
				+				setSelectedTask(null)
			
 
				+			} finally {
			
 
				+				setIsLoadingLog(false)
			
 
				+			}
			
 
				+		},
			
 
				+		[run.id],
			
 
				+	)
			
 
				+
			
 
				 	const taskMetrics: Record<number, TaskMetrics> = useMemo(() => {
			
 
				 		const metrics: Record<number, TaskMetrics> = {}
			
 
				 
			
@@ -241,15 +407,33 @@ export function Run({ run }: { run: Run }) {
 
				 						</TableHeader>
			
 
				 						<TableBody>
			
 
				 							{tasks.map((task) => (
			
 
				-								<TableRow key={task.id}>
			
 
				+								<TableRow
			
 
				+									key={task.id}
			
 
				+									className={task.finishedAt ? "cursor-pointer hover:bg-muted/50" : ""}
			
 
				+									onClick={() => task.finishedAt && onViewTaskLog(task)}>
			
 
				 									<TableCell>
			
 
				 										<div className="flex items-center gap-2">
			
 
				 											<TaskStatus
			
 
				 												task={task}
			
 
				 												running={!!task.startedAt || !!tokenUsage.get(task.id)}
			
 
				 											/>
			
 
				-											<div>
			
 
				-												{task.language}/{task.exercise}
			
 
				+											<div className="flex items-center gap-2">
			
 
				+												<span>
			
 
				+													{task.language}/{task.exercise}
			
 
				+													{task.iteration > 1 && (
			
 
				+														<span className="text-muted-foreground ml-1">
			
 
				+															(#{task.iteration})
			
 
				+														</span>
			
 
				+													)}
			
 
				+												</span>
			
 
				+												{task.finishedAt && (
			
 
				+													<Tooltip>
			
 
				+														<TooltipTrigger asChild>
			
 
				+															<FileText className="size-3 text-muted-foreground" />
			
 
				+														</TooltipTrigger>
			
 
				+														<TooltipContent>Click to view log</TooltipContent>
			
 
				+													</Tooltip>
			
 
				+												)}
			
 
				 											</div>
			
 
				 										</div>
			
 
				 									</TableCell>
			
@@ -282,6 +466,63 @@ export function Run({ run }: { run: Run }) {
 
				 					</Table>
			
 
				 				)}
			
 
				 			</div>
			
 
				+
			
 
				+			{/* Task Log Dialog - Full Screen */}
			
 
				+			<Dialog open={!!selectedTask} onOpenChange={() => setSelectedTask(null)}>
			
 
				+				<DialogContent className="w-[95vw] !max-w-[95vw] h-[90vh] flex flex-col">
			
 
				+					<DialogHeader className="flex-shrink-0">
			
 
				+						<div className="flex items-center justify-between pr-8">
			
 
				+							<DialogTitle className="flex items-center gap-2">
			
 
				+								<FileText className="size-4" />
			
 
				+								{selectedTask?.language}/{selectedTask?.exercise}
			
 
				+								{selectedTask?.iteration && selectedTask.iteration > 1 && (
			
 
				+									<span className="text-muted-foreground">(#{selectedTask.iteration})</span>
			
 
				+								)}
			
 
				+								<span
			
 
				+									className={`ml-2 text-sm ${selectedTask?.passed ? "text-green-600" : "text-red-600"}`}>
			
 
				+									({selectedTask?.passed ? "Passed" : "Failed"})
			
 
				+								</span>
			
 
				+							</DialogTitle>
			
 
				+							{taskLog && (
			
 
				+								<Button
			
 
				+									variant="outline"
			
 
				+									size="sm"
			
 
				+									onClick={onCopyLog}
			
 
				+									className="flex items-center gap-1">
			
 
				+									{copied ? (
			
 
				+										<>
			
 
				+											<Check className="size-4" />
			
 
				+											Copied!
			
 
				+										</>
			
 
				+									) : (
			
 
				+										<>
			
 
				+											<Copy className="size-4" />
			
 
				+											Copy Log
			
 
				+										</>
			
 
				+									)}
			
 
				+								</Button>
			
 
				+							)}
			
 
				+						</div>
			
 
				+					</DialogHeader>
			
 
				+					<div className="flex-1 min-h-0 overflow-hidden">
			
 
				+						{isLoadingLog ? (
			
 
				+							<div className="flex items-center justify-center h-full">
			
 
				+								<LoaderCircle className="size-6 animate-spin" />
			
 
				+							</div>
			
 
				+						) : taskLog ? (
			
 
				+							<ScrollArea className="h-full w-full">
			
 
				+								<div className="text-xs font-mono bg-muted p-4 rounded-md overflow-x-auto">
			
 
				+									{formatLogContent(taskLog)}
			
 
				+								</div>
			
 
				+							</ScrollArea>
			
 
				+						) : (
			
 
				+							<div className="flex items-center justify-center h-full text-muted-foreground">
			
 
				+								Log file not available (may have been cleared)
			
 
				+							</div>
			
 
				+						)}
			
 
				+					</div>
			
 
				+				</DialogContent>
			
 
				+			</Dialog>
			
 
				 		</>
			
 
				 	)
			
 
				 }
			
--- a/apps/web-evals/src/app/runs/new/new-run.tsx
+++ b/apps/web-evals/src/app/runs/new/new-run.tsx
@@ -7,7 +7,7 @@ import { useQuery } from "@tanstack/react-query"
 
				 import { useForm, FormProvider } from "react-hook-form"
			
 
				 import { zodResolver } from "@hookform/resolvers/zod"
			
 
				 import { toast } from "sonner"
			
 
				-import { X, Rocket, Check, ChevronsUpDown, SlidersHorizontal } from "lucide-react"
			
 
				+import { X, Rocket, Check, ChevronsUpDown, SlidersHorizontal, Info } from "lucide-react"
			
 
				 
			
 
				 import {
			
 
				 	globalSettingsSchema,
			
@@ -16,6 +16,7 @@ import {
 
				 	getModelId,
			
 
				 	type ProviderSettings,
			
 
				 	type GlobalSettings,
			
 
				+	type ReasoningEffort,
			
 
				 } from "@roo-code/types"
			
 
				 
			
 
				 import { createRun } from "@/actions/runs"
			
@@ -30,6 +31,9 @@ import {
 
				 	TIMEOUT_MIN,
			
 
				 	TIMEOUT_MAX,
			
 
				 	TIMEOUT_DEFAULT,
			
 
				+	ITERATIONS_MIN,
			
 
				+	ITERATIONS_MAX,
			
 
				+	ITERATIONS_DEFAULT,
			
 
				 } from "@/lib/schemas"
			
 
				 import { cn } from "@/lib/utils"
			
 
				 
			
@@ -40,6 +44,7 @@ import {
 
				 	Button,
			
 
				 	Checkbox,
			
 
				 	FormControl,
			
 
				+	FormDescription,
			
 
				 	FormField,
			
 
				 	FormItem,
			
 
				 	FormLabel,
			
@@ -61,7 +66,14 @@ import {
 
				 	PopoverTrigger,
			
 
				 	Slider,
			
 
				 	Label,
			
 
				-	FormDescription,
			
 
				+	Select,
			
 
				+	SelectContent,
			
 
				+	SelectItem,
			
 
				+	SelectTrigger,
			
 
				+	SelectValue,
			
 
				+	Tooltip,
			
 
				+	TooltipContent,
			
 
				+	TooltipTrigger,
			
 
				 } from "@/components/ui"
			
 
				 
			
 
				 import { SettingsDiff } from "./settings-diff"
			
@@ -78,6 +90,8 @@ export function NewRun() {
 
				 	const [provider, setModelSource] = useState<"roo" | "openrouter" | "other">("roo")
			
 
				 	const [modelPopoverOpen, setModelPopoverOpen] = useState(false)
			
 
				 	const [useNativeToolProtocol, setUseNativeToolProtocol] = useState(true)
			
 
				+	const [useMultipleNativeToolCalls, setUseMultipleNativeToolCalls] = useState(true)
			
 
				+	const [reasoningEffort, setReasoningEffort] = useState<ReasoningEffort | "">("")
			
 
				 
			
 
				 	// State for imported settings with config selection
			
 
				 	const [importedSettings, setImportedSettings] = useState<ImportedSettings | null>(null)
			
@@ -106,6 +120,7 @@ export function NewRun() {
 
				 			settings: undefined,
			
 
				 			concurrency: CONCURRENCY_DEFAULT,
			
 
				 			timeout: TIMEOUT_DEFAULT,
			
 
				+			iterations: ITERATIONS_DEFAULT,
			
 
				 			jobToken: "",
			
 
				 		},
			
 
				 	})
			
@@ -204,12 +219,24 @@ export function NewRun() {
 
				 	const onSubmit = useCallback(
			
 
				 		async (values: CreateRun) => {
			
 
				 			try {
			
 
				+				// Validate jobToken for Roo Code Cloud provider
			
 
				+				if (provider === "roo" && !values.jobToken?.trim()) {
			
 
				+					toast.error("Roo Code Cloud Token is required")
			
 
				+					return
			
 
				+				}
			
 
				+
			
 
				+				// Build experiments settings
			
 
				+				const experimentsSettings = useMultipleNativeToolCalls
			
 
				+					? { experiments: { multipleNativeToolCalls: true } }
			
 
				+					: {}
			
 
				+
			
 
				 				if (provider === "openrouter") {
			
 
				 					values.settings = {
			
 
				 						...(values.settings || {}),
			
 
				 						apiProvider: "openrouter",
			
 
				 						openRouterModelId: model,
			
 
				 						toolProtocol: useNativeToolProtocol ? "native" : "xml",
			
 
				+						...experimentsSettings,
			
 
				 					}
			
 
				 				} else if (provider === "roo") {
			
 
				 					values.settings = {
			
@@ -217,6 +244,20 @@ export function NewRun() {
 
				 						apiProvider: "roo",
			
 
				 						apiModelId: model,
			
 
				 						toolProtocol: useNativeToolProtocol ? "native" : "xml",
			
 
				+						...experimentsSettings,
			
 
				+						...(reasoningEffort
			
 
				+							? {
			
 
				+									enableReasoningEffort: true,
			
 
				+									reasoningEffort: reasoningEffort as ReasoningEffort,
			
 
				+								}
			
 
				+							: {}),
			
 
				+					}
			
 
				+				} else if (provider === "other" && values.settings) {
			
 
				+					// For imported settings, merge in experiments and tool protocol
			
 
				+					values.settings = {
			
 
				+						...values.settings,
			
 
				+						toolProtocol: useNativeToolProtocol ? "native" : "xml",
			
 
				+						...experimentsSettings,
			
 
				 					}
			
 
				 				}
			
 
				 
			
@@ -226,7 +267,7 @@ export function NewRun() {
 
				 				toast.error(e instanceof Error ? e.message : "An unknown error occurred.")
			
 
				 			}
			
 
				 		},
			
 
				-		[provider, model, router, useNativeToolProtocol],
			
 
				+		[provider, model, router, useNativeToolProtocol, useMultipleNativeToolCalls, reasoningEffort],
			
 
				 	)
			
 
				 
			
 
				 	const onSelectModel = useCallback(
			
@@ -394,6 +435,38 @@ export function NewRun() {
 
				 											</div>
			
 
				 										)}
			
 
				 
			
 
				+										<div className="mt-4 p-4 rounded-md bg-muted/30 border border-border space-y-3">
			
 
				+											<Label className="text-sm font-medium text-muted-foreground">
			
 
				+												Tool Protocol Options
			
 
				+											</Label>
			
 
				+											<div className="flex flex-col gap-2.5 pl-1">
			
 
				+												<label
			
 
				+													htmlFor="native-other"
			
 
				+													className="flex items-center gap-2 cursor-pointer">
			
 
				+													<Checkbox
			
 
				+														id="native-other"
			
 
				+														checked={useNativeToolProtocol}
			
 
				+														onCheckedChange={(checked) =>
			
 
				+															setUseNativeToolProtocol(checked === true)
			
 
				+														}
			
 
				+													/>
			
 
				+													<span className="text-sm">Use Native Tool Calls</span>
			
 
				+												</label>
			
 
				+												<label
			
 
				+													htmlFor="multipleNativeToolCalls-other"
			
 
				+													className="flex items-center gap-2 cursor-pointer">
			
 
				+													<Checkbox
			
 
				+														id="multipleNativeToolCalls-other"
			
 
				+														checked={useMultipleNativeToolCalls}
			
 
				+														onCheckedChange={(checked) =>
			
 
				+															setUseMultipleNativeToolCalls(checked === true)
			
 
				+														}
			
 
				+													/>
			
 
				+													<span className="text-sm">Use Multiple Native Tool Calls</span>
			
 
				+												</label>
			
 
				+											</div>
			
 
				+										</div>
			
 
				+
			
 
				 										{settings && (
			
 
				 											<SettingsDiff defaultSettings={EVALS_SETTINGS} customSettings={settings} />
			
 
				 										)}
			
@@ -444,15 +517,66 @@ export function NewRun() {
 
				 											</PopoverContent>
			
 
				 										</Popover>
			
 
				 
			
 
				-										<div className="flex items-center gap-1.5">
			
 
				-											<Checkbox
			
 
				-												id="native"
			
 
				-												checked={useNativeToolProtocol}
			
 
				-												onCheckedChange={(checked) =>
			
 
				-													setUseNativeToolProtocol(checked === true)
			
 
				-												}
			
 
				-											/>
			
 
				-											<Label htmlFor="native">Use Native Tool Calls</Label>
			
 
				+										<div className="mt-4 p-4 rounded-md bg-muted/30 border border-border space-y-4">
			
 
				+											<div className="space-y-3">
			
 
				+												<Label className="text-sm font-medium text-muted-foreground">
			
 
				+													Tool Protocol Options
			
 
				+												</Label>
			
 
				+												<div className="flex flex-col gap-2.5 pl-1">
			
 
				+													<label
			
 
				+														htmlFor="native"
			
 
				+														className="flex items-center gap-2 cursor-pointer">
			
 
				+														<Checkbox
			
 
				+															id="native"
			
 
				+															checked={useNativeToolProtocol}
			
 
				+															onCheckedChange={(checked) =>
			
 
				+																setUseNativeToolProtocol(checked === true)
			
 
				+															}
			
 
				+														/>
			
 
				+														<span className="text-sm">Use Native Tool Calls</span>
			
 
				+													</label>
			
 
				+													<label
			
 
				+														htmlFor="multipleNativeToolCalls"
			
 
				+														className="flex items-center gap-2 cursor-pointer">
			
 
				+														<Checkbox
			
 
				+															id="multipleNativeToolCalls"
			
 
				+															checked={useMultipleNativeToolCalls}
			
 
				+															onCheckedChange={(checked) =>
			
 
				+																setUseMultipleNativeToolCalls(checked === true)
			
 
				+															}
			
 
				+														/>
			
 
				+														<span className="text-sm">Use Multiple Native Tool Calls</span>
			
 
				+													</label>
			
 
				+												</div>
			
 
				+											</div>
			
 
				+
			
 
				+											{provider === "roo" && (
			
 
				+												<div className="space-y-2 pt-2 border-t border-border">
			
 
				+													<Label className="text-sm font-medium text-muted-foreground">
			
 
				+														Reasoning Effort
			
 
				+													</Label>
			
 
				+													<Select
			
 
				+														value={reasoningEffort || "none"}
			
 
				+														onValueChange={(value) =>
			
 
				+															setReasoningEffort(
			
 
				+																value === "none" ? "" : (value as ReasoningEffort),
			
 
				+															)
			
 
				+														}>
			
 
				+														<SelectTrigger className="w-full">
			
 
				+															<SelectValue placeholder="None (default)" />
			
 
				+														</SelectTrigger>
			
 
				+														<SelectContent>
			
 
				+															<SelectItem value="none">None (default)</SelectItem>
			
 
				+															<SelectItem value="low">Low</SelectItem>
			
 
				+															<SelectItem value="medium">Medium</SelectItem>
			
 
				+															<SelectItem value="high">High</SelectItem>
			
 
				+														</SelectContent>
			
 
				+													</Select>
			
 
				+													<p className="text-xs text-muted-foreground pl-1">
			
 
				+														When set, enableReasoningEffort will be automatically enabled
			
 
				+													</p>
			
 
				+												</div>
			
 
				+											)}
			
 
				 										</div>
			
 
				 									</>
			
 
				 								)}
			
@@ -468,20 +592,28 @@ export function NewRun() {
 
				 							name="jobToken"
			
 
				 							render={({ field }) => (
			
 
				 								<FormItem>
			
 
				-									<FormLabel>Roo Code Cloud Token</FormLabel>
			
 
				+									<div className="flex items-center gap-1">
			
 
				+										<FormLabel>Roo Code Cloud Token</FormLabel>
			
 
				+										<Tooltip>
			
 
				+											<TooltipTrigger asChild>
			
 
				+												<Info className="size-4 text-muted-foreground cursor-help" />
			
 
				+											</TooltipTrigger>
			
 
				+											<TooltipContent side="right" className="max-w-xs">
			
 
				+												<p>
			
 
				+													If you have access to the Roo Code Cloud repository, generate a
			
 
				+													token with:
			
 
				+												</p>
			
 
				+												<code className="text-xs block mt-1">
			
 
				+													pnpm --filter @roo-code-cloud/auth production:create-job-token [org]
			
 
				+													[timeout]
			
 
				+												</code>
			
 
				+											</TooltipContent>
			
 
				+										</Tooltip>
			
 
				+									</div>
			
 
				 									<FormControl>
			
 
				-										<Input type="password" {...field} />
			
 
				+										<Input type="password" placeholder="Required" {...field} />
			
 
				 									</FormControl>
			
 
				 									<FormMessage />
			
 
				-									<FormDescription>
			
 
				-										If you have access to the Roo Code Cloud repository then you can generate a
			
 
				-										token with:
			
 
				-										<br />
			
 
				-										<code className="text-xs">
			
 
				-											pnpm --filter @roo-code-cloud/auth production:create-job-token [org]
			
 
				-											[timeout]
			
 
				-										</code>
			
 
				-									</FormDescription>
			
 
				 								</FormItem>
			
 
				 							)}
			
 
				 						/>
			
@@ -600,6 +732,32 @@ export function NewRun() {
 
				 						)}
			
 
				 					/>
			
 
				 
			
 
				+					<FormField
			
 
				+						control={form.control}
			
 
				+						name="iterations"
			
 
				+						render={({ field }) => (
			
 
				+							<FormItem>
			
 
				+								<FormLabel>Iterations per Exercise</FormLabel>
			
 
				+								<FormControl>
			
 
				+									<div className="flex flex-row items-center gap-2">
			
 
				+										<Slider
			
 
				+											value={[field.value]}
			
 
				+											min={ITERATIONS_MIN}
			
 
				+											max={ITERATIONS_MAX}
			
 
				+											step={1}
			
 
				+											onValueChange={(value) => {
			
 
				+												field.onChange(value[0])
			
 
				+											}}
			
 
				+										/>
			
 
				+										<div>{field.value}</div>
			
 
				+									</div>
			
 
				+								</FormControl>
			
 
				+								<FormDescription>Run each exercise multiple times to compare results</FormDescription>
			
 
				+								<FormMessage />
			
 
				+							</FormItem>
			
 
				+						)}
			
 
				+					/>
			
 
				+
			
 
				 					<FormField
			
 
				 						control={form.control}
			
 
				 						name="description"
			
--- a/apps/web-evals/src/components/home/run.tsx
+++ b/apps/web-evals/src/components/home/run.tsx
@@ -1,7 +1,8 @@
 
				 import { useCallback, useState, useRef } from "react"
			
 
				 import Link from "next/link"
			
 
				 import { useRouter } from "next/navigation"
			
 
				-import { Ellipsis, ClipboardList, Copy, Check, LoaderCircle, Trash, Settings } from "lucide-react"
			
 
				+import { toast } from "sonner"
			
 
				+import { Ellipsis, ClipboardList, Copy, Check, LoaderCircle, Trash, Settings, FileDown } from "lucide-react"
			
 
				 
			
 
				 import type { Run as EvalsRun, TaskMetrics as EvalsTaskMetrics } from "@roo-code/evals"
			
 
				 import type { ToolName } from "@roo-code/types"
			
@@ -48,9 +49,46 @@ export function Run({ run, taskMetrics, toolColumns }: RunProps) {
 
				 	const router = useRouter()
			
 
				 	const [deleteRunId, setDeleteRunId] = useState<number>()
			
 
				 	const [showSettings, setShowSettings] = useState(false)
			
 
				+	const [isExportingLogs, setIsExportingLogs] = useState(false)
			
 
				 	const continueRef = useRef<HTMLButtonElement>(null)
			
 
				 	const { isPending, copyRun, copied } = useCopyRun(run.id)
			
 
				 
			
 
				+	const onExportFailedLogs = useCallback(async () => {
			
 
				+		if (run.failed === 0) {
			
 
				+			toast.error("No failed tasks to export")
			
 
				+			return
			
 
				+		}
			
 
				+
			
 
				+		setIsExportingLogs(true)
			
 
				+		try {
			
 
				+			const response = await fetch(`/api/runs/${run.id}/logs/failed`)
			
 
				+
			
 
				+			if (!response.ok) {
			
 
				+				const error = await response.json()
			
 
				+				toast.error(error.error || "Failed to export logs")
			
 
				+				return
			
 
				+			}
			
 
				+
			
 
				+			// Download the zip file
			
 
				+			const blob = await response.blob()
			
 
				+			const url = window.URL.createObjectURL(blob)
			
 
				+			const a = document.createElement("a")
			
 
				+			a.href = url
			
 
				+			a.download = `run-${run.id}-failed-logs.zip`
			
 
				+			document.body.appendChild(a)
			
 
				+			a.click()
			
 
				+			window.URL.revokeObjectURL(url)
			
 
				+			document.body.removeChild(a)
			
 
				+
			
 
				+			toast.success("Failed logs exported successfully")
			
 
				+		} catch (error) {
			
 
				+			console.error("Error exporting logs:", error)
			
 
				+			toast.error("Failed to export logs")
			
 
				+		} finally {
			
 
				+			setIsExportingLogs(false)
			
 
				+		}
			
 
				+	}, [run.id, run.failed])
			
 
				+
			
 
				 	const onConfirmDelete = useCallback(async () => {
			
 
				 		if (!deleteRunId) {
			
 
				 			return
			
@@ -161,6 +199,23 @@ export function Run({ run, taskMetrics, toolColumns }: RunProps) {
 
				 									</div>
			
 
				 								</DropdownMenuItem>
			
 
				 							)}
			
 
				+							{run.failed > 0 && (
			
 
				+								<DropdownMenuItem onClick={onExportFailedLogs} disabled={isExportingLogs}>
			
 
				+									<div className="flex items-center gap-1">
			
 
				+										{isExportingLogs ? (
			
 
				+											<>
			
 
				+												<LoaderCircle className="animate-spin" />
			
 
				+												Exporting...
			
 
				+											</>
			
 
				+										) : (
			
 
				+											<>
			
 
				+												<FileDown />
			
 
				+												Export Failed Logs
			
 
				+											</>
			
 
				+										)}
			
 
				+									</div>
			
 
				+								</DropdownMenuItem>
			
 
				+							)}
			
 
				 							<DropdownMenuItem
			
 
				 								onClick={() => {
			
 
				 									setDeleteRunId(run.id)
			
--- a/apps/web-evals/src/lib/schemas.ts
+++ b/apps/web-evals/src/lib/schemas.ts
@@ -14,6 +14,10 @@ export const TIMEOUT_MIN = 5
 
				 export const TIMEOUT_MAX = 10
			
 
				 export const TIMEOUT_DEFAULT = 5
			
 
				 
			
 
				+export const ITERATIONS_MIN = 1
			
 
				+export const ITERATIONS_MAX = 10
			
 
				+export const ITERATIONS_DEFAULT = 1
			
 
				+
			
 
				 export const createRunSchema = z
			
 
				 	.object({
			
 
				 		model: z.string().min(1, { message: "Model is required." }),
			
@@ -23,6 +27,7 @@ export const createRunSchema = z
 
				 		settings: rooCodeSettingsSchema.optional(),
			
 
				 		concurrency: z.number().int().min(CONCURRENCY_MIN).max(CONCURRENCY_MAX),
			
 
				 		timeout: z.number().int().min(TIMEOUT_MIN).max(TIMEOUT_MAX),
			
 
				+		iterations: z.number().int().min(ITERATIONS_MIN).max(ITERATIONS_MAX),
			
 
				 		jobToken: z.string().optional(),
			
 
				 	})
			
 
				 	.refine((data) => data.suite === "full" || (data.exercises || []).length > 0, {
			
--- a/packages/evals/docker-compose.override.yml
+++ b/packages/evals/docker-compose.override.yml
@@ -0,0 +1,45 @@
 
				+# Development overrides - automatically loaded by docker compose
			
 
				+# These settings only apply when running locally for development
			
 
				+#
			
 
				+# For production, use: docker compose -f docker-compose.yml up
			
 
				+# (explicitly exclude override file)
			
 
				+
			
 
				+services:
			
 
				+    web:
			
 
				+        environment:
			
 
				+            - NODE_ENV=development
			
 
				+        volumes:
			
 
				+            # Mount log files so web can access task logs
			
 
				+            - /tmp/evals:/tmp/evals:ro
			
 
				+            # Mount source code for hot reload in development
			
 
				+            - ../../apps/web-evals:/roo/repo/apps/web-evals:delegated
			
 
				+            - ../../packages/evals:/roo/repo/packages/evals:delegated
			
 
				+            - ../../packages/types:/roo/repo/packages/types:delegated
			
 
				+            - ../../packages/ipc:/roo/repo/packages/ipc:delegated
			
 
				+            - ../../packages/cloud:/roo/repo/packages/cloud:delegated
			
 
				+            # Exclude node_modules from being overwritten
			
 
				+            - /roo/repo/node_modules
			
 
				+            - /roo/repo/apps/web-evals/node_modules
			
 
				+            - /roo/repo/packages/evals/node_modules
			
 
				+            - /roo/repo/packages/types/node_modules
			
 
				+            - /roo/repo/packages/ipc/node_modules
			
 
				+            - /roo/repo/packages/cloud/node_modules
			
 
				+        entrypoint: []
			
 
				+        command:
			
 
				+            - sh
			
 
				+            - -c
			
 
				+            - |
			
 
				+                echo '🚀 Starting evals web service in development mode...'
			
 
				+                wait_for_db() {
			
 
				+                    echo '⏳ Waiting for database...'
			
 
				+                    until pg_isready -h db -p 5432 -U postgres -d evals_development > /dev/null 2>&1; do
			
 
				+                        echo '⏳ Database not ready yet, waiting 2 seconds...'
			
 
				+                        sleep 2
			
 
				+                    done
			
 
				+                    echo '✅ Database is ready'
			
 
				+                }
			
 
				+                wait_for_db
			
 
				+                echo '🔄 Running database migrations...'
			
 
				+                pnpm --filter @roo-code/evals db:migrate
			
 
				+                echo '🌐 Starting Next.js dev server...'
			
 
				+                cd /roo/repo/apps/web-evals && npx next dev -p 3446
			
--- a/packages/evals/docker-compose.yml
+++ b/packages/evals/docker-compose.yml
@@ -55,8 +55,11 @@ services:
 
				             - "${EVALS_WEB_PORT:-3446}:3446"
			
 
				         environment:
			
 
				             - HOST_EXECUTION_METHOD=docker
			
 
				+            - PRODUCTION_DATABASE_URL
			
 
				         volumes:
			
 
				             - /var/run/docker.sock:/var/run/docker.sock
			
 
				+            # Mount log files so web can access task logs
			
 
				+            - /tmp/evals:/tmp/evals:ro
			
 
				         depends_on:
			
 
				             db:
			
 
				                 condition: service_healthy
			
--- a/packages/evals/src/db/migrations/0004_sloppy_black_knight.sql
+++ b/packages/evals/src/db/migrations/0004_sloppy_black_knight.sql
@@ -0,0 +1,3 @@
 
				+DROP INDEX "tasks_language_exercise_idx";--> statement-breakpoint
			
 
				+ALTER TABLE "tasks" ADD COLUMN "iteration" integer DEFAULT 1 NOT NULL;--> statement-breakpoint
			
 
				+CREATE UNIQUE INDEX "tasks_language_exercise_iteration_idx" ON "tasks" USING btree ("run_id","language","exercise","iteration");
			
--- a/packages/evals/src/db/migrations/meta/0004_snapshot.json
+++ b/packages/evals/src/db/migrations/meta/0004_snapshot.json
@@ -0,0 +1,472 @@
 
				+{
			
 
				+	"id": "9caa4487-e146-4084-907d-fbf9cc3e03b9",
			
 
				+	"prevId": "853d308a-3946-4ea8-9039-236bfce3c6c0",
			
 
				+	"version": "7",
			
 
				+	"dialect": "postgresql",
			
 
				+	"tables": {
			
 
				+		"public.runs": {
			
 
				+			"name": "runs",
			
 
				+			"schema": "",
			
 
				+			"columns": {
			
 
				+				"id": {
			
 
				+					"name": "id",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": true,
			
 
				+					"notNull": true,
			
 
				+					"identity": {
			
 
				+						"type": "always",
			
 
				+						"name": "runs_id_seq",
			
 
				+						"schema": "public",
			
 
				+						"increment": "1",
			
 
				+						"startWith": "1",
			
 
				+						"minValue": "1",
			
 
				+						"maxValue": "2147483647",
			
 
				+						"cache": "1",
			
 
				+						"cycle": false
			
 
				+					}
			
 
				+				},
			
 
				+				"task_metrics_id": {
			
 
				+					"name": "task_metrics_id",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": false
			
 
				+				},
			
 
				+				"model": {
			
 
				+					"name": "model",
			
 
				+					"type": "text",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true
			
 
				+				},
			
 
				+				"name": {
			
 
				+					"name": "name",
			
 
				+					"type": "text",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": false
			
 
				+				},
			
 
				+				"description": {
			
 
				+					"name": "description",
			
 
				+					"type": "text",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": false
			
 
				+				},
			
 
				+				"contextWindow": {
			
 
				+					"name": "contextWindow",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": false
			
 
				+				},
			
 
				+				"inputPrice": {
			
 
				+					"name": "inputPrice",
			
 
				+					"type": "real",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": false
			
 
				+				},
			
 
				+				"outputPrice": {
			
 
				+					"name": "outputPrice",
			
 
				+					"type": "real",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": false
			
 
				+				},
			
 
				+				"cacheWritesPrice": {
			
 
				+					"name": "cacheWritesPrice",
			
 
				+					"type": "real",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": false
			
 
				+				},
			
 
				+				"cacheReadsPrice": {
			
 
				+					"name": "cacheReadsPrice",
			
 
				+					"type": "real",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": false
			
 
				+				},
			
 
				+				"settings": {
			
 
				+					"name": "settings",
			
 
				+					"type": "jsonb",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": false
			
 
				+				},
			
 
				+				"jobToken": {
			
 
				+					"name": "jobToken",
			
 
				+					"type": "text",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": false
			
 
				+				},
			
 
				+				"pid": {
			
 
				+					"name": "pid",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": false
			
 
				+				},
			
 
				+				"socket_path": {
			
 
				+					"name": "socket_path",
			
 
				+					"type": "text",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true
			
 
				+				},
			
 
				+				"concurrency": {
			
 
				+					"name": "concurrency",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true,
			
 
				+					"default": 2
			
 
				+				},
			
 
				+				"timeout": {
			
 
				+					"name": "timeout",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true,
			
 
				+					"default": 5
			
 
				+				},
			
 
				+				"passed": {
			
 
				+					"name": "passed",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true,
			
 
				+					"default": 0
			
 
				+				},
			
 
				+				"failed": {
			
 
				+					"name": "failed",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true,
			
 
				+					"default": 0
			
 
				+				},
			
 
				+				"created_at": {
			
 
				+					"name": "created_at",
			
 
				+					"type": "timestamp",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true
			
 
				+				}
			
 
				+			},
			
 
				+			"indexes": {},
			
 
				+			"foreignKeys": {
			
 
				+				"runs_task_metrics_id_taskMetrics_id_fk": {
			
 
				+					"name": "runs_task_metrics_id_taskMetrics_id_fk",
			
 
				+					"tableFrom": "runs",
			
 
				+					"tableTo": "taskMetrics",
			
 
				+					"columnsFrom": ["task_metrics_id"],
			
 
				+					"columnsTo": ["id"],
			
 
				+					"onDelete": "no action",
			
 
				+					"onUpdate": "no action"
			
 
				+				}
			
 
				+			},
			
 
				+			"compositePrimaryKeys": {},
			
 
				+			"uniqueConstraints": {},
			
 
				+			"policies": {},
			
 
				+			"checkConstraints": {},
			
 
				+			"isRLSEnabled": false
			
 
				+		},
			
 
				+		"public.taskMetrics": {
			
 
				+			"name": "taskMetrics",
			
 
				+			"schema": "",
			
 
				+			"columns": {
			
 
				+				"id": {
			
 
				+					"name": "id",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": true,
			
 
				+					"notNull": true,
			
 
				+					"identity": {
			
 
				+						"type": "always",
			
 
				+						"name": "taskMetrics_id_seq",
			
 
				+						"schema": "public",
			
 
				+						"increment": "1",
			
 
				+						"startWith": "1",
			
 
				+						"minValue": "1",
			
 
				+						"maxValue": "2147483647",
			
 
				+						"cache": "1",
			
 
				+						"cycle": false
			
 
				+					}
			
 
				+				},
			
 
				+				"tokens_in": {
			
 
				+					"name": "tokens_in",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true
			
 
				+				},
			
 
				+				"tokens_out": {
			
 
				+					"name": "tokens_out",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true
			
 
				+				},
			
 
				+				"tokens_context": {
			
 
				+					"name": "tokens_context",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true
			
 
				+				},
			
 
				+				"cache_writes": {
			
 
				+					"name": "cache_writes",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true
			
 
				+				},
			
 
				+				"cache_reads": {
			
 
				+					"name": "cache_reads",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true
			
 
				+				},
			
 
				+				"cost": {
			
 
				+					"name": "cost",
			
 
				+					"type": "real",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true
			
 
				+				},
			
 
				+				"duration": {
			
 
				+					"name": "duration",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true
			
 
				+				},
			
 
				+				"tool_usage": {
			
 
				+					"name": "tool_usage",
			
 
				+					"type": "jsonb",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": false
			
 
				+				},
			
 
				+				"created_at": {
			
 
				+					"name": "created_at",
			
 
				+					"type": "timestamp",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true
			
 
				+				}
			
 
				+			},
			
 
				+			"indexes": {},
			
 
				+			"foreignKeys": {},
			
 
				+			"compositePrimaryKeys": {},
			
 
				+			"uniqueConstraints": {},
			
 
				+			"policies": {},
			
 
				+			"checkConstraints": {},
			
 
				+			"isRLSEnabled": false
			
 
				+		},
			
 
				+		"public.tasks": {
			
 
				+			"name": "tasks",
			
 
				+			"schema": "",
			
 
				+			"columns": {
			
 
				+				"id": {
			
 
				+					"name": "id",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": true,
			
 
				+					"notNull": true,
			
 
				+					"identity": {
			
 
				+						"type": "always",
			
 
				+						"name": "tasks_id_seq",
			
 
				+						"schema": "public",
			
 
				+						"increment": "1",
			
 
				+						"startWith": "1",
			
 
				+						"minValue": "1",
			
 
				+						"maxValue": "2147483647",
			
 
				+						"cache": "1",
			
 
				+						"cycle": false
			
 
				+					}
			
 
				+				},
			
 
				+				"run_id": {
			
 
				+					"name": "run_id",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true
			
 
				+				},
			
 
				+				"task_metrics_id": {
			
 
				+					"name": "task_metrics_id",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": false
			
 
				+				},
			
 
				+				"language": {
			
 
				+					"name": "language",
			
 
				+					"type": "text",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true
			
 
				+				},
			
 
				+				"exercise": {
			
 
				+					"name": "exercise",
			
 
				+					"type": "text",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true
			
 
				+				},
			
 
				+				"iteration": {
			
 
				+					"name": "iteration",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true,
			
 
				+					"default": 1
			
 
				+				},
			
 
				+				"passed": {
			
 
				+					"name": "passed",
			
 
				+					"type": "boolean",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": false
			
 
				+				},
			
 
				+				"started_at": {
			
 
				+					"name": "started_at",
			
 
				+					"type": "timestamp",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": false
			
 
				+				},
			
 
				+				"finished_at": {
			
 
				+					"name": "finished_at",
			
 
				+					"type": "timestamp",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": false
			
 
				+				},
			
 
				+				"created_at": {
			
 
				+					"name": "created_at",
			
 
				+					"type": "timestamp",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true
			
 
				+				}
			
 
				+			},
			
 
				+			"indexes": {
			
 
				+				"tasks_language_exercise_iteration_idx": {
			
 
				+					"name": "tasks_language_exercise_iteration_idx",
			
 
				+					"columns": [
			
 
				+						{
			
 
				+							"expression": "run_id",
			
 
				+							"isExpression": false,
			
 
				+							"asc": true,
			
 
				+							"nulls": "last"
			
 
				+						},
			
 
				+						{
			
 
				+							"expression": "language",
			
 
				+							"isExpression": false,
			
 
				+							"asc": true,
			
 
				+							"nulls": "last"
			
 
				+						},
			
 
				+						{
			
 
				+							"expression": "exercise",
			
 
				+							"isExpression": false,
			
 
				+							"asc": true,
			
 
				+							"nulls": "last"
			
 
				+						},
			
 
				+						{
			
 
				+							"expression": "iteration",
			
 
				+							"isExpression": false,
			
 
				+							"asc": true,
			
 
				+							"nulls": "last"
			
 
				+						}
			
 
				+					],
			
 
				+					"isUnique": true,
			
 
				+					"concurrently": false,
			
 
				+					"method": "btree",
			
 
				+					"with": {}
			
 
				+				}
			
 
				+			},
			
 
				+			"foreignKeys": {
			
 
				+				"tasks_run_id_runs_id_fk": {
			
 
				+					"name": "tasks_run_id_runs_id_fk",
			
 
				+					"tableFrom": "tasks",
			
 
				+					"tableTo": "runs",
			
 
				+					"columnsFrom": ["run_id"],
			
 
				+					"columnsTo": ["id"],
			
 
				+					"onDelete": "no action",
			
 
				+					"onUpdate": "no action"
			
 
				+				},
			
 
				+				"tasks_task_metrics_id_taskMetrics_id_fk": {
			
 
				+					"name": "tasks_task_metrics_id_taskMetrics_id_fk",
			
 
				+					"tableFrom": "tasks",
			
 
				+					"tableTo": "taskMetrics",
			
 
				+					"columnsFrom": ["task_metrics_id"],
			
 
				+					"columnsTo": ["id"],
			
 
				+					"onDelete": "no action",
			
 
				+					"onUpdate": "no action"
			
 
				+				}
			
 
				+			},
			
 
				+			"compositePrimaryKeys": {},
			
 
				+			"uniqueConstraints": {},
			
 
				+			"policies": {},
			
 
				+			"checkConstraints": {},
			
 
				+			"isRLSEnabled": false
			
 
				+		},
			
 
				+		"public.toolErrors": {
			
 
				+			"name": "toolErrors",
			
 
				+			"schema": "",
			
 
				+			"columns": {
			
 
				+				"id": {
			
 
				+					"name": "id",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": true,
			
 
				+					"notNull": true,
			
 
				+					"identity": {
			
 
				+						"type": "always",
			
 
				+						"name": "toolErrors_id_seq",
			
 
				+						"schema": "public",
			
 
				+						"increment": "1",
			
 
				+						"startWith": "1",
			
 
				+						"minValue": "1",
			
 
				+						"maxValue": "2147483647",
			
 
				+						"cache": "1",
			
 
				+						"cycle": false
			
 
				+					}
			
 
				+				},
			
 
				+				"run_id": {
			
 
				+					"name": "run_id",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": false
			
 
				+				},
			
 
				+				"task_id": {
			
 
				+					"name": "task_id",
			
 
				+					"type": "integer",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": false
			
 
				+				},
			
 
				+				"tool_name": {
			
 
				+					"name": "tool_name",
			
 
				+					"type": "text",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true
			
 
				+				},
			
 
				+				"error": {
			
 
				+					"name": "error",
			
 
				+					"type": "text",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true
			
 
				+				},
			
 
				+				"created_at": {
			
 
				+					"name": "created_at",
			
 
				+					"type": "timestamp",
			
 
				+					"primaryKey": false,
			
 
				+					"notNull": true
			
 
				+				}
			
 
				+			},
			
 
				+			"indexes": {},
			
 
				+			"foreignKeys": {
			
 
				+				"toolErrors_run_id_runs_id_fk": {
			
 
				+					"name": "toolErrors_run_id_runs_id_fk",
			
 
				+					"tableFrom": "toolErrors",
			
 
				+					"tableTo": "runs",
			
 
				+					"columnsFrom": ["run_id"],
			
 
				+					"columnsTo": ["id"],
			
 
				+					"onDelete": "no action",
			
 
				+					"onUpdate": "no action"
			
 
				+				},
			
 
				+				"toolErrors_task_id_tasks_id_fk": {
			
 
				+					"name": "toolErrors_task_id_tasks_id_fk",
			
 
				+					"tableFrom": "toolErrors",
			
 
				+					"tableTo": "tasks",
			
 
				+					"columnsFrom": ["task_id"],
			
 
				+					"columnsTo": ["id"],
			
 
				+					"onDelete": "no action",
			
 
				+					"onUpdate": "no action"
			
 
				+				}
			
 
				+			},
			
 
				+			"compositePrimaryKeys": {},
			
 
				+			"uniqueConstraints": {},
			
 
				+			"policies": {},
			
 
				+			"checkConstraints": {},
			
 
				+			"isRLSEnabled": false
			
 
				+		}
			
 
				+	},
			
 
				+	"enums": {},
			
 
				+	"schemas": {},
			
 
				+	"sequences": {},
			
 
				+	"roles": {},
			
 
				+	"policies": {},
			
 
				+	"views": {},
			
 
				+	"_meta": {
			
 
				+		"columns": {},
			
 
				+		"schemas": {},
			
 
				+		"tables": {}
			
 
				+	}
			
 
				+}
			
--- a/packages/evals/src/db/migrations/meta/_journal.json
+++ b/packages/evals/src/db/migrations/meta/_journal.json
@@ -29,6 +29,13 @@
 
				 			"when": 1763797232454,
			
 
				 			"tag": "0003_simple_retro_girl",
			
 
				 			"breakpoints": true
			
 
				+		},
			
 
				+		{
			
 
				+			"idx": 4,
			
 
				+			"version": "7",
			
 
				+			"when": 1764201678953,
			
 
				+			"tag": "0004_sloppy_black_knight",
			
 
				+			"breakpoints": true
			
 
				 		}
			
 
				 	]
			
 
				 }
			
--- a/packages/evals/src/db/schema.ts
+++ b/packages/evals/src/db/schema.ts
@@ -55,12 +55,20 @@ export const tasks = pgTable(
 
				 		taskMetricsId: integer("task_metrics_id").references(() => taskMetrics.id),
			
 
				 		language: text().notNull().$type<ExerciseLanguage>(),
			
 
				 		exercise: text().notNull(),
			
 
				+		iteration: integer().default(1).notNull(),
			
 
				 		passed: boolean(),
			
 
				 		startedAt: timestamp("started_at"),
			
 
				 		finishedAt: timestamp("finished_at"),
			
 
				 		createdAt: timestamp("created_at").notNull(),
			
 
				 	},
			
 
				-	(table) => [uniqueIndex("tasks_language_exercise_idx").on(table.runId, table.language, table.exercise)],
			
 
				+	(table) => [
			
 
				+		uniqueIndex("tasks_language_exercise_iteration_idx").on(
			
 
				+			table.runId,
			
 
				+			table.language,
			
 
				+			table.exercise,
			
 
				+			table.iteration,
			
 
				+		),
			
 
				+	],
			
 
				 )
			
 
				 
			
 
				 export const tasksRelations = relations(tasks, ({ one }) => ({
			
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -176,6 +176,9 @@ importers:
 
				       '@tanstack/react-query':
			
 
				         specifier: ^5.69.0
			
 
				         version: 5.76.1([email protected])
			
 
				+      archiver:
			
 
				+        specifier: ^7.0.1
			
 
				+        version: 7.0.1
			
 
				       class-variance-authority:
			
 
				         specifier: ^0.7.1
			
 
				         version: 0.7.1
			
@@ -240,6 +243,9 @@ importers:
 
				       '@tailwindcss/postcss':
			
 
				         specifier: ^4
			
 
				         version: 4.1.8
			
 
				+      '@types/archiver':
			
 
				+        specifier: ^7.0.0
			
 
				+        version: 7.0.0
			
 
				       '@types/ps-tree':
			
 
				         specifier: ^1.1.6
			
 
				         version: 1.1.6
			
@@ -3904,6 +3910,9 @@ packages:
 
				   '@tybys/[email protected]':
			
 
				     resolution: {integrity: sha512-6+7nlbMVX/PVDCwaIQ8nTOPveOcFLSt8GcXdx8hD0bt39uWxYT88uXzqTd4fTvqta7oeUJqudepapKNt2DYJFw==}
			
 
				 
			
 
				+  '@types/[email protected]':
			
 
				+    resolution: {integrity: sha512-/3vwGwx9n+mCQdYZ2IKGGHEFL30I96UgBlk8EtRDDFQ9uxM1l4O5Ci6r00EMAkiDaTqD9DQ6nVrWRICnBPtzzg==}
			
 
				+
			
 
				   '@types/[email protected]':
			
 
				     resolution: {integrity: sha512-rfT93uj5s0PRL7EzccGMs3brplhcrghnDoV26NqKhCAS1hVo+WdNsPvE/yb6ilfr5hi2MEk6d5EWJTKdxg8jVw==}
			
 
				 
			
@@ -4142,6 +4151,9 @@ packages:
 
				   '@types/[email protected]':
			
 
				     resolution: {integrity: sha512-/LDXMQh55EzZQ0uVAZmKKhfENivEvWz6E+EYzh+/MCjMhNsotd+ZHhBGIjFDTi6+fz0OhQQQLbTgdQIxxCsC0w==}
			
 
				 
			
 
				+  '@types/[email protected]':
			
 
				+    resolution: {integrity: sha512-raiuEPUYqXu+nvtY2Pe8s8FEmZ3x5yAH4VkLdihcPdalvsHltomrRC9BzuStrJ9yk06470hS0Crw0f1pXqD+Hg==}
			
 
				+
			
 
				   '@types/[email protected]':
			
 
				     resolution: {integrity: sha512-3xSjTp3v03X/lSQLkczaN9UIEwJMoMCA1+Nb5HfbJEQWogdeQIyVtTvxPXDQjZ5zws8rFQfVfRdz03ARihPJgw==}
			
 
				 
			
@@ -4471,10 +4483,18 @@ packages:
 
				     resolution: {integrity: sha512-KVgf4XQVrTjhyWmx6cte4RxonPLR9onExufI1jhvw/MQ4BB6IsZD5gT8Lq+u/+pRkWna/6JoHpiQioaqFP5Rzw==}
			
 
				     engines: {node: '>= 10'}
			
 
				 
			
 
				+  [email protected]:
			
 
				+    resolution: {integrity: sha512-wuLJMmIBQYCsGZgYLTy5FIB2pF6Lfb6cXMSF8Qywwk3t20zWnAi7zLcQFdKQmIB8wyZpY5ER38x08GbwtR2cLA==}
			
 
				+    engines: {node: '>= 14'}
			
 
				+
			
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-+25nxyyznAXF7Nef3y0EbBeqmGZgeN/BxHX29Rs39djAfaFalmQ89SE6CWyDCHzGL0yt/ycBtNOmGTW0FyGWNw==}
			
 
				     engines: {node: '>= 10'}
			
 
				 
			
 
				+  [email protected]:
			
 
				+    resolution: {integrity: sha512-ZcbTaIqJOfCc03QwD468Unz/5Ir8ATtvAHsK+FdXbDIbGfihqh9mrvdcYunQzqn4HrvWWaFyaxJhGZagaJJpPQ==}
			
 
				+    engines: {node: '>= 14'}
			
 
				+
			
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-PYjyFOLKQ9y57JvQ6QLo8dAgNqswh8M1RMJYdQduT6xbWSgK36P/Z/v+p888pM69jMMfS8Xd8F6I1kQ/I9HUGg==}
			
 
				 
			
@@ -4676,6 +4696,10 @@ packages:
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==}
			
 
				 
			
 
				+  [email protected]:
			
 
				+    resolution: {integrity: sha512-Db1SbgBS/fg/392AblrMJk97KggmvYhr4pB5ZIMTWtaivCPMWLkmb7m21cJvpvgK+J3nsU2CmmixNBZx4vFj/w==}
			
 
				+    engines: {node: '>=8.0.0'}
			
 
				+
			
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==}
			
 
				 
			
@@ -4689,6 +4713,9 @@ packages:
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==}
			
 
				 
			
 
				+  [email protected]:
			
 
				+    resolution: {integrity: sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==}
			
 
				+
			
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-9q/rDEGSb/Qsvv2qvzIzdluL5k7AaJOTrw23z9reQthrbF7is4CtlT0DXyO1oei2DCp4uojjzQ7igaSHp1kAEQ==}
			
 
				     engines: {node: '>=0.2.0'}
			
@@ -4978,6 +5005,10 @@ packages:
 
				     resolution: {integrity: sha512-D3uMHtGc/fcO1Gt1/L7i1e33VOvD4A9hfQLP+6ewd+BvG/gQ84Yh4oftEhAdjSMgBgwGL+jsppT7JYNpo6MHHg==}
			
 
				     engines: {node: '>= 10'}
			
 
				 
			
 
				+  [email protected]:
			
 
				+    resolution: {integrity: sha512-6FqVXeETqWPoGcfzrXb37E50NP0LXT8kAMu5ooZayhWWdgEY4lBEEcbQNXtkuKQsGduxiIcI4gOTsxTmuq/bSg==}
			
 
				+    engines: {node: '>= 14'}
			
 
				+
			
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-RMtmw0iFkeR4YV+fUOSucriAQNb9g8zFR52MWCtl+cCZOFRNL6zeB395vPzFhEjjn4fMxXudmELnl/KF/WrK6w==}
			
 
				 
			
@@ -5042,6 +5073,10 @@ packages:
 
				     resolution: {integrity: sha512-NT7w2JVU7DFroFdYkeq8cywxrgjPHWkdX1wjpRQXPX5Asews3tA+Ght6lddQO5Mkumffp3X7GEqku3epj2toIw==}
			
 
				     engines: {node: '>= 10'}
			
 
				 
			
 
				+  [email protected]:
			
 
				+    resolution: {integrity: sha512-piICUB6ei4IlTv1+653yq5+KoqfBYmj9bw6LqXoOneTMDXk5nM1qt12mFW1caG3LlJXEKW1Bp0WggEmIfQB34g==}
			
 
				+    engines: {node: '>= 14'}
			
 
				+
			
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-e4a5N8lVvuLgAWgnCrLr2PP0YyDOTHa9H/Rj54dirp61qXnNq46m82bRhNqIA5VccJtWBvPTFRV3TtvHUKPB1g==}
			
 
				 
			
@@ -5896,6 +5931,10 @@ packages:
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-GWkBvjiSZK87ELrYOSESUYeVIc9mvLLf/nXalMOS5dYrgZq9o5OVkbZAVM06CVxYsCwH9BDZFPlQTlPA1j4ahA==}
			
 
				 
			
 
				+  [email protected]:
			
 
				+    resolution: {integrity: sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==}
			
 
				+    engines: {node: '>=0.8.x'}
			
 
				+
			
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-6RxOBZ/cYgd8usLwsEl+EC09Au/9BcmCKYF2/xbml6DNczf7nv0MQb+7BA2F+li6//I+28VNlQR37XfQtcAJuA==}
			
 
				     engines: {node: '>=18.0.0'}
			
@@ -8362,6 +8401,10 @@ packages:
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==}
			
 
				 
			
 
				+  [email protected]:
			
 
				+    resolution: {integrity: sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A==}
			
 
				+    engines: {node: '>= 0.6.0'}
			
 
				+
			
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==}
			
 
				     engines: {node: '>=0.4.0'}
			
@@ -8614,6 +8657,10 @@ packages:
 
				     resolution: {integrity: sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==}
			
 
				     engines: {node: '>= 6'}
			
 
				 
			
 
				+  [email protected]:
			
 
				+    resolution: {integrity: sha512-oIGGmcpTLwPga8Bn6/Z75SVaH1z5dUut2ibSyAMVhmUggWpmDn2dapB0n7f8nwaSiRtepAsfJyfXIO5DCVAODg==}
			
 
				+    engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0}
			
 
				+
			
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-v05I2k7xN8zXvPD9N+z/uhXPaj0sUFCe2rcWZIpBsqxfP7xXFQ0tipAd/wjj1YxWyWtUS5IDJpOG82JKt2EAVA==}
			
 
				 
			
@@ -9180,6 +9227,9 @@ packages:
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==}
			
 
				 
			
 
				+  [email protected]:
			
 
				+    resolution: {integrity: sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==}
			
 
				+
			
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg==}
			
 
				 
			
@@ -10271,6 +10321,10 @@ packages:
 
				     resolution: {integrity: sha512-9qv4rlDiopXg4E69k+vMHjNN63YFMe9sZMrdlvKnCjlCRWeCBswPPMPUfx+ipsAWq1LXHe70RcbaHdJJpS6hyQ==}
			
 
				     engines: {node: '>= 10'}
			
 
				 
			
 
				+  [email protected]:
			
 
				+    resolution: {integrity: sha512-zK7YHHz4ZXpW89AHXUPbQVGKI7uvkd3hzusTdotCg1UxyaVtg0zFJSTfW/Dq5f7OBBVnq6cZIaC8Ti4hb6dtCA==}
			
 
				+    engines: {node: '>= 14'}
			
 
				+
			
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-/AuWwMP+YqiPbsJx5D6TfgRTc4kTLjsh5SOcd4bLsfUg2RcEXrFMJl1DGgdHy2aCfsIA/cr/1JM0xcB2GZji8g==}
			
 
				     peerDependencies:
			
@@ -13557,6 +13611,10 @@ snapshots:
 
				       tslib: 2.8.1
			
 
				     optional: true
			
 
				 
			
 
				+  '@types/[email protected]':
			
 
				+    dependencies:
			
 
				+      '@types/readdir-glob': 1.1.5
			
 
				+
			
 
				   '@types/[email protected]': {}
			
 
				 
			
 
				   '@types/[email protected]':
			
@@ -13831,6 +13889,10 @@ snapshots:
 
				       '@types/prop-types': 15.7.14
			
 
				       csstype: 3.1.3
			
 
				 
			
 
				+  '@types/[email protected]':
			
 
				+    dependencies:
			
 
				+      '@types/node': 24.2.1
			
 
				+
			
 
				   '@types/[email protected]': {}
			
 
				 
			
 
				   '@types/[email protected]': {}
			
@@ -14052,7 +14114,7 @@ snapshots:
 
				       sirv: 3.0.1
			
 
				       tinyglobby: 0.2.14
			
 
				       tinyrainbow: 2.0.0
			
 
				-      vitest: 3.2.4(@types/[email protected])(@types/node@20.17.57)(@vitest/[email protected])([email protected])([email protected])([email protected])([email protected])([email protected])
			
 
				+      vitest: 3.2.4(@types/[email protected])(@types/node@24.2.1)(@vitest/[email protected])([email protected])([email protected])([email protected])([email protected])([email protected])
			
 
				 
			
 
				   '@vitest/[email protected]':
			
 
				     dependencies:
			
@@ -14262,6 +14324,16 @@ snapshots:
 
				       normalize-path: 3.0.0
			
 
				       readable-stream: 3.6.2
			
 
				 
			
 
				+  [email protected]:
			
 
				+    dependencies:
			
 
				+      glob: 11.1.0
			
 
				+      graceful-fs: 4.2.11
			
 
				+      is-stream: 2.0.1
			
 
				+      lazystream: 1.0.1
			
 
				+      lodash: 4.17.21
			
 
				+      normalize-path: 3.0.0
			
 
				+      readable-stream: 4.7.0
			
 
				+
			
 
				   [email protected]:
			
 
				     dependencies:
			
 
				       archiver-utils: 2.1.0
			
@@ -14272,6 +14344,16 @@ snapshots:
 
				       tar-stream: 2.2.0
			
 
				       zip-stream: 4.1.1
			
 
				 
			
 
				+  [email protected]:
			
 
				+    dependencies:
			
 
				+      archiver-utils: 5.0.2
			
 
				+      async: 3.2.6
			
 
				+      buffer-crc32: 1.0.0
			
 
				+      readable-stream: 4.7.0
			
 
				+      readdir-glob: 1.1.3
			
 
				+      tar-stream: 3.1.7
			
 
				+      zip-stream: 6.0.1
			
 
				+
			
 
				   [email protected]: {}
			
 
				 
			
 
				   [email protected]:
			
@@ -14502,6 +14584,8 @@ snapshots:
 
				 
			
 
				   [email protected]: {}
			
 
				 
			
 
				+  [email protected]: {}
			
 
				+
			
 
				   [email protected]: {}
			
 
				 
			
 
				   [email protected]: {}
			
@@ -14513,6 +14597,11 @@ snapshots:
 
				       base64-js: 1.5.1
			
 
				       ieee754: 1.2.1
			
 
				 
			
 
				+  [email protected]:
			
 
				+    dependencies:
			
 
				+      base64-js: 1.5.1
			
 
				+      ieee754: 1.2.1
			
 
				+
			
 
				   [email protected]: {}
			
 
				 
			
 
				   [email protected]:
			
@@ -14823,6 +14912,14 @@ snapshots:
 
				       normalize-path: 3.0.0
			
 
				       readable-stream: 3.6.2
			
 
				 
			
 
				+  [email protected]:
			
 
				+    dependencies:
			
 
				+      crc-32: 1.2.2
			
 
				+      crc32-stream: 6.0.0
			
 
				+      is-stream: 2.0.1
			
 
				+      normalize-path: 3.0.0
			
 
				+      readable-stream: 4.7.0
			
 
				+
			
 
				   [email protected]: {}
			
 
				 
			
 
				   [email protected]: {}
			
@@ -14881,6 +14978,11 @@ snapshots:
 
				       crc-32: 1.2.2
			
 
				       readable-stream: 3.6.2
			
 
				 
			
 
				+  [email protected]:
			
 
				+    dependencies:
			
 
				+      crc-32: 1.2.2
			
 
				+      readable-stream: 4.7.0
			
 
				+
			
 
				   [email protected]:
			
 
				     dependencies:
			
 
				       node-fetch: 2.7.0
			
@@ -15789,6 +15891,8 @@ snapshots:
 
				 
			
 
				   [email protected]: {}
			
 
				 
			
 
				+  [email protected]: {}
			
 
				+
			
 
				   [email protected]: {}
			
 
				 
			
 
				   [email protected]:
			
@@ -18692,6 +18796,8 @@ snapshots:
 
				 
			
 
				   [email protected]: {}
			
 
				 
			
 
				+  [email protected]: {}
			
 
				+
			
 
				   [email protected]: {}
			
 
				 
			
 
				   [email protected]:
			
@@ -19021,6 +19127,14 @@ snapshots:
 
				       string_decoder: 1.1.1
			
 
				       util-deprecate: 1.0.2
			
 
				 
			
 
				+  [email protected]:
			
 
				+    dependencies:
			
 
				+      abort-controller: 3.0.0
			
 
				+      buffer: 6.0.3
			
 
				+      events: 3.3.0
			
 
				+      process: 0.11.10
			
 
				+      string_decoder: 1.3.0
			
 
				+
			
 
				   [email protected]:
			
 
				     dependencies:
			
 
				       minimatch: 5.1.6
			
@@ -19766,6 +19880,10 @@ snapshots:
 
				     dependencies:
			
 
				       safe-buffer: 5.1.2
			
 
				 
			
 
				+  [email protected]:
			
 
				+    dependencies:
			
 
				+      safe-buffer: 5.2.1
			
 
				+
			
 
				   [email protected]:
			
 
				     dependencies:
			
 
				       character-entities-html4: 2.1.0
			
@@ -21059,6 +21177,12 @@ snapshots:
 
				       compress-commons: 4.1.2
			
 
				       readable-stream: 3.6.2
			
 
				 
			
 
				+  [email protected]:
			
 
				+    dependencies:
			
 
				+      archiver-utils: 5.0.2
			
 
				+      compress-commons: 6.0.2
			
 
				+      readable-stream: 4.7.0
			
 
				+
			
 
				   [email protected]([email protected]):
			
 
				     dependencies:
			
 
				       zod: 3.25.61