Просмотр исходного кода

Add model info to eval runs table (#7749)

Chris Estreich 3 месяцев назад
Родитель
Сommit
247da38b02

+ 24 - 99
apps/web-roo-code/src/app/evals/evals.tsx

@@ -1,59 +1,33 @@
 "use client"
 
 import { useMemo } from "react"
-import { ScatterChart, Scatter, XAxis, YAxis, Label, Customized, Cross } from "recharts"
-
-import type { TaskMetrics, Run } from "@roo-code/evals"
 
 import { formatTokens, formatCurrency, formatDuration, formatScore } from "@/lib"
 import { useOpenRouterModels } from "@/lib/hooks"
-import {
-	ChartContainer,
-	ChartTooltip,
-	ChartTooltipContent,
-	ChartConfig,
-	ChartLegend,
-	ChartLegendContent,
-	Table,
-	TableBody,
-	TableCaption,
-	TableCell,
-	TableHead,
-	TableHeader,
-	TableRow,
-} from "@/components/ui"
+import { Table, TableBody, TableCaption, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui"
+
+import type { EvalRun } from "./types"
+import { Plot } from "./plot"
 
-export function Evals({
-	runs,
-}: {
-	runs: (Run & {
-		label: string
-		score: number
-		languageScores?: Record<"go" | "java" | "javascript" | "python" | "rust", number>
-		taskMetrics: TaskMetrics
-		modelId?: string
-	})[]
-}) {
+export function Evals({ runs }: { runs: EvalRun[] }) {
 	const { data: openRouterModels } = useOpenRouterModels()
 
-	const tableData = useMemo(
+	const tableData: (EvalRun & { label: string; cost: number })[] = useMemo(
 		() =>
-			runs.map((run) => ({
-				...run,
-				label: run.description || run.model,
-				score: run.score,
-				cost: run.taskMetrics.cost,
-				model: openRouterModels?.[run.modelId ?? ""],
-				modelInfo: openRouterModels?.[run.modelId ?? ""]?.modelInfo,
-			})),
-		[runs, openRouterModels],
-	)
+			runs.map((run) => {
+				const openRouterModelInfo = openRouterModels?.[run.modelId ?? ""]?.modelInfo
 
-	const chartData = useMemo(() => tableData.filter(({ cost }) => cost < 100), [tableData])
-
-	const chartConfig = useMemo(
-		() => chartData.reduce((acc, run) => ({ ...acc, [run.label]: run }), {} as ChartConfig),
-		[chartData],
+				return {
+					...run,
+					label: run.name || run.description || run.model,
+					cost: run.taskMetrics.cost,
+					description: run.description ?? openRouterModelInfo?.description ?? null,
+					contextWindow: run.contextWindow ?? openRouterModelInfo?.contextWindow ?? null,
+					inputPrice: run.inputPrice ?? openRouterModelInfo?.inputPrice ?? null,
+					outputPrice: run.outputPrice ?? openRouterModelInfo?.outputPrice ?? null,
+				}
+			}),
+		[runs, openRouterModels],
 	)
 
 	return (
@@ -127,15 +101,15 @@ export function Evals({
 				<TableBody className="font-mono">
 					{tableData.map((run) => (
 						<TableRow key={run.id}>
-							<TableCell title={run.model?.description}>
+							<TableCell title={run.description ?? undefined}>
 								<div className="font-sans">{run.label}</div>
-								<div className="text-xs opacity-50">{formatTokens(run.modelInfo?.contextWindow)}</div>
+								<div className="text-xs opacity-50">{formatTokens(run.contextWindow)}</div>
 							</TableCell>
 							<TableCell className="border-r">
 								<div className="flex flex-row gap-2">
-									<div>{formatCurrency(run.modelInfo?.inputPrice)}</div>
+									<div>{formatCurrency(run.inputPrice)}</div>
 									<div className="opacity-25">/</div>
-									<div>{formatCurrency(run.modelInfo?.outputPrice)}</div>
+									<div>{formatCurrency(run.outputPrice)}</div>
 								</div>
 							</TableCell>
 							<TableCell className="font-mono">{formatDuration(run.taskMetrics.duration)}</TableCell>
@@ -167,58 +141,9 @@ export function Evals({
 					))}
 				</TableBody>
 				<TableCaption>
-					<div className="pb-4 font-medium">Cost Versus Score</div>
-					<ChartContainer config={chartConfig} className="h-[500px] w-full">
-						<ScatterChart margin={{ top: 0, right: 0, bottom: 0, left: 20 }}>
-							<XAxis
-								type="number"
-								dataKey="cost"
-								name="Cost"
-								domain={[
-									(dataMin: number) => Math.round((dataMin - 5) / 5) * 5,
-									(dataMax: number) => Math.round((dataMax + 5) / 5) * 5,
-								]}
-								tickFormatter={(value) => formatCurrency(value)}>
-								<Label value="Cost" position="bottom" offset={0} />
-							</XAxis>
-							<YAxis
-								type="number"
-								dataKey="score"
-								name="Score"
-								domain={[
-									(dataMin: number) => Math.max(0, Math.round((dataMin - 5) / 5) * 5),
-									(dataMax: number) => Math.min(100, Math.round((dataMax + 5) / 5) * 5),
-								]}
-								tickFormatter={(value) => `${value}%`}>
-								<Label value="Score" angle={-90} position="left" dy={-15} />
-							</YAxis>
-							<ChartTooltip content={<ChartTooltipContent labelKey="label" hideIndicator />} />
-							<Customized component={renderQuadrant} />
-							{chartData.map((d, i) => (
-								<Scatter key={d.label} name={d.label} data={[d]} fill={`hsl(var(--chart-${i + 1}))`} />
-							))}
-							<ChartLegend content={<ChartLegendContent />} />
-						</ScatterChart>
-					</ChartContainer>
-					<div className="py-4 text-xs opacity-50">
-						(Note: Very expensive models are excluded from the scatter plot.)
-					</div>
+					<Plot tableData={tableData} />
 				</TableCaption>
 			</Table>
 		</div>
 	)
 }
-
-// eslint-disable-next-line @typescript-eslint/no-explicit-any
-const renderQuadrant = (props: any) => (
-	<Cross
-		width={props.width}
-		height={props.height}
-		x={props.width / 2 + 35}
-		y={props.height / 2 - 15}
-		top={0}
-		left={0}
-		stroke="currentColor"
-		opacity={0.1}
-	/>
-)

+ 336 - 0
apps/web-roo-code/src/app/evals/plot.tsx

@@ -0,0 +1,336 @@
+"use client"
+
+import { useMemo } from "react"
+import { ScatterChart, Scatter, XAxis, YAxis, Customized, Cross, LabelList } from "recharts"
+
+import { formatCurrency } from "@/lib"
+import { ChartContainer, ChartTooltip, ChartConfig } from "@/components/ui"
+
+import type { EvalRun } from "./types"
+
+type PlotProps = {
+	tableData: (EvalRun & { label: string; cost: number })[]
+}
+
+type LabelPosition = "top" | "bottom" | "left" | "right"
+
+export const Plot = ({ tableData }: PlotProps) => {
+	const chartData = useMemo(() => tableData.filter(({ cost }) => cost < 50), [tableData])
+
+	const chartConfig = useMemo(
+		() => chartData.reduce((acc, run) => ({ ...acc, [run.label]: run }), {} as ChartConfig),
+		[chartData],
+	)
+
+	// Calculate label positions to avoid overlaps.
+	const labelPositions = useMemo(() => {
+		const positions: Record<string, LabelPosition> = {}
+
+		// Track placed labels with their approximate bounds.
+		const placedLabels: Array<{
+			cost: number
+			score: number
+			label: string
+			position: LabelPosition
+		}> = []
+
+		// Helper function to check if two labels would overlap.
+		const wouldLabelsOverlap = (
+			p1: { cost: number; score: number; position: LabelPosition },
+			p2: { cost: number; score: number; position: LabelPosition },
+		): boolean => {
+			// Approximate thresholds for overlap detection.
+			const horizontalThreshold = 4 // Cost units.
+			const verticalThreshold = 5 // Score units.
+
+			const costDiff = Math.abs(p1.cost - p2.cost)
+			const scoreDiff = Math.abs(p1.score - p2.score)
+
+			// If points are far apart, no overlap.
+			if (costDiff > horizontalThreshold * 2 || scoreDiff > verticalThreshold * 2) {
+				return false
+			}
+
+			// Check specific position combinations for overlap.
+			// Same position for nearby points definitely overlaps.
+			if (p1.position === p2.position && costDiff < horizontalThreshold && scoreDiff < verticalThreshold) {
+				return true
+			}
+
+			// Check adjacent position overlaps.
+			const p1IsTop = p1.position === "top"
+			const p1IsBottom = p1.position === "bottom"
+			const p2IsTop = p2.position === "top"
+			const p2IsBottom = p2.position === "bottom"
+
+			// If both labels are on the same vertical side and points are close
+			// horizontally.
+			if ((p1IsTop && p2IsTop) || (p1IsBottom && p2IsBottom)) {
+				if (costDiff < horizontalThreshold && scoreDiff < verticalThreshold / 2) {
+					return true
+				}
+			}
+
+			return false
+		}
+
+		// Helper function to check if position would overlap with a data point.
+		const wouldOverlapPoint = (point: (typeof chartData)[0], position: LabelPosition): boolean => {
+			for (const other of chartData) {
+				if (other.label === point.label) {
+					continue
+				}
+
+				const costDiff = Math.abs(point.cost - other.cost)
+				const scoreDiff = Math.abs(point.score - other.score)
+
+				// Check if label would be placed on top of another point.
+				switch (position) {
+					case "top":
+						// Label is above, check if there's a point above.
+						if (costDiff < 3 && other.score > point.score && other.score - point.score < 6) {
+							return true
+						}
+						break
+					case "bottom":
+						// Label is below, check if there's a point below.
+						if (costDiff < 3 && other.score < point.score && point.score - other.score < 6) {
+							return true
+						}
+						break
+					case "left":
+						// Label is to the left, check if there's a point to the left.
+						if (scoreDiff < 3 && other.cost < point.cost && point.cost - other.cost < 4) {
+							return true
+						}
+						break
+					case "right":
+						// Label is to the right, check if there's a point to the right.
+						if (scoreDiff < 3 && other.cost > point.cost && other.cost - point.cost < 4) {
+							return true
+						}
+						break
+				}
+			}
+			return false
+		}
+
+		// Sort points to process them in a consistent order.
+		// Process from top-left to bottom-right.
+		const sortedData = [...chartData].sort((a, b) => {
+			// First by score (higher first).
+			const scoreDiff = b.score - a.score
+			if (Math.abs(scoreDiff) > 1) return scoreDiff
+			// Then by cost (lower first).
+			return a.cost - b.cost
+		})
+
+		// Process each point and find the best position.
+		sortedData.forEach((point) => {
+			// Try positions in order of preference.
+			const positionPreferences: LabelPosition[] = ["top", "bottom", "right", "left"]
+
+			let bestPosition: LabelPosition = "top"
+
+			for (const position of positionPreferences) {
+				// Check if this position would overlap with any placed labels.
+				let hasLabelOverlap = false
+
+				for (const placed of placedLabels) {
+					if (
+						wouldLabelsOverlap(
+							{ cost: point.cost, score: point.score, position },
+							{ cost: placed.cost, score: placed.score, position: placed.position },
+						)
+					) {
+						hasLabelOverlap = true
+						break
+					}
+				}
+
+				// Check if this position would overlap with any data points.
+				const hasPointOverlap = wouldOverlapPoint(point, position)
+
+				// If no overlaps, use this position.
+				if (!hasLabelOverlap && !hasPointOverlap) {
+					bestPosition = position
+					break
+				}
+			}
+
+			// Use the best position found
+			positions[point.label] = bestPosition
+			placedLabels.push({
+				cost: point.cost,
+				score: point.score,
+				label: point.label,
+				position: bestPosition,
+			})
+		})
+
+		return positions
+	}, [chartData])
+
+	return (
+		<>
+			<div className="pt-4 pb-8 font-mono">Cost x Score</div>
+			<ChartContainer config={chartConfig} className="h-[500px] w-full">
+				<ScatterChart margin={{ top: 0, right: 0, bottom: 0, left: 20 }}>
+					<XAxis
+						type="number"
+						dataKey="cost"
+						name="Cost"
+						domain={[
+							(dataMin: number) => Math.round((dataMin - 5) / 5) * 5,
+							(dataMax: number) => Math.round((dataMax + 5) / 5) * 5,
+						]}
+						tickFormatter={(value) => formatCurrency(value)}
+					/>
+					<YAxis
+						type="number"
+						dataKey="score"
+						name="Score"
+						domain={[
+							(dataMin: number) => Math.max(0, Math.round((dataMin - 5) / 5) * 5),
+							(dataMax: number) => Math.min(100, Math.round((dataMax + 5) / 5) * 5),
+						]}
+						tickFormatter={(value) => `${value}%`}
+					/>
+					<ChartTooltip
+						content={({ active, payload }) => {
+							if (!active || !payload || !payload.length || !payload[0]) {
+								return null
+							}
+
+							const { label, cost, score } = payload[0].payload
+
+							return (
+								<div className="bg-background border rounded-sm p-2 shadow-sm text-left">
+									<div className="border-b pb-1">{label}</div>
+									<div className="pt-1">
+										<div>
+											Score: <span className="font-mono">{Math.round(score)}%</span>
+										</div>
+										<div>
+											Cost: <span className="font-mono">{formatCurrency(cost)}</span>
+										</div>
+									</div>
+								</div>
+							)
+						}}
+					/>
+					<Customized component={renderQuadrant} />
+					{chartData.map((d, index) => (
+						<Scatter
+							key={d.label}
+							name={d.label}
+							data={[d]}
+							fill={generateSpectrumColor(index, chartData.length)}>
+							<LabelList
+								dataKey="label"
+								content={(props) => renderCustomLabel(props, labelPositions[d.label] || "top")}
+							/>
+						</Scatter>
+					))}
+				</ScatterChart>
+			</ChartContainer>
+			<div className="py-4 text-xs opacity-50">
+				(Note: Models with a cost of $50 or more are excluded from the scatter plot.)
+			</div>
+		</>
+	)
+}
+
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+const renderQuadrant = (props: any) => (
+	<Cross
+		width={props.width}
+		height={props.height}
+		x={props.width / 2 + 35}
+		y={props.height / 2 - 15}
+		top={0}
+		left={0}
+		stroke="currentColor"
+		opacity={0.1}
+	/>
+)
+
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+const renderCustomLabel = (props: any, position: LabelPosition) => {
+	const { x, y, value } = props
+	const maxWidth = 80 // Maximum width in pixels - adjust as needed.
+
+	const truncateText = (text: string, maxChars: number = 20) => {
+		if (text.length <= maxChars) {
+			return text
+		}
+
+		return text.substring(0, maxChars - 1) + "…"
+	}
+
+	// Calculate position offsets based on label position.
+	let xOffset = 0
+	let yOffset = 0
+	let textAnchor: "middle" | "start" | "end" = "middle"
+	let dominantBaseline: "auto" | "hanging" | "middle" = "auto"
+
+	switch (position) {
+		case "top":
+			yOffset = -8
+			textAnchor = "middle"
+			dominantBaseline = "auto"
+			break
+		case "bottom":
+			yOffset = 15
+			textAnchor = "middle"
+			dominantBaseline = "hanging"
+			break
+		case "left":
+			xOffset = -8
+			yOffset = 5
+			textAnchor = "end"
+			dominantBaseline = "middle"
+			break
+		case "right":
+			xOffset = 15
+			yOffset = 5
+			textAnchor = "start"
+			dominantBaseline = "middle"
+			break
+	}
+
+	return (
+		<text
+			x={x + xOffset}
+			y={y + yOffset}
+			fontSize="11"
+			fontWeight="500"
+			fill="currentColor"
+			opacity="0.8"
+			textAnchor={textAnchor}
+			dominantBaseline={dominantBaseline}
+			style={{
+				pointerEvents: "none",
+				maxWidth: `${maxWidth}px`,
+				overflow: "hidden",
+				textOverflow: "ellipsis",
+				whiteSpace: "nowrap",
+			}}>
+			{truncateText(value)}
+		</text>
+	)
+}
+
+const generateSpectrumColor = (index: number, total: number): string => {
+	// Distribute hues evenly across the color wheel (0-360 degrees).
+	// Start at 0 (red) and distribute evenly.
+	const hue = (index * 360) / total
+
+	// Use high saturation for vibrant colors.
+	const saturation = 70
+
+	// Use medium lightness for good visibility on both light and dark backgrounds.
+	const lightness = 50
+
+	return `hsl(${Math.round(hue)}, ${saturation}%, ${lightness}%)`
+}

+ 9 - 0
apps/web-roo-code/src/app/evals/types.ts

@@ -0,0 +1,9 @@
+import type { TaskMetrics, Run } from "@roo-code/evals"
+
+export type EvalRun = Run & {
+	label: string
+	score: number
+	languageScores?: Record<"go" | "java" | "javascript" | "python" | "rust", number>
+	taskMetrics: TaskMetrics
+	modelId?: string
+}

+ 1 - 0
apps/web-roo-code/src/lib/format-currency.ts

@@ -7,6 +7,7 @@ export const formatCurrency = (amount: number | null | undefined) => {
 	if (amount === null || amount === undefined) {
 		return "-"
 	}
+
 	return formatter.format(amount)
 }
 

+ 1 - 1
apps/web-roo-code/src/lib/hooks/use-open-router-models.ts

@@ -49,7 +49,7 @@ export const getOpenRouterModels = async (): Promise<OpenRouterModelRecord> => {
 
 	return result.data.data
 		.filter((rawModel) => {
-			// Skip image generation models (models that output images)
+			// Skip image generation models (models that output images).
 			return !rawModel.architecture?.output_modalities?.includes("image")
 		})
 		.sort((a, b) => a.name.localeCompare(b.name))

+ 2 - 0
packages/evals/package.json

@@ -15,6 +15,8 @@
 		"drizzle-kit:production": "dotenvx run -f .env.production -- tsx node_modules/drizzle-kit/bin.cjs",
 		"db:generate": "pnpm drizzle-kit generate",
 		"db:migrate": "pnpm drizzle-kit migrate",
+		"db:test:migrate": "pnpm drizzle-kit:test migrate",
+		"db:production:migrate": "pnpm drizzle-kit:production migrate",
 		"db:push": "pnpm drizzle-kit push",
 		"db:test:push": "pnpm drizzle-kit:test push",
 		"db:production:push": "pnpm drizzle-kit:production push",

+ 6 - 0
packages/evals/src/db/migrations/0002_bouncy_blazing_skull.sql

@@ -0,0 +1,6 @@
+ALTER TABLE "runs" ADD COLUMN "name" text;--> statement-breakpoint
+ALTER TABLE "runs" ADD COLUMN "contextWindow" integer;--> statement-breakpoint
+ALTER TABLE "runs" ADD COLUMN "inputPrice" real;--> statement-breakpoint
+ALTER TABLE "runs" ADD COLUMN "outputPrice" real;--> statement-breakpoint
+ALTER TABLE "runs" ADD COLUMN "cacheWritesPrice" real;--> statement-breakpoint
+ALTER TABLE "runs" ADD COLUMN "cacheReadsPrice" real;

+ 453 - 0
packages/evals/src/db/migrations/meta/0002_snapshot.json

@@ -0,0 +1,453 @@
+{
+	"id": "3d2b8423-6170-4cb2-9f62-1c86756da97a",
+	"prevId": "43b197c4-ff4f-48c1-908b-a330e66a162d",
+	"version": "7",
+	"dialect": "postgresql",
+	"tables": {
+		"public.runs": {
+			"name": "runs",
+			"schema": "",
+			"columns": {
+				"id": {
+					"name": "id",
+					"type": "integer",
+					"primaryKey": true,
+					"notNull": true,
+					"identity": {
+						"type": "always",
+						"name": "runs_id_seq",
+						"schema": "public",
+						"increment": "1",
+						"startWith": "1",
+						"minValue": "1",
+						"maxValue": "2147483647",
+						"cache": "1",
+						"cycle": false
+					}
+				},
+				"task_metrics_id": {
+					"name": "task_metrics_id",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"model": {
+					"name": "model",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"name": {
+					"name": "name",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"description": {
+					"name": "description",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"contextWindow": {
+					"name": "contextWindow",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"inputPrice": {
+					"name": "inputPrice",
+					"type": "real",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"outputPrice": {
+					"name": "outputPrice",
+					"type": "real",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"cacheWritesPrice": {
+					"name": "cacheWritesPrice",
+					"type": "real",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"cacheReadsPrice": {
+					"name": "cacheReadsPrice",
+					"type": "real",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"settings": {
+					"name": "settings",
+					"type": "jsonb",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"pid": {
+					"name": "pid",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"socket_path": {
+					"name": "socket_path",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"concurrency": {
+					"name": "concurrency",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"default": 2
+				},
+				"timeout": {
+					"name": "timeout",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"default": 5
+				},
+				"passed": {
+					"name": "passed",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"default": 0
+				},
+				"failed": {
+					"name": "failed",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"default": 0
+				},
+				"created_at": {
+					"name": "created_at",
+					"type": "timestamp",
+					"primaryKey": false,
+					"notNull": true
+				}
+			},
+			"indexes": {},
+			"foreignKeys": {
+				"runs_task_metrics_id_taskMetrics_id_fk": {
+					"name": "runs_task_metrics_id_taskMetrics_id_fk",
+					"tableFrom": "runs",
+					"tableTo": "taskMetrics",
+					"columnsFrom": ["task_metrics_id"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				}
+			},
+			"compositePrimaryKeys": {},
+			"uniqueConstraints": {},
+			"policies": {},
+			"checkConstraints": {},
+			"isRLSEnabled": false
+		},
+		"public.taskMetrics": {
+			"name": "taskMetrics",
+			"schema": "",
+			"columns": {
+				"id": {
+					"name": "id",
+					"type": "integer",
+					"primaryKey": true,
+					"notNull": true,
+					"identity": {
+						"type": "always",
+						"name": "taskMetrics_id_seq",
+						"schema": "public",
+						"increment": "1",
+						"startWith": "1",
+						"minValue": "1",
+						"maxValue": "2147483647",
+						"cache": "1",
+						"cycle": false
+					}
+				},
+				"tokens_in": {
+					"name": "tokens_in",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"tokens_out": {
+					"name": "tokens_out",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"tokens_context": {
+					"name": "tokens_context",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"cache_writes": {
+					"name": "cache_writes",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"cache_reads": {
+					"name": "cache_reads",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"cost": {
+					"name": "cost",
+					"type": "real",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"duration": {
+					"name": "duration",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"tool_usage": {
+					"name": "tool_usage",
+					"type": "jsonb",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"created_at": {
+					"name": "created_at",
+					"type": "timestamp",
+					"primaryKey": false,
+					"notNull": true
+				}
+			},
+			"indexes": {},
+			"foreignKeys": {},
+			"compositePrimaryKeys": {},
+			"uniqueConstraints": {},
+			"policies": {},
+			"checkConstraints": {},
+			"isRLSEnabled": false
+		},
+		"public.tasks": {
+			"name": "tasks",
+			"schema": "",
+			"columns": {
+				"id": {
+					"name": "id",
+					"type": "integer",
+					"primaryKey": true,
+					"notNull": true,
+					"identity": {
+						"type": "always",
+						"name": "tasks_id_seq",
+						"schema": "public",
+						"increment": "1",
+						"startWith": "1",
+						"minValue": "1",
+						"maxValue": "2147483647",
+						"cache": "1",
+						"cycle": false
+					}
+				},
+				"run_id": {
+					"name": "run_id",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"task_metrics_id": {
+					"name": "task_metrics_id",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"language": {
+					"name": "language",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"exercise": {
+					"name": "exercise",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"passed": {
+					"name": "passed",
+					"type": "boolean",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"started_at": {
+					"name": "started_at",
+					"type": "timestamp",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"finished_at": {
+					"name": "finished_at",
+					"type": "timestamp",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"created_at": {
+					"name": "created_at",
+					"type": "timestamp",
+					"primaryKey": false,
+					"notNull": true
+				}
+			},
+			"indexes": {
+				"tasks_language_exercise_idx": {
+					"name": "tasks_language_exercise_idx",
+					"columns": [
+						{
+							"expression": "run_id",
+							"isExpression": false,
+							"asc": true,
+							"nulls": "last"
+						},
+						{
+							"expression": "language",
+							"isExpression": false,
+							"asc": true,
+							"nulls": "last"
+						},
+						{
+							"expression": "exercise",
+							"isExpression": false,
+							"asc": true,
+							"nulls": "last"
+						}
+					],
+					"isUnique": true,
+					"concurrently": false,
+					"method": "btree",
+					"with": {}
+				}
+			},
+			"foreignKeys": {
+				"tasks_run_id_runs_id_fk": {
+					"name": "tasks_run_id_runs_id_fk",
+					"tableFrom": "tasks",
+					"tableTo": "runs",
+					"columnsFrom": ["run_id"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				},
+				"tasks_task_metrics_id_taskMetrics_id_fk": {
+					"name": "tasks_task_metrics_id_taskMetrics_id_fk",
+					"tableFrom": "tasks",
+					"tableTo": "taskMetrics",
+					"columnsFrom": ["task_metrics_id"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				}
+			},
+			"compositePrimaryKeys": {},
+			"uniqueConstraints": {},
+			"policies": {},
+			"checkConstraints": {},
+			"isRLSEnabled": false
+		},
+		"public.toolErrors": {
+			"name": "toolErrors",
+			"schema": "",
+			"columns": {
+				"id": {
+					"name": "id",
+					"type": "integer",
+					"primaryKey": true,
+					"notNull": true,
+					"identity": {
+						"type": "always",
+						"name": "toolErrors_id_seq",
+						"schema": "public",
+						"increment": "1",
+						"startWith": "1",
+						"minValue": "1",
+						"maxValue": "2147483647",
+						"cache": "1",
+						"cycle": false
+					}
+				},
+				"run_id": {
+					"name": "run_id",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"task_id": {
+					"name": "task_id",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"tool_name": {
+					"name": "tool_name",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"error": {
+					"name": "error",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"created_at": {
+					"name": "created_at",
+					"type": "timestamp",
+					"primaryKey": false,
+					"notNull": true
+				}
+			},
+			"indexes": {},
+			"foreignKeys": {
+				"toolErrors_run_id_runs_id_fk": {
+					"name": "toolErrors_run_id_runs_id_fk",
+					"tableFrom": "toolErrors",
+					"tableTo": "runs",
+					"columnsFrom": ["run_id"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				},
+				"toolErrors_task_id_tasks_id_fk": {
+					"name": "toolErrors_task_id_tasks_id_fk",
+					"tableFrom": "toolErrors",
+					"tableTo": "tasks",
+					"columnsFrom": ["task_id"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				}
+			},
+			"compositePrimaryKeys": {},
+			"uniqueConstraints": {},
+			"policies": {},
+			"checkConstraints": {},
+			"isRLSEnabled": false
+		}
+	},
+	"enums": {},
+	"schemas": {},
+	"sequences": {},
+	"roles": {},
+	"policies": {},
+	"views": {},
+	"_meta": {
+		"columns": {},
+		"schemas": {},
+		"tables": {}
+	}
+}

+ 7 - 0
packages/evals/src/db/migrations/meta/_journal.json

@@ -15,6 +15,13 @@
 			"when": 1753198630651,
 			"tag": "0001_lowly_captain_flint",
 			"breakpoints": true
+		},
+		{
+			"idx": 2,
+			"version": "7",
+			"when": 1757191027855,
+			"tag": "0002_bouncy_blazing_skull",
+			"breakpoints": true
 		}
 	]
 }

+ 6 - 0
packages/evals/src/db/schema.ts

@@ -13,7 +13,13 @@ export const runs = pgTable("runs", {
 	id: integer().primaryKey().generatedAlwaysAsIdentity(),
 	taskMetricsId: integer("task_metrics_id").references(() => taskMetrics.id),
 	model: text().notNull(),
+	name: text(),
 	description: text(),
+	contextWindow: integer(),
+	inputPrice: real(),
+	outputPrice: real(),
+	cacheWritesPrice: real(),
+	cacheReadsPrice: real(),
 	settings: jsonb().$type<RooCodeSettings>(),
 	pid: integer(),
 	socketPath: text("socket_path").notNull(),