Răsfoiți Sursa

Move to Postgres for evals on roocode.com (#4520)

Chris Estreich 6 luni în urmă
părinte
comite
ab01fb3bdb
48 a modificat fișierele cu 1037 adăugiri și 1233 ștergeri
  1. 16 1
      .github/workflows/website-deploy.yml
  2. 16 1
      .github/workflows/website-preview.yml
  3. 1 1
      apps/vscode-e2e/package.json
  4. 2 2
      apps/web-evals/package.json
  5. 0 158
      apps/web-evals/src/app/home.tsx
  6. 2 2
      apps/web-evals/src/app/page.tsx
  7. 5 5
      apps/web-evals/src/app/runs/new/new-run.tsx
  8. 149 0
      apps/web-evals/src/components/home/run.tsx
  9. 56 0
      apps/web-evals/src/components/home/runs.tsx
  10. 28 0
      apps/web-evals/src/hooks/use-copy-run.ts
  11. 19 0
      apps/web-evals/src/lib/actions.ts
  12. 1 1
      apps/web-evals/src/lib/schemas.ts
  13. 0 3
      apps/web-roo-code/.env.example
  14. 0 14
      apps/web-roo-code/drizzle.config.ts
  15. 4 15
      apps/web-roo-code/package.json
  16. 29 0
      apps/web-roo-code/src/actions/evals.ts
  17. 6 4
      apps/web-roo-code/src/app/evals/evals.tsx
  18. 3 23
      apps/web-roo-code/src/app/evals/page.tsx
  19. 0 13
      apps/web-roo-code/src/db/db.ts
  20. 0 6
      apps/web-roo-code/src/db/index.ts
  21. 0 3
      apps/web-roo-code/src/db/queries/errors.ts
  22. 0 19
      apps/web-roo-code/src/db/queries/runs.ts
  23. 0 17
      apps/web-roo-code/src/db/queries/taskMetrics.ts
  24. 0 29
      apps/web-roo-code/src/db/queries/tasks.ts
  25. 0 78
      apps/web-roo-code/src/db/schema.ts
  26. 0 40
      apps/web-roo-code/src/drizzle/0000_elite_raza.sql
  27. 0 1
      apps/web-roo-code/src/drizzle/0001_lush_reavers.sql
  28. 0 274
      apps/web-roo-code/src/drizzle/meta/0000_snapshot.json
  29. 0 281
      apps/web-roo-code/src/drizzle/meta/0001_snapshot.json
  30. 0 20
      apps/web-roo-code/src/drizzle/meta/_journal.json
  31. 2 0
      apps/web-roo-code/src/lib/hooks/index.ts
  32. 1 0
      apps/web-roo-code/src/lib/hooks/use-open-router-models.ts
  33. 0 29
      apps/web-roo-code/src/lib/server/get-language-scores.ts
  34. 0 1
      apps/web-roo-code/src/lib/server/index.ts
  35. 2 2
      packages/build/package.json
  36. 2 2
      packages/cloud/package.json
  37. 4 5
      packages/evals/package.json
  38. 21 2
      packages/evals/src/db/db.ts
  39. 3 0
      packages/evals/src/db/index.ts
  40. 287 0
      packages/evals/src/db/queries/__tests__/copyRun.spec.ts
  41. 183 0
      packages/evals/src/db/queries/copyRun.ts
  42. 24 1
      packages/evals/src/db/queries/tasks.ts
  43. 1 1
      packages/ipc/package.json
  44. 2 2
      packages/telemetry/package.json
  45. 2 2
      packages/types/package.json
  46. 163 172
      pnpm-lock.yaml
  47. 1 1
      src/package.json
  48. 2 2
      webview-ui/package.json

+ 16 - 1
.github/workflows/website-deploy.yml

@@ -13,9 +13,24 @@ env:
     VERCEL_PROJECT_ID: ${{ secrets.VERCEL_PROJECT_ID }}
 
 jobs:
+    check-secrets:
+        runs-on: ubuntu-latest
+        outputs:
+            has-vercel-token: ${{ steps.check.outputs.has-vercel-token }}
+        steps:
+            - name: Check if VERCEL_TOKEN exists
+              id: check
+              run: |
+                if [ -n "${{ secrets.VERCEL_TOKEN }}" ]; then
+                  echo "has-vercel-token=true" >> $GITHUB_OUTPUT
+                else
+                  echo "has-vercel-token=false" >> $GITHUB_OUTPUT
+                fi
+
     deploy:
         runs-on: ubuntu-latest
-        if: ${{ secrets.VERCEL_TOKEN != '' }}
+        needs: check-secrets
+        if: ${{ needs.check-secrets.outputs.has-vercel-token == 'true' }}
         steps:
             - name: Checkout code
               uses: actions/checkout@v4

+ 16 - 1
.github/workflows/website-preview.yml

@@ -13,9 +13,24 @@ env:
     VERCEL_PROJECT_ID: ${{ secrets.VERCEL_PROJECT_ID }}
 
 jobs:
+    check-secrets:
+        runs-on: ubuntu-latest
+        outputs:
+            has-vercel-token: ${{ steps.check.outputs.has-vercel-token }}
+        steps:
+            - name: Check if VERCEL_TOKEN exists
+              id: check
+              run: |
+                if [ -n "${{ secrets.VERCEL_TOKEN }}" ]; then
+                  echo "has-vercel-token=true" >> $GITHUB_OUTPUT
+                else
+                  echo "has-vercel-token=false" >> $GITHUB_OUTPUT
+                fi
+
     preview:
         runs-on: ubuntu-latest
-        if: ${{ secrets.VERCEL_TOKEN != '' }}
+        needs: check-secrets
+        if: ${{ needs.check-secrets.outputs.has-vercel-token == 'true' }}
         steps:
             - name: Checkout code
               uses: actions/checkout@v4

+ 1 - 1
apps/vscode-e2e/package.json

@@ -14,7 +14,7 @@
 		"@roo-code/config-typescript": "workspace:^",
 		"@roo-code/types": "workspace:^",
 		"@types/mocha": "^10.0.10",
-		"@types/node": "^22.14.1",
+		"@types/node": "20.x",
 		"@types/vscode": "^1.95.0",
 		"@vscode/test-cli": "^0.0.11",
 		"@vscode/test-electron": "^2.4.0",

+ 2 - 2
apps/web-evals/package.json

@@ -11,7 +11,7 @@
 		"start": "next start"
 	},
 	"dependencies": {
-		"@hookform/resolvers": "^4.1.3",
+		"@hookform/resolvers": "^5.1.1",
 		"@radix-ui/react-alert-dialog": "^1.1.7",
 		"@radix-ui/react-dialog": "^1.1.6",
 		"@radix-ui/react-dropdown-menu": "^2.1.7",
@@ -44,7 +44,7 @@
 		"tailwind-merge": "^3.3.0",
 		"tailwindcss-animate": "^1.0.7",
 		"vaul": "^1.1.2",
-		"zod": "^3.24.2"
+		"zod": "^3.25.61"
 	},
 	"devDependencies": {
 		"@roo-code/config-eslint": "workspace:^",

+ 0 - 158
apps/web-evals/src/app/home.tsx

@@ -1,158 +0,0 @@
-"use client"
-
-import { useCallback, useState, useRef } from "react"
-import { useRouter } from "next/navigation"
-import Link from "next/link"
-import { Ellipsis, Rocket } from "lucide-react"
-
-import type { Run, TaskMetrics } from "@roo-code/evals"
-
-import { deleteRun } from "@/actions/runs"
-import { formatCurrency, formatDuration, formatTokens, formatToolUsageSuccessRate } from "@/lib/formatters"
-import {
-	Button,
-	Table,
-	TableBody,
-	TableCell,
-	TableHead,
-	TableHeader,
-	TableRow,
-	DropdownMenu,
-	DropdownMenuContent,
-	DropdownMenuItem,
-	DropdownMenuTrigger,
-	AlertDialog,
-	AlertDialogAction,
-	AlertDialogCancel,
-	AlertDialogContent,
-	AlertDialogDescription,
-	AlertDialogFooter,
-	AlertDialogHeader,
-	AlertDialogTitle,
-} from "@/components/ui"
-
-export function Home({ runs }: { runs: (Run & { taskMetrics: TaskMetrics | null })[] }) {
-	const router = useRouter()
-
-	const [deleteRunId, setDeleteRunId] = useState<number>()
-	const continueRef = useRef<HTMLButtonElement>(null)
-
-	const onConfirmDelete = useCallback(async () => {
-		if (!deleteRunId) {
-			return
-		}
-
-		try {
-			await deleteRun(deleteRunId)
-			setDeleteRunId(undefined)
-		} catch (error) {
-			console.error(error)
-		}
-	}, [deleteRunId])
-
-	return (
-		<>
-			<Table className="border border-t-0">
-				<TableHeader>
-					<TableRow>
-						<TableHead>Model</TableHead>
-						<TableHead>Passed</TableHead>
-						<TableHead>Failed</TableHead>
-						<TableHead>% Correct</TableHead>
-						<TableHead>Tokens In / Out</TableHead>
-						<TableHead>Diff Edits</TableHead>
-						<TableHead>Cost</TableHead>
-						<TableHead>Duration</TableHead>
-						<TableHead />
-					</TableRow>
-				</TableHeader>
-				<TableBody>
-					{runs.length ? (
-						runs.map(({ taskMetrics, ...run }) => (
-							<TableRow key={run.id}>
-								<TableCell>{run.model}</TableCell>
-								<TableCell>{run.passed}</TableCell>
-								<TableCell>{run.failed}</TableCell>
-								<TableCell>
-									{run.passed + run.failed > 0 && (
-										<span>{((run.passed / (run.passed + run.failed)) * 100).toFixed(1)}%</span>
-									)}
-								</TableCell>
-								<TableCell>
-									{taskMetrics && (
-										<div className="flex items-center gap-1.5">
-											<div>{formatTokens(taskMetrics.tokensIn)}</div>/
-											<div>{formatTokens(taskMetrics.tokensOut)}</div>
-										</div>
-									)}
-								</TableCell>
-								<TableCell>
-									{taskMetrics?.toolUsage?.apply_diff && (
-										<div className="flex flex-row items-center gap-1.5">
-											<div>{taskMetrics.toolUsage.apply_diff.attempts}</div>
-											<div>/</div>
-											<div>{formatToolUsageSuccessRate(taskMetrics.toolUsage.apply_diff)}</div>
-										</div>
-									)}
-								</TableCell>
-								<TableCell>{taskMetrics && formatCurrency(taskMetrics.cost)}</TableCell>
-								<TableCell>{taskMetrics && formatDuration(taskMetrics.duration)}</TableCell>
-								<TableCell>
-									<DropdownMenu>
-										<Button variant="ghost" size="icon" asChild>
-											<DropdownMenuTrigger>
-												<Ellipsis />
-											</DropdownMenuTrigger>
-										</Button>
-										<DropdownMenuContent align="end">
-											<DropdownMenuItem asChild>
-												<Link href={`/runs/${run.id}`}>View Tasks</Link>
-											</DropdownMenuItem>
-											<DropdownMenuItem
-												onClick={() => {
-													setDeleteRunId(run.id)
-													setTimeout(() => continueRef.current?.focus(), 0)
-												}}>
-												Delete
-											</DropdownMenuItem>
-										</DropdownMenuContent>
-									</DropdownMenu>
-								</TableCell>
-							</TableRow>
-						))
-					) : (
-						<TableRow>
-							<TableCell colSpan={8} className="text-center">
-								No eval runs yet.
-								<Button variant="link" onClick={() => router.push("/runs/new")}>
-									Launch
-								</Button>
-								one now.
-							</TableCell>
-						</TableRow>
-					)}
-				</TableBody>
-			</Table>
-			<Button
-				variant="default"
-				className="absolute top-4 right-12 size-12 rounded-full"
-				onClick={() => router.push("/runs/new")}>
-				<Rocket className="size-6" />
-			</Button>
-			<AlertDialog open={!!deleteRunId} onOpenChange={() => setDeleteRunId(undefined)}>
-				<AlertDialogContent>
-					<AlertDialogHeader>
-						<AlertDialogTitle>Are you sure?</AlertDialogTitle>
-						<AlertDialogDescription>This action cannot be undone.</AlertDialogDescription>
-					</AlertDialogHeader>
-					<AlertDialogFooter>
-						<AlertDialogCancel>Cancel</AlertDialogCancel>
-						<AlertDialogAction ref={continueRef} onClick={onConfirmDelete}>
-							Continue
-						</AlertDialogAction>
-					</AlertDialogFooter>
-				</AlertDialogContent>
-			</AlertDialog>
-		</>
-	)
-}

+ 2 - 2
apps/web-evals/src/app/page.tsx

@@ -1,10 +1,10 @@
 import { getRuns } from "@roo-code/evals"
 
-import { Home } from "./home"
+import { Runs } from "@/components/home/runs"
 
 export const dynamic = "force-dynamic"
 
 export default async function Page() {
 	const runs = await getRuns()
-	return <Home runs={runs} />
+	return <Runs runs={runs} />
 }

+ 5 - 5
apps/web-evals/src/app/runs/new/new-run.tsx

@@ -15,8 +15,8 @@ import { globalSettingsSchema, providerSettingsSchema, EVALS_SETTINGS, getModelI
 import { createRun } from "@/actions/runs"
 import { getExercises } from "@/actions/exercises"
 import {
-	createRunSchema as formSchema,
-	type CreateRun as FormValues,
+	createRunSchema,
+	type CreateRun,
 	MODEL_DEFAULT,
 	CONCURRENCY_MIN,
 	CONCURRENCY_MAX,
@@ -68,8 +68,8 @@ export function NewRun() {
 	const models = useOpenRouterModels()
 	const exercises = useQuery({ queryKey: ["getExercises"], queryFn: () => getExercises() })
 
-	const form = useForm<FormValues>({
-		resolver: zodResolver(formSchema),
+	const form = useForm<CreateRun>({
+		resolver: zodResolver(createRunSchema),
 		defaultValues: {
 			model: MODEL_DEFAULT,
 			description: "",
@@ -94,7 +94,7 @@ export function NewRun() {
 	const systemPromptRef = useRef<HTMLTextAreaElement>(null)
 
 	const onSubmit = useCallback(
-		async (values: FormValues) => {
+		async (values: CreateRun) => {
 			try {
 				if (mode === "openrouter") {
 					values.settings = { ...(values.settings || {}), openRouterModelId: model }

+ 149 - 0
apps/web-evals/src/components/home/run.tsx

@@ -0,0 +1,149 @@
+import { useCallback, useState, useRef } from "react"
+import Link from "next/link"
+import { Ellipsis, ClipboardList, Copy, Check, LoaderCircle, Trash } from "lucide-react"
+
+import type { Run as EvalsRun, TaskMetrics as EvalsTaskMetrics } from "@roo-code/evals"
+
+import { deleteRun } from "@/actions/runs"
+import { formatCurrency, formatDuration, formatTokens, formatToolUsageSuccessRate } from "@/lib/formatters"
+import { useCopyRun } from "@/hooks/use-copy-run"
+import {
+	Button,
+	TableCell,
+	TableRow,
+	DropdownMenu,
+	DropdownMenuContent,
+	DropdownMenuItem,
+	DropdownMenuTrigger,
+	AlertDialog,
+	AlertDialogAction,
+	AlertDialogCancel,
+	AlertDialogContent,
+	AlertDialogDescription,
+	AlertDialogFooter,
+	AlertDialogHeader,
+	AlertDialogTitle,
+} from "@/components/ui"
+
+type RunProps = {
+	run: EvalsRun
+	taskMetrics: EvalsTaskMetrics | null
+}
+
+export function Run({ run, taskMetrics }: RunProps) {
+	const [deleteRunId, setDeleteRunId] = useState<number>()
+	const continueRef = useRef<HTMLButtonElement>(null)
+	const { isPending, copyRun, copied } = useCopyRun(run.id)
+
+	const onConfirmDelete = useCallback(async () => {
+		if (!deleteRunId) {
+			return
+		}
+
+		try {
+			await deleteRun(deleteRunId)
+			setDeleteRunId(undefined)
+		} catch (error) {
+			console.error(error)
+		}
+	}, [deleteRunId])
+
+	return (
+		<>
+			<TableRow>
+				<TableCell>{run.model}</TableCell>
+				<TableCell>{run.passed}</TableCell>
+				<TableCell>{run.failed}</TableCell>
+				<TableCell>
+					{run.passed + run.failed > 0 && (
+						<span>{((run.passed / (run.passed + run.failed)) * 100).toFixed(1)}%</span>
+					)}
+				</TableCell>
+				<TableCell>
+					{taskMetrics && (
+						<div className="flex items-center gap-1.5">
+							<div>{formatTokens(taskMetrics.tokensIn)}</div>/
+							<div>{formatTokens(taskMetrics.tokensOut)}</div>
+						</div>
+					)}
+				</TableCell>
+				<TableCell>
+					{taskMetrics?.toolUsage?.apply_diff && (
+						<div className="flex flex-row items-center gap-1.5">
+							<div>{taskMetrics.toolUsage.apply_diff.attempts}</div>
+							<div>/</div>
+							<div>{formatToolUsageSuccessRate(taskMetrics.toolUsage.apply_diff)}</div>
+						</div>
+					)}
+				</TableCell>
+				<TableCell>{taskMetrics && formatCurrency(taskMetrics.cost)}</TableCell>
+				<TableCell>{taskMetrics && formatDuration(taskMetrics.duration)}</TableCell>
+				<TableCell>
+					<DropdownMenu>
+						<Button variant="ghost" size="icon" asChild>
+							<DropdownMenuTrigger>
+								<Ellipsis />
+							</DropdownMenuTrigger>
+						</Button>
+						<DropdownMenuContent align="end">
+							<DropdownMenuItem asChild>
+								<Link href={`/runs/${run.id}`}>
+									<div className="flex items-center gap-1">
+										<ClipboardList />
+										<div>View Tasks</div>
+									</div>
+								</Link>
+							</DropdownMenuItem>
+							{run.taskMetricsId && (
+								<DropdownMenuItem onClick={() => copyRun()} disabled={isPending || copied}>
+									<div className="flex items-center gap-1">
+										{isPending ? (
+											<>
+												<LoaderCircle className="animate-spin" />
+												Copying...
+											</>
+										) : copied ? (
+											<>
+												<Check />
+												Copied!
+											</>
+										) : (
+											<>
+												<Copy />
+												Copy to Production
+											</>
+										)}
+									</div>
+								</DropdownMenuItem>
+							)}
+							<DropdownMenuItem
+								onClick={() => {
+									setDeleteRunId(run.id)
+									setTimeout(() => continueRef.current?.focus(), 0)
+								}}>
+								<div className="flex items-center gap-1">
+									<Trash />
+									<div>Delete</div>
+								</div>
+							</DropdownMenuItem>
+						</DropdownMenuContent>
+					</DropdownMenu>
+				</TableCell>
+			</TableRow>
+			<AlertDialog open={!!deleteRunId} onOpenChange={() => setDeleteRunId(undefined)}>
+				<AlertDialogContent>
+					<AlertDialogHeader>
+						<AlertDialogTitle>Are you sure?</AlertDialogTitle>
+						<AlertDialogDescription>This action cannot be undone.</AlertDialogDescription>
+					</AlertDialogHeader>
+					<AlertDialogFooter>
+						<AlertDialogCancel>Cancel</AlertDialogCancel>
+						<AlertDialogAction ref={continueRef} onClick={onConfirmDelete}>
+							Continue
+						</AlertDialogAction>
+					</AlertDialogFooter>
+				</AlertDialogContent>
+			</AlertDialog>
+		</>
+	)
+}

+ 56 - 0
apps/web-evals/src/components/home/runs.tsx

@@ -0,0 +1,56 @@
+"use client"
+
+import { useRouter } from "next/navigation"
+import { Rocket } from "lucide-react"
+
+import type { Run, TaskMetrics } from "@roo-code/evals"
+
+import { Button, Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui"
+import { Run as Row } from "@/components/home/run"
+
+type RunWithTaskMetrics = Run & { taskMetrics: TaskMetrics | null }
+
+export function Runs({ runs }: { runs: RunWithTaskMetrics[] }) {
+	const router = useRouter()
+
+	return (
+		<>
+			<Table className="border border-t-0">
+				<TableHeader>
+					<TableRow>
+						<TableHead>Model</TableHead>
+						<TableHead>Passed</TableHead>
+						<TableHead>Failed</TableHead>
+						<TableHead>% Correct</TableHead>
+						<TableHead>Tokens In / Out</TableHead>
+						<TableHead>Diff Edits</TableHead>
+						<TableHead>Cost</TableHead>
+						<TableHead>Duration</TableHead>
+						<TableHead />
+					</TableRow>
+				</TableHeader>
+				<TableBody>
+					{runs.length ? (
+						runs.map(({ taskMetrics, ...run }) => <Row key={run.id} run={run} taskMetrics={taskMetrics} />)
+					) : (
+						<TableRow>
+							<TableCell colSpan={9} className="text-center">
+								No eval runs yet.
+								<Button variant="link" onClick={() => router.push("/runs/new")}>
+									Launch
+								</Button>
+								one now.
+							</TableCell>
+						</TableRow>
+					)}
+				</TableBody>
+			</Table>
+			<Button
+				variant="default"
+				className="absolute top-4 right-12 size-12 rounded-full"
+				onClick={() => router.push("/runs/new")}>
+				<Rocket className="size-6" />
+			</Button>
+		</>
+	)
+}

+ 28 - 0
apps/web-evals/src/hooks/use-copy-run.ts

@@ -0,0 +1,28 @@
+import { useState } from "react"
+import { useMutation } from "@tanstack/react-query"
+import { toast } from "sonner"
+
+import { copyRunToProduction } from "@/lib/actions"
+
+export function useCopyRun(runId: number) {
+	const [copied, setCopied] = useState(false)
+
+	const { isPending, mutate: copyRun } = useMutation({
+		mutationFn: () => copyRunToProduction(runId),
+		onSuccess: (result) => {
+			if (result.success) {
+				toast.success(result.message)
+				setCopied(true)
+				setTimeout(() => setCopied(false), 3000)
+			} else {
+				toast.error(result.error)
+			}
+		},
+		onError: (error) => {
+			console.error("Copy to production failed:", error)
+			toast.error("Failed to copy run to production")
+		},
+	})
+
+	return { isPending, copyRun, copied }
+}

+ 19 - 0
apps/web-evals/src/lib/actions.ts

@@ -0,0 +1,19 @@
+"use server"
+
+import { client, getProductionClient, copyRun } from "@roo-code/evals"
+
+export async function copyRunToProduction(runId: number) {
+	try {
+		await copyRun({ sourceDb: client, targetDb: getProductionClient(), runId })
+
+		return {
+			success: true,
+			message: `Run ${runId} successfully copied to production.`,
+		}
+	} catch (error) {
+		return {
+			success: false,
+			error: `Failed to copy run ${runId} to production: ${error instanceof Error ? error.message : "Unknown error"}.`,
+		}
+	}
+}

+ 1 - 1
apps/web-evals/src/lib/schemas.ts

@@ -19,7 +19,7 @@ export const createRunSchema = z
 		suite: z.enum(["full", "partial"]),
 		exercises: z.array(z.string()).optional(),
 		settings: rooCodeSettingsSchema.optional(),
-		concurrency: z.number().int().min(CONCURRENCY_MIN).max(CONCURRENCY_MAX).default(CONCURRENCY_DEFAULT),
+		concurrency: z.number().int().min(CONCURRENCY_MIN).max(CONCURRENCY_MAX),
 		systemPrompt: z.string().optional(),
 	})
 	.refine((data) => data.suite === "full" || (data.exercises || []).length > 0, {

+ 0 - 3
apps/web-roo-code/.env.example

@@ -6,6 +6,3 @@ NEXT_PUBLIC_POSTHOG_HOST=https://us.i.posthog.com
 # Basin Form Endpoint for Static Form Submissions
 # Replace this with your actual Basin form endpoint (e.g., https://usebasin.com/f/your-form-id)
 NEXT_PUBLIC_BASIN_ENDPOINT=https://usebasin.com/f/your-form-id-here
-
-TURSO_CONNECTION_URL=libsql://development-roo-code.aws-us-east-1.turso.io
-TURSO_AUTH_TOKEN=your-auth-token-here

+ 0 - 14
apps/web-roo-code/drizzle.config.ts

@@ -1,14 +0,0 @@
-import { defineConfig } from "drizzle-kit"
-
-const dialect = process.env.BENCHMARKS_DB_PATH ? "sqlite" : "turso"
-
-const dbCredentials = process.env.BENCHMARKS_DB_PATH
-	? { url: process.env.BENCHMARKS_DB_PATH }
-	: { url: process.env.TURSO_CONNECTION_URL!, authToken: process.env.TURSO_AUTH_TOKEN! }
-
-export default defineConfig({
-	out: "./drizzle",
-	schema: "./src/db/schema.ts",
-	dialect,
-	dbCredentials,
-})

+ 4 - 15
apps/web-roo-code/package.json

@@ -7,26 +7,16 @@
 		"check-types": "tsc --noEmit",
 		"dev": "next dev",
 		"build": "next build",
-		"start": "next start",
-		"drizzle-kit": "dotenvx run -f .env -- tsx node_modules/drizzle-kit/bin.cjs",
-		"db:generate": "pnpm drizzle-kit generate",
-		"db:migrate": "pnpm drizzle-kit migrate",
-		"db:push": "pnpm drizzle-kit push",
-		"db:pull": "pnpm drizzle-kit pull",
-		"db:check": "pnpm drizzle-kit check",
-		"db:up": "pnpm drizzle-kit up",
-		"db:studio": "pnpm drizzle-kit studio"
+		"start": "next start"
 	},
 	"dependencies": {
-		"@libsql/client": "^0.15.7",
 		"@radix-ui/react-dialog": "^1.1.14",
 		"@radix-ui/react-slot": "^1.2.3",
+		"@roo-code/evals": "workspace:^",
 		"@roo-code/types": "workspace:^",
 		"@tanstack/react-query": "^5.79.0",
 		"class-variance-authority": "^0.7.1",
 		"clsx": "^2.1.1",
-		"drizzle-orm": "^0.44.0",
-		"drizzle-zod": "^0.8.0",
 		"embla-carousel-auto-scroll": "^8.6.0",
 		"embla-carousel-autoplay": "^8.6.0",
 		"embla-carousel-react": "^8.6.0",
@@ -41,17 +31,16 @@
 		"recharts": "^2.15.3",
 		"tailwind-merge": "^3.3.0",
 		"tailwindcss-animate": "^1.0.7",
-		"zod": "^3.25.41"
+		"zod": "^3.25.61"
 	},
 	"devDependencies": {
 		"@roo-code/config-eslint": "workspace:^",
 		"@roo-code/config-typescript": "workspace:^",
 		"@tailwindcss/typography": "^0.5.16",
-		"@types/node": "^20.17.54",
+		"@types/node": "20.x",
 		"@types/react": "^18.3.23",
 		"@types/react-dom": "^18.3.7",
 		"autoprefixer": "^10.4.21",
-		"drizzle-kit": "^0.31.0",
 		"postcss": "^8.5.4",
 		"tailwindcss": "^3.4.17"
 	}

+ 29 - 0
apps/web-roo-code/src/actions/evals.ts

@@ -0,0 +1,29 @@
+"use server"
+
+import { getModelId, rooCodeSettingsSchema } from "@roo-code/types"
+import { getRuns, getLanguageScores } from "@roo-code/evals"
+
+import { formatScore } from "@/lib"
+
+export async function getEvalRuns() {
+	const languageScores = await getLanguageScores()
+
+	const runs = (await getRuns())
+		.filter((run) => !!run.taskMetrics)
+		.filter(({ settings }) => rooCodeSettingsSchema.safeParse(settings).success)
+		.sort((a, b) => b.passed - a.passed)
+		.map((run) => {
+			const settings = rooCodeSettingsSchema.parse(run.settings)
+
+			return {
+				...run,
+				label: run.description || run.model,
+				score: formatScore(run.passed / (run.passed + run.failed)),
+				languageScores: languageScores[run.id],
+				taskMetrics: run.taskMetrics!,
+				modelId: getModelId(settings),
+			}
+		})
+
+	return runs
+}

+ 6 - 4
apps/web-roo-code/src/app/evals/evals.tsx

@@ -3,14 +3,17 @@
 import { useMemo } from "react"
 import { ScatterChart, Scatter, XAxis, YAxis, Label, Customized, Cross } from "recharts"
 
-import { TaskMetrics, type Run } from "@/db"
+import type { TaskMetrics, Run } from "@roo-code/evals"
 
-import { ChartConfig, ChartLegend, ChartLegendContent } from "@/components/ui/chart"
 import { formatTokens, formatCurrency, formatDuration, formatScore } from "@/lib"
+import { useOpenRouterModels } from "@/lib/hooks"
 import {
 	ChartContainer,
 	ChartTooltip,
 	ChartTooltipContent,
+	ChartConfig,
+	ChartLegend,
+	ChartLegendContent,
 	Table,
 	TableBody,
 	TableCaption,
@@ -19,7 +22,6 @@ import {
 	TableHeader,
 	TableRow,
 } from "@/components/ui"
-import { useOpenRouterModels } from "@/lib/hooks/use-open-router-models"
 
 export function Evals({
 	runs,
@@ -126,7 +128,7 @@ export function Evals({
 					{tableData.map((run) => (
 						<TableRow key={run.id}>
 							<TableCell title={run.model?.description}>
-								<div className="font-sans">{run.model?.name || run.label}</div>
+								<div className="font-sans">{run.label}</div>
 								<div className="text-xs opacity-50">
 									{formatTokens(run.modelInfo?.contextWindow ?? 0)}
 								</div>

+ 3 - 23
apps/web-roo-code/src/app/evals/page.tsx

@@ -1,14 +1,11 @@
 import type { Metadata } from "next"
 
-import { rooCodeSettingsSchema, getModelId } from "@roo-code/types"
-
-import { getRuns } from "@/db"
-import { getLanguageScores } from "@/lib/server"
-import { formatScore } from "@/lib"
+import { getEvalRuns } from "@/actions/evals"
 
 import { Evals } from "./evals"
 
 export const revalidate = 300
+export const dynamic = "force-dynamic"
 
 export const metadata: Metadata = {
 	title: "Roo Code Evals",
@@ -26,24 +23,7 @@ export const metadata: Metadata = {
 }
 
 export default async function Page() {
-	const languageScores = await getLanguageScores()
-
-	const runs = (await getRuns())
-		.filter((run) => !!run.taskMetrics)
-		.filter(({ settings }) => rooCodeSettingsSchema.safeParse(settings).success)
-		.sort((a, b) => b.passed - a.passed)
-		.map((run) => {
-			const settings = rooCodeSettingsSchema.parse(run.settings)
-
-			return {
-				...run,
-				label: run.description || run.model,
-				score: formatScore(run.passed / (run.passed + run.failed)),
-				languageScores: languageScores[run.id],
-				taskMetrics: run.taskMetrics!,
-				modelId: getModelId(settings),
-			}
-		})
+	const runs = await getEvalRuns()
 
 	return <Evals runs={runs} />
 }

+ 0 - 13
apps/web-roo-code/src/db/db.ts

@@ -1,13 +0,0 @@
-import { drizzle } from "drizzle-orm/libsql"
-
-import { schema } from "./schema"
-
-if ((!process.env.TURSO_CONNECTION_URL || !process.env.TURSO_AUTH_TOKEN) && !process.env.BENCHMARKS_DB_PATH) {
-	throw new Error("TURSO_CONNECTION_URL and TURSO_AUTH_TOKEN or BENCHMARKS_DB_PATH must be set")
-}
-
-const connection = process.env.BENCHMARKS_DB_PATH
-	? { url: process.env.BENCHMARKS_DB_PATH, concurrency: 50 }
-	: { url: process.env.TURSO_CONNECTION_URL!, authToken: process.env.TURSO_AUTH_TOKEN! }
-
-export const db = drizzle({ schema, connection })

+ 0 - 6
apps/web-roo-code/src/db/index.ts

@@ -1,6 +0,0 @@
-export { db } from "./db"
-export * from "./schema"
-
-export * from "./queries/runs"
-export * from "./queries/tasks"
-export * from "./queries/taskMetrics"

+ 0 - 3
apps/web-roo-code/src/db/queries/errors.ts

@@ -1,3 +0,0 @@
-export class RecordNotFoundError extends Error {}
-
-export class RecordNotCreatedError extends Error {}

+ 0 - 19
apps/web-roo-code/src/db/queries/runs.ts

@@ -1,19 +0,0 @@
-import { desc, eq } from "drizzle-orm"
-
-import { RecordNotFoundError } from "./errors"
-import { schema } from "../schema"
-import { db } from "../db"
-
-const table = schema.runs
-
-export const findRun = async (id: number) => {
-	const run = await db.query.runs.findFirst({ where: eq(table.id, id) })
-
-	if (!run) {
-		throw new RecordNotFoundError()
-	}
-
-	return run
-}
-
-export const getRuns = async () => db.query.runs.findMany({ orderBy: desc(table.id), with: { taskMetrics: true } })

+ 0 - 17
apps/web-roo-code/src/db/queries/taskMetrics.ts

@@ -1,17 +0,0 @@
-import { eq } from "drizzle-orm"
-
-import { RecordNotFoundError } from "./errors"
-import { taskMetrics } from "../schema"
-import { db } from "../db"
-
-const table = taskMetrics
-
-export const findTaskMetrics = async (id: number) => {
-	const run = await db.query.taskMetrics.findFirst({ where: eq(table.id, id) })
-
-	if (!run) {
-		throw new RecordNotFoundError()
-	}
-
-	return run
-}

+ 0 - 29
apps/web-roo-code/src/db/queries/tasks.ts

@@ -1,29 +0,0 @@
-import { and, eq } from "drizzle-orm"
-
-import { RecordNotFoundError } from "./errors"
-import { tasks } from "../schema"
-import { db } from "../db"
-
-export const findTask = async (id: number) => {
-	const run = await db.query.tasks.findFirst({ where: eq(tasks.id, id) })
-
-	if (!run) {
-		throw new RecordNotFoundError()
-	}
-
-	return run
-}
-
-type GetTask = {
-	runId: number
-	language: string
-	exercise: string
-}
-
-export const getTask = async ({ runId, language, exercise }: GetTask) =>
-	db.query.tasks.findFirst({
-		where: and(eq(tasks.runId, runId), eq(tasks.language, language), eq(tasks.exercise, exercise)),
-	})
-
-export const getTasks = async (runId: number) =>
-	db.query.tasks.findMany({ where: eq(tasks.runId, runId), with: { taskMetrics: true } })

+ 0 - 78
apps/web-roo-code/src/db/schema.ts

@@ -1,78 +0,0 @@
-import { sqliteTable, text, real, integer, blob, uniqueIndex } from "drizzle-orm/sqlite-core"
-import { relations } from "drizzle-orm"
-
-/**
- * runs
- */
-
-export const runs = sqliteTable("runs", {
-	id: integer({ mode: "number" }).primaryKey({ autoIncrement: true }),
-	taskMetricsId: integer({ mode: "number" }).references(() => taskMetrics.id),
-	model: text().notNull(),
-	description: text(),
-	settings: blob({ mode: "json" }).$type<unknown>(),
-	pid: integer({ mode: "number" }),
-	socketPath: text().notNull(),
-	passed: integer({ mode: "number" }).default(0).notNull(),
-	failed: integer({ mode: "number" }).default(0).notNull(),
-	createdAt: integer({ mode: "timestamp" }).notNull(),
-})
-
-export const runsRelations = relations(runs, ({ one }) => ({
-	taskMetrics: one(taskMetrics, { fields: [runs.taskMetricsId], references: [taskMetrics.id] }),
-}))
-
-export type Run = typeof runs.$inferSelect
-
-/**
- * tasks
- */
-
-export const tasks = sqliteTable(
-	"tasks",
-	{
-		id: integer({ mode: "number" }).primaryKey({ autoIncrement: true }),
-		runId: integer({ mode: "number" })
-			.references(() => runs.id)
-			.notNull(),
-		taskMetricsId: integer({ mode: "number" }).references(() => taskMetrics.id),
-		language: text().notNull(),
-		exercise: text().notNull(),
-		passed: integer({ mode: "boolean" }),
-		startedAt: integer({ mode: "timestamp" }),
-		finishedAt: integer({ mode: "timestamp" }),
-		createdAt: integer({ mode: "timestamp" }).notNull(),
-	},
-	(table) => [uniqueIndex("tasks_language_exercise_idx").on(table.runId, table.language, table.exercise)],
-)
-
-export const tasksRelations = relations(tasks, ({ one }) => ({
-	run: one(runs, { fields: [tasks.runId], references: [runs.id] }),
-	taskMetrics: one(taskMetrics, { fields: [tasks.taskMetricsId], references: [taskMetrics.id] }),
-}))
-
-export type Task = typeof tasks.$inferSelect
-
-/**
- * taskMetrics
- */
-
-export const taskMetrics = sqliteTable("taskMetrics", {
-	id: integer({ mode: "number" }).primaryKey({ autoIncrement: true }),
-	tokensIn: integer({ mode: "number" }).notNull(),
-	tokensOut: integer({ mode: "number" }).notNull(),
-	tokensContext: integer({ mode: "number" }).notNull(),
-	cacheWrites: integer({ mode: "number" }).notNull(),
-	cacheReads: integer({ mode: "number" }).notNull(),
-	cost: real().notNull(),
-	duration: integer({ mode: "number" }).notNull(),
-	createdAt: integer({ mode: "timestamp" }).notNull(),
-})
-
-export type TaskMetrics = typeof taskMetrics.$inferSelect
-
-/**
- * schema
- */
-
-export const schema = { runs, runsRelations, tasks, tasksRelations, taskMetrics }

+ 0 - 40
apps/web-roo-code/src/drizzle/0000_elite_raza.sql

@@ -1,40 +0,0 @@
-CREATE TABLE `runs` (
-	`id` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
-	`taskMetricsId` integer,
-	`model` text NOT NULL,
-	`description` text,
-	`pid` integer,
-	`socketPath` text NOT NULL,
-	`passed` integer DEFAULT 0 NOT NULL,
-	`failed` integer DEFAULT 0 NOT NULL,
-	`createdAt` integer NOT NULL,
-	FOREIGN KEY (`taskMetricsId`) REFERENCES `taskMetrics`(`id`) ON UPDATE no action ON DELETE no action
-);
---> statement-breakpoint
-CREATE TABLE `taskMetrics` (
-	`id` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
-	`tokensIn` integer NOT NULL,
-	`tokensOut` integer NOT NULL,
-	`tokensContext` integer NOT NULL,
-	`cacheWrites` integer NOT NULL,
-	`cacheReads` integer NOT NULL,
-	`cost` real NOT NULL,
-	`duration` integer NOT NULL,
-	`createdAt` integer NOT NULL
-);
---> statement-breakpoint
-CREATE TABLE `tasks` (
-	`id` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
-	`runId` integer NOT NULL,
-	`taskMetricsId` integer,
-	`language` text NOT NULL,
-	`exercise` text NOT NULL,
-	`passed` integer,
-	`startedAt` integer,
-	`finishedAt` integer,
-	`createdAt` integer NOT NULL,
-	FOREIGN KEY (`runId`) REFERENCES `runs`(`id`) ON UPDATE no action ON DELETE no action,
-	FOREIGN KEY (`taskMetricsId`) REFERENCES `taskMetrics`(`id`) ON UPDATE no action ON DELETE no action
-);
---> statement-breakpoint
-CREATE UNIQUE INDEX `tasks_language_exercise_idx` ON `tasks` (`runId`,`language`,`exercise`);

+ 0 - 1
apps/web-roo-code/src/drizzle/0001_lush_reavers.sql

@@ -1 +0,0 @@
-ALTER TABLE `runs` ADD `settings` blob;

+ 0 - 274
apps/web-roo-code/src/drizzle/meta/0000_snapshot.json

@@ -1,274 +0,0 @@
-{
-	"version": "6",
-	"dialect": "sqlite",
-	"id": "c0fa8491-b5c0-493d-aa32-ddf280259c30",
-	"prevId": "00000000-0000-0000-0000-000000000000",
-	"tables": {
-		"runs": {
-			"name": "runs",
-			"columns": {
-				"id": {
-					"name": "id",
-					"type": "integer",
-					"primaryKey": true,
-					"notNull": true,
-					"autoincrement": true
-				},
-				"taskMetricsId": {
-					"name": "taskMetricsId",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": false,
-					"autoincrement": false
-				},
-				"model": {
-					"name": "model",
-					"type": "text",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"description": {
-					"name": "description",
-					"type": "text",
-					"primaryKey": false,
-					"notNull": false,
-					"autoincrement": false
-				},
-				"pid": {
-					"name": "pid",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": false,
-					"autoincrement": false
-				},
-				"socketPath": {
-					"name": "socketPath",
-					"type": "text",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"passed": {
-					"name": "passed",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false,
-					"default": 0
-				},
-				"failed": {
-					"name": "failed",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false,
-					"default": 0
-				},
-				"createdAt": {
-					"name": "createdAt",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				}
-			},
-			"indexes": {},
-			"foreignKeys": {
-				"runs_taskMetricsId_taskMetrics_id_fk": {
-					"name": "runs_taskMetricsId_taskMetrics_id_fk",
-					"tableFrom": "runs",
-					"tableTo": "taskMetrics",
-					"columnsFrom": ["taskMetricsId"],
-					"columnsTo": ["id"],
-					"onDelete": "no action",
-					"onUpdate": "no action"
-				}
-			},
-			"compositePrimaryKeys": {},
-			"uniqueConstraints": {},
-			"checkConstraints": {}
-		},
-		"taskMetrics": {
-			"name": "taskMetrics",
-			"columns": {
-				"id": {
-					"name": "id",
-					"type": "integer",
-					"primaryKey": true,
-					"notNull": true,
-					"autoincrement": true
-				},
-				"tokensIn": {
-					"name": "tokensIn",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"tokensOut": {
-					"name": "tokensOut",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"tokensContext": {
-					"name": "tokensContext",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"cacheWrites": {
-					"name": "cacheWrites",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"cacheReads": {
-					"name": "cacheReads",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"cost": {
-					"name": "cost",
-					"type": "real",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"duration": {
-					"name": "duration",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"createdAt": {
-					"name": "createdAt",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				}
-			},
-			"indexes": {},
-			"foreignKeys": {},
-			"compositePrimaryKeys": {},
-			"uniqueConstraints": {},
-			"checkConstraints": {}
-		},
-		"tasks": {
-			"name": "tasks",
-			"columns": {
-				"id": {
-					"name": "id",
-					"type": "integer",
-					"primaryKey": true,
-					"notNull": true,
-					"autoincrement": true
-				},
-				"runId": {
-					"name": "runId",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"taskMetricsId": {
-					"name": "taskMetricsId",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": false,
-					"autoincrement": false
-				},
-				"language": {
-					"name": "language",
-					"type": "text",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"exercise": {
-					"name": "exercise",
-					"type": "text",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"passed": {
-					"name": "passed",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": false,
-					"autoincrement": false
-				},
-				"startedAt": {
-					"name": "startedAt",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": false,
-					"autoincrement": false
-				},
-				"finishedAt": {
-					"name": "finishedAt",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": false,
-					"autoincrement": false
-				},
-				"createdAt": {
-					"name": "createdAt",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				}
-			},
-			"indexes": {
-				"tasks_language_exercise_idx": {
-					"name": "tasks_language_exercise_idx",
-					"columns": ["runId", "language", "exercise"],
-					"isUnique": true
-				}
-			},
-			"foreignKeys": {
-				"tasks_runId_runs_id_fk": {
-					"name": "tasks_runId_runs_id_fk",
-					"tableFrom": "tasks",
-					"tableTo": "runs",
-					"columnsFrom": ["runId"],
-					"columnsTo": ["id"],
-					"onDelete": "no action",
-					"onUpdate": "no action"
-				},
-				"tasks_taskMetricsId_taskMetrics_id_fk": {
-					"name": "tasks_taskMetricsId_taskMetrics_id_fk",
-					"tableFrom": "tasks",
-					"tableTo": "taskMetrics",
-					"columnsFrom": ["taskMetricsId"],
-					"columnsTo": ["id"],
-					"onDelete": "no action",
-					"onUpdate": "no action"
-				}
-			},
-			"compositePrimaryKeys": {},
-			"uniqueConstraints": {},
-			"checkConstraints": {}
-		}
-	},
-	"views": {},
-	"enums": {},
-	"_meta": {
-		"schemas": {},
-		"tables": {},
-		"columns": {}
-	},
-	"internal": {
-		"indexes": {}
-	}
-}

+ 0 - 281
apps/web-roo-code/src/drizzle/meta/0001_snapshot.json

@@ -1,281 +0,0 @@
-{
-	"version": "6",
-	"dialect": "sqlite",
-	"id": "8906647f-81d6-498a-897c-b1638c04c69a",
-	"prevId": "c0fa8491-b5c0-493d-aa32-ddf280259c30",
-	"tables": {
-		"runs": {
-			"name": "runs",
-			"columns": {
-				"id": {
-					"name": "id",
-					"type": "integer",
-					"primaryKey": true,
-					"notNull": true,
-					"autoincrement": true
-				},
-				"taskMetricsId": {
-					"name": "taskMetricsId",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": false,
-					"autoincrement": false
-				},
-				"model": {
-					"name": "model",
-					"type": "text",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"description": {
-					"name": "description",
-					"type": "text",
-					"primaryKey": false,
-					"notNull": false,
-					"autoincrement": false
-				},
-				"settings": {
-					"name": "settings",
-					"type": "blob",
-					"primaryKey": false,
-					"notNull": false,
-					"autoincrement": false
-				},
-				"pid": {
-					"name": "pid",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": false,
-					"autoincrement": false
-				},
-				"socketPath": {
-					"name": "socketPath",
-					"type": "text",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"passed": {
-					"name": "passed",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false,
-					"default": 0
-				},
-				"failed": {
-					"name": "failed",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false,
-					"default": 0
-				},
-				"createdAt": {
-					"name": "createdAt",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				}
-			},
-			"indexes": {},
-			"foreignKeys": {
-				"runs_taskMetricsId_taskMetrics_id_fk": {
-					"name": "runs_taskMetricsId_taskMetrics_id_fk",
-					"tableFrom": "runs",
-					"tableTo": "taskMetrics",
-					"columnsFrom": ["taskMetricsId"],
-					"columnsTo": ["id"],
-					"onDelete": "no action",
-					"onUpdate": "no action"
-				}
-			},
-			"compositePrimaryKeys": {},
-			"uniqueConstraints": {},
-			"checkConstraints": {}
-		},
-		"taskMetrics": {
-			"name": "taskMetrics",
-			"columns": {
-				"id": {
-					"name": "id",
-					"type": "integer",
-					"primaryKey": true,
-					"notNull": true,
-					"autoincrement": true
-				},
-				"tokensIn": {
-					"name": "tokensIn",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"tokensOut": {
-					"name": "tokensOut",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"tokensContext": {
-					"name": "tokensContext",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"cacheWrites": {
-					"name": "cacheWrites",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"cacheReads": {
-					"name": "cacheReads",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"cost": {
-					"name": "cost",
-					"type": "real",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"duration": {
-					"name": "duration",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"createdAt": {
-					"name": "createdAt",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				}
-			},
-			"indexes": {},
-			"foreignKeys": {},
-			"compositePrimaryKeys": {},
-			"uniqueConstraints": {},
-			"checkConstraints": {}
-		},
-		"tasks": {
-			"name": "tasks",
-			"columns": {
-				"id": {
-					"name": "id",
-					"type": "integer",
-					"primaryKey": true,
-					"notNull": true,
-					"autoincrement": true
-				},
-				"runId": {
-					"name": "runId",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"taskMetricsId": {
-					"name": "taskMetricsId",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": false,
-					"autoincrement": false
-				},
-				"language": {
-					"name": "language",
-					"type": "text",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"exercise": {
-					"name": "exercise",
-					"type": "text",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				},
-				"passed": {
-					"name": "passed",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": false,
-					"autoincrement": false
-				},
-				"startedAt": {
-					"name": "startedAt",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": false,
-					"autoincrement": false
-				},
-				"finishedAt": {
-					"name": "finishedAt",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": false,
-					"autoincrement": false
-				},
-				"createdAt": {
-					"name": "createdAt",
-					"type": "integer",
-					"primaryKey": false,
-					"notNull": true,
-					"autoincrement": false
-				}
-			},
-			"indexes": {
-				"tasks_language_exercise_idx": {
-					"name": "tasks_language_exercise_idx",
-					"columns": ["runId", "language", "exercise"],
-					"isUnique": true
-				}
-			},
-			"foreignKeys": {
-				"tasks_runId_runs_id_fk": {
-					"name": "tasks_runId_runs_id_fk",
-					"tableFrom": "tasks",
-					"tableTo": "runs",
-					"columnsFrom": ["runId"],
-					"columnsTo": ["id"],
-					"onDelete": "no action",
-					"onUpdate": "no action"
-				},
-				"tasks_taskMetricsId_taskMetrics_id_fk": {
-					"name": "tasks_taskMetricsId_taskMetrics_id_fk",
-					"tableFrom": "tasks",
-					"tableTo": "taskMetrics",
-					"columnsFrom": ["taskMetricsId"],
-					"columnsTo": ["id"],
-					"onDelete": "no action",
-					"onUpdate": "no action"
-				}
-			},
-			"compositePrimaryKeys": {},
-			"uniqueConstraints": {},
-			"checkConstraints": {}
-		}
-	},
-	"views": {},
-	"enums": {},
-	"_meta": {
-		"schemas": {},
-		"tables": {},
-		"columns": {}
-	},
-	"internal": {
-		"indexes": {}
-	}
-}

+ 0 - 20
apps/web-roo-code/src/drizzle/meta/_journal.json

@@ -1,20 +0,0 @@
-{
-	"version": "7",
-	"dialect": "sqlite",
-	"entries": [
-		{
-			"idx": 0,
-			"version": "6",
-			"when": 1742599919625,
-			"tag": "0000_elite_raza",
-			"breakpoints": true
-		},
-		{
-			"idx": 1,
-			"version": "6",
-			"when": 1743089501047,
-			"tag": "0001_lush_reavers",
-			"breakpoints": true
-		}
-	]
-}

+ 2 - 0
apps/web-roo-code/src/lib/hooks/index.ts

@@ -0,0 +1,2 @@
+export * from "./use-logo-src"
+export * from "./use-open-router-models"

+ 1 - 0
apps/web-roo-code/src/lib/hooks/use-open-router-models.ts

@@ -1,5 +1,6 @@
 import { z } from "zod"
 import { useQuery } from "@tanstack/react-query"
+
 import { ModelInfo } from "@roo-code/types"
 
 const parsePrice = (price?: string) => (price ? parseFloat(price) * 1_000_000 : undefined)

+ 0 - 29
apps/web-roo-code/src/lib/server/get-language-scores.ts

@@ -1,29 +0,0 @@
-"use server"
-
-import { sql } from "drizzle-orm"
-import { db, tasks } from "@/db"
-
-export type Language = "go" | "java" | "javascript" | "python" | "rust"
-
-export const getLanguageScores = async () => {
-	const records = await db
-		.select({
-			runId: tasks.runId,
-			language: sql<Language>`language`,
-			score: sql<number>`cast(sum(case when ${tasks.passed} = 1 then 1 else 0 end) as float) / count(*)`,
-		})
-		.from(tasks)
-		.groupBy(tasks.runId, tasks.language)
-
-	const results: Record<number, Record<Language, number>> = {}
-
-	for (const { runId, language, score } of records) {
-		if (!results[runId]) {
-			results[runId] = { go: 0, java: 0, javascript: 0, python: 0, rust: 0 }
-		}
-
-		results[runId][language] = score
-	}
-
-	return results
-}

+ 0 - 1
apps/web-roo-code/src/lib/server/index.ts

@@ -1 +0,0 @@
-export * from "./get-language-scores"

+ 2 - 2
packages/build/package.json

@@ -13,12 +13,12 @@
 		"clean": "rimraf dist .turbo"
 	},
 	"dependencies": {
-		"zod": "^3.24.2"
+		"zod": "^3.25.61"
 	},
 	"devDependencies": {
 		"@roo-code/config-eslint": "workspace:^",
 		"@roo-code/config-typescript": "workspace:^",
-		"@types/node": "^22.15.20",
+		"@types/node": "20.x",
 		"vitest": "^3.1.3"
 	}
 }

+ 2 - 2
packages/cloud/package.json

@@ -14,12 +14,12 @@
 		"@roo-code/telemetry": "workspace:^",
 		"@roo-code/types": "workspace:^",
 		"axios": "^1.7.4",
-		"zod": "^3.24.2"
+		"zod": "^3.25.61"
 	},
 	"devDependencies": {
 		"@roo-code/config-eslint": "workspace:^",
 		"@roo-code/config-typescript": "workspace:^",
-		"@types/node": "^22.15.20",
+		"@types/node": "20.x",
 		"@types/vscode": "^1.84.0",
 		"vitest": "^3.1.3"
 	}

+ 4 - 5
packages/evals/package.json

@@ -12,12 +12,12 @@
 		"cli": "dotenvx run -f .env.development .env.local -- tsx src/cli/index.ts",
 		"drizzle-kit": "dotenvx run -f .env.development -- tsx node_modules/drizzle-kit/bin.cjs",
 		"drizzle-kit:test": "dotenvx run -f .env.test -- tsx node_modules/drizzle-kit/bin.cjs",
+		"drizzle-kit:production": "dotenvx run -f .env.production -- tsx node_modules/drizzle-kit/bin.cjs",
 		"db:generate": "pnpm drizzle-kit generate",
 		"db:migrate": "pnpm drizzle-kit migrate",
 		"db:push": "pnpm drizzle-kit push",
-		"db:check": "pnpm drizzle-kit check",
 		"db:test:push": "pnpm drizzle-kit:test push",
-		"db:test:check": "pnpm drizzle-kit:test check",
+		"db:production:push": "pnpm drizzle-kit:production push",
 		"db:start": "docker compose up -d db",
 		"db:stop": "docker compose down db",
 		"redis:start": "docker compose up -d redis",
@@ -27,7 +27,6 @@
 	"dependencies": {
 		"@roo-code/ipc": "workspace:^",
 		"@roo-code/types": "workspace:^",
-		"better-sqlite3": "^11.10.0",
 		"cmd-ts": "^0.13.0",
 		"drizzle-orm": "^0.44.1",
 		"execa": "^9.6.0",
@@ -38,12 +37,12 @@
 		"postgres": "^3.4.7",
 		"ps-tree": "^1.2.0",
 		"redis": "^5.5.5",
-		"zod": "^3.24.2"
+		"zod": "^3.25.61"
 	},
 	"devDependencies": {
 		"@roo-code/config-eslint": "workspace:^",
 		"@roo-code/config-typescript": "workspace:^",
-		"@types/node": "^22.15.20",
+		"@types/node": "20.x",
 		"@types/node-ipc": "^9.2.3",
 		"@types/ps-tree": "^1.1.6",
 		"drizzle-kit": "^0.31.1",

+ 21 - 2
packages/evals/src/db/db.ts

@@ -4,7 +4,6 @@ import postgres from "postgres"
 import * as schema from "./schema.js"
 
 const pgClient = postgres(process.env.DATABASE_URL!, { prepare: false })
-
 const client = drizzle({ client: pgClient, schema })
 
 let testDb: typeof client | undefined = undefined
@@ -17,10 +16,30 @@ if (process.env.NODE_ENV === "test") {
 	testDb = client
 }
 
+let _productionPgClient: ReturnType<typeof postgres> | undefined = undefined
+let _productionClient: typeof client | undefined = undefined
+
+const getProductionClient = () => {
+	if (!process.env.PRODUCTION_DATABASE_URL) {
+		throw new Error("PRODUCTION_DATABASE_URL is not set")
+	}
+
+	if (!_productionClient) {
+		_productionPgClient = postgres(process.env.PRODUCTION_DATABASE_URL, { prepare: false })
+		_productionClient = drizzle({ client: _productionPgClient, schema })
+	}
+
+	return _productionClient
+}
+
 const disconnect = async () => {
 	await pgClient.end()
+
+	if (_productionPgClient) {
+		await _productionPgClient.end()
+	}
 }
 
 type DatabaseOrTransaction = typeof client | Parameters<Parameters<typeof client.transaction>[0]>[0]
 
-export { client, testDb, disconnect, type DatabaseOrTransaction }
+export { client, testDb, getProductionClient, disconnect, type DatabaseOrTransaction }

+ 3 - 0
packages/evals/src/db/index.ts

@@ -4,3 +4,6 @@ export * from "./queries/runs.js"
 export * from "./queries/tasks.js"
 export * from "./queries/taskMetrics.js"
 export * from "./queries/toolErrors.js"
+export * from "./queries/copyRun.js"
+
+export * from "./db.js"

+ 287 - 0
packages/evals/src/db/queries/__tests__/copyRun.spec.ts

@@ -0,0 +1,287 @@
+// npx vitest run src/db/queries/__tests__/copyRun.spec.ts
+
+import { eq } from "drizzle-orm"
+
+import { copyRun } from "../copyRun.js"
+import { createRun } from "../runs.js"
+import { createTask } from "../tasks.js"
+import { createTaskMetrics } from "../taskMetrics.js"
+import { createToolError } from "../toolErrors.js"
+import { RecordNotFoundError } from "../errors.js"
+import { schema } from "../../schema.js"
+import { client as db } from "../../db.js"
+
+describe("copyRun", () => {
+	let sourceRunId: number
+	let sourceTaskIds: number[] = []
+	let sourceTaskMetricsIds: number[] = []
+	let sourceToolErrorIds: number[] = []
+
+	beforeEach(async () => {
+		const run = await createRun({
+			model: "gpt-4.1-mini",
+			socketPath: "/tmp/roo.sock",
+			description: "Test run for copying",
+			concurrency: 4,
+		})
+
+		sourceRunId = run.id
+
+		const runTaskMetrics = await createTaskMetrics({
+			duration: 120_000,
+			tokensIn: 200_000,
+			tokensOut: 5_000,
+			tokensContext: 205_000,
+			cacheWrites: 10,
+			cacheReads: 5,
+			cost: 0.15,
+			toolUsage: {
+				read_file: { attempts: 10, failures: 1 },
+				apply_diff: { attempts: 8, failures: 2 },
+			},
+		})
+
+		sourceTaskMetricsIds.push(runTaskMetrics.id)
+
+		await db
+			.update(schema.runs)
+			.set({ taskMetricsId: runTaskMetrics.id, passed: 2, failed: 1 })
+			.where(eq(schema.runs.id, sourceRunId))
+
+		const task1TaskMetrics = await createTaskMetrics({
+			duration: 45_000,
+			tokensIn: 100_000,
+			tokensOut: 2_000,
+			tokensContext: 102_000,
+			cacheWrites: 0,
+			cacheReads: 0,
+			cost: 0.05,
+			toolUsage: {
+				read_file: { attempts: 3, failures: 0 },
+				apply_diff: { attempts: 3, failures: 1 },
+			},
+		})
+
+		sourceTaskMetricsIds.push(task1TaskMetrics.id)
+
+		const task1 = await createTask({
+			runId: sourceRunId,
+			taskMetricsId: task1TaskMetrics.id,
+			language: "go",
+			exercise: "go/say",
+			passed: true,
+			startedAt: new Date("2023-01-01T10:00:00Z"),
+			finishedAt: new Date("2023-01-01T10:45:00Z"),
+		})
+
+		sourceTaskIds.push(task1.id)
+
+		const task2TaskMetrics = await createTaskMetrics({
+			duration: 30_000,
+			tokensIn: 75_000,
+			tokensOut: 1_000,
+			tokensContext: 76_000,
+			cacheWrites: 0,
+			cacheReads: 0,
+			cost: 0.04,
+			toolUsage: {
+				read_file: { attempts: 3, failures: 0 },
+				apply_diff: { attempts: 2, failures: 0 },
+			},
+		})
+
+		sourceTaskMetricsIds.push(task2TaskMetrics.id)
+
+		const task2 = await createTask({
+			runId: sourceRunId,
+			taskMetricsId: task2TaskMetrics.id,
+			language: "python",
+			exercise: "python/hello-world",
+			passed: false,
+			startedAt: new Date("2023-01-01T11:00:00Z"),
+			finishedAt: new Date("2023-01-01T11:30:00Z"),
+		})
+
+		sourceTaskIds.push(task2.id)
+
+		const task3 = await createTask({
+			runId: sourceRunId,
+			taskMetricsId: null,
+			language: "rust",
+			exercise: "rust/hello-world",
+			passed: true,
+			startedAt: new Date("2023-01-01T12:00:00Z"),
+			finishedAt: new Date("2023-01-01T12:15:00Z"),
+		})
+
+		sourceTaskIds.push(task3.id)
+
+		const toolError1 = await createToolError({
+			runId: sourceRunId,
+			taskId: task1.id,
+			toolName: "apply_diff",
+			error: "Syntax error in diff",
+		})
+
+		sourceToolErrorIds.push(toolError1.id)
+
+		const toolError2 = await createToolError({
+			runId: sourceRunId,
+			taskId: task2.id,
+			toolName: "execute_command",
+			error: "Command failed with exit code 1",
+		})
+
+		sourceToolErrorIds.push(toolError2.id)
+
+		const toolError3 = await createToolError({
+			runId: sourceRunId,
+			taskId: null,
+			toolName: "browser_action",
+			error: "Browser connection timeout",
+		})
+
+		sourceToolErrorIds.push(toolError3.id)
+	})
+
+	afterEach(async () => {
+		if (sourceToolErrorIds.length > 0) {
+			await db.delete(schema.toolErrors).where(eq(schema.toolErrors.runId, sourceRunId))
+		}
+
+		if (sourceTaskIds.length > 0) {
+			await db.delete(schema.tasks).where(eq(schema.tasks.runId, sourceRunId))
+		}
+
+		await db.delete(schema.runs).where(eq(schema.runs.id, sourceRunId))
+
+		if (sourceTaskMetricsIds.length > 0) {
+			for (const id of sourceTaskMetricsIds) {
+				await db.delete(schema.taskMetrics).where(eq(schema.taskMetrics.id, id))
+			}
+		}
+
+		sourceTaskIds = []
+		sourceTaskMetricsIds = []
+		sourceToolErrorIds = []
+	})
+
+	it("should copy a complete run with all related data", async () => {
+		const newRunId = await copyRun({ sourceDb: db, targetDb: db, runId: sourceRunId })
+
+		expect(newRunId).toBeDefined()
+		expect(newRunId).not.toBe(sourceRunId)
+
+		const copiedRun = await db.query.runs.findFirst({
+			where: eq(schema.runs.id, newRunId),
+			with: { taskMetrics: true },
+		})
+
+		expect(copiedRun).toBeDefined()
+		expect(copiedRun!.model).toBe("gpt-4.1-mini")
+		expect(copiedRun!.description).toBe("Test run for copying")
+		expect(copiedRun!.concurrency).toBe(4)
+		expect(copiedRun!.passed).toBe(2)
+		expect(copiedRun!.failed).toBe(1)
+		expect(copiedRun!.taskMetrics).toBeDefined()
+
+		expect(copiedRun!.taskMetrics!.duration).toBe(120_000)
+		expect(copiedRun!.taskMetrics!.tokensIn).toBe(200_000)
+		expect(copiedRun!.taskMetrics!.toolUsage).toEqual({
+			read_file: { attempts: 10, failures: 1 },
+			apply_diff: { attempts: 8, failures: 2 },
+		})
+
+		const copiedTasks = await db.query.tasks.findMany({
+			where: eq(schema.tasks.runId, newRunId),
+			with: { taskMetrics: true },
+			orderBy: (tasks, { asc }) => [asc(tasks.language)],
+		})
+
+		expect(copiedTasks).toHaveLength(3)
+
+		const goTask = copiedTasks.find((t) => t.language === "go")!
+		expect(goTask.exercise).toBe("go/say")
+		expect(goTask.passed).toBe(true)
+		expect(goTask.taskMetrics).toBeDefined()
+		expect(goTask.taskMetrics!.duration).toBe(45_000)
+		expect(goTask.taskMetrics!.toolUsage).toEqual({
+			read_file: { attempts: 3, failures: 0 },
+			apply_diff: { attempts: 3, failures: 1 },
+		})
+
+		const pythonTask = copiedTasks.find((t) => t.language === "python")!
+		expect(pythonTask.exercise).toBe("python/hello-world")
+		expect(pythonTask.passed).toBe(false)
+		expect(pythonTask.taskMetrics).toBeDefined()
+		expect(pythonTask.taskMetrics!.duration).toBe(30_000)
+
+		const rustTask = copiedTasks.find((t) => t.language === "rust")!
+		expect(rustTask.exercise).toBe("rust/hello-world")
+		expect(rustTask.passed).toBe(true)
+		expect(rustTask.taskMetrics).toBeNull()
+
+		const copiedToolErrors = await db.query.toolErrors.findMany({
+			where: eq(schema.toolErrors.runId, newRunId),
+		})
+
+		expect(copiedToolErrors).toHaveLength(3)
+
+		const taskToolErrors = copiedToolErrors.filter((te) => te.taskId !== null)
+		const runToolErrors = copiedToolErrors.filter((te) => te.taskId === null)
+
+		expect(taskToolErrors).toHaveLength(2)
+		expect(runToolErrors).toHaveLength(1)
+
+		const browserError = runToolErrors.find((te) => te.toolName === "browser_action")!
+		expect(browserError.error).toBe("Browser connection timeout")
+
+		await db.delete(schema.toolErrors).where(eq(schema.toolErrors.runId, newRunId))
+		await db.delete(schema.tasks).where(eq(schema.tasks.runId, newRunId))
+
+		const copiedRunForCleanup = await db.query.runs.findFirst({
+			where: eq(schema.runs.id, newRunId),
+			columns: { taskMetricsId: true },
+		})
+
+		await db.delete(schema.runs).where(eq(schema.runs.id, newRunId))
+
+		const copiedTasksForCleanup = await db.query.tasks.findMany({
+			where: eq(schema.tasks.runId, newRunId),
+			columns: { taskMetricsId: true },
+		})
+
+		const taskMetricsToDelete = copiedTasksForCleanup
+			.map((t) => t.taskMetricsId)
+			.filter((id): id is number => id !== null)
+
+		if (copiedRunForCleanup?.taskMetricsId) {
+			taskMetricsToDelete.push(copiedRunForCleanup.taskMetricsId)
+		}
+
+		if (taskMetricsToDelete.length > 0) {
+			for (const id of taskMetricsToDelete) {
+				await db.delete(schema.taskMetrics).where(eq(schema.taskMetrics.id, id))
+			}
+		}
+	})
+
+	it("should throw RecordNotFoundError for non-existent run", async () => {
+		await expect(copyRun({ sourceDb: db, targetDb: db, runId: 999999 })).rejects.toThrow(RecordNotFoundError)
+	})
+
+	it("should copy run without task metrics", async () => {
+		const minimalRun = await createRun({ model: "gpt-3.5-turbo", socketPath: "/tmp/minimal.sock" })
+
+		const newRunId = await copyRun({ sourceDb: db, targetDb: db, runId: minimalRun.id })
+
+		const copiedRun = await db.query.runs.findFirst({ where: eq(schema.runs.id, newRunId) })
+
+		expect(copiedRun).toBeDefined()
+		expect(copiedRun!.model).toBe("gpt-3.5-turbo")
+		expect(copiedRun!.taskMetricsId).toBeNull()
+
+		await db.delete(schema.runs).where(eq(schema.runs.id, minimalRun.id))
+		await db.delete(schema.runs).where(eq(schema.runs.id, newRunId))
+	})
+})

+ 183 - 0
packages/evals/src/db/queries/copyRun.ts

@@ -0,0 +1,183 @@
+import { eq } from "drizzle-orm"
+import type { NodePgDatabase } from "drizzle-orm/node-postgres"
+
+import type { InsertRun, InsertTask, InsertTaskMetrics, InsertToolError } from "../schema.js"
+import { schema } from "../schema.js"
+
+import { RecordNotFoundError, RecordNotCreatedError } from "./errors.js"
+
+export const copyRun = async ({
+	sourceDb,
+	targetDb,
+	runId,
+}: {
+	sourceDb: NodePgDatabase<typeof schema>
+	targetDb: NodePgDatabase<typeof schema>
+	runId: number
+}) => {
+	const sourceRun = await sourceDb.query.runs.findFirst({
+		where: eq(schema.runs.id, runId),
+		with: { taskMetrics: true },
+	})
+
+	if (!sourceRun) {
+		throw new RecordNotFoundError(`Run with ID ${runId} not found`)
+	}
+
+	let newRunTaskMetricsId: number | null = null
+
+	if (sourceRun.taskMetrics) {
+		const runTaskMetricsData: InsertTaskMetrics = {
+			tokensIn: sourceRun.taskMetrics.tokensIn,
+			tokensOut: sourceRun.taskMetrics.tokensOut,
+			tokensContext: sourceRun.taskMetrics.tokensContext,
+			cacheWrites: sourceRun.taskMetrics.cacheWrites,
+			cacheReads: sourceRun.taskMetrics.cacheReads,
+			cost: sourceRun.taskMetrics.cost,
+			duration: sourceRun.taskMetrics.duration,
+			toolUsage: sourceRun.taskMetrics.toolUsage,
+		}
+
+		const newRunTaskMetrics = await targetDb
+			.insert(schema.taskMetrics)
+			.values({
+				...runTaskMetricsData,
+				createdAt: new Date(),
+			})
+			.returning()
+
+		const createdRunTaskMetrics = newRunTaskMetrics[0]
+
+		if (!createdRunTaskMetrics) {
+			throw new RecordNotCreatedError("Failed to create run taskMetrics")
+		}
+
+		newRunTaskMetricsId = createdRunTaskMetrics.id
+	}
+
+	const runData: InsertRun = {
+		taskMetricsId: newRunTaskMetricsId,
+		model: sourceRun.model,
+		description: sourceRun.description,
+		settings: sourceRun.settings,
+		pid: sourceRun.pid,
+		socketPath: sourceRun.socketPath,
+		concurrency: sourceRun.concurrency,
+		passed: sourceRun.passed,
+		failed: sourceRun.failed,
+	}
+
+	const newRuns = await targetDb
+		.insert(schema.runs)
+		.values({ ...runData, createdAt: new Date() })
+		.returning()
+
+	const newRun = newRuns[0]
+
+	if (!newRun) {
+		throw new RecordNotCreatedError("Failed to create run")
+	}
+
+	const newRunId = newRun.id
+
+	const sourceTasks = await sourceDb.query.tasks.findMany({
+		where: eq(schema.tasks.runId, runId),
+		with: { taskMetrics: true },
+	})
+
+	const taskIdMapping = new Map<number, number>()
+
+	for (const sourceTask of sourceTasks) {
+		let newTaskMetricsId: number | null = null
+
+		if (sourceTask.taskMetrics) {
+			const taskMetricsData: InsertTaskMetrics = {
+				tokensIn: sourceTask.taskMetrics.tokensIn,
+				tokensOut: sourceTask.taskMetrics.tokensOut,
+				tokensContext: sourceTask.taskMetrics.tokensContext,
+				cacheWrites: sourceTask.taskMetrics.cacheWrites,
+				cacheReads: sourceTask.taskMetrics.cacheReads,
+				cost: sourceTask.taskMetrics.cost,
+				duration: sourceTask.taskMetrics.duration,
+				toolUsage: sourceTask.taskMetrics.toolUsage,
+			}
+
+			const newTaskMetrics = await targetDb
+				.insert(schema.taskMetrics)
+				.values({ ...taskMetricsData, createdAt: new Date() })
+				.returning()
+
+			const createdTaskMetrics = newTaskMetrics[0]
+
+			if (!createdTaskMetrics) {
+				throw new RecordNotCreatedError("Failed to create task taskMetrics")
+			}
+
+			newTaskMetricsId = createdTaskMetrics.id
+		}
+
+		const taskData: InsertTask = {
+			runId: newRunId,
+			taskMetricsId: newTaskMetricsId,
+			language: sourceTask.language,
+			exercise: sourceTask.exercise,
+			passed: sourceTask.passed,
+			startedAt: sourceTask.startedAt,
+			finishedAt: sourceTask.finishedAt,
+		}
+
+		const newTasks = await targetDb
+			.insert(schema.tasks)
+			.values({ ...taskData, createdAt: new Date() })
+			.returning()
+
+		const newTask = newTasks[0]
+
+		if (!newTask) {
+			throw new RecordNotCreatedError("Failed to create task")
+		}
+
+		taskIdMapping.set(sourceTask.id, newTask.id)
+	}
+
+	for (const [oldTaskId, newTaskId] of taskIdMapping) {
+		const sourceTaskToolErrors = await sourceDb.query.toolErrors.findMany({
+			where: eq(schema.toolErrors.taskId, oldTaskId),
+		})
+
+		for (const sourceToolError of sourceTaskToolErrors) {
+			const toolErrorData: InsertToolError = {
+				runId: newRunId,
+				taskId: newTaskId,
+				toolName: sourceToolError.toolName,
+				error: sourceToolError.error,
+			}
+
+			await targetDb.insert(schema.toolErrors).values({
+				...toolErrorData,
+				createdAt: new Date(),
+			})
+		}
+	}
+
+	const sourceRunToolErrors = await sourceDb.query.toolErrors.findMany({
+		where: eq(schema.toolErrors.runId, runId),
+	})
+
+	for (const sourceToolError of sourceRunToolErrors) {
+		if (sourceToolError.taskId && taskIdMapping.has(sourceToolError.taskId)) {
+			continue
+		}
+
+		const toolErrorData: InsertToolError = {
+			runId: newRunId,
+			taskId: sourceToolError.taskId ? taskIdMapping.get(sourceToolError.taskId) || null : null,
+			toolName: sourceToolError.toolName,
+			error: sourceToolError.error,
+		}
+
+		await targetDb.insert(schema.toolErrors).values({ ...toolErrorData, createdAt: new Date() })
+	}
+
+	return newRunId
+}

+ 24 - 1
packages/evals/src/db/queries/tasks.ts

@@ -1,4 +1,4 @@
-import { and, asc, eq } from "drizzle-orm"
+import { and, asc, eq, sql } from "drizzle-orm"
 
 import type { ExerciseLanguage } from "../../exercises/index.js"
 
@@ -63,3 +63,26 @@ export const getTasks = async (runId: number) =>
 		with: { taskMetrics: true },
 		orderBy: asc(tasks.id),
 	})
+
+export const getLanguageScores = async () => {
+	const records = await db
+		.select({
+			runId: tasks.runId,
+			language: tasks.language,
+			score: sql<number>`cast(sum(case when ${tasks.passed} = true then 1 else 0 end) as float) / count(*)`,
+		})
+		.from(tasks)
+		.groupBy(tasks.runId, tasks.language)
+
+	const results: Record<number, Record<ExerciseLanguage, number>> = {}
+
+	for (const { runId, language, score } of records) {
+		if (!results[runId]) {
+			results[runId] = { go: 0, java: 0, javascript: 0, python: 0, rust: 0 }
+		}
+
+		results[runId][language] = score
+	}
+
+	return results
+}

+ 1 - 1
packages/ipc/package.json

@@ -16,7 +16,7 @@
 	"devDependencies": {
 		"@roo-code/config-eslint": "workspace:^",
 		"@roo-code/config-typescript": "workspace:^",
-		"@types/node": "^22.15.20",
+		"@types/node": "20.x",
 		"@types/node-ipc": "^9.2.3",
 		"vitest": "^3.1.3"
 	}

+ 2 - 2
packages/telemetry/package.json

@@ -13,12 +13,12 @@
 	"dependencies": {
 		"@roo-code/types": "workspace:^",
 		"posthog-node": "^4.7.0",
-		"zod": "^3.24.2"
+		"zod": "^3.25.61"
 	},
 	"devDependencies": {
 		"@roo-code/config-eslint": "workspace:^",
 		"@roo-code/config-typescript": "workspace:^",
-		"@types/node": "^22.15.20",
+		"@types/node": "20.x",
 		"@types/vscode": "^1.84.0",
 		"vitest": "^3.1.3"
 	}

+ 2 - 2
packages/types/package.json

@@ -23,12 +23,12 @@
 		"clean": "rimraf dist npm/dist .turbo"
 	},
 	"dependencies": {
-		"zod": "^3.24.2"
+		"zod": "^3.25.61"
 	},
 	"devDependencies": {
 		"@roo-code/config-eslint": "workspace:^",
 		"@roo-code/config-typescript": "workspace:^",
-		"@types/node": "^22.15.20",
+		"@types/node": "20.x",
 		"tsup": "^8.3.5",
 		"vitest": "^3.1.3"
 	}

Fișier diff suprimat deoarece este prea mare
+ 163 - 172
pnpm-lock.yaml


+ 1 - 1
src/package.json

@@ -419,7 +419,7 @@
 		"web-tree-sitter": "^0.22.6",
 		"workerpool": "^9.2.0",
 		"yaml": "^2.8.0",
-		"zod": "^3.24.2"
+		"zod": "^3.25.61"
 	},
 	"devDependencies": {
 		"@jest/globals": "^29.7.0",

+ 2 - 2
webview-ui/package.json

@@ -72,7 +72,7 @@
 		"use-sound": "^5.0.0",
 		"vscode-material-icons": "^0.1.1",
 		"vscrui": "^0.2.2",
-		"zod": "^3.24.2"
+		"zod": "^3.25.61"
 	},
 	"devDependencies": {
 		"@jest/globals": "^29.7.0",
@@ -82,7 +82,7 @@
 		"@testing-library/react": "^16.2.0",
 		"@testing-library/user-event": "^14.6.1",
 		"@types/jest": "^29.0.0",
-		"@types/node": "^18.0.0",
+		"@types/node": "20.x",
 		"@types/react": "^18.3.23",
 		"@types/react-dom": "^18.3.5",
 		"@types/shell-quote": "^1.7.5",

Unele fișiere nu au fost afișate deoarece prea multe fișiere au fost modificate în acest diff