Parcourir la source

Fix evals; broken by #5865 (#6065)

Chris Estreich il y a 5 mois
Parent
commit
984d368f7a

+ 2 - 2
apps/web-evals/scripts/check-services.sh

@@ -7,13 +7,13 @@ fi
 
 
 if ! nc -z localhost 5432 2>/dev/null; then
 if ! nc -z localhost 5432 2>/dev/null; then
   echo "❌ PostgreSQL is not running on port 5432"
   echo "❌ PostgreSQL is not running on port 5432"
-  echo "💡 Start it with: pnpm --filter @roo-code/evals db:start"
+  echo "💡 Start it with: pnpm --filter @roo-code/evals db:up"
   exit 1
   exit 1
 fi
 fi
 
 
 if ! nc -z localhost 6379 2>/dev/null; then
 if ! nc -z localhost 6379 2>/dev/null; then
   echo "❌ Redis is not running on port 6379"
   echo "❌ Redis is not running on port 6379"
-  echo "💡 Start it with: pnpm --filter @roo-code/evals redis:start"
+  echo "💡 Start it with: pnpm --filter @roo-code/evals redis:up"
   exit 1
   exit 1
 fi
 fi
 
 

+ 2 - 2
apps/web-evals/src/app/runs/new/new-run.tsx

@@ -350,7 +350,7 @@ export function NewRun() {
 						name="timeout"
 						name="timeout"
 						render={({ field }) => (
 						render={({ field }) => (
 							<FormItem>
 							<FormItem>
-								<FormLabel>Timeout (minutes)</FormLabel>
+								<FormLabel>Timeout (Minutes)</FormLabel>
 								<FormControl>
 								<FormControl>
 									<div className="flex flex-row items-center gap-2">
 									<div className="flex flex-row items-center gap-2">
 										<Slider
 										<Slider
@@ -360,7 +360,7 @@ export function NewRun() {
 											step={1}
 											step={1}
 											onValueChange={(value) => field.onChange(value[0])}
 											onValueChange={(value) => field.onChange(value[0])}
 										/>
 										/>
-										<div>{field.value} min</div>
+										<div>{field.value}</div>
 									</div>
 									</div>
 								</FormControl>
 								</FormControl>
 								<FormMessage />
 								<FormMessage />

+ 1 - 1
package.json

@@ -23,7 +23,7 @@
 		"changeset:version": "cp CHANGELOG.md src/CHANGELOG.md && changeset version && cp -vf src/CHANGELOG.md .",
 		"changeset:version": "cp CHANGELOG.md src/CHANGELOG.md && changeset version && cp -vf src/CHANGELOG.md .",
 		"knip": "knip --include files",
 		"knip": "knip --include files",
 		"update-contributors": "node scripts/update-contributors.js",
 		"update-contributors": "node scripts/update-contributors.js",
-		"evals": "docker compose -f packages/evals/docker-compose.yml --profile server --profile runner up --build --scale runner=0"
+		"evals": "dotenvx run -f packages/evals/.env.development packages/evals/.env.local -- docker compose -f packages/evals/docker-compose.yml --profile server --profile runner up --build --scale runner=0"
 	},
 	},
 	"devDependencies": {
 	"devDependencies": {
 		"@changesets/cli": "^2.27.10",
 		"@changesets/cli": "^2.27.10",

+ 6 - 5
packages/evals/package.json

@@ -18,11 +18,12 @@
 		"db:push": "pnpm drizzle-kit push",
 		"db:push": "pnpm drizzle-kit push",
 		"db:test:push": "pnpm drizzle-kit:test push",
 		"db:test:push": "pnpm drizzle-kit:test push",
 		"db:production:push": "pnpm drizzle-kit:production push",
 		"db:production:push": "pnpm drizzle-kit:production push",
-		"db:start": "docker compose up -d db",
-		"db:stop": "docker compose down db",
-		"redis:start": "docker compose up -d redis",
-		"redis:stop": "docker compose down redis",
-		"services:start": "docker compose up -d db redis"
+		"db:up": "dotenvx run -f .env.development .env.local -- docker compose up -d db",
+		"db:down": "dotenvx run -f .env.development .env.local -- docker compose down db",
+		"redis:up": "dotenvx run -f .env.development .env.local -- docker compose up -d redis",
+		"redis:down": "dotenvx run -f .env.development .env.local -- docker compose down redis",
+		"services:up": "dotenvx run -f .env.development .env.local -- docker compose up -d db redis",
+		"services:down": "dotenvx run -f .env.development .env.local -- docker compose down db redis"
 	},
 	},
 	"dependencies": {
 	"dependencies": {
 		"@roo-code/ipc": "workspace:^",
 		"@roo-code/ipc": "workspace:^",

+ 1 - 0
packages/evals/src/db/migrations/0001_lowly_captain_flint.sql

@@ -0,0 +1 @@
+ALTER TABLE "runs" ADD COLUMN "timeout" integer DEFAULT 5 NOT NULL;

+ 417 - 0
packages/evals/src/db/migrations/meta/0001_snapshot.json

@@ -0,0 +1,417 @@
+{
+	"id": "43b197c4-ff4f-48c1-908b-a330e66a162d",
+	"prevId": "b50d5e6a-0f3f-4605-a5e7-9351711fc5e4",
+	"version": "7",
+	"dialect": "postgresql",
+	"tables": {
+		"public.runs": {
+			"name": "runs",
+			"schema": "",
+			"columns": {
+				"id": {
+					"name": "id",
+					"type": "integer",
+					"primaryKey": true,
+					"notNull": true,
+					"identity": {
+						"type": "always",
+						"name": "runs_id_seq",
+						"schema": "public",
+						"increment": "1",
+						"startWith": "1",
+						"minValue": "1",
+						"maxValue": "2147483647",
+						"cache": "1",
+						"cycle": false
+					}
+				},
+				"task_metrics_id": {
+					"name": "task_metrics_id",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"model": {
+					"name": "model",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"description": {
+					"name": "description",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"settings": {
+					"name": "settings",
+					"type": "jsonb",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"pid": {
+					"name": "pid",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"socket_path": {
+					"name": "socket_path",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"concurrency": {
+					"name": "concurrency",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"default": 2
+				},
+				"timeout": {
+					"name": "timeout",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"default": 5
+				},
+				"passed": {
+					"name": "passed",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"default": 0
+				},
+				"failed": {
+					"name": "failed",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"default": 0
+				},
+				"created_at": {
+					"name": "created_at",
+					"type": "timestamp",
+					"primaryKey": false,
+					"notNull": true
+				}
+			},
+			"indexes": {},
+			"foreignKeys": {
+				"runs_task_metrics_id_taskMetrics_id_fk": {
+					"name": "runs_task_metrics_id_taskMetrics_id_fk",
+					"tableFrom": "runs",
+					"tableTo": "taskMetrics",
+					"columnsFrom": ["task_metrics_id"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				}
+			},
+			"compositePrimaryKeys": {},
+			"uniqueConstraints": {},
+			"policies": {},
+			"checkConstraints": {},
+			"isRLSEnabled": false
+		},
+		"public.taskMetrics": {
+			"name": "taskMetrics",
+			"schema": "",
+			"columns": {
+				"id": {
+					"name": "id",
+					"type": "integer",
+					"primaryKey": true,
+					"notNull": true,
+					"identity": {
+						"type": "always",
+						"name": "taskMetrics_id_seq",
+						"schema": "public",
+						"increment": "1",
+						"startWith": "1",
+						"minValue": "1",
+						"maxValue": "2147483647",
+						"cache": "1",
+						"cycle": false
+					}
+				},
+				"tokens_in": {
+					"name": "tokens_in",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"tokens_out": {
+					"name": "tokens_out",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"tokens_context": {
+					"name": "tokens_context",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"cache_writes": {
+					"name": "cache_writes",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"cache_reads": {
+					"name": "cache_reads",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"cost": {
+					"name": "cost",
+					"type": "real",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"duration": {
+					"name": "duration",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"tool_usage": {
+					"name": "tool_usage",
+					"type": "jsonb",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"created_at": {
+					"name": "created_at",
+					"type": "timestamp",
+					"primaryKey": false,
+					"notNull": true
+				}
+			},
+			"indexes": {},
+			"foreignKeys": {},
+			"compositePrimaryKeys": {},
+			"uniqueConstraints": {},
+			"policies": {},
+			"checkConstraints": {},
+			"isRLSEnabled": false
+		},
+		"public.tasks": {
+			"name": "tasks",
+			"schema": "",
+			"columns": {
+				"id": {
+					"name": "id",
+					"type": "integer",
+					"primaryKey": true,
+					"notNull": true,
+					"identity": {
+						"type": "always",
+						"name": "tasks_id_seq",
+						"schema": "public",
+						"increment": "1",
+						"startWith": "1",
+						"minValue": "1",
+						"maxValue": "2147483647",
+						"cache": "1",
+						"cycle": false
+					}
+				},
+				"run_id": {
+					"name": "run_id",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"task_metrics_id": {
+					"name": "task_metrics_id",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"language": {
+					"name": "language",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"exercise": {
+					"name": "exercise",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"passed": {
+					"name": "passed",
+					"type": "boolean",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"started_at": {
+					"name": "started_at",
+					"type": "timestamp",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"finished_at": {
+					"name": "finished_at",
+					"type": "timestamp",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"created_at": {
+					"name": "created_at",
+					"type": "timestamp",
+					"primaryKey": false,
+					"notNull": true
+				}
+			},
+			"indexes": {
+				"tasks_language_exercise_idx": {
+					"name": "tasks_language_exercise_idx",
+					"columns": [
+						{
+							"expression": "run_id",
+							"isExpression": false,
+							"asc": true,
+							"nulls": "last"
+						},
+						{
+							"expression": "language",
+							"isExpression": false,
+							"asc": true,
+							"nulls": "last"
+						},
+						{
+							"expression": "exercise",
+							"isExpression": false,
+							"asc": true,
+							"nulls": "last"
+						}
+					],
+					"isUnique": true,
+					"concurrently": false,
+					"method": "btree",
+					"with": {}
+				}
+			},
+			"foreignKeys": {
+				"tasks_run_id_runs_id_fk": {
+					"name": "tasks_run_id_runs_id_fk",
+					"tableFrom": "tasks",
+					"tableTo": "runs",
+					"columnsFrom": ["run_id"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				},
+				"tasks_task_metrics_id_taskMetrics_id_fk": {
+					"name": "tasks_task_metrics_id_taskMetrics_id_fk",
+					"tableFrom": "tasks",
+					"tableTo": "taskMetrics",
+					"columnsFrom": ["task_metrics_id"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				}
+			},
+			"compositePrimaryKeys": {},
+			"uniqueConstraints": {},
+			"policies": {},
+			"checkConstraints": {},
+			"isRLSEnabled": false
+		},
+		"public.toolErrors": {
+			"name": "toolErrors",
+			"schema": "",
+			"columns": {
+				"id": {
+					"name": "id",
+					"type": "integer",
+					"primaryKey": true,
+					"notNull": true,
+					"identity": {
+						"type": "always",
+						"name": "toolErrors_id_seq",
+						"schema": "public",
+						"increment": "1",
+						"startWith": "1",
+						"minValue": "1",
+						"maxValue": "2147483647",
+						"cache": "1",
+						"cycle": false
+					}
+				},
+				"run_id": {
+					"name": "run_id",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"task_id": {
+					"name": "task_id",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false
+				},
+				"tool_name": {
+					"name": "tool_name",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"error": {
+					"name": "error",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true
+				},
+				"created_at": {
+					"name": "created_at",
+					"type": "timestamp",
+					"primaryKey": false,
+					"notNull": true
+				}
+			},
+			"indexes": {},
+			"foreignKeys": {
+				"toolErrors_run_id_runs_id_fk": {
+					"name": "toolErrors_run_id_runs_id_fk",
+					"tableFrom": "toolErrors",
+					"tableTo": "runs",
+					"columnsFrom": ["run_id"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				},
+				"toolErrors_task_id_tasks_id_fk": {
+					"name": "toolErrors_task_id_tasks_id_fk",
+					"tableFrom": "toolErrors",
+					"tableTo": "tasks",
+					"columnsFrom": ["task_id"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				}
+			},
+			"compositePrimaryKeys": {},
+			"uniqueConstraints": {},
+			"policies": {},
+			"checkConstraints": {},
+			"isRLSEnabled": false
+		}
+	},
+	"enums": {},
+	"schemas": {},
+	"sequences": {},
+	"roles": {},
+	"policies": {},
+	"views": {},
+	"_meta": {
+		"columns": {},
+		"schemas": {},
+		"tables": {}
+	}
+}

+ 7 - 0
packages/evals/src/db/migrations/meta/_journal.json

@@ -8,6 +8,13 @@
 			"when": 1748937674449,
 			"when": 1748937674449,
 			"tag": "0000_young_trauma",
 			"tag": "0000_young_trauma",
 			"breakpoints": true
 			"breakpoints": true
+		},
+		{
+			"idx": 1,
+			"version": "7",
+			"when": 1753198630651,
+			"tag": "0001_lowly_captain_flint",
+			"breakpoints": true
 		}
 		}
 	]
 	]
 }
 }