Переглянути джерело

Control evals concurrency in web app (#2265)

Chris Estreich 8 місяців тому
батько
коміт
6ab9aa9f15

+ 7 - 5
evals/apps/cli/src/index.ts

@@ -36,7 +36,6 @@ import { getExercises } from "./exercises.js"
 type TaskResult = { success: boolean; retry: boolean }
 type TaskPromise = Promise<TaskResult>
 
-const MAX_CONCURRENCY = 5
 const TASK_TIMEOUT = 10 * 60 * 1_000
 const UNIT_TEST_TIMEOUT = 60 * 1_000
 
@@ -78,12 +77,14 @@ const run = async (toolbox: GluegunToolbox) => {
 				const exercises = getExercises()[language as ExerciseLanguage]
 
 				await pMap(exercises, (exercise) => createTask({ runId: run.id, language, exercise }), {
-					concurrency: 10,
+					concurrency: run.concurrency,
 				})
 			}
 		} else if (exercise === "all") {
 			const exercises = getExercises()[language as ExerciseLanguage]
-			await pMap(exercises, (exercise) => createTask({ runId: run.id, language, exercise }), { concurrency: 10 })
+			await pMap(exercises, (exercise) => createTask({ runId: run.id, language, exercise }), {
+				concurrency: run.concurrency,
+			})
 		} else {
 			language = language || (await askLanguage(prompt))
 			exercise = exercise || (await askExercise(prompt, language))
@@ -145,13 +146,14 @@ const run = async (toolbox: GluegunToolbox) => {
 	}
 
 	let delay = 0
+
 	for (const task of tasks) {
 		const promise = processTask(task, delay)
 		delay = delay + 5_000
 		runningPromises.push(promise)
 		promise.then(() => processTaskResult(task, promise))
 
-		if (runningPromises.length >= MAX_CONCURRENCY) {
+		if (runningPromises.length >= run.concurrency) {
 			delay = 0
 			await Promise.race(runningPromises)
 		}
@@ -179,7 +181,7 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
 	// subprocess.stdout.pipe(process.stdout)
 
 	// Sleep for a random amount of time before opening a new VSCode window.
-	await new Promise((resolve) => setTimeout(resolve, 1_000 + Math.random() * MAX_CONCURRENCY * 1_000))
+	await new Promise((resolve) => setTimeout(resolve, 1_000 + Math.random() * 5_000))
 	console.log(`Opening new VS Code window at ${workspacePath}`)
 
 	await execa({

+ 1 - 0
evals/apps/web/package.json

@@ -20,6 +20,7 @@
 		"@radix-ui/react-scroll-area": "^1.2.3",
 		"@radix-ui/react-select": "^2.1.6",
 		"@radix-ui/react-separator": "^1.1.2",
+		"@radix-ui/react-slider": "^1.2.4",
 		"@radix-ui/react-slot": "^1.1.2",
 		"@radix-ui/react-tabs": "^1.1.3",
 		"@radix-ui/react-tooltip": "^1.1.8",

+ 33 - 2
evals/apps/web/src/app/runs/new/new-run.tsx

@@ -12,7 +12,13 @@ import { X, Rocket, Check, ChevronsUpDown, HardDriveUpload, CircleCheck } from "
 import { globalSettingsSchema, providerSettingsSchema, rooCodeDefaults } from "@evals/types"
 
 import { createRun } from "@/lib/server/runs"
-import { createRunSchema as formSchema, type CreateRun as FormValues } from "@/lib/schemas"
+import {
+	createRunSchema as formSchema,
+	type CreateRun as FormValues,
+	CONCURRENCY_MIN,
+	CONCURRENCY_MAX,
+	CONCURRENCY_DEFAULT,
+} from "@/lib/schemas"
 import { cn } from "@/lib/utils"
 import { useOpenRouterModels } from "@/hooks/use-open-router-models"
 import { useExercises } from "@/hooks/use-exercises"
@@ -38,6 +44,7 @@ import {
 	PopoverContent,
 	PopoverTrigger,
 	ScrollArea,
+	Slider,
 } from "@/components/ui"
 
 import { SettingsDiff } from "./settings-diff"
@@ -63,6 +70,7 @@ export function NewRun() {
 			suite: "full",
 			exercises: [],
 			settings: undefined,
+			concurrency: CONCURRENCY_DEFAULT,
 		},
 	})
 
@@ -73,7 +81,7 @@ export function NewRun() {
 		formState: { isSubmitting },
 	} = form
 
-	const [model, suite, settings] = watch(["model", "suite", "settings"])
+	const [model, suite, settings] = watch(["model", "suite", "settings", "concurrency"])
 
 	const onSubmit = useCallback(
 		async (values: FormValues) => {
@@ -288,6 +296,29 @@ export function NewRun() {
 						)}
 					/>
 
+					<FormField
+						control={form.control}
+						name="concurrency"
+						render={({ field }) => (
+							<FormItem>
+								<FormLabel>Concurrency</FormLabel>
+								<FormControl>
+									<div className="flex flex-row items-center gap-2">
+										<Slider
+											defaultValue={[field.value]}
+											min={CONCURRENCY_MIN}
+											max={CONCURRENCY_MAX}
+											step={1}
+											onValueChange={(value) => field.onChange(value[0])}
+										/>
+										<div>{field.value}</div>
+									</div>
+								</FormControl>
+								<FormMessage />
+							</FormItem>
+						)}
+					/>
+
 					<FormField
 						control={form.control}
 						name="description"

+ 1 - 0
evals/apps/web/src/components/ui/index.ts

@@ -11,6 +11,7 @@ export * from "./popover"
 export * from "./scroll-area"
 export * from "./select"
 export * from "./separator"
+export * from "./slider"
 export * from "./sonner"
 export * from "./table"
 export * from "./tabs"

+ 56 - 0
evals/apps/web/src/components/ui/slider.tsx

@@ -0,0 +1,56 @@
+"use client"
+
+import * as React from "react"
+import * as SliderPrimitive from "@radix-ui/react-slider"
+
+import { cn } from "@/lib/utils"
+
+function Slider({
+	className,
+	defaultValue,
+	value,
+	min = 0,
+	max = 100,
+	...props
+}: React.ComponentProps<typeof SliderPrimitive.Root>) {
+	const _values = React.useMemo(
+		() => (Array.isArray(value) ? value : Array.isArray(defaultValue) ? defaultValue : [min, max]),
+		[value, defaultValue, min, max],
+	)
+
+	return (
+		<SliderPrimitive.Root
+			data-slot="slider"
+			defaultValue={defaultValue}
+			value={value}
+			min={min}
+			max={max}
+			className={cn(
+				"relative flex w-full touch-none items-center select-none data-[disabled]:opacity-50 data-[orientation=vertical]:h-full data-[orientation=vertical]:min-h-44 data-[orientation=vertical]:w-auto data-[orientation=vertical]:flex-col",
+				className,
+			)}
+			{...props}>
+			<SliderPrimitive.Track
+				data-slot="slider-track"
+				className={cn(
+					"bg-muted relative grow overflow-hidden rounded-full data-[orientation=horizontal]:h-1.5 data-[orientation=horizontal]:w-full data-[orientation=vertical]:h-full data-[orientation=vertical]:w-1.5",
+				)}>
+				<SliderPrimitive.Range
+					data-slot="slider-range"
+					className={cn(
+						"bg-primary absolute data-[orientation=horizontal]:h-full data-[orientation=vertical]:w-full",
+					)}
+				/>
+			</SliderPrimitive.Track>
+			{Array.from({ length: _values.length }, (_, index) => (
+				<SliderPrimitive.Thumb
+					data-slot="slider-thumb"
+					key={index}
+					className="border-primary bg-accent block size-4 shrink-0 rounded-full border shadow-sm transition-[color,box-shadow] focus-visible:outline-hidden disabled:pointer-events-none disabled:opacity-50 cursor-pointer"
+				/>
+			))}
+		</SliderPrimitive.Root>
+	)
+}
+
+export { Slider }

+ 5 - 0
evals/apps/web/src/lib/schemas.ts

@@ -6,6 +6,10 @@ import { rooCodeSettingsSchema } from "@evals/types"
  * CreateRun
  */
 
+export const CONCURRENCY_MIN = 1
+export const CONCURRENCY_MAX = 25
+export const CONCURRENCY_DEFAULT = 2
+
 export const createRunSchema = z
 	.object({
 		model: z.string().min(1, { message: "Model is required." }),
@@ -13,6 +17,7 @@ export const createRunSchema = z
 		suite: z.enum(["full", "partial"]),
 		exercises: z.array(z.string()).optional(),
 		settings: rooCodeSettingsSchema.optional(),
+		concurrency: z.number().int().min(CONCURRENCY_MIN).max(CONCURRENCY_MAX).default(CONCURRENCY_DEFAULT),
 	})
 	.refine((data) => data.suite === "full" || (data.exercises || []).length > 0, {
 		message: "Exercises are required when running a partial suite.",

+ 6 - 6
evals/package.json

@@ -13,14 +13,14 @@
 		"drizzle:studio": "pnpm --filter @evals/db db:studio"
 	},
 	"devDependencies": {
-		"@dotenvx/dotenvx": "^1.39.0",
-		"@eslint/js": "^9.22.0",
-		"eslint": "^9.22.0",
+		"@dotenvx/dotenvx": "^1.39.1",
+		"@eslint/js": "^9.24.0",
+		"eslint": "^9.24.0",
 		"globals": "^16.0.0",
 		"prettier": "^3.5.3",
 		"tsx": "^4.19.3",
-		"turbo": "^2.4.4",
-		"typescript": "^5",
-		"typescript-eslint": "^8.26.0"
+		"turbo": "^2.5.0",
+		"typescript": "^5.8.3",
+		"typescript-eslint": "^8.29.1"
 	}
 }

+ 1 - 0
evals/packages/db/drizzle/0002_white_flatman.sql

@@ -0,0 +1 @@
+ALTER TABLE `runs` ADD `concurrency` integer DEFAULT 2 NOT NULL;

+ 289 - 0
evals/packages/db/drizzle/meta/0002_snapshot.json

@@ -0,0 +1,289 @@
+{
+	"version": "6",
+	"dialect": "sqlite",
+	"id": "f49d9b0b-fda9-467a-9adb-c941d6cbf7ce",
+	"prevId": "8906647f-81d6-498a-897c-b1638c04c69a",
+	"tables": {
+		"runs": {
+			"name": "runs",
+			"columns": {
+				"id": {
+					"name": "id",
+					"type": "integer",
+					"primaryKey": true,
+					"notNull": true,
+					"autoincrement": true
+				},
+				"taskMetricsId": {
+					"name": "taskMetricsId",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false,
+					"autoincrement": false
+				},
+				"model": {
+					"name": "model",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"description": {
+					"name": "description",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": false,
+					"autoincrement": false
+				},
+				"settings": {
+					"name": "settings",
+					"type": "blob",
+					"primaryKey": false,
+					"notNull": false,
+					"autoincrement": false
+				},
+				"pid": {
+					"name": "pid",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false,
+					"autoincrement": false
+				},
+				"socketPath": {
+					"name": "socketPath",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"concurrency": {
+					"name": "concurrency",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false,
+					"default": 2
+				},
+				"passed": {
+					"name": "passed",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false,
+					"default": 0
+				},
+				"failed": {
+					"name": "failed",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false,
+					"default": 0
+				},
+				"createdAt": {
+					"name": "createdAt",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				}
+			},
+			"indexes": {},
+			"foreignKeys": {
+				"runs_taskMetricsId_taskMetrics_id_fk": {
+					"name": "runs_taskMetricsId_taskMetrics_id_fk",
+					"tableFrom": "runs",
+					"tableTo": "taskMetrics",
+					"columnsFrom": ["taskMetricsId"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				}
+			},
+			"compositePrimaryKeys": {},
+			"uniqueConstraints": {},
+			"checkConstraints": {}
+		},
+		"taskMetrics": {
+			"name": "taskMetrics",
+			"columns": {
+				"id": {
+					"name": "id",
+					"type": "integer",
+					"primaryKey": true,
+					"notNull": true,
+					"autoincrement": true
+				},
+				"tokensIn": {
+					"name": "tokensIn",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"tokensOut": {
+					"name": "tokensOut",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"tokensContext": {
+					"name": "tokensContext",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"cacheWrites": {
+					"name": "cacheWrites",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"cacheReads": {
+					"name": "cacheReads",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"cost": {
+					"name": "cost",
+					"type": "real",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"duration": {
+					"name": "duration",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"createdAt": {
+					"name": "createdAt",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				}
+			},
+			"indexes": {},
+			"foreignKeys": {},
+			"compositePrimaryKeys": {},
+			"uniqueConstraints": {},
+			"checkConstraints": {}
+		},
+		"tasks": {
+			"name": "tasks",
+			"columns": {
+				"id": {
+					"name": "id",
+					"type": "integer",
+					"primaryKey": true,
+					"notNull": true,
+					"autoincrement": true
+				},
+				"runId": {
+					"name": "runId",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"taskMetricsId": {
+					"name": "taskMetricsId",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false,
+					"autoincrement": false
+				},
+				"language": {
+					"name": "language",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"exercise": {
+					"name": "exercise",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"passed": {
+					"name": "passed",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false,
+					"autoincrement": false
+				},
+				"startedAt": {
+					"name": "startedAt",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false,
+					"autoincrement": false
+				},
+				"finishedAt": {
+					"name": "finishedAt",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false,
+					"autoincrement": false
+				},
+				"createdAt": {
+					"name": "createdAt",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				}
+			},
+			"indexes": {
+				"tasks_language_exercise_idx": {
+					"name": "tasks_language_exercise_idx",
+					"columns": ["runId", "language", "exercise"],
+					"isUnique": true
+				}
+			},
+			"foreignKeys": {
+				"tasks_runId_runs_id_fk": {
+					"name": "tasks_runId_runs_id_fk",
+					"tableFrom": "tasks",
+					"tableTo": "runs",
+					"columnsFrom": ["runId"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				},
+				"tasks_taskMetricsId_taskMetrics_id_fk": {
+					"name": "tasks_taskMetricsId_taskMetrics_id_fk",
+					"tableFrom": "tasks",
+					"tableTo": "taskMetrics",
+					"columnsFrom": ["taskMetricsId"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				}
+			},
+			"compositePrimaryKeys": {},
+			"uniqueConstraints": {},
+			"checkConstraints": {}
+		}
+	},
+	"views": {},
+	"enums": {},
+	"_meta": {
+		"schemas": {},
+		"tables": {},
+		"columns": {}
+	},
+	"internal": {
+		"indexes": {}
+	}
+}

+ 7 - 0
evals/packages/db/drizzle/meta/_journal.json

@@ -15,6 +15,13 @@
 			"when": 1743089501047,
 			"tag": "0001_lush_reavers",
 			"breakpoints": true
+		},
+		{
+			"idx": 2,
+			"version": "6",
+			"when": 1743698195142,
+			"tag": "0002_white_flatman",
+			"breakpoints": true
 		}
 	]
 }

+ 1 - 0
evals/packages/db/src/schema.ts

@@ -16,6 +16,7 @@ export const runs = sqliteTable("runs", {
 	settings: blob({ mode: "json" }).$type<GlobalSettings>(),
 	pid: integer({ mode: "number" }),
 	socketPath: text().notNull(),
+	concurrency: integer({ mode: "number" }).default(2).notNull(),
 	passed: integer({ mode: "number" }).default(0).notNull(),
 	failed: integer({ mode: "number" }).default(0).notNull(),
 	createdAt: integer({ mode: "timestamp" }).notNull(),

Різницю між файлами не показано, бо вона завелика
+ 520 - 45
evals/pnpm-lock.yaml


+ 3 - 5
evals/scripts/setup.sh

@@ -293,11 +293,9 @@ if [[ ! -s .env ]]; then
   cp .env.sample .env || exit 1
 fi
 
-if [[ ! -s /tmp/evals.db ]]; then
-  echo "🗄️ Creating database..."
-  pnpm --filter @evals/db db:push || exit 1
-  pnpm --filter @evals/db db:enable-wal || exit 1
-fi
+echo "🗄️ Syncing database..."
+pnpm --filter @evals/db db:push || exit 1
+pnpm --filter @evals/db db:enable-wal || exit 1
 
 if ! grep -q "OPENROUTER_API_KEY" .env; then
   read -p "🔐 Enter your OpenRouter API key (sk-or-v1-...): " openrouter_api_key

+ 1 - 1
src/core/context-tracking/FileContextTracker.ts

@@ -111,7 +111,7 @@ export class FileContextTracker {
 
 	// Gets task metadata from storage
 	async getTaskMetadata(taskId: string): Promise<TaskMetadata> {
-		const globalStoragePath = this.getContextProxy()?.globalStorageUri.fsPath ?? ''
+		const globalStoragePath = this.getContextProxy()?.globalStorageUri.fsPath ?? ""
 		const taskDir = await getTaskDirectoryPath(globalStoragePath, taskId)
 		const filePath = path.join(taskDir, GlobalFileNames.taskMetadata)
 		try {

Деякі файли не було показано, через те що забагато файлів було змінено