2
0
Эх сурвалжийг харах

feat: add configurable timeout for evals (5-10 min) (#5865)

* feat: add configurable timeout for evals (5-10 min)

- Add timeout field to CreateRun schema with min 5, max 10, default 5
- Add timeout slider UI component to /runs/new page
- Update database schema to include timeout column in runs table
- Create migration to add timeout column with default value of 5
- Update runTask.ts to use configurable timeout from run settings
- Pass timeout parameter through the createRun action

* fix: remove unused EVALS_TIMEOUT import

* fix: add timeout field to createRun calls in copyRun test

- Added timeout: 5 to both createRun calls in copyRun.spec.ts
- This fixes the test failure caused by the new required timeout field in the runs schema
- The timeout field was added in the configurable timeout feature but the test was not updated

* fix: use configurable timeout for Redis key expiration in registerRunner

- Updated registerRunner function to accept timeoutSeconds parameter
- Modified call in runTask.ts to pass configurable timeout instead of hardcoded EVALS_TIMEOUT
- Removed unused EVALS_TIMEOUT import from redis.ts
- Ensures Redis keys remain valid for the entire duration of task execution (up to 10 minutes)

---------

Co-authored-by: Roo Code <[email protected]>
Co-authored-by: hannesrudolph <[email protected]>
roomote[bot] 5 сар өмнө
parent
commit
c96b399dd7

+ 2 - 1
apps/web-evals/src/actions/runs.ts

@@ -22,9 +22,10 @@ import { CreateRun } from "@/lib/schemas"
 const EVALS_REPO_PATH = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../../../evals")
 
 // eslint-disable-next-line @typescript-eslint/no-unused-vars
-export async function createRun({ suite, exercises = [], systemPrompt, ...values }: CreateRun) {
+export async function createRun({ suite, exercises = [], systemPrompt, timeout, ...values }: CreateRun) {
 	const run = await _createRun({
 		...values,
+		timeout,
 		socketPath: "", // TODO: Get rid of this.
 	})
 

+ 27 - 0
apps/web-evals/src/app/runs/new/new-run.tsx

@@ -21,6 +21,9 @@ import {
 	CONCURRENCY_MIN,
 	CONCURRENCY_MAX,
 	CONCURRENCY_DEFAULT,
+	TIMEOUT_MIN,
+	TIMEOUT_MAX,
+	TIMEOUT_DEFAULT,
 } from "@/lib/schemas"
 import { cn } from "@/lib/utils"
 import { useOpenRouterModels } from "@/hooks/use-open-router-models"
@@ -77,6 +80,7 @@ export function NewRun() {
 			exercises: [],
 			settings: undefined,
 			concurrency: CONCURRENCY_DEFAULT,
+			timeout: TIMEOUT_DEFAULT,
 		},
 	})
 
@@ -341,6 +345,29 @@ export function NewRun() {
 						)}
 					/>
 
+					<FormField
+						control={form.control}
+						name="timeout"
+						render={({ field }) => (
+							<FormItem>
+								<FormLabel>Timeout (minutes)</FormLabel>
+								<FormControl>
+									<div className="flex flex-row items-center gap-2">
+										<Slider
+											defaultValue={[field.value]}
+											min={TIMEOUT_MIN}
+											max={TIMEOUT_MAX}
+											step={1}
+											onValueChange={(value) => field.onChange(value[0])}
+										/>
+										<div>{field.value} min</div>
+									</div>
+								</FormControl>
+								<FormMessage />
+							</FormItem>
+						)}
+					/>
+
 					<FormField
 						control={form.control}
 						name="description"

+ 5 - 0
apps/web-evals/src/lib/schemas.ts

@@ -12,6 +12,10 @@ export const CONCURRENCY_MIN = 1
 export const CONCURRENCY_MAX = 25
 export const CONCURRENCY_DEFAULT = 1
 
+export const TIMEOUT_MIN = 5
+export const TIMEOUT_MAX = 10
+export const TIMEOUT_DEFAULT = 5
+
 export const createRunSchema = z
 	.object({
 		model: z.string().min(1, { message: "Model is required." }),
@@ -20,6 +24,7 @@ export const createRunSchema = z
 		exercises: z.array(z.string()).optional(),
 		settings: rooCodeSettingsSchema.optional(),
 		concurrency: z.number().int().min(CONCURRENCY_MIN).max(CONCURRENCY_MAX),
+		timeout: z.number().int().min(TIMEOUT_MIN).max(TIMEOUT_MAX),
 		systemPrompt: z.string().optional(),
 	})
 	.refine((data) => data.suite === "full" || (data.exercises || []).length > 0, {

+ 10 - 4
packages/evals/src/cli/redis.ts

@@ -1,7 +1,5 @@
 import { createClient, type RedisClientType } from "redis"
 
-import { EVALS_TIMEOUT } from "@roo-code/types"
-
 let redis: RedisClientType | undefined
 
 export const redisClient = async () => {
@@ -18,11 +16,19 @@ export const getPubSubKey = (runId: number) => `evals:${runId}`
 export const getRunnersKey = (runId: number) => `runners:${runId}`
 export const getHeartbeatKey = (runId: number) => `heartbeat:${runId}`
 
-export const registerRunner = async ({ runId, taskId }: { runId: number; taskId: number }) => {
+export const registerRunner = async ({
+	runId,
+	taskId,
+	timeoutSeconds,
+}: {
+	runId: number
+	taskId: number
+	timeoutSeconds: number
+}) => {
 	const redis = await redisClient()
 	const runnersKey = getRunnersKey(runId)
 	await redis.sAdd(runnersKey, `task-${taskId}:${process.env.HOSTNAME ?? process.pid}`)
-	await redis.expire(runnersKey, EVALS_TIMEOUT / 1_000)
+	await redis.expire(runnersKey, timeoutSeconds)
 }
 
 export const deregisterRunner = async ({ runId, taskId }: { runId: number; taskId: number }) => {

+ 4 - 10
packages/evals/src/cli/runTask.ts

@@ -5,14 +5,7 @@ import * as os from "node:os"
 import pWaitFor from "p-wait-for"
 import { execa } from "execa"
 
-import {
-	type TaskEvent,
-	TaskCommandName,
-	RooCodeEventName,
-	IpcMessageType,
-	EVALS_SETTINGS,
-	EVALS_TIMEOUT,
-} from "@roo-code/types"
+import { type TaskEvent, TaskCommandName, RooCodeEventName, IpcMessageType, EVALS_SETTINGS } from "@roo-code/types"
 import { IpcClient } from "@roo-code/ipc"
 
 import {
@@ -42,7 +35,7 @@ export const processTask = async ({ taskId, logger }: { taskId: number; logger?:
 	const task = await findTask(taskId)
 	const { language, exercise } = task
 	const run = await findRun(task.runId)
-	await registerRunner({ runId: run.id, taskId })
+	await registerRunner({ runId: run.id, taskId, timeoutSeconds: (run.timeout || 5) * 60 })
 
 	const containerized = isDockerContainer()
 
@@ -304,9 +297,10 @@ export const runTask = async ({ run, task, publish, logger }: RunTaskOptions) =>
 	})
 
 	try {
+		const timeoutMs = (run.timeout || 5) * 60 * 1_000 // Convert minutes to milliseconds
 		await pWaitFor(() => !!taskFinishedAt || !!taskAbortedAt || isClientDisconnected, {
 			interval: 1_000,
-			timeout: EVALS_TIMEOUT,
+			timeout: timeoutMs,
 		})
 	} catch (_error) {
 		taskTimedOut = true

+ 1 - 0
packages/evals/src/db/migrations/0001_add_timeout_to_runs.sql

@@ -0,0 +1 @@
+ALTER TABLE "runs" ADD COLUMN "timeout" integer DEFAULT 5 NOT NULL;

+ 2 - 1
packages/evals/src/db/queries/__tests__/copyRun.spec.ts

@@ -23,6 +23,7 @@ describe("copyRun", () => {
 			socketPath: "/tmp/roo.sock",
 			description: "Test run for copying",
 			concurrency: 4,
+			timeout: 5,
 		})
 
 		sourceRunId = run.id
@@ -271,7 +272,7 @@ describe("copyRun", () => {
 	})
 
 	it("should copy run without task metrics", async () => {
-		const minimalRun = await createRun({ model: "gpt-3.5-turbo", socketPath: "/tmp/minimal.sock" })
+		const minimalRun = await createRun({ model: "gpt-3.5-turbo", socketPath: "/tmp/minimal.sock", timeout: 5 })
 
 		const newRunId = await copyRun({ sourceDb: db, targetDb: db, runId: minimalRun.id })
 

+ 1 - 0
packages/evals/src/db/schema.ts

@@ -18,6 +18,7 @@ export const runs = pgTable("runs", {
 	pid: integer(),
 	socketPath: text("socket_path").notNull(),
 	concurrency: integer().default(2).notNull(),
+	timeout: integer().default(5).notNull(),
 	passed: integer().default(0).notNull(),
 	failed: integer().default(0).notNull(),
 	createdAt: timestamp("created_at").notNull(),