|
@@ -8,14 +8,21 @@ import fs from "fs"
|
|
|
import { revalidatePath } from "next/cache"
|
|
import { revalidatePath } from "next/cache"
|
|
|
import pMap from "p-map"
|
|
import pMap from "p-map"
|
|
|
|
|
|
|
|
-import { ExerciseLanguage, exerciseLanguages } from "@evals/types"
|
|
|
|
|
-import * as db from "@evals/db"
|
|
|
|
|
|
|
+import {
|
|
|
|
|
+ type ExerciseLanguage,
|
|
|
|
|
+ exerciseLanguages,
|
|
|
|
|
+ createRun as _createRun,
|
|
|
|
|
+ updateRun as _updateRun,
|
|
|
|
|
+ deleteRun as _deleteRun,
|
|
|
|
|
+ createTask,
|
|
|
|
|
+} from "@roo-code/evals"
|
|
|
|
|
|
|
|
import { CreateRun } from "@/lib/schemas"
|
|
import { CreateRun } from "@/lib/schemas"
|
|
|
|
|
+
|
|
|
import { getExercisesForLanguage } from "./exercises"
|
|
import { getExercisesForLanguage } from "./exercises"
|
|
|
|
|
|
|
|
export async function createRun({ suite, exercises = [], systemPrompt, ...values }: CreateRun) {
|
|
export async function createRun({ suite, exercises = [], systemPrompt, ...values }: CreateRun) {
|
|
|
- const run = await db.createRun({
|
|
|
|
|
|
|
+ const run = await _createRun({
|
|
|
...values,
|
|
...values,
|
|
|
socketPath: path.join(os.tmpdir(), `roo-code-evals-${crypto.randomUUID()}.sock`),
|
|
socketPath: path.join(os.tmpdir(), `roo-code-evals-${crypto.randomUUID()}.sock`),
|
|
|
})
|
|
})
|
|
@@ -28,13 +35,13 @@ export async function createRun({ suite, exercises = [], systemPrompt, ...values
|
|
|
throw new Error("Invalid exercise path: " + path)
|
|
throw new Error("Invalid exercise path: " + path)
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- await db.createTask({ ...values, runId: run.id, language: language as ExerciseLanguage, exercise })
|
|
|
|
|
|
|
+ await createTask({ ...values, runId: run.id, language: language as ExerciseLanguage, exercise })
|
|
|
}
|
|
}
|
|
|
} else {
|
|
} else {
|
|
|
for (const language of exerciseLanguages) {
|
|
for (const language of exerciseLanguages) {
|
|
|
const exercises = await getExercisesForLanguage(language)
|
|
const exercises = await getExercisesForLanguage(language)
|
|
|
|
|
|
|
|
- await pMap(exercises, (exercise) => db.createTask({ ...values, runId: run.id, language, exercise }), {
|
|
|
|
|
|
|
+ await pMap(exercises, (exercise) => createTask({ ...values, runId: run.id, language, exercise }), {
|
|
|
concurrency: 10,
|
|
concurrency: 10,
|
|
|
})
|
|
})
|
|
|
}
|
|
}
|
|
@@ -49,18 +56,14 @@ export async function createRun({ suite, exercises = [], systemPrompt, ...values
|
|
|
? { ...process.env, FOOTGUN_SYSTEM_PROMPT: systemPrompt }
|
|
? { ...process.env, FOOTGUN_SYSTEM_PROMPT: systemPrompt }
|
|
|
: process.env
|
|
: process.env
|
|
|
|
|
|
|
|
- const childProcess = spawn(
|
|
|
|
|
- "pnpm",
|
|
|
|
|
- ["--filter", "@evals/cli", "dev", "run", "all", "--runId", run.id.toString()],
|
|
|
|
|
- {
|
|
|
|
|
- detached: true,
|
|
|
|
|
- stdio: ["ignore", logFile, logFile],
|
|
|
|
|
- env,
|
|
|
|
|
- },
|
|
|
|
|
- )
|
|
|
|
|
|
|
+ const childProcess = spawn("pnpm", ["--filter", "@roo-code/evals", "cli", run.id.toString()], {
|
|
|
|
|
+ detached: true,
|
|
|
|
|
+ stdio: ["ignore", logFile, logFile],
|
|
|
|
|
+ env,
|
|
|
|
|
+ })
|
|
|
|
|
|
|
|
childProcess.unref()
|
|
childProcess.unref()
|
|
|
- await db.updateRun(run.id, { pid: childProcess.pid })
|
|
|
|
|
|
|
+ await _updateRun(run.id, { pid: childProcess.pid })
|
|
|
} catch (error) {
|
|
} catch (error) {
|
|
|
console.error(error)
|
|
console.error(error)
|
|
|
}
|
|
}
|
|
@@ -69,6 +72,6 @@ export async function createRun({ suite, exercises = [], systemPrompt, ...values
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
export async function deleteRun(runId: number) {
|
|
export async function deleteRun(runId: number) {
|
|
|
- await db.deleteRun(runId)
|
|
|
|
|
|
|
+ await _deleteRun(runId)
|
|
|
revalidatePath("/runs")
|
|
revalidatePath("/runs")
|
|
|
}
|
|
}
|