| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150 |
- import { execa } from "execa"
- import { type TaskEvent, RooCodeEventName } from "@roo-code/types"
- import { findRun, findTask, updateTask } from "../db/index"
- import { Logger, getTag, isDockerContainer } from "./utils"
- import { redisClient, getPubSubKey, registerRunner, deregisterRunner } from "./redis"
- import { runUnitTest } from "./runUnitTest"
- import { runTaskWithCli } from "./runTaskInCli"
- import { runTaskInVscode } from "./runTaskInVscode"
- export const processTask = async ({
- taskId,
- jobToken,
- logger,
- }: {
- taskId: number
- jobToken: string | null
- logger?: Logger
- }) => {
- const task = await findTask(taskId)
- const { language, exercise } = task
- const run = await findRun(task.runId)
- await registerRunner({ runId: run.id, taskId, timeoutSeconds: (run.timeout || 5) * 60 })
- const containerized = isDockerContainer()
- logger =
- logger ||
- new Logger({
- logDir: containerized ? `/var/log/evals/runs/${run.id}` : `/tmp/evals/runs/${run.id}`,
- filename: `${language}-${exercise}.log`,
- tag: getTag("runTask", { run, task }),
- })
- try {
- const publish = async (e: TaskEvent) => {
- const redis = await redisClient()
- await redis.publish(getPubSubKey(run.id), JSON.stringify(e))
- }
- const executionMethod = run.executionMethod || "vscode"
- logger.info(`running task ${task.id} (${language}/${exercise}) via ${executionMethod}...`)
- if (executionMethod === "cli") {
- await runTaskWithCli({ run, task, jobToken, publish, logger })
- } else {
- await runTaskInVscode({ run, task, jobToken, publish, logger })
- }
- logger.info(`testing task ${task.id} (${language}/${exercise})...`)
- const passed = await runUnitTest({ task, logger })
- logger.info(`task ${task.id} (${language}/${exercise}) -> ${passed}`)
- await updateTask(task.id, { passed })
- await publish({
- eventName: passed ? RooCodeEventName.EvalPass : RooCodeEventName.EvalFail,
- taskId: task.id,
- })
- } finally {
- await deregisterRunner({ runId: run.id, taskId })
- }
- }
- export const processTaskInContainer = async ({
- taskId,
- jobToken,
- logger,
- maxRetries = 10,
- }: {
- taskId: number
- jobToken: string | null
- logger: Logger
- maxRetries?: number
- }) => {
- const baseArgs = [
- "--rm",
- "--network evals_default",
- "-v /var/run/docker.sock:/var/run/docker.sock",
- "-v /tmp/evals:/var/log/evals",
- "-e HOST_EXECUTION_METHOD=docker",
- ]
- if (jobToken) {
- baseArgs.push(`-e ROO_CODE_CLOUD_TOKEN=${jobToken}`)
- }
- // Pass API keys to the container so the CLI can authenticate
- const apiKeyEnvVars = [
- "OPENROUTER_API_KEY",
- "ANTHROPIC_API_KEY",
- "OPENAI_API_KEY",
- "GOOGLE_API_KEY",
- "DEEPSEEK_API_KEY",
- "MISTRAL_API_KEY",
- ]
- for (const envVar of apiKeyEnvVars) {
- if (process.env[envVar]) {
- baseArgs.push(`-e ${envVar}=${process.env[envVar]}`)
- }
- }
- const command = `pnpm --filter @roo-code/evals cli --taskId ${taskId}`
- logger.info(command)
- for (let attempt = 0; attempt <= maxRetries; attempt++) {
- const containerName = `evals-task-${taskId}.${attempt}`
- const args = [`--name ${containerName}`, `-e EVALS_ATTEMPT=${attempt}`, ...baseArgs]
- const isRetry = attempt > 0
- if (isRetry) {
- const delayMs = Math.pow(2, attempt - 1) * 1000 * (0.5 + Math.random())
- logger.info(`retrying in ${delayMs}ms (attempt ${attempt + 1}/${maxRetries + 1})`)
- await new Promise((resolve) => setTimeout(resolve, delayMs))
- }
- logger.info(
- `${isRetry ? "retrying" : "executing"} container command (attempt ${attempt + 1}/${maxRetries + 1})`,
- )
- const subprocess = execa(`docker run ${args.join(" ")} evals-runner sh -c "${command}"`, { shell: true })
- // subprocess.stdout?.on("data", (data) => console.log(data.toString()))
- // subprocess.stderr?.on("data", (data) => console.error(data.toString()))
- try {
- const result = await subprocess
- logger.info(`container process completed with exit code: ${result.exitCode}`)
- return
- } catch (error) {
- if (error && typeof error === "object" && "exitCode" in error) {
- logger.error(
- `container process failed with exit code: ${error.exitCode} (attempt ${attempt + 1}/${maxRetries + 1})`,
- )
- } else {
- logger.error(`container process failed with error: ${error} (attempt ${attempt + 1}/${maxRetries + 1})`)
- }
- if (attempt === maxRetries) {
- break
- }
- }
- }
- logger.error(`all ${maxRetries + 1} attempts failed, giving up`)
- // TODO: Mark task as failed.
- }
|