Просмотр исходного кода

Improve the evals "run status" section (#4400)

Chris Estreich 6 месяцев назад
Родитель
Сommit
24851b96db

+ 1 - 1
apps/web-evals/package.json

@@ -5,7 +5,7 @@
 	"scripts": {
 		"lint": "next lint",
 		"check-types": "tsc -b",
-		"dev": "scripts/check-services.sh && next dev --turbopack",
+		"dev": "scripts/check-services.sh && next dev",
 		"format": "prettier --write src",
 		"build": "next build",
 		"start": "next start"

+ 3 - 4
apps/web-evals/src/lib/server/exercises.ts → apps/web-evals/src/actions/exercises.ts

@@ -6,7 +6,9 @@ import { fileURLToPath } from "url"
 
 import { type ExerciseLanguage, exerciseLanguages } from "@roo-code/evals"
 
-const __dirname = path.dirname(fileURLToPath(import.meta.url))
+const __dirname = path.dirname(fileURLToPath(import.meta.url)) // <repo>/apps/web-evals/src/actions
+
+const EXERCISES_BASE_PATH = path.resolve(__dirname, "../../../../../evals")
 
 export const listDirectories = async (relativePath: string) => {
 	try {
@@ -19,9 +21,6 @@ export const listDirectories = async (relativePath: string) => {
 	}
 }
 
-// __dirname = <repo>/evals/apps/web/src/lib/server
-const EXERCISES_BASE_PATH = path.resolve(__dirname, "../../../../../../evals")
-
 export const getExercises = async () => {
 	const result = await Promise.all(
 		exerciseLanguages.map(async (language) => {

+ 8 - 0
apps/web-evals/src/actions/heartbeat.ts

@@ -0,0 +1,8 @@
+"use server"
+
+import { redisClient } from "@/lib/server/redis"
+
+export const getHeartbeat = async (runId: number) => {
+	const redis = await redisClient()
+	return redis.get(`heartbeat:${runId}`)
+}

+ 1 - 1
apps/web-evals/src/lib/server/runners.ts → apps/web-evals/src/actions/runners.ts

@@ -1,6 +1,6 @@
 "use server"
 
-import { redisClient } from "./redis"
+import { redisClient } from "@/lib/server/redis"
 
 export const getRunners = async (runId: number) => {
 	const redis = await redisClient()

+ 0 - 0
apps/web-evals/src/lib/server/runs.ts → apps/web-evals/src/actions/runs.ts


+ 0 - 0
apps/web-evals/src/lib/server/tasks.ts → apps/web-evals/src/actions/tasks.ts


+ 1 - 1
apps/web-evals/src/app/home.tsx

@@ -7,7 +7,7 @@ import { Ellipsis, Rocket } from "lucide-react"
 
 import type { Run, TaskMetrics } from "@roo-code/evals"
 
-import { deleteRun } from "@/lib/server/runs"
+import { deleteRun } from "@/actions/runs"
 import { formatCurrency, formatDuration, formatTokens, formatToolUsageSuccessRate } from "@/lib/formatters"
 import {
 	Button,

+ 0 - 50
apps/web-evals/src/app/runs/[id]/connection-status.tsx

@@ -1,50 +0,0 @@
-"use client"
-
-import type { EventSourceStatus } from "@/hooks/use-event-source"
-import { useRunners } from "@/hooks/use-runners"
-import { cn } from "@/lib/utils"
-
-type ConnectionStatusProps = {
-	status: EventSourceStatus
-	runId: number
-}
-
-export const ConnectionStatus = (connectionStatus: ConnectionStatusProps) => {
-	const { data: runners, isLoading } = useRunners(connectionStatus.runId)
-	const status = isLoading ? "loading" : runners === null ? "dead" : connectionStatus.status
-
-	return (
-		<div>
-			<div className="flex items-center gap-2">
-				<div className="flex items-center gap-2">
-					<div>Status:</div>
-					<div className="capitalize">{status}</div>
-				</div>
-				<div className="relative">
-					<div
-						className={cn("absolute size-2.5 rounded-full opacity-50 animate-ping", {
-							"bg-gray-500": status === "loading",
-							"bg-green-500": status === "connected",
-							"bg-amber-500": status === "waiting",
-							"bg-rose-500": status === "error" || status === "dead",
-						})}
-					/>
-					<div
-						className={cn("size-2.5 rounded-full", {
-							"bg-gray-500": status === "loading",
-							"bg-green-500": status === "connected",
-							"bg-amber-500": status === "waiting",
-							"bg-rose-500": status === "error" || status === "dead",
-						})}
-					/>
-				</div>
-			</div>
-			<div className="flex items-center gap-2">
-				<div>Runners:</div>
-				{runners && runners.length > 0 && (
-					<div className="font-mono text-sm text-muted-foreground">{runners?.join(", ")}</div>
-				)}
-			</div>
-		</div>
-	)
-}

+ 55 - 0
apps/web-evals/src/app/runs/[id]/run-status.tsx

@@ -0,0 +1,55 @@
+"use client"
+
+import type { RunStatus as _RunStatus } from "@/hooks/use-run-status"
+import { cn } from "@/lib/utils"
+
+export const RunStatus = ({ runStatus: { sseStatus, heartbeat, runners = [] } }: { runStatus: _RunStatus }) => (
+	<div>
+		<div className="flex items-center gap-2">
+			<div className="flex items-center gap-2">
+				<div>Task Stream:</div>
+				<div className="font-mono text-sm text-muted-foreground">{sseStatus}</div>
+			</div>
+			<div className="relative">
+				<div
+					className={cn("absolute size-2.5 rounded-full opacity-50 animate-ping", {
+						"bg-green-500": sseStatus === "connected",
+						"bg-amber-500": sseStatus === "waiting",
+						"bg-rose-500": sseStatus === "error",
+					})}
+				/>
+				<div
+					className={cn("size-2.5 rounded-full", {
+						"bg-green-500": sseStatus === "connected",
+						"bg-amber-500": sseStatus === "waiting",
+						"bg-rose-500": sseStatus === "error",
+					})}
+				/>
+			</div>
+		</div>
+		<div className="flex items-center gap-2">
+			<div className="flex items-center gap-2">
+				<div>Task Controller:</div>
+				<div className="font-mono text-sm text-muted-foreground">{heartbeat ?? "dead"}</div>
+			</div>
+			<div className="relative">
+				<div
+					className={cn("absolute size-2.5 rounded-full opacity-50 animate-ping", {
+						"bg-green-500": !!heartbeat,
+						"bg-rose-500": !heartbeat,
+					})}
+				/>
+				<div
+					className={cn("size-2.5 rounded-full", {
+						"bg-green-500": !!heartbeat,
+						"bg-rose-500": !heartbeat,
+					})}
+				/>
+			</div>
+		</div>
+		<div className="flex items-center gap-2">
+			<div>Task Runners:</div>
+			{runners.length > 0 && <div className="font-mono text-sm text-muted-foreground">{runners?.join(", ")}</div>}
+		</div>
+	</div>
+)

+ 5 - 4
apps/web-evals/src/app/runs/[id]/run.tsx

@@ -10,12 +10,13 @@ import { useRunStatus } from "@/hooks/use-run-status"
 import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui"
 
 import { TaskStatus } from "./task-status"
-import { ConnectionStatus } from "./connection-status"
+import { RunStatus } from "./run-status"
 
 type TaskMetrics = Pick<_TaskMetrics, "tokensIn" | "tokensOut" | "tokensContext" | "duration" | "cost">
 
 export function Run({ run }: { run: Run }) {
-	const { tasks, status, tokenUsage, usageUpdatedAt } = useRunStatus(run)
+	const runStatus = useRunStatus(run)
+	const { tasks, tokenUsage, usageUpdatedAt } = runStatus
 
 	const taskMetrics: Record<number, TaskMetrics> = useMemo(() => {
 		const metrics: Record<number, TaskMetrics> = {}
@@ -45,10 +46,10 @@ export function Run({ run }: { run: Run }) {
 			<div>
 				<div className="mb-2">
 					<div>
-						<div>{run.model}</div>
+						<div className="font-mono">{run.model}</div>
 						{run.description && <div className="text-sm text-muted-foreground">{run.description}</div>}
 					</div>
-					{!run.taskMetricsId && <ConnectionStatus status={status} runId={run.id} />}
+					{!run.taskMetricsId && <RunStatus runStatus={runStatus} />}
 				</div>
 				{!tasks ? (
 					<LoaderCircle className="size-4 animate-spin" />

+ 4 - 3
apps/web-evals/src/app/runs/new/new-run.tsx

@@ -3,6 +3,7 @@
 import { useCallback, useRef, useState } from "react"
 import { useRouter } from "next/navigation"
 import { z } from "zod"
+import { useQuery } from "@tanstack/react-query"
 import { useForm, FormProvider } from "react-hook-form"
 import { zodResolver } from "@hookform/resolvers/zod"
 import fuzzysort from "fuzzysort"
@@ -11,7 +12,8 @@ import { X, Rocket, Check, ChevronsUpDown, SlidersHorizontal, Book, CircleCheck
 
 import { globalSettingsSchema, providerSettingsSchema, EVALS_SETTINGS, getModelId } from "@roo-code/types"
 
-import { createRun } from "@/lib/server/runs"
+import { createRun } from "@/actions/runs"
+import { getExercises } from "@/actions/exercises"
 import {
 	createRunSchema as formSchema,
 	type CreateRun as FormValues,
@@ -22,7 +24,6 @@ import {
 } from "@/lib/schemas"
 import { cn } from "@/lib/utils"
 import { useOpenRouterModels } from "@/hooks/use-open-router-models"
-import { useExercises } from "@/hooks/use-exercises"
 import {
 	Button,
 	FormControl,
@@ -65,7 +66,7 @@ export function NewRun() {
 	const modelSearchValueRef = useRef("")
 
 	const models = useOpenRouterModels()
-	const exercises = useExercises()
+	const exercises = useQuery({ queryKey: ["getExercises"], queryFn: () => getExercises() })
 
 	const form = useForm<FormValues>({
 		resolver: zodResolver(formSchema),

+ 0 - 5
apps/web-evals/src/hooks/use-exercises.ts

@@ -1,5 +0,0 @@
-import { useQuery } from "@tanstack/react-query"
-
-import { getExercises } from "@/lib/server/exercises"
-
-export const useExercises = () => useQuery({ queryKey: ["exercises"], queryFn: () => getExercises() })

+ 32 - 7
apps/web-evals/src/hooks/use-run-status.ts

@@ -2,20 +2,43 @@ import { useState, useCallback, useRef } from "react"
 import { useQuery, keepPreviousData } from "@tanstack/react-query"
 
 import { type TokenUsage, RooCodeEventName, taskEventSchema } from "@roo-code/types"
-import type { Run } from "@roo-code/evals"
+import type { Run, Task, TaskMetrics } from "@roo-code/evals"
 
-import { getTasks } from "@/lib/server/tasks"
-import { useEventSource } from "@/hooks/use-event-source"
+import { getHeartbeat } from "@/actions/heartbeat"
+import { getRunners } from "@/actions/runners"
+import { getTasks } from "@/actions/tasks"
+import { type EventSourceStatus, useEventSource } from "@/hooks/use-event-source"
 
-export const useRunStatus = (run: Run) => {
+export type RunStatus = {
+	sseStatus: EventSourceStatus
+	heartbeat: string | null | undefined
+	runners: string[] | undefined
+	tasks: (Task & { taskMetrics: TaskMetrics | null })[] | undefined
+	tokenUsage: Map<number, TokenUsage & { duration?: number }>
+	usageUpdatedAt: number | undefined
+}
+
+export const useRunStatus = (run: Run): RunStatus => {
 	const [tasksUpdatedAt, setTasksUpdatedAt] = useState<number>()
 	const [usageUpdatedAt, setUsageUpdatedAt] = useState<number>()
 
 	const tokenUsage = useRef<Map<number, TokenUsage & { duration?: number }>>(new Map())
 	const startTimes = useRef<Map<number, number>>(new Map())
 
+	const { data: heartbeat } = useQuery({
+		queryKey: ["getHeartbeat", run.id],
+		queryFn: () => getHeartbeat(run.id),
+		refetchInterval: 10_000,
+	})
+
+	const { data: runners } = useQuery({
+		queryKey: ["getRunners", run.id],
+		queryFn: () => getRunners(run.id),
+		refetchInterval: 10_000,
+	})
+
 	const { data: tasks } = useQuery({
-		queryKey: ["run", run.id, tasksUpdatedAt],
+		queryKey: ["getTasks", run.id, tasksUpdatedAt],
 		queryFn: async () => getTasks(run.id),
 		placeholderData: keepPreviousData,
 		refetchInterval: 30_000,
@@ -65,10 +88,12 @@ export const useRunStatus = (run: Run) => {
 		}
 	}, [])
 
-	const status = useEventSource({ url, onMessage })
+	const sseStatus = useEventSource({ url, onMessage })
 
 	return {
-		status,
+		sseStatus,
+		heartbeat,
+		runners,
 		tasks,
 		tokenUsage: tokenUsage.current,
 		usageUpdatedAt,

+ 0 - 10
apps/web-evals/src/hooks/use-runners.ts

@@ -1,10 +0,0 @@
-import { useQuery } from "@tanstack/react-query"
-
-import { getRunners } from "@/lib/server/runners"
-
-export const useRunners = (runId: number) =>
-	useQuery({
-		queryKey: ["runners", runId],
-		queryFn: () => getRunners(runId),
-		refetchInterval: 10_000,
-	})

+ 1 - 2
apps/web-evals/src/lib/server/sse-stream.ts

@@ -36,9 +36,8 @@ export class SSEStream {
 
 		try {
 			await this._writer.close()
-		} catch (error) {
+		} catch (_error) {
 			// Writer might already be closed, ignore the error.
-			console.debug("[SSEStream#close] Writer already closed:", error)
 		}
 	}
 

+ 10 - 6
packages/evals/src/cli/redis.ts

@@ -1,5 +1,7 @@
 import { createClient, type RedisClientType } from "redis"
 
+import { EVALS_TIMEOUT } from "@roo-code/types"
+
 let redis: RedisClientType | undefined
 
 export const redisClient = async () => {
@@ -18,26 +20,28 @@ export const getHeartbeatKey = (runId: number) => `heartbeat:${runId}`
 
 export const registerRunner = async ({ runId, taskId }: { runId: number; taskId: number }) => {
 	const redis = await redisClient()
-	await redis.sAdd(getRunnersKey(runId), `task-${taskId}:${process.env.HOSTNAME}`)
+	const runnersKey = getRunnersKey(runId)
+	await redis.sAdd(runnersKey, `task-${taskId}:${process.env.HOSTNAME ?? process.pid}`)
+	await redis.expire(runnersKey, EVALS_TIMEOUT / 1_000)
 }
 
 export const deregisterRunner = async ({ runId, taskId }: { runId: number; taskId: number }) => {
 	const redis = await redisClient()
-	await redis.sRem(getRunnersKey(runId), `task-${taskId}:${process.env.HOSTNAME}`)
+	await redis.sRem(getRunnersKey(runId), `task-${taskId}:${process.env.HOSTNAME ?? process.pid}`)
 }
 
-export const startHeartbeat = async (runId: number, interval: number = 10) => {
+export const startHeartbeat = async (runId: number, seconds: number = 10) => {
 	const pid = process.pid.toString()
 	const redis = await redisClient()
 	const heartbeatKey = getHeartbeatKey(runId)
-	await redis.setEx(heartbeatKey, interval, pid)
+	await redis.setEx(heartbeatKey, seconds, pid)
 
 	return setInterval(
 		() =>
-			redis.expire(heartbeatKey, interval).catch((error) => {
+			redis.expire(heartbeatKey, seconds).catch((error) => {
 				console.error("heartbeat error:", error)
 			}),
-		(interval * 1_000) / 2,
+		(seconds * 1_000) / 2,
 	)
 }