|
@@ -1,8 +1,8 @@
|
|
|
"use client"
|
|
"use client"
|
|
|
|
|
|
|
|
-import { useMemo, useState, useCallback, useEffect } from "react"
|
|
|
|
|
|
|
+import { useMemo, useState, useCallback, useEffect, Fragment } from "react"
|
|
|
import { toast } from "sonner"
|
|
import { toast } from "sonner"
|
|
|
-import { LoaderCircle, FileText, Copy, Check, StopCircle } from "lucide-react"
|
|
|
|
|
|
|
+import { LoaderCircle, FileText, Copy, Check, StopCircle, List, Layers } from "lucide-react"
|
|
|
|
|
|
|
|
import type { Run, TaskMetrics as _TaskMetrics, Task } from "@roo-code/evals"
|
|
import type { Run, TaskMetrics as _TaskMetrics, Task } from "@roo-code/evals"
|
|
|
import type { ToolName } from "@roo-code/types"
|
|
import type { ToolName } from "@roo-code/types"
|
|
@@ -41,6 +41,9 @@ import { RunStatus } from "./run-status"
|
|
|
|
|
|
|
|
type TaskMetrics = Pick<_TaskMetrics, "tokensIn" | "tokensOut" | "tokensContext" | "duration" | "cost">
|
|
type TaskMetrics = Pick<_TaskMetrics, "tokensIn" | "tokensOut" | "tokensContext" | "duration" | "cost">
|
|
|
|
|
|
|
|
|
|
+// Extended Task type with taskMetrics from useRunStatus
|
|
|
|
|
+type TaskWithMetrics = Task & { taskMetrics: _TaskMetrics | null }
|
|
|
|
|
+
|
|
|
type ToolUsageEntry = { attempts: number; failures: number }
|
|
type ToolUsageEntry = { attempts: number; failures: number }
|
|
|
type ToolUsage = Record<string, ToolUsageEntry>
|
|
type ToolUsage = Record<string, ToolUsageEntry>
|
|
|
|
|
|
|
@@ -250,6 +253,19 @@ export function Run({ run }: { run: Run }) {
|
|
|
const [copied, setCopied] = useState(false)
|
|
const [copied, setCopied] = useState(false)
|
|
|
const [showKillDialog, setShowKillDialog] = useState(false)
|
|
const [showKillDialog, setShowKillDialog] = useState(false)
|
|
|
const [isKilling, setIsKilling] = useState(false)
|
|
const [isKilling, setIsKilling] = useState(false)
|
|
|
|
|
+ const [groupByStatus, setGroupByStatus] = useState(() => {
|
|
|
|
|
+ // Initialize from localStorage if available (client-side only)
|
|
|
|
|
+ if (typeof window !== "undefined") {
|
|
|
|
|
+ const stored = localStorage.getItem("evals-group-by-status")
|
|
|
|
|
+ return stored === "true"
|
|
|
|
|
+ }
|
|
|
|
|
+ return false
|
|
|
|
|
+ })
|
|
|
|
|
+
|
|
|
|
|
+ // Persist groupByStatus to localStorage
|
|
|
|
|
+ useEffect(() => {
|
|
|
|
|
+ localStorage.setItem("evals-group-by-status", String(groupByStatus))
|
|
|
|
|
+ }, [groupByStatus])
|
|
|
|
|
|
|
|
// Determine if run is still active (has heartbeat or runners)
|
|
// Determine if run is still active (has heartbeat or runners)
|
|
|
const isRunActive = !run.taskMetricsId && (!!heartbeat || (runners && runners.length > 0))
|
|
const isRunActive = !run.taskMetricsId && (!!heartbeat || (runners && runners.length > 0))
|
|
@@ -300,41 +316,6 @@ export function Run({ run }: { run: Run }) {
|
|
|
return () => document.removeEventListener("keydown", handleKeyDown)
|
|
return () => document.removeEventListener("keydown", handleKeyDown)
|
|
|
}, [selectedTask])
|
|
}, [selectedTask])
|
|
|
|
|
|
|
|
- const onViewTaskLog = useCallback(
|
|
|
|
|
- async (task: Task) => {
|
|
|
|
|
- // Only allow viewing logs for tasks that have started
|
|
|
|
|
- if (!task.startedAt && !tokenUsage.get(task.id)) {
|
|
|
|
|
- toast.error("Task has not started yet")
|
|
|
|
|
- return
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- setSelectedTask(task)
|
|
|
|
|
- setIsLoadingLog(true)
|
|
|
|
|
- setTaskLog(null)
|
|
|
|
|
-
|
|
|
|
|
- try {
|
|
|
|
|
- const response = await fetch(`/api/runs/${run.id}/logs/${task.id}`)
|
|
|
|
|
-
|
|
|
|
|
- if (!response.ok) {
|
|
|
|
|
- const error = await response.json()
|
|
|
|
|
- toast.error(error.error || "Failed to load log")
|
|
|
|
|
- setSelectedTask(null)
|
|
|
|
|
- return
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- const data = await response.json()
|
|
|
|
|
- setTaskLog(data.logContent)
|
|
|
|
|
- } catch (error) {
|
|
|
|
|
- console.error("Error loading task log:", error)
|
|
|
|
|
- toast.error("Failed to load log")
|
|
|
|
|
- setSelectedTask(null)
|
|
|
|
|
- } finally {
|
|
|
|
|
- setIsLoadingLog(false)
|
|
|
|
|
- }
|
|
|
|
|
- },
|
|
|
|
|
- [run.id, tokenUsage],
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
const taskMetrics: Record<number, TaskMetrics> = useMemo(() => {
|
|
const taskMetrics: Record<number, TaskMetrics> = useMemo(() => {
|
|
|
// Reference usageUpdatedAt to trigger recomputation when Map contents change
|
|
// Reference usageUpdatedAt to trigger recomputation when Map contents change
|
|
|
void usageUpdatedAt
|
|
void usageUpdatedAt
|
|
@@ -376,6 +357,44 @@ export function Run({ run }: { run: Run }) {
|
|
|
return metrics
|
|
return metrics
|
|
|
}, [tasks, tokenUsage, usageUpdatedAt])
|
|
}, [tasks, tokenUsage, usageUpdatedAt])
|
|
|
|
|
|
|
|
|
|
+ const onViewTaskLog = useCallback(
|
|
|
|
|
+ async (task: Task) => {
|
|
|
|
|
+ // Only allow viewing logs for tasks that have started.
|
|
|
|
|
+ // Note: we treat presence of derived metrics as evidence of a started task,
|
|
|
|
|
+ // since this page may be rendered without streaming `tokenUsage` populated.
|
|
|
|
|
+ const hasStarted = !!task.startedAt || !!tokenUsage.get(task.id) || !!taskMetrics[task.id]
|
|
|
|
|
+ if (!hasStarted) {
|
|
|
|
|
+ toast.error("Task has not started yet")
|
|
|
|
|
+ return
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ setSelectedTask(task)
|
|
|
|
|
+ setIsLoadingLog(true)
|
|
|
|
|
+ setTaskLog(null)
|
|
|
|
|
+
|
|
|
|
|
+ try {
|
|
|
|
|
+ const response = await fetch(`/api/runs/${run.id}/logs/${task.id}`)
|
|
|
|
|
+
|
|
|
|
|
+ if (!response.ok) {
|
|
|
|
|
+ const error = await response.json()
|
|
|
|
|
+ toast.error(error.error || "Failed to load log")
|
|
|
|
|
+ setSelectedTask(null)
|
|
|
|
|
+ return
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ const data = await response.json()
|
|
|
|
|
+ setTaskLog(data.logContent)
|
|
|
|
|
+ } catch (error) {
|
|
|
|
|
+ console.error("Error loading task log:", error)
|
|
|
|
|
+ toast.error("Failed to load log")
|
|
|
|
|
+ setSelectedTask(null)
|
|
|
|
|
+ } finally {
|
|
|
|
|
+ setIsLoadingLog(false)
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ [run.id, tokenUsage, taskMetrics],
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
// Collect all unique tool names from all tasks and sort by total attempts
|
|
// Collect all unique tool names from all tasks and sort by total attempts
|
|
|
const toolColumns = useMemo<ToolName[]>(() => {
|
|
const toolColumns = useMemo<ToolName[]>(() => {
|
|
|
// Reference usageUpdatedAt to trigger recomputation when Map contents change
|
|
// Reference usageUpdatedAt to trigger recomputation when Map contents change
|
|
@@ -463,10 +482,13 @@ export function Run({ run }: { run: Run }) {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ const remaining = tasks.length - completed
|
|
|
|
|
+
|
|
|
return {
|
|
return {
|
|
|
passed,
|
|
passed,
|
|
|
failed,
|
|
failed,
|
|
|
completed,
|
|
completed,
|
|
|
|
|
+ remaining,
|
|
|
passRate: completed > 0 ? ((passed / completed) * 100).toFixed(1) : null,
|
|
passRate: completed > 0 ? ((passed / completed) * 100).toFixed(1) : null,
|
|
|
totalTokensIn,
|
|
totalTokensIn,
|
|
|
totalTokensOut,
|
|
totalTokensOut,
|
|
@@ -501,258 +523,399 @@ export function Run({ run }: { run: Run }) {
|
|
|
return Date.now() - startTime
|
|
return Date.now() - startTime
|
|
|
}, [tasks, run.createdAt, run.taskMetricsId, usageUpdatedAt])
|
|
}, [tasks, run.createdAt, run.taskMetricsId, usageUpdatedAt])
|
|
|
|
|
|
|
|
- return (
|
|
|
|
|
- <>
|
|
|
|
|
- <div>
|
|
|
|
|
- {stats && (
|
|
|
|
|
- <div className="mb-4 p-4 border rounded-lg bg-muted sticky top-0 z-10">
|
|
|
|
|
- {/* Provider, Model title and status */}
|
|
|
|
|
- <div className="flex items-center justify-center gap-3 mb-3 relative">
|
|
|
|
|
- {run.settings?.apiProvider && (
|
|
|
|
|
- <span className="text-sm text-muted-foreground">{run.settings.apiProvider}</span>
|
|
|
|
|
- )}
|
|
|
|
|
- <div className="font-mono">{run.model}</div>
|
|
|
|
|
- <RunStatus runStatus={runStatus} isComplete={!!run.taskMetricsId} />
|
|
|
|
|
- {run.description && (
|
|
|
|
|
- <span className="text-sm text-muted-foreground">- {run.description}</span>
|
|
|
|
|
- )}
|
|
|
|
|
- {isRunActive && (
|
|
|
|
|
- <Tooltip>
|
|
|
|
|
- <TooltipTrigger asChild>
|
|
|
|
|
- <Button
|
|
|
|
|
- variant="ghost"
|
|
|
|
|
- size="sm"
|
|
|
|
|
- onClick={() => setShowKillDialog(true)}
|
|
|
|
|
- disabled={isKilling}
|
|
|
|
|
- className="absolute right-0 flex items-center gap-1 text-muted-foreground hover:text-destructive">
|
|
|
|
|
- {isKilling ? (
|
|
|
|
|
- <LoaderCircle className="size-4 animate-spin" />
|
|
|
|
|
- ) : (
|
|
|
|
|
- <StopCircle className="size-4" />
|
|
|
|
|
- )}
|
|
|
|
|
- Kill
|
|
|
|
|
- </Button>
|
|
|
|
|
- </TooltipTrigger>
|
|
|
|
|
- <TooltipContent>Stop all containers for this run</TooltipContent>
|
|
|
|
|
- </Tooltip>
|
|
|
|
|
- )}
|
|
|
|
|
- </div>
|
|
|
|
|
- {/* Main Stats Row */}
|
|
|
|
|
- <div className="flex items-start justify-center gap-x-8 gap-y-3">
|
|
|
|
|
- {/* Passed/Failed */}
|
|
|
|
|
- <div className="text-center min-w-[80px]">
|
|
|
|
|
- <div className="text-2xl font-bold whitespace-nowrap">
|
|
|
|
|
- <span className="text-green-600">{stats.passed}</span>
|
|
|
|
|
- <span className="text-muted-foreground mx-1">/</span>
|
|
|
|
|
- <span className="text-red-600">{stats.failed}</span>
|
|
|
|
|
- </div>
|
|
|
|
|
- <div className="text-xs text-muted-foreground">Passed / Failed</div>
|
|
|
|
|
- </div>
|
|
|
|
|
|
|
+ // Task status categories
|
|
|
|
|
+ type TaskStatusCategory = "failed" | "in_progress" | "passed" | "not_started"
|
|
|
|
|
+
|
|
|
|
|
+ const getTaskStatusCategory = useCallback(
|
|
|
|
|
+ (task: TaskWithMetrics): TaskStatusCategory => {
|
|
|
|
|
+ if (task.passed === false) return "failed"
|
|
|
|
|
+ if (task.passed === true) return "passed"
|
|
|
|
|
+ // Check streaming data, DB metrics, or startedAt timestamp
|
|
|
|
|
+ const hasStarted = !!task.startedAt || !!tokenUsage.get(task.id) || !!taskMetrics[task.id]
|
|
|
|
|
+ if (hasStarted) return "in_progress"
|
|
|
|
|
+ return "not_started"
|
|
|
|
|
+ },
|
|
|
|
|
+ [tokenUsage, taskMetrics],
|
|
|
|
|
+ )
|
|
|
|
|
|
|
|
- {/* Pass Rate */}
|
|
|
|
|
- <div className="text-center min-w-[80px]">
|
|
|
|
|
- <div
|
|
|
|
|
- className={`text-2xl font-bold ${
|
|
|
|
|
- stats.passRate === null
|
|
|
|
|
- ? ""
|
|
|
|
|
- : parseFloat(stats.passRate) === 100
|
|
|
|
|
- ? ""
|
|
|
|
|
- : parseFloat(stats.passRate) >= 80
|
|
|
|
|
- ? "text-yellow-500"
|
|
|
|
|
- : "text-red-500"
|
|
|
|
|
- }`}>
|
|
|
|
|
- {stats.passRate ? `${stats.passRate}%` : "-"}
|
|
|
|
|
- </div>
|
|
|
|
|
- <div className="text-xs text-muted-foreground">Pass Rate</div>
|
|
|
|
|
- </div>
|
|
|
|
|
|
|
+ // Group tasks by status while preserving original index
|
|
|
|
|
+ const groupedTasks = useMemo(() => {
|
|
|
|
|
+ if (!tasks || !groupByStatus) return null
|
|
|
|
|
|
|
|
- {/* Tokens */}
|
|
|
|
|
- <div className="text-center min-w-[140px]">
|
|
|
|
|
- <div className="text-xl font-bold font-mono whitespace-nowrap">
|
|
|
|
|
- {formatTokens(stats.totalTokensIn)}
|
|
|
|
|
- <span className="text-muted-foreground mx-1">/</span>
|
|
|
|
|
- {formatTokens(stats.totalTokensOut)}
|
|
|
|
|
- </div>
|
|
|
|
|
- <div className="text-xs text-muted-foreground">Tokens In / Out</div>
|
|
|
|
|
- </div>
|
|
|
|
|
|
|
+ const groups: Record<TaskStatusCategory, Array<{ task: TaskWithMetrics; originalIndex: number }>> = {
|
|
|
|
|
+ failed: [],
|
|
|
|
|
+ in_progress: [],
|
|
|
|
|
+ passed: [],
|
|
|
|
|
+ not_started: [],
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- {/* Cost */}
|
|
|
|
|
- <div className="text-center min-w-[70px]">
|
|
|
|
|
- <div className="text-2xl font-bold font-mono">{formatCurrency(stats.totalCost)}</div>
|
|
|
|
|
- <div className="text-xs text-muted-foreground">Cost</div>
|
|
|
|
|
- </div>
|
|
|
|
|
|
|
+ tasks.forEach((task, index) => {
|
|
|
|
|
+ const status = getTaskStatusCategory(task)
|
|
|
|
|
+ groups[status].push({ task, originalIndex: index })
|
|
|
|
|
+ })
|
|
|
|
|
|
|
|
- {/* Duration */}
|
|
|
|
|
- <div className="text-center min-w-[90px]">
|
|
|
|
|
- <div className="text-2xl font-bold font-mono whitespace-nowrap">
|
|
|
|
|
- {stats.totalDuration > 0 ? formatDuration(stats.totalDuration) : "-"}
|
|
|
|
|
- </div>
|
|
|
|
|
- <div className="text-xs text-muted-foreground">Duration</div>
|
|
|
|
|
- </div>
|
|
|
|
|
|
|
+ return groups
|
|
|
|
|
+ }, [tasks, groupByStatus, getTaskStatusCategory])
|
|
|
|
|
+
|
|
|
|
|
+ const statusLabels = useMemo(
|
|
|
|
|
+ (): Record<TaskStatusCategory, { label: string; className: string; count: number }> => ({
|
|
|
|
|
+ failed: { label: "Failed", className: "text-red-500", count: groupedTasks?.failed.length ?? 0 },
|
|
|
|
|
+ in_progress: {
|
|
|
|
|
+ label: "In Progress",
|
|
|
|
|
+ className: "text-yellow-500",
|
|
|
|
|
+ count: groupedTasks?.in_progress.length ?? 0,
|
|
|
|
|
+ },
|
|
|
|
|
+ passed: { label: "Passed", className: "text-green-500", count: groupedTasks?.passed.length ?? 0 },
|
|
|
|
|
+ not_started: {
|
|
|
|
|
+ label: "Not Started",
|
|
|
|
|
+ className: "text-muted-foreground",
|
|
|
|
|
+ count: groupedTasks?.not_started.length ?? 0,
|
|
|
|
|
+ },
|
|
|
|
|
+ }),
|
|
|
|
|
+ [groupedTasks],
|
|
|
|
|
+ )
|
|
|
|
|
|
|
|
- {/* Elapsed Time */}
|
|
|
|
|
- <div className="text-center min-w-[90px]">
|
|
|
|
|
- <div className="text-2xl font-bold font-mono whitespace-nowrap">
|
|
|
|
|
- {elapsedTime !== null ? formatDuration(elapsedTime) : "-"}
|
|
|
|
|
- </div>
|
|
|
|
|
- <div className="text-xs text-muted-foreground">Elapsed</div>
|
|
|
|
|
- </div>
|
|
|
|
|
- </div>
|
|
|
|
|
|
|
+ const statusOrder: TaskStatusCategory[] = ["failed", "in_progress", "passed", "not_started"]
|
|
|
|
|
|
|
|
- {/* Tool Usage Row */}
|
|
|
|
|
- {Object.keys(stats.toolUsage).length > 0 && (
|
|
|
|
|
- <div className="flex items-center justify-center gap-2 flex-wrap mt-3">
|
|
|
|
|
- {Object.entries(stats.toolUsage)
|
|
|
|
|
- .sort(([, a], [, b]) => b.attempts - a.attempts)
|
|
|
|
|
- .map(([toolName, usage]) => {
|
|
|
|
|
- const abbr = getToolAbbreviation(toolName)
|
|
|
|
|
- const successRate =
|
|
|
|
|
- usage.attempts > 0
|
|
|
|
|
- ? ((usage.attempts - usage.failures) / usage.attempts) * 100
|
|
|
|
|
- : 100
|
|
|
|
|
- const rateColor =
|
|
|
|
|
- successRate === 100
|
|
|
|
|
- ? "text-green-500"
|
|
|
|
|
- : successRate >= 80
|
|
|
|
|
- ? "text-yellow-500"
|
|
|
|
|
- : "text-red-500"
|
|
|
|
|
- return (
|
|
|
|
|
- <Tooltip key={toolName}>
|
|
|
|
|
- <TooltipTrigger asChild>
|
|
|
|
|
- <div className="flex items-center gap-1 px-2 py-1 rounded bg-background/50 border border-border/50 hover:border-border transition-colors cursor-default text-xs">
|
|
|
|
|
- <span className="font-medium text-muted-foreground">
|
|
|
|
|
- {abbr}
|
|
|
|
|
- </span>
|
|
|
|
|
- <span className="font-bold tabular-nums">{usage.attempts}</span>
|
|
|
|
|
- <span className={`${rateColor}`}>
|
|
|
|
|
- {formatToolUsageSuccessRate(usage)}
|
|
|
|
|
- </span>
|
|
|
|
|
- </div>
|
|
|
|
|
- </TooltipTrigger>
|
|
|
|
|
- <TooltipContent side="bottom">{toolName}</TooltipContent>
|
|
|
|
|
- </Tooltip>
|
|
|
|
|
- )
|
|
|
|
|
- })}
|
|
|
|
|
- </div>
|
|
|
|
|
- )}
|
|
|
|
|
|
|
+ // Helper to render a task row
|
|
|
|
|
+ const renderTaskRow = (task: TaskWithMetrics, originalIndex: number) => {
|
|
|
|
|
+ const hasStarted = !!task.startedAt || !!tokenUsage.get(task.id) || !!taskMetrics[task.id]
|
|
|
|
|
+ return (
|
|
|
|
|
+ <TableRow
|
|
|
|
|
+ key={task.id}
|
|
|
|
|
+ className={`${hasStarted ? "cursor-pointer hover:bg-muted/50" : ""} ${task.passed === false ? "bg-red-950/30 border-l-2 border-l-red-500" : ""}`}
|
|
|
|
|
+ onClick={() => hasStarted && onViewTaskLog(task)}>
|
|
|
|
|
+ <TableCell className="text-center text-muted-foreground font-mono text-xs">
|
|
|
|
|
+ {originalIndex + 1}
|
|
|
|
|
+ </TableCell>
|
|
|
|
|
+ <TableCell>
|
|
|
|
|
+ <div className="flex items-center gap-2">
|
|
|
|
|
+ <TaskStatus task={task} running={hasStarted} />
|
|
|
|
|
+ <div className="flex items-center gap-2">
|
|
|
|
|
+ <span>
|
|
|
|
|
+ {task.language}/{task.exercise}
|
|
|
|
|
+ {task.iteration > 1 && (
|
|
|
|
|
+ <span className="text-muted-foreground ml-1">(#{task.iteration})</span>
|
|
|
|
|
+ )}
|
|
|
|
|
+ </span>
|
|
|
|
|
+ {hasStarted && (
|
|
|
|
|
+ <Tooltip>
|
|
|
|
|
+ <TooltipTrigger asChild>
|
|
|
|
|
+ <FileText className="size-3 text-muted-foreground" />
|
|
|
|
|
+ </TooltipTrigger>
|
|
|
|
|
+ <TooltipContent>Click to view log</TooltipContent>
|
|
|
|
|
+ </Tooltip>
|
|
|
|
|
+ )}
|
|
|
|
|
+ </div>
|
|
|
</div>
|
|
</div>
|
|
|
|
|
+ </TableCell>
|
|
|
|
|
+ {taskMetrics[task.id] ? (
|
|
|
|
|
+ <>
|
|
|
|
|
+ <TableCell className="font-mono text-xs">
|
|
|
|
|
+ <div className="flex items-center justify-evenly">
|
|
|
|
|
+ <div>{formatTokens(taskMetrics[task.id]!.tokensIn)}</div>/
|
|
|
|
|
+ <div>{formatTokens(taskMetrics[task.id]!.tokensOut)}</div>
|
|
|
|
|
+ </div>
|
|
|
|
|
+ </TableCell>
|
|
|
|
|
+ <TableCell className="font-mono text-xs">
|
|
|
|
|
+ {formatTokens(taskMetrics[task.id]!.tokensContext)}
|
|
|
|
|
+ </TableCell>
|
|
|
|
|
+ {toolColumns.map((toolName) => {
|
|
|
|
|
+ const dbUsage = task.taskMetrics?.toolUsage?.[toolName]
|
|
|
|
|
+ const streamingUsage = toolUsage.get(task.id)?.[toolName]
|
|
|
|
|
+ const usage = task.finishedAt ? (dbUsage ?? streamingUsage) : streamingUsage
|
|
|
|
|
+
|
|
|
|
|
+ const successRate =
|
|
|
|
|
+ usage && usage.attempts > 0
|
|
|
|
|
+ ? ((usage.attempts - usage.failures) / usage.attempts) * 100
|
|
|
|
|
+ : 100
|
|
|
|
|
+ const rateColor =
|
|
|
|
|
+ successRate === 100
|
|
|
|
|
+ ? "text-muted-foreground"
|
|
|
|
|
+ : successRate >= 80
|
|
|
|
|
+ ? "text-yellow-500"
|
|
|
|
|
+ : "text-red-500"
|
|
|
|
|
+ return (
|
|
|
|
|
+ <TableCell key={toolName} className="text-xs text-center">
|
|
|
|
|
+ {usage ? (
|
|
|
|
|
+ <div className="flex flex-col items-center">
|
|
|
|
|
+ <span className="font-medium">{usage.attempts}</span>
|
|
|
|
|
+ <span className={rateColor}>{formatToolUsageSuccessRate(usage)}</span>
|
|
|
|
|
+ </div>
|
|
|
|
|
+ ) : (
|
|
|
|
|
+ <span className="text-muted-foreground">-</span>
|
|
|
|
|
+ )}
|
|
|
|
|
+ </TableCell>
|
|
|
|
|
+ )
|
|
|
|
|
+ })}
|
|
|
|
|
+ <TableCell className="font-mono text-xs">
|
|
|
|
|
+ {taskMetrics[task.id]!.duration ? formatDuration(taskMetrics[task.id]!.duration) : "-"}
|
|
|
|
|
+ </TableCell>
|
|
|
|
|
+ <TableCell className="font-mono text-xs">
|
|
|
|
|
+ {formatCurrency(taskMetrics[task.id]!.cost)}
|
|
|
|
|
+ </TableCell>
|
|
|
|
|
+ </>
|
|
|
|
|
+ ) : (
|
|
|
|
|
+ <TableCell colSpan={5 + toolColumns.length} />
|
|
|
)}
|
|
)}
|
|
|
|
|
+ </TableRow>
|
|
|
|
|
+ )
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return (
|
|
|
|
|
+ <>
|
|
|
|
|
+ <div>
|
|
|
{!tasks ? (
|
|
{!tasks ? (
|
|
|
<LoaderCircle className="size-4 animate-spin" />
|
|
<LoaderCircle className="size-4 animate-spin" />
|
|
|
) : (
|
|
) : (
|
|
|
- <Table className="border">
|
|
|
|
|
- <TableHeader>
|
|
|
|
|
- <TableRow>
|
|
|
|
|
- <TableHead>Exercise</TableHead>
|
|
|
|
|
- <TableHead className="text-center">Tokens In / Out</TableHead>
|
|
|
|
|
- <TableHead>Context</TableHead>
|
|
|
|
|
- {toolColumns.map((toolName) => (
|
|
|
|
|
- <TableHead key={toolName} className="text-xs text-center">
|
|
|
|
|
- <Tooltip>
|
|
|
|
|
- <TooltipTrigger>{getToolAbbreviation(toolName)}</TooltipTrigger>
|
|
|
|
|
- <TooltipContent>{toolName}</TooltipContent>
|
|
|
|
|
- </Tooltip>
|
|
|
|
|
- </TableHead>
|
|
|
|
|
- ))}
|
|
|
|
|
- <TableHead>Duration</TableHead>
|
|
|
|
|
- <TableHead>Cost</TableHead>
|
|
|
|
|
- </TableRow>
|
|
|
|
|
- </TableHeader>
|
|
|
|
|
- <TableBody>
|
|
|
|
|
- {tasks.map((task) => {
|
|
|
|
|
- const hasStarted = !!task.startedAt || !!tokenUsage.get(task.id)
|
|
|
|
|
- return (
|
|
|
|
|
- <TableRow
|
|
|
|
|
- key={task.id}
|
|
|
|
|
- className={`${hasStarted ? "cursor-pointer hover:bg-muted/50" : ""} ${task.passed === false ? "bg-red-950/30 border-l-2 border-l-red-500" : ""}`}
|
|
|
|
|
- onClick={() => hasStarted && onViewTaskLog(task)}>
|
|
|
|
|
- <TableCell>
|
|
|
|
|
- <div className="flex items-center gap-2">
|
|
|
|
|
- <TaskStatus task={task} running={hasStarted} />
|
|
|
|
|
- <div className="flex items-center gap-2">
|
|
|
|
|
- <span>
|
|
|
|
|
- {task.language}/{task.exercise}
|
|
|
|
|
- {task.iteration > 1 && (
|
|
|
|
|
- <span className="text-muted-foreground ml-1">
|
|
|
|
|
- (#{task.iteration})
|
|
|
|
|
- </span>
|
|
|
|
|
- )}
|
|
|
|
|
- </span>
|
|
|
|
|
- {hasStarted && (
|
|
|
|
|
- <Tooltip>
|
|
|
|
|
- <TooltipTrigger asChild>
|
|
|
|
|
- <FileText className="size-3 text-muted-foreground" />
|
|
|
|
|
- </TooltipTrigger>
|
|
|
|
|
- <TooltipContent>Click to view log</TooltipContent>
|
|
|
|
|
- </Tooltip>
|
|
|
|
|
- )}
|
|
|
|
|
- </div>
|
|
|
|
|
- </div>
|
|
|
|
|
- </TableCell>
|
|
|
|
|
- {taskMetrics[task.id] ? (
|
|
|
|
|
|
|
+ <>
|
|
|
|
|
+ {/* View Toggle */}
|
|
|
|
|
+ <div className="flex justify-end mb-2">
|
|
|
|
|
+ <Tooltip>
|
|
|
|
|
+ <TooltipTrigger asChild>
|
|
|
|
|
+ <Button
|
|
|
|
|
+ variant="outline"
|
|
|
|
|
+ size="sm"
|
|
|
|
|
+ onClick={() => setGroupByStatus(!groupByStatus)}
|
|
|
|
|
+ className="flex items-center gap-2">
|
|
|
|
|
+ {groupByStatus ? (
|
|
|
<>
|
|
<>
|
|
|
- <TableCell className="font-mono text-xs">
|
|
|
|
|
- <div className="flex items-center justify-evenly">
|
|
|
|
|
- <div>{formatTokens(taskMetrics[task.id]!.tokensIn)}</div>/
|
|
|
|
|
- <div>{formatTokens(taskMetrics[task.id]!.tokensOut)}</div>
|
|
|
|
|
- </div>
|
|
|
|
|
- </TableCell>
|
|
|
|
|
- <TableCell className="font-mono text-xs">
|
|
|
|
|
- {formatTokens(taskMetrics[task.id]!.tokensContext)}
|
|
|
|
|
- </TableCell>
|
|
|
|
|
- {toolColumns.map((toolName) => {
|
|
|
|
|
- // Use DB values for finished tasks, but fall back to streaming values
|
|
|
|
|
- // if DB values are missing (handles race condition during timeout)
|
|
|
|
|
- const dbUsage = task.taskMetrics?.toolUsage?.[toolName]
|
|
|
|
|
- const streamingUsage = toolUsage.get(task.id)?.[toolName]
|
|
|
|
|
- const usage = task.finishedAt
|
|
|
|
|
- ? (dbUsage ?? streamingUsage)
|
|
|
|
|
- : streamingUsage
|
|
|
|
|
-
|
|
|
|
|
- const successRate =
|
|
|
|
|
- usage && usage.attempts > 0
|
|
|
|
|
- ? ((usage.attempts - usage.failures) / usage.attempts) * 100
|
|
|
|
|
- : 100
|
|
|
|
|
- const rateColor =
|
|
|
|
|
- successRate === 100
|
|
|
|
|
- ? "text-muted-foreground"
|
|
|
|
|
- : successRate >= 80
|
|
|
|
|
- ? "text-yellow-500"
|
|
|
|
|
- : "text-red-500"
|
|
|
|
|
- return (
|
|
|
|
|
- <TableCell key={toolName} className="text-xs text-center">
|
|
|
|
|
- {usage ? (
|
|
|
|
|
- <div className="flex flex-col items-center">
|
|
|
|
|
- <span className="font-medium">
|
|
|
|
|
- {usage.attempts}
|
|
|
|
|
- </span>
|
|
|
|
|
- <span className={rateColor}>
|
|
|
|
|
- {formatToolUsageSuccessRate(usage)}
|
|
|
|
|
- </span>
|
|
|
|
|
- </div>
|
|
|
|
|
- ) : (
|
|
|
|
|
- <span className="text-muted-foreground">-</span>
|
|
|
|
|
- )}
|
|
|
|
|
- </TableCell>
|
|
|
|
|
- )
|
|
|
|
|
- })}
|
|
|
|
|
- <TableCell className="font-mono text-xs">
|
|
|
|
|
- {taskMetrics[task.id]!.duration
|
|
|
|
|
- ? formatDuration(taskMetrics[task.id]!.duration)
|
|
|
|
|
- : "-"}
|
|
|
|
|
- </TableCell>
|
|
|
|
|
- <TableCell className="font-mono text-xs">
|
|
|
|
|
- {formatCurrency(taskMetrics[task.id]!.cost)}
|
|
|
|
|
- </TableCell>
|
|
|
|
|
|
|
+ <List className="size-4" />
|
|
|
|
|
+ <span>Show Order</span>
|
|
|
</>
|
|
</>
|
|
|
) : (
|
|
) : (
|
|
|
- <TableCell colSpan={4 + toolColumns.length} />
|
|
|
|
|
|
|
+ <>
|
|
|
|
|
+ <Layers className="size-4" />
|
|
|
|
|
+ <span>Group by Status</span>
|
|
|
|
|
+ </>
|
|
|
)}
|
|
)}
|
|
|
|
|
+ </Button>
|
|
|
|
|
+ </TooltipTrigger>
|
|
|
|
|
+ <TooltipContent>
|
|
|
|
|
+ {groupByStatus ? "Show tasks in run order" : "Group tasks by status"}
|
|
|
|
|
+ </TooltipContent>
|
|
|
|
|
+ </Tooltip>
|
|
|
|
|
+ </div>
|
|
|
|
|
+ <Table className="border">
|
|
|
|
|
+ <TableHeader className="sticky top-0 z-10">
|
|
|
|
|
+ {stats && (
|
|
|
|
|
+ <TableRow>
|
|
|
|
|
+ <TableHead colSpan={6 + toolColumns.length} className="bg-muted p-4">
|
|
|
|
|
+ {/* Provider, Model title and status */}
|
|
|
|
|
+ <div className="flex items-center justify-center gap-3 mb-3 relative">
|
|
|
|
|
+ {run.settings?.apiProvider && (
|
|
|
|
|
+ <span className="text-sm text-muted-foreground">
|
|
|
|
|
+ {run.settings.apiProvider}
|
|
|
|
|
+ </span>
|
|
|
|
|
+ )}
|
|
|
|
|
+ <div className="font-mono">{run.model}</div>
|
|
|
|
|
+ <RunStatus runStatus={runStatus} isComplete={!!run.taskMetricsId} />
|
|
|
|
|
+ {run.description && (
|
|
|
|
|
+ <span className="text-sm text-muted-foreground">
|
|
|
|
|
+ - {run.description}
|
|
|
|
|
+ </span>
|
|
|
|
|
+ )}
|
|
|
|
|
+ {isRunActive && (
|
|
|
|
|
+ <Tooltip>
|
|
|
|
|
+ <TooltipTrigger asChild>
|
|
|
|
|
+ <Button
|
|
|
|
|
+ variant="ghost"
|
|
|
|
|
+ size="sm"
|
|
|
|
|
+ onClick={() => setShowKillDialog(true)}
|
|
|
|
|
+ disabled={isKilling}
|
|
|
|
|
+ className="absolute right-0 flex items-center gap-1 text-muted-foreground hover:text-destructive">
|
|
|
|
|
+ {isKilling ? (
|
|
|
|
|
+ <LoaderCircle className="size-4 animate-spin" />
|
|
|
|
|
+ ) : (
|
|
|
|
|
+ <StopCircle className="size-4" />
|
|
|
|
|
+ )}
|
|
|
|
|
+ Kill
|
|
|
|
|
+ </Button>
|
|
|
|
|
+ </TooltipTrigger>
|
|
|
|
|
+ <TooltipContent>
|
|
|
|
|
+ Stop all containers for this run
|
|
|
|
|
+ </TooltipContent>
|
|
|
|
|
+ </Tooltip>
|
|
|
|
|
+ )}
|
|
|
|
|
+ </div>
|
|
|
|
|
+ {/* Main Stats Row */}
|
|
|
|
|
+ <div className="flex items-start justify-center gap-x-8 gap-y-3">
|
|
|
|
|
+ {/* Pass Rate / Fail Rate / Remaining % */}
|
|
|
|
|
+ <div className="text-center min-w-[160px]">
|
|
|
|
|
+ <div className="text-2xl font-bold whitespace-nowrap">
|
|
|
|
|
+ <span className="text-green-600">
|
|
|
|
|
+ {stats.completed > 0
|
|
|
|
|
+ ? `${((stats.passed / stats.completed) * 100).toFixed(1)}%`
|
|
|
|
|
+ : "-"}
|
|
|
|
|
+ </span>
|
|
|
|
|
+ <span className="text-muted-foreground mx-1">/</span>
|
|
|
|
|
+ <span className="text-red-600">
|
|
|
|
|
+ {stats.completed > 0
|
|
|
|
|
+ ? `${((stats.failed / stats.completed) * 100).toFixed(1)}%`
|
|
|
|
|
+ : "-"}
|
|
|
|
|
+ </span>
|
|
|
|
|
+ <span className="text-muted-foreground mx-1">/</span>
|
|
|
|
|
+ <span className="text-muted-foreground">
|
|
|
|
|
+ {tasks.length > 0
|
|
|
|
|
+ ? `${((stats.remaining / tasks.length) * 100).toFixed(1)}%`
|
|
|
|
|
+ : "-"}
|
|
|
|
|
+ </span>
|
|
|
|
|
+ </div>
|
|
|
|
|
+ <div className="text-xs text-muted-foreground">
|
|
|
|
|
+ <span className="text-green-600">{stats.passed}</span>
|
|
|
|
|
+ {" / "}
|
|
|
|
|
+ <span className="text-red-600">{stats.failed}</span>
|
|
|
|
|
+ {" / "}
|
|
|
|
|
+ <span>{stats.remaining}</span>
|
|
|
|
|
+ {" of "}
|
|
|
|
|
+ {tasks.length}
|
|
|
|
|
+ </div>
|
|
|
|
|
+ </div>
|
|
|
|
|
+
|
|
|
|
|
+ {/* Tokens */}
|
|
|
|
|
+ <div className="text-center min-w-[140px]">
|
|
|
|
|
+ <div className="text-xl font-bold font-mono whitespace-nowrap">
|
|
|
|
|
+ {formatTokens(stats.totalTokensIn)}
|
|
|
|
|
+ <span className="text-muted-foreground mx-1">/</span>
|
|
|
|
|
+ {formatTokens(stats.totalTokensOut)}
|
|
|
|
|
+ </div>
|
|
|
|
|
+ <div className="text-xs text-muted-foreground">Tokens In / Out</div>
|
|
|
|
|
+ </div>
|
|
|
|
|
+
|
|
|
|
|
+ {/* Cost */}
|
|
|
|
|
+ <div className="text-center min-w-[70px]">
|
|
|
|
|
+ <div className="text-2xl font-bold font-mono">
|
|
|
|
|
+ {formatCurrency(stats.totalCost)}
|
|
|
|
|
+ </div>
|
|
|
|
|
+ <div className="text-xs text-muted-foreground">Cost</div>
|
|
|
|
|
+ </div>
|
|
|
|
|
+
|
|
|
|
|
+ {/* Duration */}
|
|
|
|
|
+ <div className="text-center min-w-[90px]">
|
|
|
|
|
+ <div className="text-2xl font-bold font-mono whitespace-nowrap">
|
|
|
|
|
+ {stats.totalDuration > 0
|
|
|
|
|
+ ? formatDuration(stats.totalDuration)
|
|
|
|
|
+ : "-"}
|
|
|
|
|
+ </div>
|
|
|
|
|
+ <div className="text-xs text-muted-foreground">Duration</div>
|
|
|
|
|
+ </div>
|
|
|
|
|
+
|
|
|
|
|
+ {/* Elapsed Time */}
|
|
|
|
|
+ <div className="text-center min-w-[90px]">
|
|
|
|
|
+ <div className="text-2xl font-bold font-mono whitespace-nowrap">
|
|
|
|
|
+ {elapsedTime !== null ? formatDuration(elapsedTime) : "-"}
|
|
|
|
|
+ </div>
|
|
|
|
|
+ <div className="text-xs text-muted-foreground">Elapsed</div>
|
|
|
|
|
+ </div>
|
|
|
|
|
+
|
|
|
|
|
+ {/* Estimated Time Remaining - only show if run is active and we have data */}
|
|
|
|
|
+ {!run.taskMetricsId &&
|
|
|
|
|
+ elapsedTime !== null &&
|
|
|
|
|
+ stats.completed > 0 &&
|
|
|
|
|
+ stats.remaining > 0 && (
|
|
|
|
|
+ <div className="text-center min-w-[90px]">
|
|
|
|
|
+ <div className="text-2xl font-bold font-mono whitespace-nowrap text-muted-foreground">
|
|
|
|
|
+ ~
|
|
|
|
|
+ {formatDuration(
|
|
|
|
|
+ (elapsedTime / stats.completed) * stats.remaining,
|
|
|
|
|
+ )}
|
|
|
|
|
+ </div>
|
|
|
|
|
+ <div className="text-xs text-muted-foreground">
|
|
|
|
|
+ Est. Remaining
|
|
|
|
|
+ </div>
|
|
|
|
|
+ </div>
|
|
|
|
|
+ )}
|
|
|
|
|
+ </div>
|
|
|
|
|
+
|
|
|
|
|
+ {/* Tool Usage Row */}
|
|
|
|
|
+ {Object.keys(stats.toolUsage).length > 0 && (
|
|
|
|
|
+ <div className="flex items-center justify-center gap-2 flex-wrap mt-3">
|
|
|
|
|
+ {Object.entries(stats.toolUsage)
|
|
|
|
|
+ .sort(([, a], [, b]) => b.attempts - a.attempts)
|
|
|
|
|
+ .map(([toolName, usage]) => {
|
|
|
|
|
+ const abbr = getToolAbbreviation(toolName)
|
|
|
|
|
+ const successRate =
|
|
|
|
|
+ usage.attempts > 0
|
|
|
|
|
+ ? ((usage.attempts - usage.failures) /
|
|
|
|
|
+ usage.attempts) *
|
|
|
|
|
+ 100
|
|
|
|
|
+ : 100
|
|
|
|
|
+ const rateColor =
|
|
|
|
|
+ successRate === 100
|
|
|
|
|
+ ? "text-green-500"
|
|
|
|
|
+ : successRate >= 80
|
|
|
|
|
+ ? "text-yellow-500"
|
|
|
|
|
+ : "text-red-500"
|
|
|
|
|
+ return (
|
|
|
|
|
+ <Tooltip key={toolName}>
|
|
|
|
|
+ <TooltipTrigger asChild>
|
|
|
|
|
+ <div className="flex items-center gap-1 px-2 py-1 rounded bg-background/50 border border-border/50 hover:border-border transition-colors cursor-default text-xs">
|
|
|
|
|
+ <span className="font-medium text-muted-foreground">
|
|
|
|
|
+ {abbr}
|
|
|
|
|
+ </span>
|
|
|
|
|
+ <span className="font-bold tabular-nums">
|
|
|
|
|
+ {usage.attempts}
|
|
|
|
|
+ </span>
|
|
|
|
|
+ <span className={`${rateColor}`}>
|
|
|
|
|
+ {formatToolUsageSuccessRate(usage)}
|
|
|
|
|
+ </span>
|
|
|
|
|
+ </div>
|
|
|
|
|
+ </TooltipTrigger>
|
|
|
|
|
+ <TooltipContent side="bottom">
|
|
|
|
|
+ {toolName}
|
|
|
|
|
+ </TooltipContent>
|
|
|
|
|
+ </Tooltip>
|
|
|
|
|
+ )
|
|
|
|
|
+ })}
|
|
|
|
|
+ </div>
|
|
|
|
|
+ )}
|
|
|
|
|
+ </TableHead>
|
|
|
</TableRow>
|
|
</TableRow>
|
|
|
- )
|
|
|
|
|
- })}
|
|
|
|
|
- </TableBody>
|
|
|
|
|
- </Table>
|
|
|
|
|
|
|
+ )}
|
|
|
|
|
+ <TableRow>
|
|
|
|
|
+ <TableHead className="w-12 text-center">#</TableHead>
|
|
|
|
|
+ <TableHead>Exercise</TableHead>
|
|
|
|
|
+ <TableHead className="text-center">Tokens In / Out</TableHead>
|
|
|
|
|
+ <TableHead>Context</TableHead>
|
|
|
|
|
+ {toolColumns.map((toolName) => (
|
|
|
|
|
+ <TableHead key={toolName} className="text-xs text-center">
|
|
|
|
|
+ <Tooltip>
|
|
|
|
|
+ <TooltipTrigger>{getToolAbbreviation(toolName)}</TooltipTrigger>
|
|
|
|
|
+ <TooltipContent>{toolName}</TooltipContent>
|
|
|
|
|
+ </Tooltip>
|
|
|
|
|
+ </TableHead>
|
|
|
|
|
+ ))}
|
|
|
|
|
+ <TableHead>Duration</TableHead>
|
|
|
|
|
+ <TableHead>Cost</TableHead>
|
|
|
|
|
+ </TableRow>
|
|
|
|
|
+ </TableHeader>
|
|
|
|
|
+ <TableBody>
|
|
|
|
|
+ {groupByStatus && groupedTasks
|
|
|
|
|
+ ? // Grouped view
|
|
|
|
|
+ statusOrder.map((status) => {
|
|
|
|
|
+ const group = groupedTasks[status]
|
|
|
|
|
+ if (group.length === 0) return null
|
|
|
|
|
+ const { label, className } = statusLabels[status]
|
|
|
|
|
+ return (
|
|
|
|
|
+ <Fragment key={status}>
|
|
|
|
|
+ <TableRow className="bg-muted/50 hover:bg-muted/50">
|
|
|
|
|
+ <TableCell colSpan={6 + toolColumns.length} className="py-2">
|
|
|
|
|
+ <span className={`font-semibold ${className}`}>
|
|
|
|
|
+ {label} ({group.length})
|
|
|
|
|
+ </span>
|
|
|
|
|
+ </TableCell>
|
|
|
|
|
+ </TableRow>
|
|
|
|
|
+ {group.map(({ task, originalIndex }) =>
|
|
|
|
|
+ renderTaskRow(task, originalIndex),
|
|
|
|
|
+ )}
|
|
|
|
|
+ </Fragment>
|
|
|
|
|
+ )
|
|
|
|
|
+ })
|
|
|
|
|
+ : // Default order view
|
|
|
|
|
+ tasks.map((task, index) => renderTaskRow(task, index))}
|
|
|
|
|
+ </TableBody>
|
|
|
|
|
+ </Table>
|
|
|
|
|
+ </>
|
|
|
)}
|
|
)}
|
|
|
</div>
|
|
</div>
|
|
|
|
|
|