Просмотр исходного кода

feat(evals): add UI and backend support for importing and injecting f… (#3606)

Shariq Riaz 7 месяцев назад
Родитель
Сommit
ce3e4e883d

+ 9 - 0
evals/apps/cli/src/index.ts

@@ -178,6 +178,15 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
 	const workspacePath = path.resolve(exercisesPath, language, exercise)
 	const taskSocketPath = path.resolve(dirname, `${dirname}/task-${task.id}.sock`)
 
+	// Inject foot gun system prompt if present
+	if (process.env.FOOTGUN_SYSTEM_PROMPT) {
+		const rooDir = path.join(workspacePath, ".roo")
+		if (!fs.existsSync(rooDir)) {
+			fs.mkdirSync(rooDir, { recursive: true })
+		}
+		fs.writeFileSync(path.join(rooDir, "system-prompt-code"), process.env.FOOTGUN_SYSTEM_PROMPT)
+	}
+
 	// If debugging:
 	// Use --wait --log trace or --verbose.
 	// Don't await execa and store result as subprocess.

+ 26 - 2
evals/apps/web/src/app/runs/new/new-run.tsx

@@ -8,6 +8,7 @@ import { zodResolver } from "@hookform/resolvers/zod"
 import fuzzysort from "fuzzysort"
 import { toast } from "sonner"
 import { X, Rocket, Check, ChevronsUpDown, HardDriveUpload, CircleCheck } from "lucide-react"
+import { Dialog, DialogContent, DialogTitle, DialogFooter } from "@/components/ui/dialog"
 
 import { globalSettingsSchema, providerSettingsSchema, rooCodeDefaults } from "@evals/types"
 
@@ -83,6 +84,10 @@ export function NewRun() {
 
 	const [model, suite, settings] = watch(["model", "suite", "settings", "concurrency"])
 
+	const [systemPromptDialogOpen, setSystemPromptDialogOpen] = useState(false)
+	const [systemPrompt, setSystemPrompt] = useState("")
+	const systemPromptRef = useRef<HTMLTextAreaElement>(null)
+
 	const onSubmit = useCallback(
 		async (values: FormValues) => {
 			try {
@@ -97,13 +102,13 @@ export function NewRun() {
 					values.settings = { ...(values.settings || {}), openRouterModelId }
 				}
 
-				const { id } = await createRun(values)
+				const { id } = await createRun({ ...values, systemPrompt })
 				router.push(`/runs/${id}`)
 			} catch (e) {
 				toast.error(e instanceof Error ? e.message : "An unknown error occurred.")
 			}
 		},
-		[mode, model, models.data, router],
+		[mode, model, models.data, router, systemPrompt],
 	)
 
 	const onFilterModels = useCallback(
@@ -313,6 +318,10 @@ export function NewRun() {
 							)}
 							<FormMessage />
 						</FormItem>
+
+						<Button type="button" variant="secondary" onClick={() => setSystemPromptDialogOpen(true)}>
+							Import Foot Gun System Prompt
+						</Button>
 					</div>
 
 					<FormField
@@ -394,6 +403,21 @@ export function NewRun() {
 				onClick={() => router.push("/")}>
 				<X className="size-6" />
 			</Button>
+			<Dialog open={systemPromptDialogOpen} onOpenChange={setSystemPromptDialogOpen}>
+				<DialogContent>
+					<DialogTitle>Import Foot Gun System Prompt</DialogTitle>
+					<textarea
+						ref={systemPromptRef}
+						value={systemPrompt}
+						onChange={(e) => setSystemPrompt(e.target.value)}
+						placeholder="Paste or type your system prompt here..."
+						className="w-full min-h-[120px] border rounded p-2"
+					/>
+					<DialogFooter>
+						<Button onClick={() => setSystemPromptDialogOpen(false)}>Done</Button>
+					</DialogFooter>
+				</DialogContent>
+			</Dialog>
 		</>
 	)
 }

+ 1 - 0
evals/apps/web/src/lib/schemas.ts

@@ -18,6 +18,7 @@ export const createRunSchema = z
 		exercises: z.array(z.string()).optional(),
 		settings: rooCodeSettingsSchema.optional(),
 		concurrency: z.number().int().min(CONCURRENCY_MIN).max(CONCURRENCY_MAX).default(CONCURRENCY_DEFAULT),
+		systemPrompt: z.string().optional(),
 	})
 	.refine((data) => data.suite === "full" || (data.exercises || []).length > 0, {
 		message: "Exercises are required when running a partial suite.",

+ 16 - 7
evals/apps/web/src/lib/server/runs.ts

@@ -14,7 +14,7 @@ import * as db from "@evals/db"
 import { CreateRun } from "@/lib/schemas"
 import { getExercisesForLanguage } from "./exercises"
 
-export async function createRun({ suite, exercises = [], ...values }: CreateRun) {
+export async function createRun({ suite, exercises = [], systemPrompt, ...values }: CreateRun) {
 	const run = await db.createRun({
 		...values,
 		socketPath: path.join(os.tmpdir(), `roo-code-evals-${crypto.randomUUID()}.sock`),
@@ -45,13 +45,22 @@ export async function createRun({ suite, exercises = [], ...values }: CreateRun)
 	try {
 		const logFile = fs.openSync(`/tmp/roo-code-evals-${run.id}.log`, "a")
 
-		const process = spawn("pnpm", ["--filter", "@evals/cli", "dev", "run", "all", "--runId", run.id.toString()], {
-			detached: true,
-			stdio: ["ignore", logFile, logFile],
-		})
+		const env: NodeJS.ProcessEnv = systemPrompt
+			? { ...process.env, FOOTGUN_SYSTEM_PROMPT: systemPrompt }
+			: process.env
 
-		process.unref()
-		await db.updateRun(run.id, { pid: process.pid })
+		const childProcess = spawn(
+			"pnpm",
+			["--filter", "@evals/cli", "dev", "run", "all", "--runId", run.id.toString()],
+			{
+				detached: true,
+				stdio: ["ignore", logFile, logFile],
+				env,
+			},
+		)
+
+		childProcess.unref()
+		await db.updateRun(run.id, { pid: childProcess.pid })
 	} catch (error) {
 		console.error(error)
 	}