Przeglądaj źródła

Record tool use errors encountered during eval runs (#2816)

Chris Estreich 8 miesięcy temu
rodzic
commit
61e23cccb6

+ 7 - 0
evals/apps/cli/src/index.ts

@@ -29,6 +29,7 @@ import {
 	updateTask,
 	createTaskMetrics,
 	updateTaskMetrics,
+	createToolError,
 } from "@evals/db"
 import { IpcServer, IpcClient } from "@evals/ipc"
 
@@ -255,6 +256,12 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
 			rooTaskId = payload[0]
 		}
 
+		if (eventName === RooCodeEventName.TaskToolFailed) {
+			// eslint-disable-next-line @typescript-eslint/no-unused-vars
+			const [_taskId, toolName, error] = payload
+			await createToolError({ taskId: task.id, toolName, error })
+		}
+
 		if (
 			(eventName === RooCodeEventName.TaskTokenUsageUpdated || eventName === RooCodeEventName.TaskCompleted) &&
 			taskMetricsId

+ 10 - 0
evals/packages/db/drizzle/0004_absent_slapstick.sql

@@ -0,0 +1,10 @@
+CREATE TABLE `toolErrors` (
+	`id` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
+	`runId` integer,
+	`taskId` integer,
+	`toolName` text NOT NULL,
+	`error` text NOT NULL,
+	`createdAt` integer NOT NULL,
+	FOREIGN KEY (`runId`) REFERENCES `runs`(`id`) ON UPDATE no action ON DELETE no action,
+	FOREIGN KEY (`taskId`) REFERENCES `tasks`(`id`) ON UPDATE no action ON DELETE no action
+);

+ 367 - 0
evals/packages/db/drizzle/meta/0004_snapshot.json

@@ -0,0 +1,367 @@
+{
+	"version": "6",
+	"dialect": "sqlite",
+	"id": "ae766c54-aff4-4ce6-b492-24813790c279",
+	"prevId": "61d48d20-f662-445d-9962-cf9cb165cbe7",
+	"tables": {
+		"runs": {
+			"name": "runs",
+			"columns": {
+				"id": {
+					"name": "id",
+					"type": "integer",
+					"primaryKey": true,
+					"notNull": true,
+					"autoincrement": true
+				},
+				"taskMetricsId": {
+					"name": "taskMetricsId",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false,
+					"autoincrement": false
+				},
+				"model": {
+					"name": "model",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"description": {
+					"name": "description",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": false,
+					"autoincrement": false
+				},
+				"settings": {
+					"name": "settings",
+					"type": "blob",
+					"primaryKey": false,
+					"notNull": false,
+					"autoincrement": false
+				},
+				"pid": {
+					"name": "pid",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false,
+					"autoincrement": false
+				},
+				"socketPath": {
+					"name": "socketPath",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"concurrency": {
+					"name": "concurrency",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false,
+					"default": 2
+				},
+				"passed": {
+					"name": "passed",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false,
+					"default": 0
+				},
+				"failed": {
+					"name": "failed",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false,
+					"default": 0
+				},
+				"createdAt": {
+					"name": "createdAt",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				}
+			},
+			"indexes": {},
+			"foreignKeys": {
+				"runs_taskMetricsId_taskMetrics_id_fk": {
+					"name": "runs_taskMetricsId_taskMetrics_id_fk",
+					"tableFrom": "runs",
+					"tableTo": "taskMetrics",
+					"columnsFrom": ["taskMetricsId"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				}
+			},
+			"compositePrimaryKeys": {},
+			"uniqueConstraints": {},
+			"checkConstraints": {}
+		},
+		"taskMetrics": {
+			"name": "taskMetrics",
+			"columns": {
+				"id": {
+					"name": "id",
+					"type": "integer",
+					"primaryKey": true,
+					"notNull": true,
+					"autoincrement": true
+				},
+				"tokensIn": {
+					"name": "tokensIn",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"tokensOut": {
+					"name": "tokensOut",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"tokensContext": {
+					"name": "tokensContext",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"cacheWrites": {
+					"name": "cacheWrites",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"cacheReads": {
+					"name": "cacheReads",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"cost": {
+					"name": "cost",
+					"type": "real",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"duration": {
+					"name": "duration",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"toolUsage": {
+					"name": "toolUsage",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": false,
+					"autoincrement": false
+				},
+				"createdAt": {
+					"name": "createdAt",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				}
+			},
+			"indexes": {},
+			"foreignKeys": {},
+			"compositePrimaryKeys": {},
+			"uniqueConstraints": {},
+			"checkConstraints": {}
+		},
+		"tasks": {
+			"name": "tasks",
+			"columns": {
+				"id": {
+					"name": "id",
+					"type": "integer",
+					"primaryKey": true,
+					"notNull": true,
+					"autoincrement": true
+				},
+				"runId": {
+					"name": "runId",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"taskMetricsId": {
+					"name": "taskMetricsId",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false,
+					"autoincrement": false
+				},
+				"language": {
+					"name": "language",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"exercise": {
+					"name": "exercise",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"passed": {
+					"name": "passed",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false,
+					"autoincrement": false
+				},
+				"startedAt": {
+					"name": "startedAt",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false,
+					"autoincrement": false
+				},
+				"finishedAt": {
+					"name": "finishedAt",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false,
+					"autoincrement": false
+				},
+				"createdAt": {
+					"name": "createdAt",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				}
+			},
+			"indexes": {
+				"tasks_language_exercise_idx": {
+					"name": "tasks_language_exercise_idx",
+					"columns": ["runId", "language", "exercise"],
+					"isUnique": true
+				}
+			},
+			"foreignKeys": {
+				"tasks_runId_runs_id_fk": {
+					"name": "tasks_runId_runs_id_fk",
+					"tableFrom": "tasks",
+					"tableTo": "runs",
+					"columnsFrom": ["runId"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				},
+				"tasks_taskMetricsId_taskMetrics_id_fk": {
+					"name": "tasks_taskMetricsId_taskMetrics_id_fk",
+					"tableFrom": "tasks",
+					"tableTo": "taskMetrics",
+					"columnsFrom": ["taskMetricsId"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				}
+			},
+			"compositePrimaryKeys": {},
+			"uniqueConstraints": {},
+			"checkConstraints": {}
+		},
+		"toolErrors": {
+			"name": "toolErrors",
+			"columns": {
+				"id": {
+					"name": "id",
+					"type": "integer",
+					"primaryKey": true,
+					"notNull": true,
+					"autoincrement": true
+				},
+				"runId": {
+					"name": "runId",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false,
+					"autoincrement": false
+				},
+				"taskId": {
+					"name": "taskId",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": false,
+					"autoincrement": false
+				},
+				"toolName": {
+					"name": "toolName",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"error": {
+					"name": "error",
+					"type": "text",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				},
+				"createdAt": {
+					"name": "createdAt",
+					"type": "integer",
+					"primaryKey": false,
+					"notNull": true,
+					"autoincrement": false
+				}
+			},
+			"indexes": {},
+			"foreignKeys": {
+				"toolErrors_runId_runs_id_fk": {
+					"name": "toolErrors_runId_runs_id_fk",
+					"tableFrom": "toolErrors",
+					"tableTo": "runs",
+					"columnsFrom": ["runId"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				},
+				"toolErrors_taskId_tasks_id_fk": {
+					"name": "toolErrors_taskId_tasks_id_fk",
+					"tableFrom": "toolErrors",
+					"tableTo": "tasks",
+					"columnsFrom": ["taskId"],
+					"columnsTo": ["id"],
+					"onDelete": "no action",
+					"onUpdate": "no action"
+				}
+			},
+			"compositePrimaryKeys": {},
+			"uniqueConstraints": {},
+			"checkConstraints": {}
+		}
+	},
+	"views": {},
+	"enums": {},
+	"_meta": {
+		"schemas": {},
+		"tables": {},
+		"columns": {}
+	},
+	"internal": {
+		"indexes": {}
+	}
+}

+ 7 - 0
evals/packages/db/drizzle/meta/_journal.json

@@ -29,6 +29,13 @@
 			"when": 1744950664129,
 			"tag": "0003_sweet_chimera",
 			"breakpoints": true
+		},
+		{
+			"idx": 4,
+			"version": "6",
+			"when": 1745256393286,
+			"tag": "0004_absent_slapstick",
+			"breakpoints": true
 		}
 	]
 }

+ 1 - 0
evals/packages/db/src/index.ts

@@ -3,3 +3,4 @@ export * from "./schema.js"
 export * from "./queries/runs.js"
 export * from "./queries/tasks.js"
 export * from "./queries/taskMetrics.js"
+export * from "./queries/toolErrors.js"

+ 5 - 6
evals/packages/db/src/queries/runs.ts

@@ -9,10 +9,8 @@ import { db } from "../db.js"
 import { createTaskMetrics } from "./taskMetrics.js"
 import { getTasks } from "./tasks.js"
 
-const table = schema.runs
-
 export const findRun = async (id: number) => {
-	const run = await db.query.runs.findFirst({ where: eq(table.id, id) })
+	const run = await db.query.runs.findFirst({ where: eq(schema.runs.id, id) })
 
 	if (!run) {
 		throw new RecordNotFoundError()
@@ -23,7 +21,7 @@ export const findRun = async (id: number) => {
 
 export const createRun = async (args: InsertRun) => {
 	const records = await db
-		.insert(table)
+		.insert(schema.runs)
 		.values({
 			...insertRunSchema.parse(args),
 			createdAt: new Date(),
@@ -40,7 +38,7 @@ export const createRun = async (args: InsertRun) => {
 }
 
 export const updateRun = async (id: number, values: UpdateRun) => {
-	const records = await db.update(table).set(values).where(eq(table.id, id)).returning()
+	const records = await db.update(schema.runs).set(values).where(eq(schema.runs.id, id)).returning()
 	const record = records[0]
 
 	if (!record) {
@@ -50,7 +48,8 @@ export const updateRun = async (id: number, values: UpdateRun) => {
 	return record
 }
 
-export const getRuns = async () => db.query.runs.findMany({ orderBy: desc(table.id), with: { taskMetrics: true } })
+export const getRuns = async () =>
+	db.query.runs.findMany({ orderBy: desc(schema.runs.id), with: { taskMetrics: true } })
 
 export const finishRun = async (runId: number) => {
 	const [values] = await db

+ 5 - 22
evals/packages/db/src/queries/taskMetrics.ts

@@ -1,14 +1,12 @@
-import { eq, avg, min, max, and, isNotNull } from "drizzle-orm"
+import { eq } from "drizzle-orm"
 
 import { RecordNotFoundError, RecordNotCreatedError } from "./errors.js"
 import type { InsertTaskMetrics, UpdateTaskMetrics } from "../schema.js"
-import { insertTaskMetricsSchema, taskMetrics, tasks, runs } from "../schema.js"
+import { insertTaskMetricsSchema, taskMetrics } from "../schema.js"
 import { db } from "../db.js"
 
-const table = taskMetrics
-
 export const findTaskMetrics = async (id: number) => {
-	const run = await db.query.taskMetrics.findFirst({ where: eq(table.id, id) })
+	const run = await db.query.taskMetrics.findFirst({ where: eq(taskMetrics.id, id) })
 
 	if (!run) {
 		throw new RecordNotFoundError()
@@ -19,7 +17,7 @@ export const findTaskMetrics = async (id: number) => {
 
 export const createTaskMetrics = async (args: InsertTaskMetrics) => {
 	const records = await db
-		.insert(table)
+		.insert(taskMetrics)
 		.values({
 			...insertTaskMetricsSchema.parse(args),
 			createdAt: new Date(),
@@ -36,7 +34,7 @@ export const createTaskMetrics = async (args: InsertTaskMetrics) => {
 }
 
 export const updateTaskMetrics = async (id: number, values: UpdateTaskMetrics) => {
-	const records = await db.update(table).set(values).where(eq(table.id, id)).returning()
+	const records = await db.update(taskMetrics).set(values).where(eq(taskMetrics.id, id)).returning()
 	const record = records[0]
 
 	if (!record) {
@@ -45,18 +43,3 @@ export const updateTaskMetrics = async (id: number, values: UpdateTaskMetrics) =
 
 	return record
 }
-
-export const successfulTaskDurations = async () => {
-	return db
-		.select({
-			runId: tasks.runId,
-			avgDuration: avg(taskMetrics.duration).mapWith(Number),
-			minDuration: min(taskMetrics.duration).mapWith(Number),
-			maxDuration: max(taskMetrics.duration).mapWith(Number),
-		})
-		.from(tasks)
-		.innerJoin(taskMetrics, eq(tasks.taskMetricsId, taskMetrics.id))
-		.innerJoin(runs, eq(tasks.runId, runs.id))
-		.where(and(eq(tasks.passed, true), isNotNull(runs.taskMetricsId)))
-		.groupBy(tasks.runId)
-}

+ 5 - 7
evals/packages/db/src/queries/tasks.ts

@@ -7,10 +7,8 @@ import type { InsertTask, UpdateTask } from "../schema.js"
 import { insertTaskSchema, tasks } from "../schema.js"
 import { db } from "../db.js"
 
-const table = tasks
-
 export const findTask = async (id: number) => {
-	const run = await db.query.tasks.findFirst({ where: eq(table.id, id) })
+	const run = await db.query.tasks.findFirst({ where: eq(tasks.id, id) })
 
 	if (!run) {
 		throw new RecordNotFoundError()
@@ -21,7 +19,7 @@ export const findTask = async (id: number) => {
 
 export const createTask = async (args: InsertTask) => {
 	const records = await db
-		.insert(table)
+		.insert(tasks)
 		.values({
 			...insertTaskSchema.parse(args),
 			createdAt: new Date(),
@@ -38,7 +36,7 @@ export const createTask = async (args: InsertTask) => {
 }
 
 export const updateTask = async (id: number, values: UpdateTask) => {
-	const records = await db.update(table).set(values).where(eq(table.id, id)).returning()
+	const records = await db.update(tasks).set(values).where(eq(tasks.id, id)).returning()
 	const record = records[0]
 
 	if (!record) {
@@ -56,8 +54,8 @@ type GetTask = {
 
 export const getTask = async ({ runId, language, exercise }: GetTask) =>
 	db.query.tasks.findFirst({
-		where: and(eq(table.runId, runId), eq(table.language, language), eq(table.exercise, exercise)),
+		where: and(eq(tasks.runId, runId), eq(tasks.language, language), eq(tasks.exercise, exercise)),
 	})
 
 export const getTasks = async (runId: number) =>
-	db.query.tasks.findMany({ where: eq(table.runId, runId), with: { taskMetrics: true } })
+	db.query.tasks.findMany({ where: eq(tasks.runId, runId), with: { taskMetrics: true } })

+ 22 - 0
evals/packages/db/src/queries/toolErrors.ts

@@ -0,0 +1,22 @@
+import { RecordNotCreatedError } from "./errors.js"
+import type { InsertToolError } from "../schema.js"
+import { insertToolErrorSchema, toolErrors } from "../schema.js"
+import { db } from "../db.js"
+
+export const createToolError = async (args: InsertToolError) => {
+	const records = await db
+		.insert(toolErrors)
+		.values({
+			...insertToolErrorSchema.parse(args),
+			createdAt: new Date(),
+		})
+		.returning()
+
+	const record = records[0]
+
+	if (!record) {
+		throw new RecordNotCreatedError()
+	}
+
+	return record
+}

+ 36 - 1
evals/packages/db/src/schema.ts

@@ -2,7 +2,14 @@ import { sqliteTable, text, real, integer, blob, uniqueIndex } from "drizzle-orm
 import { relations } from "drizzle-orm"
 import { createInsertSchema } from "drizzle-zod"
 
-import { RooCodeSettings, ToolUsage, exerciseLanguages, rooCodeSettingsSchema, toolUsageSchema } from "@evals/types"
+import {
+	RooCodeSettings,
+	ToolUsage,
+	exerciseLanguages,
+	rooCodeSettingsSchema,
+	toolNames,
+	toolUsageSchema,
+} from "@evals/types"
 
 /**
  * runs
@@ -98,6 +105,34 @@ export type InsertTaskMetrics = Omit<typeof taskMetrics.$inferInsert, "id" | "cr
 
 export type UpdateTaskMetrics = Partial<Omit<TaskMetrics, "id" | "createdAt">>
 
+/**
+ * toolErrors
+ */
+
+export const toolErrors = sqliteTable("toolErrors", {
+	id: integer({ mode: "number" }).primaryKey({ autoIncrement: true }),
+	runId: integer({ mode: "number" }).references(() => runs.id),
+	taskId: integer({ mode: "number" }).references(() => tasks.id),
+	toolName: text({ enum: toolNames }).notNull(),
+	error: text().notNull(),
+	createdAt: integer({ mode: "timestamp" }).notNull(),
+})
+
+export const toolErrorsRelations = relations(toolErrors, ({ one }) => ({
+	run: one(runs, { fields: [toolErrors.runId], references: [runs.id] }),
+	task: one(tasks, { fields: [toolErrors.taskId], references: [tasks.id] }),
+}))
+
+export type ToolError = typeof toolErrors.$inferSelect
+
+export const insertToolErrorSchema = createInsertSchema(toolErrors)
+	.omit({ id: true, createdAt: true })
+	.extend({ toolUsage: toolUsageSchema.optional() })
+
+export type InsertToolError = Omit<typeof toolErrors.$inferInsert, "id" | "createdAt">
+
+export type UpdateToolError = Partial<Omit<ToolError, "id" | "createdAt">>
+
 /**
  * schema
  */

+ 5 - 0
evals/packages/types/src/ipc.ts

@@ -111,6 +111,11 @@ export const taskEventSchema = z.discriminatedUnion("eventName", [
 		payload: rooCodeEventsSchema.shape[RooCodeEventName.TaskTokenUsageUpdated],
 		taskId: z.number().optional(),
 	}),
+	z.object({
+		eventName: z.literal(RooCodeEventName.TaskToolFailed),
+		payload: rooCodeEventsSchema.shape[RooCodeEventName.TaskToolFailed],
+		taskId: z.number().optional(),
+	}),
 	z.object({
 		eventName: z.literal(EvalEventName.Pass),
 		payload: z.undefined(),

+ 1 - 2
evals/packages/types/src/roo-code-defaults.ts

@@ -9,7 +9,7 @@ export const rooCodeDefaults: RooCodeSettings = {
 	rateLimitSeconds: 0,
 
 	pinnedApiConfigs: {},
-	lastShownAnnouncementId: "apr-16-2025-3-12",
+	lastShownAnnouncementId: "apr-18-2025-3-13",
 
 	autoApprovalEnabled: true,
 	alwaysAllowReadOnly: true,
@@ -59,7 +59,6 @@ export const rooCodeDefaults: RooCodeSettings = {
 		search_and_replace: false,
 		insert_content: false,
 		powerSteering: false,
-		append_to_file: false,
 	},
 
 	language: "en",

+ 3 - 2
evals/packages/types/src/roo-code.ts

@@ -271,7 +271,7 @@ export type CustomSupportPrompts = z.infer<typeof customSupportPromptsSchema>
  * ExperimentId
  */
 
-export const experimentIds = ["search_and_replace", "insert_content", "powerSteering", "append_to_file"] as const
+export const experimentIds = ["search_and_replace", "insert_content", "powerSteering"] as const
 
 export const experimentIdsSchema = z.enum(experimentIds)
 
@@ -285,7 +285,6 @@ const experimentsSchema = z.object({
 	search_and_replace: z.boolean(),
 	insert_content: z.boolean(),
 	powerSteering: z.boolean(),
-	append_to_file: z.boolean(),
 })
 
 export type Experiments = z.infer<typeof experimentsSchema>
@@ -863,6 +862,7 @@ export enum RooCodeEventName {
 	TaskSpawned = "taskSpawned",
 	TaskCompleted = "taskCompleted",
 	TaskTokenUsageUpdated = "taskTokenUsageUpdated",
+	TaskToolFailed = "taskToolFailed",
 }
 
 export const rooCodeEventsSchema = z.object({
@@ -883,6 +883,7 @@ export const rooCodeEventsSchema = z.object({
 	[RooCodeEventName.TaskSpawned]: z.tuple([z.string(), z.string()]),
 	[RooCodeEventName.TaskCompleted]: z.tuple([z.string(), tokenUsageSchema, toolUsageSchema]),
 	[RooCodeEventName.TaskTokenUsageUpdated]: z.tuple([z.string(), tokenUsageSchema]),
+	[RooCodeEventName.TaskToolFailed]: z.tuple([z.string(), toolNamesSchema, z.string()]),
 })
 
 export type RooCodeEvents = z.infer<typeof rooCodeEventsSchema>

+ 1 - 0
package-lock.json

@@ -16921,6 +16921,7 @@
 			"resolved": "https://registry.npmjs.org/npm-run-all/-/npm-run-all-4.1.5.tgz",
 			"integrity": "sha512-Oo82gJDAVcaMdi3nuoKFavkIHBRVqQ1qvMb+9LHk/cF4P6B2m8aP04hGf7oL6wZ9BuGwX1onlLhpuoofSyoQDQ==",
 			"dev": true,
+			"license": "MIT",
 			"dependencies": {
 				"ansi-styles": "^3.2.1",
 				"chalk": "^2.4.1",

+ 8 - 1
src/core/Cline.ts

@@ -107,6 +107,7 @@ export type ClineEvents = {
 	taskSpawned: [taskId: string]
 	taskCompleted: [taskId: string, tokenUsage: TokenUsage, toolUsage: ToolUsage]
 	taskTokenUsageUpdated: [taskId: string, tokenUsage: TokenUsage]
+	taskToolFailed: [taskId: string, tool: ToolName, error: string]
 }
 
 export type ClineOptions = {
@@ -340,6 +341,7 @@ export class Cline extends EventEmitter<ClineEvents> {
 				return data
 			}
 		}
+
 		return []
 	}
 
@@ -2541,12 +2543,17 @@ export class Cline extends EventEmitter<ClineEvents> {
 
 		this.toolUsage[toolName].attempts++
 	}
-	public recordToolError(toolName: ToolName) {
+
+	public recordToolError(toolName: ToolName, error?: string) {
 		if (!this.toolUsage[toolName]) {
 			this.toolUsage[toolName] = { attempts: 0, failures: 0 }
 		}
 
 		this.toolUsage[toolName].failures++
+
+		if (error) {
+			this.emit("taskToolFailed", this.taskId, toolName, error)
+		}
 	}
 
 	public getToolUsage() {

+ 2 - 1
src/core/tools/applyDiffTool.ts

@@ -96,7 +96,6 @@ export async function applyDiffTool(
 
 			if (!diffResult.success) {
 				cline.consecutiveMistakeCount++
-				cline.recordToolError("apply_diff")
 				const currentCount = (cline.consecutiveMistakeCountForApplyDiff.get(relPath) || 0) + 1
 				cline.consecutiveMistakeCountForApplyDiff.set(relPath, currentCount)
 				let formattedError = ""
@@ -128,6 +127,8 @@ export async function applyDiffTool(
 					await cline.say("diff_error", formattedError)
 				}
 
+				cline.recordToolError("apply_diff", formattedError)
+
 				pushToolResult(formattedError)
 				return
 			}

+ 8 - 4
src/exports/api.ts

@@ -285,10 +285,6 @@ export class API extends EventEmitter<RooCodeEvents> implements RooCodeAPI {
 
 			cline.on("taskModeSwitched", (taskId, mode) => this.emit(RooCodeEventName.TaskModeSwitched, taskId, mode))
 
-			cline.on("taskTokenUsageUpdated", (_, usage) =>
-				this.emit(RooCodeEventName.TaskTokenUsageUpdated, cline.taskId, usage),
-			)
-
 			cline.on("taskAskResponded", () => this.emit(RooCodeEventName.TaskAskResponded, cline.taskId))
 
 			cline.on("taskAborted", () => {
@@ -309,6 +305,14 @@ export class API extends EventEmitter<RooCodeEvents> implements RooCodeAPI {
 			cline.on("taskPaused", () => this.emit(RooCodeEventName.TaskPaused, cline.taskId))
 			cline.on("taskUnpaused", () => this.emit(RooCodeEventName.TaskUnpaused, cline.taskId))
 
+			cline.on("taskTokenUsageUpdated", (_, usage) =>
+				this.emit(RooCodeEventName.TaskTokenUsageUpdated, cline.taskId, usage),
+			)
+
+			cline.on("taskToolFailed", (taskId, tool, error) =>
+				this.emit(RooCodeEventName.TaskToolFailed, taskId, tool, error),
+			)
+
 			this.emit(RooCodeEventName.TaskCreated, cline.taskId)
 		})
 	}

+ 25 - 0
src/exports/roo-code.d.ts

@@ -543,6 +543,30 @@ type RooCodeEvents = {
 			contextTokens: number
 		},
 	]
+	taskToolFailed: [
+		string,
+		(
+			| "execute_command"
+			| "read_file"
+			| "write_to_file"
+			| "append_to_file"
+			| "apply_diff"
+			| "insert_content"
+			| "search_and_replace"
+			| "search_files"
+			| "list_files"
+			| "list_code_definition_names"
+			| "browser_action"
+			| "use_mcp_tool"
+			| "access_mcp_resource"
+			| "ask_followup_question"
+			| "attempt_completion"
+			| "switch_mode"
+			| "new_task"
+			| "fetch_instructions"
+		),
+		string,
+	]
 }
 
 /**
@@ -560,6 +584,7 @@ declare enum RooCodeEventName {
 	TaskSpawned = "taskSpawned",
 	TaskCompleted = "taskCompleted",
 	TaskTokenUsageUpdated = "taskTokenUsageUpdated",
+	TaskToolFailed = "taskToolFailed",
 }
 
 type RooCodeSettings = GlobalSettings & ProviderSettings

+ 24 - 0
src/exports/types.ts

@@ -552,6 +552,30 @@ type RooCodeEvents = {
 			contextTokens: number
 		},
 	]
+	taskToolFailed: [
+		string,
+		(
+			| "execute_command"
+			| "read_file"
+			| "write_to_file"
+			| "append_to_file"
+			| "apply_diff"
+			| "insert_content"
+			| "search_and_replace"
+			| "search_files"
+			| "list_files"
+			| "list_code_definition_names"
+			| "browser_action"
+			| "use_mcp_tool"
+			| "access_mcp_resource"
+			| "ask_followup_question"
+			| "attempt_completion"
+			| "switch_mode"
+			| "new_task"
+			| "fetch_instructions"
+		),
+		string,
+	]
 }
 
 export type { RooCodeEvents }

+ 2 - 0
src/schemas/index.ts

@@ -873,6 +873,7 @@ export enum RooCodeEventName {
 	TaskSpawned = "taskSpawned",
 	TaskCompleted = "taskCompleted",
 	TaskTokenUsageUpdated = "taskTokenUsageUpdated",
+	TaskToolFailed = "taskToolFailed",
 }
 
 export const rooCodeEventsSchema = z.object({
@@ -893,6 +894,7 @@ export const rooCodeEventsSchema = z.object({
 	[RooCodeEventName.TaskSpawned]: z.tuple([z.string(), z.string()]),
 	[RooCodeEventName.TaskCompleted]: z.tuple([z.string(), tokenUsageSchema, toolUsageSchema]),
 	[RooCodeEventName.TaskTokenUsageUpdated]: z.tuple([z.string(), tokenUsageSchema]),
+	[RooCodeEventName.TaskToolFailed]: z.tuple([z.string(), toolNamesSchema, z.string()]),
 })
 
 export type RooCodeEvents = z.infer<typeof rooCodeEventsSchema>