Browse Source

feat: add experimental flag to disable command execution in attempt_c… (#4352)

* feat: add experimental flag to disable command execution in attempt_completion tool

* fix: remove deprecation phase comments from attemptCompletionTool

* feat: add translations for disable completion command experiment

* fix: revert unintended package.json change

* Rename attempt-completion.experiment.test.ts to attempt-completion.test.ts

* fix: address PR feedback - restore autoCondenseContext in tests and remove type assertion

---------

Co-authored-by: Daniel <[email protected]>
Hannes Rudolph 6 months ago
parent
commit
67d238afd0
29 changed files with 571 additions and 14 deletions
  1. 2 1
      packages/types/src/experiment.ts
  2. 10 2
      src/core/prompts/sections/objective.ts
  3. 1 1
      src/core/prompts/system.ts
  4. 129 0
      src/core/prompts/tools/__tests__/attempt-completion.test.ts
  5. 31 8
      src/core/prompts/tools/attempt-completion.ts
  6. 2 1
      src/core/prompts/tools/index.ts
  7. 1 0
      src/core/prompts/tools/types.ts
  8. 310 0
      src/core/tools/__tests__/attemptCompletionTool.experiment.test.ts
  9. 10 1
      src/core/tools/attemptCompletionTool.ts
  10. 3 0
      src/shared/__tests__/experiments.test.ts
  11. 2 0
      src/shared/experiments.ts
  12. 2 0
      webview-ui/src/context/__tests__/ExtensionStateContext.test.tsx
  13. 4 0
      webview-ui/src/i18n/locales/ca/settings.json
  14. 4 0
      webview-ui/src/i18n/locales/de/settings.json
  15. 4 0
      webview-ui/src/i18n/locales/en/settings.json
  16. 4 0
      webview-ui/src/i18n/locales/es/settings.json
  17. 4 0
      webview-ui/src/i18n/locales/fr/settings.json
  18. 4 0
      webview-ui/src/i18n/locales/hi/settings.json
  19. 4 0
      webview-ui/src/i18n/locales/it/settings.json
  20. 4 0
      webview-ui/src/i18n/locales/ja/settings.json
  21. 4 0
      webview-ui/src/i18n/locales/ko/settings.json
  22. 4 0
      webview-ui/src/i18n/locales/nl/settings.json
  23. 4 0
      webview-ui/src/i18n/locales/pl/settings.json
  24. 4 0
      webview-ui/src/i18n/locales/pt-BR/settings.json
  25. 4 0
      webview-ui/src/i18n/locales/ru/settings.json
  26. 4 0
      webview-ui/src/i18n/locales/tr/settings.json
  27. 4 0
      webview-ui/src/i18n/locales/vi/settings.json
  28. 4 0
      webview-ui/src/i18n/locales/zh-CN/settings.json
  29. 4 0
      webview-ui/src/i18n/locales/zh-TW/settings.json

+ 2 - 1
packages/types/src/experiment.ts

@@ -6,7 +6,7 @@ import type { Keys, Equals, AssertEqual } from "./type-fu.js"
  * ExperimentId
  */
 
-export const experimentIds = ["powerSteering", "concurrentFileReads"] as const
+export const experimentIds = ["powerSteering", "concurrentFileReads", "disableCompletionCommand"] as const
 
 export const experimentIdsSchema = z.enum(experimentIds)
 
@@ -19,6 +19,7 @@ export type ExperimentId = z.infer<typeof experimentIdsSchema>
 export const experimentsSchema = z.object({
 	powerSteering: z.boolean(),
 	concurrentFileReads: z.boolean(),
+	disableCompletionCommand: z.boolean(),
 })
 
 export type Experiments = z.infer<typeof experimentsSchema>

+ 10 - 2
src/core/prompts/sections/objective.ts

@@ -1,6 +1,7 @@
+import { EXPERIMENT_IDS, experiments } from "../../../shared/experiments"
 import { CodeIndexManager } from "../../../services/code-index/manager"
 
-export function getObjectiveSection(codeIndexManager?: CodeIndexManager): string {
+export function getObjectiveSection(codeIndexManager?: CodeIndexManager, experimentsConfig?: Record<string, boolean>): string {
 	const isCodebaseSearchAvailable = codeIndexManager &&
 		codeIndexManager.isFeatureEnabled &&
 		codeIndexManager.isFeatureConfigured &&
@@ -9,6 +10,13 @@ export function getObjectiveSection(codeIndexManager?: CodeIndexManager): string
 	const codebaseSearchInstruction = isCodebaseSearchAvailable
 		? "First, if the task involves understanding existing code or functionality, you MUST use the `codebase_search` tool to search for relevant code based on the task's intent BEFORE using any other search or file exploration tools. Then, "
 		: "First, "
+	
+	// Check if command execution is disabled via experiment
+	const isCommandDisabled = experimentsConfig && experimentsConfig[EXPERIMENT_IDS.DISABLE_COMPLETION_COMMAND]
+	
+	const commandInstruction = !isCommandDisabled
+		? " You may also provide a CLI command to showcase the result of your task; this can be particularly useful for web development tasks, where you can run e.g. \`open index.html\` to show the website you've built."
+		: ""
 
 	return `====
 
@@ -19,6 +27,6 @@ You accomplish a given task iteratively, breaking it down into clear steps and w
 1. Analyze the user's task and set clear, achievable goals to accomplish it. Prioritize these goals in a logical order.
 2. Work through these goals sequentially, utilizing available tools one at a time as necessary. Each goal should correspond to a distinct step in your problem-solving process. You will be informed on the work completed and what's remaining as you go.
 3. Remember, you have extensive capabilities with access to a wide range of tools that can be used in powerful and clever ways as necessary to accomplish each goal. Before calling a tool, do some analysis within <thinking></thinking> tags. ${codebaseSearchInstruction}analyze the file structure provided in environment_details to gain context and insights for proceeding effectively. Next, think about which of the provided tools is the most relevant tool to accomplish the user's task. Go through each of the required parameters of the relevant tool and determine if the user has directly provided or given enough information to infer a value. When deciding if the parameter can be inferred, carefully consider all the context to see if it supports a specific value. If all of the required parameters are present or can be reasonably inferred, close the thinking tag and proceed with the tool use. BUT, if one of the values for a required parameter is missing, DO NOT invoke the tool (not even with fillers for the missing params) and instead, ask the user to provide the missing parameters using the ask_followup_question tool. DO NOT ask for more information on optional parameters if it is not provided.
-4. Once you've completed the user's task, you must use the attempt_completion tool to present the result of the task to the user. You may also provide a CLI command to showcase the result of your task; this can be particularly useful for web development tasks, where you can run e.g. \`open index.html\` to show the website you've built.
+4. Once you've completed the user's task, you must use the attempt_completion tool to present the result of the task to the user.${commandInstruction}
 5. The user may provide feedback, which you can use to make improvements and try again. But DO NOT continue in pointless back and forth conversations, i.e. don't end your responses with questions or offers for further assistance.`
 }

+ 1 - 1
src/core/prompts/system.ts

@@ -97,7 +97,7 @@ ${getRulesSection(cwd, supportsComputerUse, effectiveDiffStrategy, codeIndexMana
 
 ${getSystemInfoSection(cwd)}
 
-${getObjectiveSection(codeIndexManager)}
+${getObjectiveSection(codeIndexManager, experiments)}
 
 ${await addCustomInstructions(baseInstructions, globalCustomInstructions || "", cwd, mode, { language: language ?? formatLanguage(vscode.env.language), rooIgnoreInstructions })}`
 

+ 129 - 0
src/core/prompts/tools/__tests__/attempt-completion.test.ts

@@ -0,0 +1,129 @@
+import { getAttemptCompletionDescription } from "../attempt-completion"
+import { EXPERIMENT_IDS } from "../../../../shared/experiments"
+
+describe("getAttemptCompletionDescription - DISABLE_COMPLETION_COMMAND experiment", () => {
+	describe("when experiment is disabled (default)", () => {
+		it("should include command parameter in the description", () => {
+			const args = {
+				cwd: "/test/path",
+				supportsComputerUse: false,
+				experiments: {
+					[EXPERIMENT_IDS.DISABLE_COMPLETION_COMMAND]: false,
+				},
+			}
+
+			const description = getAttemptCompletionDescription(args)
+
+			// Check that command parameter is included
+			expect(description).toContain("- command: (optional)")
+			expect(description).toContain("A CLI command to execute to show a live demo")
+			expect(description).toContain("<command>Command to demonstrate result (optional)</command>")
+			expect(description).toContain("<command>open index.html</command>")
+		})
+
+		it("should include command parameter when experiments is undefined", () => {
+			const args = {
+				cwd: "/test/path",
+				supportsComputerUse: false,
+			}
+
+			const description = getAttemptCompletionDescription(args)
+
+			// Check that command parameter is included
+			expect(description).toContain("- command: (optional)")
+			expect(description).toContain("A CLI command to execute to show a live demo")
+			expect(description).toContain("<command>Command to demonstrate result (optional)</command>")
+			expect(description).toContain("<command>open index.html</command>")
+		})
+
+		it("should include command parameter when no args provided", () => {
+			const description = getAttemptCompletionDescription()
+
+			// Check that command parameter is included
+			expect(description).toContain("- command: (optional)")
+			expect(description).toContain("A CLI command to execute to show a live demo")
+			expect(description).toContain("<command>Command to demonstrate result (optional)</command>")
+			expect(description).toContain("<command>open index.html</command>")
+		})
+	})
+
+	describe("when experiment is enabled", () => {
+		it("should NOT include command parameter in the description", () => {
+			const args = {
+				cwd: "/test/path",
+				supportsComputerUse: false,
+				experiments: {
+					[EXPERIMENT_IDS.DISABLE_COMPLETION_COMMAND]: true,
+				},
+			}
+
+			const description = getAttemptCompletionDescription(args)
+
+			// Check that command parameter is NOT included
+			expect(description).not.toContain("- command: (optional)")
+			expect(description).not.toContain("A CLI command to execute to show a live demo")
+			expect(description).not.toContain("<command>Command to demonstrate result (optional)</command>")
+			expect(description).not.toContain("<command>open index.html</command>")
+
+			// But should still have the basic structure
+			expect(description).toContain("## attempt_completion")
+			expect(description).toContain("- result: (required)")
+			expect(description).toContain("<attempt_completion>")
+			expect(description).toContain("</attempt_completion>")
+		})
+
+		it("should show example without command", () => {
+			const args = {
+				cwd: "/test/path",
+				supportsComputerUse: false,
+				experiments: {
+					[EXPERIMENT_IDS.DISABLE_COMPLETION_COMMAND]: true,
+				},
+			}
+
+			const description = getAttemptCompletionDescription(args)
+
+			// Check example format
+			expect(description).toContain("Example: Requesting to attempt completion with a result")
+			expect(description).toContain("I've updated the CSS")
+			expect(description).not.toContain("Example: Requesting to attempt completion with a result and command")
+		})
+	})
+
+	describe("description content", () => {
+		it("should maintain core functionality description regardless of experiment", () => {
+			const argsWithExperimentDisabled = {
+				cwd: "/test/path",
+				supportsComputerUse: false,
+				experiments: {
+					[EXPERIMENT_IDS.DISABLE_COMPLETION_COMMAND]: false,
+				},
+			}
+
+			const argsWithExperimentEnabled = {
+				cwd: "/test/path",
+				supportsComputerUse: false,
+				experiments: {
+					[EXPERIMENT_IDS.DISABLE_COMPLETION_COMMAND]: true,
+				},
+			}
+
+			const descriptionDisabled = getAttemptCompletionDescription(argsWithExperimentDisabled)
+			const descriptionEnabled = getAttemptCompletionDescription(argsWithExperimentEnabled)
+
+			// Both should contain core functionality
+			const coreText = "After each tool use, the user will respond with the result of that tool use"
+			expect(descriptionDisabled).toContain(coreText)
+			expect(descriptionEnabled).toContain(coreText)
+
+			// Both should contain the important note
+			const importantNote = "IMPORTANT NOTE: This tool CANNOT be used until you've confirmed"
+			expect(descriptionDisabled).toContain(importantNote)
+			expect(descriptionEnabled).toContain(importantNote)
+
+			// Both should contain result parameter
+			expect(descriptionDisabled).toContain("- result: (required)")
+			expect(descriptionEnabled).toContain("- result: (required)")
+		})
+	})
+})

+ 31 - 8
src/core/prompts/tools/attempt-completion.ts

@@ -1,17 +1,31 @@
-export function getAttemptCompletionDescription(): string {
-	return `## attempt_completion
-Description: After each tool use, the user will respond with the result of that tool use, i.e. if it succeeded or failed, along with any reasons for failure. Once you've received the results of tool uses and can confirm that the task is complete, use this tool to present the result of your work to the user. Optionally you may provide a CLI command to showcase the result of your work. The user may respond with feedback if they are not satisfied with the result, which you can use to make improvements and try again.
+import { EXPERIMENT_IDS, experiments } from "../../../shared/experiments"
+import { ToolArgs } from "./types"
+
+export function getAttemptCompletionDescription(args?: ToolArgs): string {
+	// Check if command execution is disabled via experiment
+	const isCommandDisabled = args?.experiments && experiments.isEnabled(
+		args.experiments,
+		EXPERIMENT_IDS.DISABLE_COMPLETION_COMMAND
+	)
+
+	const baseDescription = `## attempt_completion
+Description: After each tool use, the user will respond with the result of that tool use, i.e. if it succeeded or failed, along with any reasons for failure. Once you've received the results of tool uses and can confirm that the task is complete, use this tool to present the result of your work to the user.${!isCommandDisabled ? ' Optionally you may provide a CLI command to showcase the result of your work.' : ''} The user may respond with feedback if they are not satisfied with the result, which you can use to make improvements and try again.
 IMPORTANT NOTE: This tool CANNOT be used until you've confirmed from the user that any previous tool uses were successful. Failure to do so will result in code corruption and system failure. Before using this tool, you must ask yourself in <thinking></thinking> tags if you've confirmed from the user that any previous tool uses were successful. If not, then DO NOT use this tool.
 Parameters:
-- result: (required) The result of the task. Formulate this result in a way that is final and does not require further input from the user. Don't end your result with questions or offers for further assistance.
-- command: (optional) A CLI command to execute to show a live demo of the result to the user. For example, use \`open index.html\` to display a created html website, or \`open localhost:3000\` to display a locally running development server. But DO NOT use commands like \`echo\` or \`cat\` that merely print text. This command should be valid for the current operating system. Ensure the command is properly formatted and does not contain any harmful instructions.
+- result: (required) The result of the task. Formulate this result in a way that is final and does not require further input from the user. Don't end your result with questions or offers for further assistance.`
+
+	const commandParameter = !isCommandDisabled ? `
+- command: (optional) A CLI command to execute to show a live demo of the result to the user. For example, use \`open index.html\` to display a created html website, or \`open localhost:3000\` to display a locally running development server. But DO NOT use commands like \`echo\` or \`cat\` that merely print text. This command should be valid for the current operating system. Ensure the command is properly formatted and does not contain any harmful instructions.` : ''
+
+	const usage = `
 Usage:
 <attempt_completion>
 <result>
 Your final result description here
-</result>
-<command>Command to demonstrate result (optional)</command>
-</attempt_completion>
+</result>${!isCommandDisabled ? '\n<command>Command to demonstrate result (optional)</command>' : ''}
+</attempt_completion>`
+
+	const example = !isCommandDisabled ? `
 
 Example: Requesting to attempt completion with a result and command
 <attempt_completion>
@@ -19,5 +33,14 @@ Example: Requesting to attempt completion with a result and command
 I've updated the CSS
 </result>
 <command>open index.html</command>
+</attempt_completion>` : `
+
+Example: Requesting to attempt completion with a result
+<attempt_completion>
+<result>
+I've updated the CSS
+</result>
 </attempt_completion>`
+
+	return baseDescription + commandParameter + usage + example
 }

+ 2 - 1
src/core/prompts/tools/index.ts

@@ -35,7 +35,7 @@ const toolDescriptionMap: Record<string, (args: ToolArgs) => string | undefined>
 	list_code_definition_names: (args) => getListCodeDefinitionNamesDescription(args),
 	browser_action: (args) => getBrowserActionDescription(args),
 	ask_followup_question: () => getAskFollowupQuestionDescription(),
-	attempt_completion: () => getAttemptCompletionDescription(),
+	attempt_completion: (args) => getAttemptCompletionDescription(args),
 	use_mcp_tool: (args) => getUseMcpToolDescription(args),
 	access_mcp_resource: (args) => getAccessMcpResourceDescription(args),
 	codebase_search: () => getCodebaseSearchDescription(),
@@ -69,6 +69,7 @@ export function getToolDescriptionsForMode(
 		mcpHub,
 		partialReadsEnabled,
 		settings,
+		experiments,
 	}
 
 	const tools = new Set<string>()

+ 1 - 0
src/core/prompts/tools/types.ts

@@ -10,4 +10,5 @@ export type ToolArgs = {
 	toolOptions?: any
 	partialReadsEnabled?: boolean
 	settings?: Record<string, any>
+	experiments?: Record<string, boolean>
 }

+ 310 - 0
src/core/tools/__tests__/attemptCompletionTool.experiment.test.ts

@@ -0,0 +1,310 @@
+import { Task } from "../../task/Task"
+import { attemptCompletionTool } from "../attemptCompletionTool"
+import { EXPERIMENT_IDS } from "../../../shared/experiments"
+import { executeCommand } from "../executeCommandTool"
+
+// Mock dependencies
+jest.mock("../executeCommandTool", () => ({
+	executeCommand: jest.fn(),
+}))
+
+jest.mock("@roo-code/telemetry", () => ({
+	TelemetryService: {
+		instance: {
+			captureTaskCompleted: jest.fn(),
+		},
+	},
+}))
+
+describe("attemptCompletionTool - DISABLE_COMPLETION_COMMAND experiment", () => {
+	let mockCline: any
+	let mockAskApproval: jest.Mock
+	let mockHandleError: jest.Mock
+	let mockPushToolResult: jest.Mock
+	let mockRemoveClosingTag: jest.Mock
+	let mockToolDescription: jest.Mock
+	let mockAskFinishSubTaskApproval: jest.Mock
+
+	beforeEach(() => {
+		jest.clearAllMocks()
+
+		mockAskApproval = jest.fn().mockResolvedValue(true)
+		mockHandleError = jest.fn()
+		mockPushToolResult = jest.fn()
+		mockRemoveClosingTag = jest.fn((tag, content) => content)
+		mockToolDescription = jest.fn().mockReturnValue("attempt_completion")
+		mockAskFinishSubTaskApproval = jest.fn()
+
+		mockCline = {
+			say: jest.fn(),
+			ask: jest.fn().mockResolvedValue({ response: "yesButtonClicked", text: "", images: [] }),
+			clineMessages: [],
+			lastMessageTs: Date.now(),
+			consecutiveMistakeCount: 0,
+			sayAndCreateMissingParamError: jest.fn(),
+			recordToolError: jest.fn(),
+			emit: jest.fn(),
+			getTokenUsage: jest.fn().mockReturnValue({}),
+			toolUsage: {},
+			userMessageContent: [],
+			taskId: "test-task-id",
+			providerRef: {
+				deref: jest.fn().mockReturnValue({
+					getState: jest.fn().mockResolvedValue({
+						experiments: {},
+					}),
+				}),
+			},
+		}
+	})
+
+	describe("when experiment is disabled (default)", () => {
+		beforeEach(() => {
+			mockCline.providerRef.deref().getState.mockResolvedValue({
+				experiments: {
+					[EXPERIMENT_IDS.DISABLE_COMPLETION_COMMAND]: false,
+				},
+			})
+		})
+
+		it("should execute command when provided", async () => {
+			const mockExecuteCommand = executeCommand as jest.Mock
+			mockExecuteCommand.mockResolvedValue([false, "Command executed successfully"])
+
+			// Mock clineMessages with a previous message that's not a command ask
+			mockCline.clineMessages = [{ say: "previous_message", text: "Previous message" }]
+
+			const block = {
+				params: {
+					result: "Task completed successfully",
+					command: "npm test",
+				},
+				partial: false,
+			}
+
+			await attemptCompletionTool(
+				mockCline,
+				block as any,
+				mockAskApproval,
+				mockHandleError,
+				mockPushToolResult,
+				mockRemoveClosingTag,
+				mockToolDescription,
+				mockAskFinishSubTaskApproval,
+			)
+
+			// When there's a lastMessage that's not a command ask, it should say completion_result first
+			expect(mockCline.say).toHaveBeenCalledWith("completion_result", "Task completed successfully", undefined, false)
+			expect(mockCline.emit).toHaveBeenCalledWith("taskCompleted", mockCline.taskId, expect.any(Object), expect.any(Object))
+			expect(mockAskApproval).toHaveBeenCalledWith("command", "npm test")
+			expect(mockExecuteCommand).toHaveBeenCalled()
+		})
+
+		it("should not execute command when user rejects", async () => {
+			mockAskApproval.mockResolvedValue(false)
+			const mockExecuteCommand = executeCommand as jest.Mock
+
+			// Mock clineMessages with a previous message that's not a command ask
+			mockCline.clineMessages = [{ say: "previous_message", text: "Previous message" }]
+
+			const block = {
+				params: {
+					result: "Task completed successfully",
+					command: "npm test",
+				},
+				partial: false,
+			}
+
+			await attemptCompletionTool(
+				mockCline,
+				block as any,
+				mockAskApproval,
+				mockHandleError,
+				mockPushToolResult,
+				mockRemoveClosingTag,
+				mockToolDescription,
+				mockAskFinishSubTaskApproval,
+			)
+
+			// Should say completion_result and emit before asking for approval
+			expect(mockCline.say).toHaveBeenCalledWith("completion_result", "Task completed successfully", undefined, false)
+			expect(mockCline.emit).toHaveBeenCalledWith("taskCompleted", mockCline.taskId, expect.any(Object), expect.any(Object))
+			expect(mockAskApproval).toHaveBeenCalledWith("command", "npm test")
+			expect(mockExecuteCommand).not.toHaveBeenCalled()
+		})
+	})
+
+	describe("when experiment is enabled", () => {
+		beforeEach(() => {
+			mockCline.providerRef.deref().getState.mockResolvedValue({
+				experiments: {
+					[EXPERIMENT_IDS.DISABLE_COMPLETION_COMMAND]: true,
+				},
+			})
+		})
+
+		it("should NOT execute command even when provided", async () => {
+			const mockExecuteCommand = executeCommand as jest.Mock
+
+			const block = {
+				params: {
+					result: "Task completed successfully",
+					command: "npm test",
+				},
+				partial: false,
+			}
+
+			await attemptCompletionTool(
+				mockCline,
+				block as any,
+				mockAskApproval,
+				mockHandleError,
+				mockPushToolResult,
+				mockRemoveClosingTag,
+				mockToolDescription,
+				mockAskFinishSubTaskApproval,
+			)
+
+			expect(mockCline.say).toHaveBeenCalledWith("completion_result", "Task completed successfully", undefined, false)
+			expect(mockAskApproval).not.toHaveBeenCalled()
+			expect(mockExecuteCommand).not.toHaveBeenCalled()
+		})
+
+		it("should complete normally without command execution", async () => {
+			const block = {
+				params: {
+					result: "Task completed successfully",
+					command: "npm test",
+				},
+				partial: false,
+			}
+
+			await attemptCompletionTool(
+				mockCline,
+				block as any,
+				mockAskApproval,
+				mockHandleError,
+				mockPushToolResult,
+				mockRemoveClosingTag,
+				mockToolDescription,
+				mockAskFinishSubTaskApproval,
+			)
+
+			expect(mockCline.say).toHaveBeenCalledWith("completion_result", "Task completed successfully", undefined, false)
+			expect(mockCline.emit).toHaveBeenCalledWith("taskCompleted", mockCline.taskId, expect.any(Object), expect.any(Object))
+			expect(mockAskApproval).not.toHaveBeenCalled()
+		})
+	})
+
+	describe("when no command is provided", () => {
+		it("should work the same regardless of experiment state", async () => {
+			const block = {
+				params: {
+					result: "Task completed successfully",
+				},
+				partial: false,
+			}
+
+			// Test with experiment disabled
+			mockCline.providerRef.deref().getState.mockResolvedValue({
+				experiments: {
+					[EXPERIMENT_IDS.DISABLE_COMPLETION_COMMAND]: false,
+				},
+			})
+
+			await attemptCompletionTool(
+				mockCline,
+				block as any,
+				mockAskApproval,
+				mockHandleError,
+				mockPushToolResult,
+				mockRemoveClosingTag,
+				mockToolDescription,
+				mockAskFinishSubTaskApproval,
+			)
+
+			expect(mockCline.say).toHaveBeenCalledWith("completion_result", "Task completed successfully", undefined, false)
+			expect(mockAskApproval).not.toHaveBeenCalled()
+
+			// Reset mocks
+			jest.clearAllMocks()
+
+			// Test with experiment enabled
+			mockCline.providerRef.deref().getState.mockResolvedValue({
+				experiments: {
+					[EXPERIMENT_IDS.DISABLE_COMPLETION_COMMAND]: true,
+				},
+			})
+
+			await attemptCompletionTool(
+				mockCline,
+				block as any,
+				mockAskApproval,
+				mockHandleError,
+				mockPushToolResult,
+				mockRemoveClosingTag,
+				mockToolDescription,
+				mockAskFinishSubTaskApproval,
+			)
+
+			expect(mockCline.say).toHaveBeenCalledWith("completion_result", "Task completed successfully", undefined, false)
+			expect(mockAskApproval).not.toHaveBeenCalled()
+		})
+	})
+
+	describe("error handling", () => {
+		it("should handle missing result parameter", async () => {
+			const block = {
+				params: {},
+				partial: false,
+			}
+
+			await attemptCompletionTool(
+				mockCline,
+				block as any,
+				mockAskApproval,
+				mockHandleError,
+				mockPushToolResult,
+				mockRemoveClosingTag,
+				mockToolDescription,
+				mockAskFinishSubTaskApproval,
+			)
+
+			expect(mockCline.consecutiveMistakeCount).toBe(1)
+			expect(mockCline.recordToolError).toHaveBeenCalledWith("attempt_completion")
+			expect(mockCline.sayAndCreateMissingParamError).toHaveBeenCalledWith("attempt_completion", "result")
+		})
+
+		it("should handle state retrieval errors gracefully", async () => {
+			// Mock provider ref to return null
+			mockCline.providerRef.deref.mockReturnValue(null)
+
+			// Mock clineMessages to simulate no previous messages
+			mockCline.clineMessages = []
+
+			const block = {
+				params: {
+					result: "Task completed successfully",
+					command: "npm test",
+				},
+				partial: false,
+			}
+
+			await attemptCompletionTool(
+				mockCline,
+				block as any,
+				mockAskApproval,
+				mockHandleError,
+				mockPushToolResult,
+				mockRemoveClosingTag,
+				mockToolDescription,
+				mockAskFinishSubTaskApproval,
+			)
+
+			// When state retrieval fails, it defaults to not disabled (false), so it will try to execute command
+			// Since there's no lastMessage, it goes directly to askApproval
+			expect(mockAskApproval).toHaveBeenCalledWith("command", "npm test")
+			expect(mockCline.say).not.toHaveBeenCalled()
+		})
+	})
+})

+ 10 - 1
src/core/tools/attemptCompletionTool.ts

@@ -15,6 +15,7 @@ import {
 } from "../../shared/tools"
 import { formatResponse } from "../prompts/responses"
 import { type ExecuteCommandOptions, executeCommand } from "./executeCommandTool"
+import { EXPERIMENT_IDS, experiments, experimentDefault } from "../../shared/experiments"
 
 export async function attemptCompletionTool(
 	cline: Task,
@@ -68,7 +69,15 @@ export async function attemptCompletionTool(
 
 			let commandResult: ToolResponse | undefined
 
-			if (command) {
+			// Check if command execution is disabled via experiment
+			const state = await cline.providerRef.deref()?.getState()
+			const experimentsConfig = state?.experiments ?? experimentDefault
+			const isCommandDisabled = experiments.isEnabled(
+				experimentsConfig,
+				EXPERIMENT_IDS.DISABLE_COMPLETION_COMMAND,
+			)
+
+			if (command && !isCommandDisabled) {
 				if (lastMessage && lastMessage.ask !== "command") {
 					// Haven't sent a command message yet so first send completion_result then command.
 					await cline.say("completion_result", result, undefined, false)

+ 3 - 0
src/shared/__tests__/experiments.test.ts

@@ -19,6 +19,7 @@ describe("experiments", () => {
 			const experiments: Record<ExperimentId, boolean> = {
 				powerSteering: false,
 				concurrentFileReads: false,
+				disableCompletionCommand: false,
 			}
 			expect(Experiments.isEnabled(experiments, EXPERIMENT_IDS.POWER_STEERING)).toBe(false)
 		})
@@ -27,6 +28,7 @@ describe("experiments", () => {
 			const experiments: Record<ExperimentId, boolean> = {
 				powerSteering: true,
 				concurrentFileReads: false,
+				disableCompletionCommand: false,
 			}
 			expect(Experiments.isEnabled(experiments, EXPERIMENT_IDS.POWER_STEERING)).toBe(true)
 		})
@@ -35,6 +37,7 @@ describe("experiments", () => {
 			const experiments: Record<ExperimentId, boolean> = {
 				powerSteering: false,
 				concurrentFileReads: false,
+				disableCompletionCommand: false,
 			}
 			expect(Experiments.isEnabled(experiments, EXPERIMENT_IDS.POWER_STEERING)).toBe(false)
 		})

+ 2 - 0
src/shared/experiments.ts

@@ -3,6 +3,7 @@ import type { AssertEqual, Equals, Keys, Values, ExperimentId } from "@roo-code/
 export const EXPERIMENT_IDS = {
 	POWER_STEERING: "powerSteering",
 	CONCURRENT_FILE_READS: "concurrentFileReads",
+	DISABLE_COMPLETION_COMMAND: "disableCompletionCommand",
 } as const satisfies Record<string, ExperimentId>
 
 type _AssertExperimentIds = AssertEqual<Equals<ExperimentId, Values<typeof EXPERIMENT_IDS>>>
@@ -16,6 +17,7 @@ interface ExperimentConfig {
 export const experimentConfigsMap: Record<ExperimentKey, ExperimentConfig> = {
 	POWER_STEERING: { enabled: false },
 	CONCURRENT_FILE_READS: { enabled: false },
+	DISABLE_COMPLETION_COMMAND: { enabled: false },
 }
 
 export const experimentDefault = Object.fromEntries(

+ 2 - 0
webview-ui/src/context/__tests__/ExtensionStateContext.test.tsx

@@ -224,6 +224,7 @@ describe("mergeExtensionState", () => {
 				powerSteering: true,
 				autoCondenseContext: true,
 				concurrentFileReads: true,
+				disableCompletionCommand: false,
 			} as Record<ExperimentId, boolean>,
 		}
 
@@ -238,6 +239,7 @@ describe("mergeExtensionState", () => {
 			powerSteering: true,
 			autoCondenseContext: true,
 			concurrentFileReads: true,
+			disableCompletionCommand: false,
 		})
 	})
 })

+ 4 - 0
webview-ui/src/i18n/locales/ca/settings.json

@@ -492,6 +492,10 @@
 		"CONCURRENT_FILE_READS": {
 			"name": "Habilitar lectura concurrent de fitxers",
 			"description": "Quan està habilitat, Roo pot llegir múltiples fitxers en una sola sol·licitud (fins a 15 fitxers). Quan està deshabilitat, Roo ha de llegir fitxers un per un. Deshabilitar-ho pot ajudar quan es treballa amb models menys capaços o quan voleu més control sobre l'accés als fitxers."
+		},
+		"DISABLE_COMPLETION_COMMAND": {
+			"name": "Desactivar l'execució de comandes a attempt_completion",
+			"description": "Quan està activat, l'eina attempt_completion no executarà comandes. Aquesta és una característica experimental per preparar la futura eliminació de l'execució de comandes en la finalització de tasques."
 		}
 	},
 	"promptCaching": {

+ 4 - 0
webview-ui/src/i18n/locales/de/settings.json

@@ -492,6 +492,10 @@
 		"CONCURRENT_FILE_READS": {
 			"name": "Gleichzeitiges Lesen von Dateien aktivieren",
 			"description": "Wenn aktiviert, kann Roo mehrere Dateien in einer einzigen Anfrage lesen (bis zu 15 Dateien). Wenn deaktiviert, muss Roo Dateien nacheinander lesen. Das Deaktivieren kann helfen, wenn Sie mit weniger leistungsfähigen Modellen arbeiten oder mehr Kontrolle über den Dateizugriff wünschen."
+		},
+		"DISABLE_COMPLETION_COMMAND": {
+			"name": "Befehlsausführung in attempt_completion deaktivieren",
+			"description": "Wenn aktiviert, führt das Tool attempt_completion keine Befehle aus. Dies ist eine experimentelle Funktion, um die Abschaffung der Befehlsausführung bei Aufgabenabschluss vorzubereiten."
 		}
 	},
 	"promptCaching": {

+ 4 - 0
webview-ui/src/i18n/locales/en/settings.json

@@ -492,6 +492,10 @@
 		"MULTI_SEARCH_AND_REPLACE": {
 			"name": "Use experimental multi block diff tool",
 			"description": "When enabled, Roo will use multi block diff tool. This will try to update multiple code blocks in the file in one request."
+		},
+		"DISABLE_COMPLETION_COMMAND": {
+			"name": "Disable command execution in attempt_completion",
+			"description": "When enabled, the attempt_completion tool will not execute commands. This is an experimental feature to prepare for deprecating command execution in task completion."
 		}
 	},
 	"promptCaching": {

+ 4 - 0
webview-ui/src/i18n/locales/es/settings.json

@@ -492,6 +492,10 @@
 		"CONCURRENT_FILE_READS": {
 			"name": "Habilitar lectura concurrente de archivos",
 			"description": "Cuando está habilitado, Roo puede leer múltiples archivos en una sola solicitud (hasta 15 archivos). Cuando está deshabilitado, Roo debe leer archivos uno a la vez. Deshabilitarlo puede ayudar cuando se trabaja con modelos menos capaces o cuando desea más control sobre el acceso a archivos."
+		},
+		"DISABLE_COMPLETION_COMMAND": {
+			"name": "Desactivar la ejecución de comandos en attempt_completion",
+			"description": "Cuando está activado, la herramienta attempt_completion no ejecutará comandos. Esta es una función experimental para preparar la futura eliminación de la ejecución de comandos en la finalización de tareas."
 		}
 	},
 	"promptCaching": {

+ 4 - 0
webview-ui/src/i18n/locales/fr/settings.json

@@ -492,6 +492,10 @@
 		"CONCURRENT_FILE_READS": {
 			"name": "Activer la lecture simultanée de fichiers",
 			"description": "Lorsqu'activé, Roo peut lire plusieurs fichiers dans une seule requête (jusqu'à 15 fichiers). Lorsque désactivé, Roo doit lire les fichiers un par un. La désactivation peut aider lors du travail avec des modèles moins performants ou lorsque vous souhaitez plus de contrôle sur l'accès aux fichiers."
+		},
+		"DISABLE_COMPLETION_COMMAND": {
+			"name": "Désactiver l'exécution des commandes dans attempt_completion",
+			"description": "Lorsque cette option est activée, l'outil attempt_completion n'exécutera pas de commandes. Il s'agit d'une fonctionnalité expérimentale visant à préparer la dépréciation de l'exécution des commandes lors de la finalisation des tâches."
 		}
 	},
 	"promptCaching": {

+ 4 - 0
webview-ui/src/i18n/locales/hi/settings.json

@@ -492,6 +492,10 @@
 		"CONCURRENT_FILE_READS": {
 			"name": "समवर्ती फ़ाइल पढ़ना सक्षम करें",
 			"description": "सक्षम होने पर, Roo एक ही अनुरोध में कई फ़ाइलें (अधिकतम 15 फ़ाइलें) पढ़ सकता है। अक्षम होने पर, Roo को एक बार में एक फ़ाइल पढ़नी होगी। कम सक्षम मॉडल के साथ काम करते समय या जब आप फ़ाइल एक्सेस पर अधिक नियंत्रण चाहते हैं तो इसे अक्षम करना मददगार हो सकता है।"
+		},
+		"DISABLE_COMPLETION_COMMAND": {
+			"name": "attempt_completion में कमांड निष्पादन अक्षम करें",
+			"description": "जब सक्षम किया जाता है, तो attempt_completion टूल कमांड निष्पादित नहीं करेगा। यह कार्य पूर्ण होने पर कमांड निष्पादन को पदावनत करने की तैयारी के लिए एक प्रयोगात्मक सुविधा है।"
 		}
 	},
 	"promptCaching": {

+ 4 - 0
webview-ui/src/i18n/locales/it/settings.json

@@ -492,6 +492,10 @@
 		"CONCURRENT_FILE_READS": {
 			"name": "Abilita lettura simultanea dei file",
 			"description": "Quando abilitato, Roo può leggere più file in una singola richiesta (fino a 15 file). Quando disabilitato, Roo deve leggere i file uno alla volta. Disabilitarlo può aiutare quando si lavora con modelli meno capaci o quando si desidera maggiore controllo sull'accesso ai file."
+		},
+		"DISABLE_COMPLETION_COMMAND": {
+			"name": "Disabilita l'esecuzione dei comandi in attempt_completion",
+			"description": "Se abilitato, lo strumento attempt_completion non eseguirà comandi. Questa è una funzionalità sperimentale per preparare la futura deprecazione dell'esecuzione dei comandi al completamento dell'attività."
 		}
 	},
 	"promptCaching": {

+ 4 - 0
webview-ui/src/i18n/locales/ja/settings.json

@@ -492,6 +492,10 @@
 		"CONCURRENT_FILE_READS": {
 			"name": "並行ファイル読み取りを有効にする",
 			"description": "有効にすると、Rooは1回のリクエストで複数のファイル(最大15ファイル)を読み取ることができます。無効にすると、Rooはファイルを1つずつ読み取る必要があります。能力の低いモデルで作業する場合や、ファイルアクセスをより細かく制御したい場合は、無効にすると役立ちます。"
+		},
+		"DISABLE_COMPLETION_COMMAND": {
+			"name": "attempt_completionでのコマンド実行を無効にする",
+			"description": "有効にすると、attempt_completionツールはコマンドを実行しません。これは、タスク完了時のコマンド実行の非推奨化に備えるための実験的な機能です。"
 		}
 	},
 	"promptCaching": {

+ 4 - 0
webview-ui/src/i18n/locales/ko/settings.json

@@ -492,6 +492,10 @@
 		"CONCURRENT_FILE_READS": {
 			"name": "동시 파일 읽기 활성화",
 			"description": "활성화하면 Roo가 한 번의 요청으로 여러 파일(최대 15개)을 읽을 수 있습니다. 비활성화하면 Roo는 파일을 하나씩 읽어야 합니다. 성능이 낮은 모델로 작업하거나 파일 액세스를 더 제어하려는 경우 비활성화하면 도움이 될 수 있습니다."
+		},
+		"DISABLE_COMPLETION_COMMAND": {
+			"name": "attempt_completion에서 명령 실행 비활성화",
+			"description": "활성화하면 attempt_completion 도구가 명령을 실행하지 않습니다. 이는 작업 완료 시 명령 실행을 더 이상 사용하지 않도록 준비하기 위한 실험적 기능입니다."
 		}
 	},
 	"promptCaching": {

+ 4 - 0
webview-ui/src/i18n/locales/nl/settings.json

@@ -492,6 +492,10 @@
 		"CONCURRENT_FILE_READS": {
 			"name": "Gelijktijdig lezen van bestanden inschakelen",
 			"description": "Wanneer ingeschakeld, kan Roo meerdere bestanden in één verzoek lezen (tot 15 bestanden). Wanneer uitgeschakeld, moet Roo bestanden één voor één lezen. Uitschakelen kan helpen bij het werken met minder capabele modellen of wanneer u meer controle over bestandstoegang wilt."
+		},
+		"DISABLE_COMPLETION_COMMAND": {
+			"name": "Commando-uitvoering in attempt_completion uitschakelen",
+			"description": "Indien ingeschakeld, zal de attempt_completion tool geen commando's uitvoeren. Dit is een experimentele functie ter voorbereiding op het afschaffen van commando-uitvoering bij taakvoltooiing."
 		}
 	},
 	"promptCaching": {

+ 4 - 0
webview-ui/src/i18n/locales/pl/settings.json

@@ -492,6 +492,10 @@
 		"CONCURRENT_FILE_READS": {
 			"name": "Włącz jednoczesne odczytywanie plików",
 			"description": "Po włączeniu Roo może odczytać wiele plików w jednym żądaniu (do 15 plików). Po wyłączeniu Roo musi odczytywać pliki pojedynczo. Wyłączenie może pomóc podczas pracy z mniej wydajnymi modelami lub gdy chcesz mieć większą kontrolę nad dostępem do plików."
+		},
+		"DISABLE_COMPLETION_COMMAND": {
+			"name": "Wyłącz wykonywanie poleceń w attempt_completion",
+			"description": "Gdy włączone, narzędzie attempt_completion nie będzie wykonywać poleceń. Jest to funkcja eksperymentalna przygotowująca do przyszłego wycofania wykonywania poleceń po zakończeniu zadania."
 		}
 	},
 	"promptCaching": {

+ 4 - 0
webview-ui/src/i18n/locales/pt-BR/settings.json

@@ -492,6 +492,10 @@
 		"CONCURRENT_FILE_READS": {
 			"name": "Habilitar leitura simultânea de arquivos",
 			"description": "Quando habilitado, o Roo pode ler vários arquivos em uma única solicitação (até 15 arquivos). Quando desabilitado, o Roo deve ler arquivos um de cada vez. Desabilitar pode ajudar ao trabalhar com modelos menos capazes ou quando você deseja mais controle sobre o acesso aos arquivos."
+		},
+		"DISABLE_COMPLETION_COMMAND": {
+			"name": "Desativar execução de comando em attempt_completion",
+			"description": "Quando ativado, a ferramenta attempt_completion não executará comandos. Este é um recurso experimental para preparar a futura descontinuação da execução de comandos na conclusão da tarefa."
 		}
 	},
 	"promptCaching": {

+ 4 - 0
webview-ui/src/i18n/locales/ru/settings.json

@@ -492,6 +492,10 @@
 		"CONCURRENT_FILE_READS": {
 			"name": "Включить одновременное чтение файлов",
 			"description": "При включении Roo может читать несколько файлов в одном запросе (до 15 файлов). При отключении Roo должен читать файлы по одному. Отключение может помочь при работе с менее производительными моделями или когда вы хотите больше контроля над доступом к файлам."
+		},
+		"DISABLE_COMPLETION_COMMAND": {
+			"name": "Отключить выполнение команд в attempt_completion",
+			"description": "Если включено, инструмент attempt_completion не будет выполнять команды. Это экспериментальная функция для подготовки к будущему прекращению поддержки выполнения команд при завершении задачи."
 		}
 	},
 	"promptCaching": {

+ 4 - 0
webview-ui/src/i18n/locales/tr/settings.json

@@ -492,6 +492,10 @@
 		"CONCURRENT_FILE_READS": {
 			"name": "Eşzamanlı dosya okumayı etkinleştir",
 			"description": "Etkinleştirildiğinde, Roo tek bir istekte birden fazla dosya okuyabilir (en fazla 15 dosya). Devre dışı bırakıldığında, Roo dosyaları birer birer okumalıdır. Daha az yetenekli modellerle çalışırken veya dosya erişimi üzerinde daha fazla kontrol istediğinizde devre dışı bırakmak yardımcı olabilir."
+		},
+		"DISABLE_COMPLETION_COMMAND": {
+			"name": "attempt_completion'da komut yürütmeyi devre dışı bırak",
+			"description": "Etkinleştirildiğinde, attempt_completion aracı komutları yürütmez. Bu, görev tamamlandığında komut yürütmenin kullanımdan kaldırılmasına hazırlanmak için deneysel bir özelliktir."
 		}
 	},
 	"promptCaching": {

+ 4 - 0
webview-ui/src/i18n/locales/vi/settings.json

@@ -492,6 +492,10 @@
 		"CONCURRENT_FILE_READS": {
 			"name": "Bật đọc tệp đồng thời",
 			"description": "Khi bật, Roo có thể đọc nhiều tệp trong một yêu cầu duy nhất (tối đa 15 tệp). Khi tắt, Roo phải đọc từng tệp một. Việc tắt có thể hữu ích khi làm việc với các mô hình ít khả năng hơn hoặc khi bạn muốn kiểm soát nhiều hơn quyền truy cập tệp."
+		},
+		"DISABLE_COMPLETION_COMMAND": {
+			"name": "Tắt thực thi lệnh trong attempt_completion",
+			"description": "Khi được bật, công cụ attempt_completion sẽ không thực thi lệnh. Đây là một tính năng thử nghiệm để chuẩn bị cho việc ngừng hỗ trợ thực thi lệnh khi hoàn thành tác vụ trong tương lai."
 		}
 	},
 	"promptCaching": {

+ 4 - 0
webview-ui/src/i18n/locales/zh-CN/settings.json

@@ -492,6 +492,10 @@
 		"CONCURRENT_FILE_READS": {
 			"name": "启用并发文件读取",
 			"description": "启用后,Roo 可以在单个请求中读取多个文件(最多 15 个文件)。禁用后,Roo 必须逐个读取文件。在使用能力较弱的模型或希望对文件访问有更多控制时,禁用此功能可能会有所帮助。"
+		},
+		"DISABLE_COMPLETION_COMMAND": {
+			"name": "禁用 attempt_completion 中的命令执行",
+			"description": "启用后,attempt_completion 工具将不会执行命令。这是一项实验性功能,旨在为将来弃用任务完成时的命令执行做准备。"
 		}
 	},
 	"promptCaching": {

+ 4 - 0
webview-ui/src/i18n/locales/zh-TW/settings.json

@@ -492,6 +492,10 @@
 		"CONCURRENT_FILE_READS": {
 			"name": "啟用並行檔案讀取",
 			"description": "啟用後,Roo 可以在單一請求中讀取多個檔案(最多 15 個檔案)。停用後,Roo 必須逐一讀取檔案。在使用能力較弱的模型或希望對檔案存取有更多控制時,停用此功能可能會有所幫助。"
+		},
+		"DISABLE_COMPLETION_COMMAND": {
+			"name": "停用 attempt_completion 中的指令執行",
+			"description": "啟用後,attempt_completion 工具將不會執行指令。這是一項實驗性功能,旨在為未來停用工作完成時的指令執行做準備。"
 		}
 	},
 	"promptCaching": {