8 månader sedan · 6baf28c328
--- a/src/core/tools/__tests__/readFileTool.test.ts
+++ b/src/core/tools/__tests__/readFileTool.test.ts
@@ -44,16 +44,14 @@ const addLineNumbersMock = jest.fn().mockImplementation((text, startLine = 1) =>
 
				 	return lines.map((line, i) => `${startLine + i} | ${line}`).join("\n")
			
 
				 })
			
 
				 
			
 
				-const extractTextFromFileMock = jest.fn().mockImplementation((_filePath) => {
			
 
				-	// Call addLineNumbersMock to register the call
			
 
				-	addLineNumbersMock(mockInputContent)
			
 
				-	return Promise.resolve(addLineNumbersMock(mockInputContent))
			
 
				-})
			
 
				+const extractTextFromFileMock = jest.fn()
			
 
				+const getSupportedBinaryFormatsMock = jest.fn(() => [".pdf", ".docx", ".ipynb"])
			
 
				 
			
 
				 // Now assign the mocks to the module
			
 
				 const extractTextModule = jest.requireMock("../../../integrations/misc/extract-text")
			
 
				 extractTextModule.extractTextFromFile = extractTextFromFileMock
			
 
				 extractTextModule.addLineNumbers = addLineNumbersMock
			
 
				+extractTextModule.getSupportedBinaryFormats = getSupportedBinaryFormatsMock
			
 
				 
			
 
				 jest.mock("../../ignore/RooIgnoreController", () => ({
			
 
				 	RooIgnoreController: class {
			
@@ -128,6 +126,9 @@ describe("read_file tool with maxReadFileLine setting", () => {
 
				 		mockCline.say = jest.fn().mockResolvedValue(undefined)
			
 
				 		mockCline.ask = jest.fn().mockResolvedValue({ response: "yesButtonClicked" })
			
 
				 		mockCline.presentAssistantMessage = jest.fn()
			
 
				+		mockCline.handleError = jest.fn().mockResolvedValue(undefined)
			
 
				+		mockCline.pushToolResult = jest.fn()
			
 
				+		mockCline.removeClosingTag = jest.fn((tag, content) => content)
			
 
				 
			
 
				 		mockCline.fileContextTracker = {
			
 
				 			trackFileContext: jest.fn().mockResolvedValue(undefined),
			
@@ -410,6 +411,13 @@ describe("read_file tool XML output structure", () => {
 
				 		mockedPathResolve.mockReturnValue(absoluteFilePath)
			
 
				 		mockedIsBinaryFile.mockResolvedValue(false)
			
 
				 
			
 
				+		// Set default implementation for extractTextFromFile
			
 
				+		mockedExtractTextFromFile.mockImplementation((filePath) => {
			
 
				+			// Call addLineNumbersMock to register the call
			
 
				+			addLineNumbersMock(mockInputContent)
			
 
				+			return Promise.resolve(addLineNumbersMock(mockInputContent))
			
 
				+		})
			
 
				+
			
 
				 		mockInputContent = fileContent
			
 
				 
			
 
				 		// Setup mock provider with default maxReadFileLine
			
@@ -1126,34 +1134,101 @@ describe("read_file tool XML output structure", () => {
 
				 			const textPath = "test/text.txt"
			
 
				 			const binaryPath = "test/binary.pdf"
			
 
				 			const numberedContent = "1 | Text file content"
			
 
				+			const pdfContent = "1 | PDF content extracted"
			
 
				+
			
 
				+			// Mock path.resolve to return the expected paths
			
 
				+			mockedPathResolve.mockImplementation((cwd, relPath) => `/${relPath}`)
			
 
				 
			
 
				 			// Mock binary file detection
			
 
				-			mockedIsBinaryFile.mockImplementationOnce(() => Promise.resolve(false))
			
 
				-			mockedIsBinaryFile.mockImplementationOnce(() => Promise.resolve(true))
			
 
				+			mockedIsBinaryFile.mockImplementation((path) => {
			
 
				+				if (path.includes("text.txt")) return Promise.resolve(false)
			
 
				+				if (path.includes("binary.pdf")) return Promise.resolve(true)
			
 
				+				return Promise.resolve(false)
			
 
				+			})
			
 
				+
			
 
				+			mockedCountFileLines.mockImplementation((path) => {
			
 
				+				return Promise.resolve(1)
			
 
				+			})
			
 
				 
			
 
				-			// Mock content based on file type
			
 
				 			mockedExtractTextFromFile.mockImplementation((path) => {
			
 
				-				if (path.includes("binary")) {
			
 
				-					return Promise.resolve("")
			
 
				+				if (path.includes("binary.pdf")) {
			
 
				+					return Promise.resolve(pdfContent)
			
 
				 				}
			
 
				 				return Promise.resolve(numberedContent)
			
 
				 			})
			
 
				-			mockedCountFileLines.mockImplementation((path) => {
			
 
				-				return Promise.resolve(path.includes("binary") ? 0 : 1)
			
 
				-			})
			
 
				+
			
 
				+			// Configure mocks for the test
			
 
				 			mockProvider.getState.mockResolvedValue({ maxReadFileLine: -1 })
			
 
				 
			
 
				-			// Execute
			
 
				-			const result = await executeReadFileTool(
			
 
				-				{
			
 
				+			// Create standalone mock functions
			
 
				+			const mockAskApproval = jest.fn().mockResolvedValue({ response: "yesButtonClicked" })
			
 
				+			const mockHandleError = jest.fn().mockResolvedValue(undefined)
			
 
				+			const mockPushToolResult = jest.fn()
			
 
				+			const mockRemoveClosingTag = jest.fn((tag, content) => content)
			
 
				+
			
 
				+			// Create a tool use object directly
			
 
				+			const toolUse: ReadFileToolUse = {
			
 
				+				type: "tool_use",
			
 
				+				name: "read_file",
			
 
				+				params: {
			
 
				 					args: `<file><path>${textPath}</path></file><file><path>${binaryPath}</path></file>`,
			
 
				 				},
			
 
				-				{ totalLines: 1 },
			
 
				+				partial: false,
			
 
				+			}
			
 
				+
			
 
				+			// Call readFileTool directly
			
 
				+			await readFileTool(
			
 
				+				mockCline,
			
 
				+				toolUse,
			
 
				+				mockAskApproval,
			
 
				+				mockHandleError,
			
 
				+				mockPushToolResult,
			
 
				+				mockRemoveClosingTag,
			
 
				 			)
			
 
				 
			
 
				-			// Verify
			
 
				-			expect(result).toBe(
			
 
				-				`<files>\n<file><path>${textPath}</path>\n<content lines="1-1">\n${numberedContent}</content>\n</file>\n<file><path>${binaryPath}</path>\n<notice>Binary file</notice>\n</file>\n</files>`,
			
 
				+			// Check the result
			
 
				+			expect(mockPushToolResult).toHaveBeenCalledWith(
			
 
				+				`<files>\n<file><path>${textPath}</path>\n<content lines="1-1">\n${numberedContent}</content>\n</file>\n<file><path>${binaryPath}</path>\n<content lines="1-1">\n${pdfContent}</content>\n</file>\n</files>`,
			
 
				+			)
			
 
				+		})
			
 
				+
			
 
				+		it("should block unsupported binary files", async () => {
			
 
				+			// Setup
			
 
				+			const unsupportedBinaryPath = "test/binary.exe"
			
 
				+
			
 
				+			mockedIsBinaryFile.mockImplementation(() => Promise.resolve(true))
			
 
				+			mockedCountFileLines.mockImplementation(() => Promise.resolve(1))
			
 
				+			mockProvider.getState.mockResolvedValue({ maxReadFileLine: -1 })
			
 
				+
			
 
				+			// Create standalone mock functions
			
 
				+			const mockAskApproval = jest.fn().mockResolvedValue({ response: "yesButtonClicked" })
			
 
				+			const mockHandleError = jest.fn().mockResolvedValue(undefined)
			
 
				+			const mockPushToolResult = jest.fn()
			
 
				+			const mockRemoveClosingTag = jest.fn((tag, content) => content)
			
 
				+
			
 
				+			// Create a tool use object directly
			
 
				+			const toolUse: ReadFileToolUse = {
			
 
				+				type: "tool_use",
			
 
				+				name: "read_file",
			
 
				+				params: {
			
 
				+					args: `<file><path>${unsupportedBinaryPath}</path></file>`,
			
 
				+				},
			
 
				+				partial: false,
			
 
				+			}
			
 
				+
			
 
				+			// Call readFileTool directly
			
 
				+			await readFileTool(
			
 
				+				mockCline,
			
 
				+				toolUse,
			
 
				+				mockAskApproval,
			
 
				+				mockHandleError,
			
 
				+				mockPushToolResult,
			
 
				+				mockRemoveClosingTag,
			
 
				+			)
			
 
				+
			
 
				+			// Check the result
			
 
				+			expect(mockPushToolResult).toHaveBeenCalledWith(
			
 
				+				`<files>\n<file><path>${unsupportedBinaryPath}</path>\n<notice>Binary file</notice>\n</file>\n</files>`,
			
 
				 			)
			
 
				 		})
			
 
				 	})
			
@@ -1165,6 +1240,7 @@ describe("read_file tool XML output structure", () => {
 
				 			const maxReadFileLine = -1
			
 
				 			const totalLines = 0
			
 
				 			mockedCountFileLines.mockResolvedValue(totalLines)
			
 
				+			mockedIsBinaryFile.mockResolvedValue(false) // Ensure empty file is not detected as binary
			
 
				 
			
 
				 			// Execute
			
 
				 			const result = await executeReadFileTool({}, { maxReadFileLine, totalLines })
			
--- a/src/core/tools/readFileTool.ts
+++ b/src/core/tools/readFileTool.ts
@@ -11,7 +11,7 @@ import { isPathOutsideWorkspace } from "../../utils/pathUtils"
 
				 import { getReadablePath } from "../../utils/path"
			
 
				 import { countFileLines } from "../../integrations/misc/line-counter"
			
 
				 import { readLines } from "../../integrations/misc/read-lines"
			
 
				-import { extractTextFromFile, addLineNumbers } from "../../integrations/misc/extract-text"
			
 
				+import { extractTextFromFile, addLineNumbers, getSupportedBinaryFormats } from "../../integrations/misc/extract-text"
			
 
				 import { parseSourceCodeDefinitionsForFile } from "../../services/tree-sitter"
			
 
				 import { parseXml } from "../../utils/xml"
			
 
				 
			
@@ -435,13 +435,19 @@ export async function readFileTool(
 
				 			try {
			
 
				 				const [totalLines, isBinary] = await Promise.all([countFileLines(fullPath), isBinaryFile(fullPath)])
			
 
				 
			
 
				-				// Handle binary files
			
 
				+				// Handle binary files (but allow specific file types that extractTextFromFile can handle)
			
 
				 				if (isBinary) {
			
 
				-					updateFileResult(relPath, {
			
 
				-						notice: "Binary file",
			
 
				-						xmlContent: `<file><path>${relPath}</path>\n<notice>Binary file</notice>\n</file>`,
			
 
				-					})
			
 
				-					continue
			
 
				+					const fileExtension = path.extname(relPath).toLowerCase()
			
 
				+					const supportedBinaryFormats = getSupportedBinaryFormats()
			
 
				+
			
 
				+					if (!supportedBinaryFormats.includes(fileExtension)) {
			
 
				+						updateFileResult(relPath, {
			
 
				+							notice: "Binary file",
			
 
				+							xmlContent: `<file><path>${relPath}</path>\n<notice>Binary file</notice>\n</file>`,
			
 
				+						})
			
 
				+						continue
			
 
				+					}
			
 
				+					// For supported binary formats (.pdf, .docx, .ipynb), continue to extractTextFromFile
			
 
				 				}
			
 
				 
			
 
				 				// Handle range reads (bypass maxReadFileLine)
			
--- a/src/integrations/misc/extract-text.ts
+++ b/src/integrations/misc/extract-text.ts
@@ -5,34 +5,6 @@ import mammoth from "mammoth"
 
				 import fs from "fs/promises"
			
 
				 import { isBinaryFile } from "isbinaryfile"
			
 
				 
			
 
				-export async function extractTextFromFile(filePath: string): Promise<string> {
			
 
				-	try {
			
 
				-		await fs.access(filePath)
			
 
				-	} catch (error) {
			
 
				-		throw new Error(`File not found: ${filePath}`)
			
 
				-	}
			
 
				-
			
 
				-	const fileExtension = path.extname(filePath).toLowerCase()
			
 
				-
			
 
				-	switch (fileExtension) {
			
 
				-		case ".pdf":
			
 
				-			return extractTextFromPDF(filePath)
			
 
				-		case ".docx":
			
 
				-			return extractTextFromDOCX(filePath)
			
 
				-		case ".ipynb":
			
 
				-			return extractTextFromIPYNB(filePath)
			
 
				-		default: {
			
 
				-			const isBinary = await isBinaryFile(filePath).catch(() => false)
			
 
				-
			
 
				-			if (!isBinary) {
			
 
				-				return addLineNumbers(await fs.readFile(filePath, "utf8"))
			
 
				-			} else {
			
 
				-				throw new Error(`Cannot read text for file type: ${fileExtension}`)
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 async function extractTextFromPDF(filePath: string): Promise<string> {
			
 
				 	const dataBuffer = await fs.readFile(filePath)
			
 
				 	const data = await pdf(dataBuffer)
			
@@ -58,6 +30,47 @@ async function extractTextFromIPYNB(filePath: string): Promise<string> {
 
				 	return addLineNumbers(extractedText)
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * Map of supported binary file formats to their extraction functions
			
 
				+ */
			
 
				+const SUPPORTED_BINARY_FORMATS = {
			
 
				+	".pdf": extractTextFromPDF,
			
 
				+	".docx": extractTextFromDOCX,
			
 
				+	".ipynb": extractTextFromIPYNB,
			
 
				+} as const
			
 
				+
			
 
				+/**
			
 
				+ * Returns the list of supported binary file formats that can be processed by extractTextFromFile
			
 
				+ */
			
 
				+export function getSupportedBinaryFormats(): string[] {
			
 
				+	return Object.keys(SUPPORTED_BINARY_FORMATS)
			
 
				+}
			
 
				+
			
 
				+export async function extractTextFromFile(filePath: string): Promise<string> {
			
 
				+	try {
			
 
				+		await fs.access(filePath)
			
 
				+	} catch (error) {
			
 
				+		throw new Error(`File not found: ${filePath}`)
			
 
				+	}
			
 
				+
			
 
				+	const fileExtension = path.extname(filePath).toLowerCase()
			
 
				+
			
 
				+	// Check if we have a specific extractor for this format
			
 
				+	const extractor = SUPPORTED_BINARY_FORMATS[fileExtension as keyof typeof SUPPORTED_BINARY_FORMATS]
			
 
				+	if (extractor) {
			
 
				+		return extractor(filePath)
			
 
				+	}
			
 
				+
			
 
				+	// Handle other files
			
 
				+	const isBinary = await isBinaryFile(filePath).catch(() => false)
			
 
				+
			
 
				+	if (!isBinary) {
			
 
				+		return addLineNumbers(await fs.readFile(filePath, "utf8"))
			
 
				+	} else {
			
 
				+		throw new Error(`Cannot read text for file type: ${fileExtension}`)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 export function addLineNumbers(content: string, startLine: number = 1): string {
			
 
				 	// If content is empty, return empty string - empty files should not have line numbers
			
 
				 	// If content is empty but startLine > 1, return "startLine | " because we know the file is not empty