import z from "zod" import * as fs from "fs" import * as path from "path" import { Tool } from "./tool" import { LSP } from "../lsp" import { FileTime } from "../file/time" import DESCRIPTION from "./read.txt" import { Filesystem } from "../util/filesystem" import { Instance } from "../project/instance" import { Identifier } from "../id/id" import { Permission } from "../permission" import { Agent } from "@/agent/agent" import { iife } from "@/util/iife" const DEFAULT_READ_LIMIT = 2000 const MAX_LINE_LENGTH = 2000 export const ReadTool = Tool.define("read", { description: DESCRIPTION, parameters: z.object({ filePath: z.string().describe("The path to the file to read"), offset: z.coerce.number().describe("The line number to start reading from (0-based)").optional(), limit: z.coerce.number().describe("The number of lines to read (defaults to 2000)").optional(), }), async execute(params, ctx) { let filepath = params.filePath if (!path.isAbsolute(filepath)) { filepath = path.join(process.cwd(), filepath) } const title = path.relative(Instance.worktree, filepath) const agent = await Agent.get(ctx.agent) if (!ctx.extra?.["bypassCwdCheck"] && !Filesystem.contains(Instance.directory, filepath)) { const parentDir = path.dirname(filepath) if (agent.permission.external_directory === "ask") { await Permission.ask({ type: "external_directory", pattern: [parentDir, path.join(parentDir, "*")], sessionID: ctx.sessionID, messageID: ctx.messageID, callID: ctx.callID, title: `Access file outside working directory: ${filepath}`, metadata: { filepath, parentDir, }, }) } else if (agent.permission.external_directory === "deny") { throw new Permission.RejectedError( ctx.sessionID, "external_directory", ctx.callID, { filepath: filepath, parentDir, }, `File ${filepath} is not in the current working directory`, ) } } const block = iife(() => { const whitelist = [".env.sample", ".example"] if (whitelist.some((w) => filepath.endsWith(w))) return false if (filepath.includes(".env")) return true return false }) if (block) { throw new Error(`The user has blocked you from reading ${filepath}, DO NOT make further attempts to read it`) } const file = Bun.file(filepath) if (!(await file.exists())) { const dir = path.dirname(filepath) const base = path.basename(filepath) const dirEntries = fs.readdirSync(dir) const suggestions = dirEntries .filter( (entry) => entry.toLowerCase().includes(base.toLowerCase()) || base.toLowerCase().includes(entry.toLowerCase()), ) .map((entry) => path.join(dir, entry)) .slice(0, 3) if (suggestions.length > 0) { throw new Error(`File not found: ${filepath}\n\nDid you mean one of these?\n${suggestions.join("\n")}`) } throw new Error(`File not found: ${filepath}`) } const isImage = file.type.startsWith("image/") && file.type !== "image/svg+xml" const isPdf = file.type === "application/pdf" if (isImage || isPdf) { const mime = file.type const msg = `${isImage ? "Image" : "PDF"} read successfully` return { title, output: msg, metadata: { preview: msg, }, attachments: [ { id: Identifier.ascending("part"), sessionID: ctx.sessionID, messageID: ctx.messageID, type: "file", mime, url: `data:${mime};base64,${Buffer.from(await file.bytes()).toString("base64")}`, }, ], } } const isBinary = await isBinaryFile(filepath, file) if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`) const limit = params.limit ?? DEFAULT_READ_LIMIT const offset = params.offset || 0 const lines = await file.text().then((text) => text.split("\n")) const raw = lines.slice(offset, offset + limit).map((line) => { return line.length > MAX_LINE_LENGTH ? line.substring(0, MAX_LINE_LENGTH) + "..." : line }) const content = raw.map((line, index) => { return `${(index + offset + 1).toString().padStart(5, "0")}| ${line}` }) const preview = raw.slice(0, 20).join("\n") let output = "\n" output += content.join("\n") const totalLines = lines.length const lastReadLine = offset + content.length const hasMoreLines = totalLines > lastReadLine if (hasMoreLines) { output += `\n\n(File has more lines. Use 'offset' parameter to read beyond line ${lastReadLine})` } else { output += `\n\n(End of file - total ${totalLines} lines)` } output += "\n" // just warms the lsp client LSP.touchFile(filepath, false) FileTime.read(ctx.sessionID, filepath) return { title, output, metadata: { preview, }, } }, }) async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise { const ext = path.extname(filepath).toLowerCase() // binary check for common non-text extensions switch (ext) { case ".zip": case ".tar": case ".gz": case ".exe": case ".dll": case ".so": case ".class": case ".jar": case ".war": case ".7z": case ".doc": case ".docx": case ".xls": case ".xlsx": case ".ppt": case ".pptx": case ".odt": case ".ods": case ".odp": case ".bin": case ".dat": case ".obj": case ".o": case ".a": case ".lib": case ".wasm": case ".pyc": case ".pyo": return true default: break } const stat = await file.stat() const fileSize = stat.size if (fileSize === 0) return false const bufferSize = Math.min(4096, fileSize) const buffer = await file.arrayBuffer() if (buffer.byteLength === 0) return false const bytes = new Uint8Array(buffer.slice(0, bufferSize)) let nonPrintableCount = 0 for (let i = 0; i < bytes.length; i++) { if (bytes[i] === 0) return true if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) { nonPrintableCount++ } } // If >30% non-printable characters, consider it binary return nonPrintableCount / bytes.length > 0.3 }