read.ts 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. import z from "zod"
  2. import * as fs from "fs"
  3. import * as path from "path"
  4. import { Tool } from "./tool"
  5. import { LSP } from "../lsp"
  6. import { FileTime } from "../file/time"
  7. import DESCRIPTION from "./read.txt"
  8. import { Instance } from "../project/instance"
  9. import { Identifier } from "../id/id"
  10. import { assertExternalDirectory } from "./external-directory"
  11. import { InstructionPrompt } from "../session/instruction"
  12. const DEFAULT_READ_LIMIT = 2000
  13. const MAX_LINE_LENGTH = 2000
  14. const MAX_BYTES = 50 * 1024
  15. export const ReadTool = Tool.define("read", {
  16. description: DESCRIPTION,
  17. parameters: z.object({
  18. filePath: z.string().describe("The absolute path to the file or directory to read"),
  19. offset: z.coerce.number().describe("The line number to start reading from (1-indexed)").optional(),
  20. limit: z.coerce.number().describe("The maximum number of lines to read (defaults to 2000)").optional(),
  21. }),
  22. async execute(params, ctx) {
  23. if (params.offset !== undefined && params.offset < 1) {
  24. throw new Error("offset must be greater than or equal to 1")
  25. }
  26. let filepath = params.filePath
  27. if (!path.isAbsolute(filepath)) {
  28. filepath = path.resolve(Instance.directory, filepath)
  29. }
  30. const title = path.relative(Instance.worktree, filepath)
  31. const file = Bun.file(filepath)
  32. const stat = await file.stat().catch(() => undefined)
  33. await assertExternalDirectory(ctx, filepath, {
  34. bypass: Boolean(ctx.extra?.["bypassCwdCheck"]),
  35. kind: stat?.isDirectory() ? "directory" : "file",
  36. })
  37. await ctx.ask({
  38. permission: "read",
  39. patterns: [filepath],
  40. always: ["*"],
  41. metadata: {},
  42. })
  43. if (!stat) {
  44. const dir = path.dirname(filepath)
  45. const base = path.basename(filepath)
  46. const dirEntries = fs.readdirSync(dir)
  47. const suggestions = dirEntries
  48. .filter(
  49. (entry) =>
  50. entry.toLowerCase().includes(base.toLowerCase()) || base.toLowerCase().includes(entry.toLowerCase()),
  51. )
  52. .map((entry) => path.join(dir, entry))
  53. .slice(0, 3)
  54. if (suggestions.length > 0) {
  55. throw new Error(`File not found: ${filepath}\n\nDid you mean one of these?\n${suggestions.join("\n")}`)
  56. }
  57. throw new Error(`File not found: ${filepath}`)
  58. }
  59. if (stat.isDirectory()) {
  60. const dirents = await fs.promises.readdir(filepath, { withFileTypes: true })
  61. const entries = await Promise.all(
  62. dirents.map(async (dirent) => {
  63. if (dirent.isDirectory()) return dirent.name + "/"
  64. if (dirent.isSymbolicLink()) {
  65. const target = await fs.promises.stat(path.join(filepath, dirent.name)).catch(() => undefined)
  66. if (target?.isDirectory()) return dirent.name + "/"
  67. }
  68. return dirent.name
  69. }),
  70. )
  71. entries.sort((a, b) => a.localeCompare(b))
  72. const limit = params.limit ?? DEFAULT_READ_LIMIT
  73. const offset = params.offset ?? 1
  74. const start = offset - 1
  75. const sliced = entries.slice(start, start + limit)
  76. const truncated = start + sliced.length < entries.length
  77. const output = [
  78. `<path>${filepath}</path>`,
  79. `<type>directory</type>`,
  80. `<entries>`,
  81. sliced.join("\n"),
  82. truncated
  83. ? `\n(Showing ${sliced.length} of ${entries.length} entries. Use 'offset' parameter to read beyond entry ${offset + sliced.length})`
  84. : `\n(${entries.length} entries)`,
  85. `</entries>`,
  86. ].join("\n")
  87. return {
  88. title,
  89. output,
  90. metadata: {
  91. preview: sliced.slice(0, 20).join("\n"),
  92. truncated,
  93. loaded: [] as string[],
  94. },
  95. }
  96. }
  97. const instructions = await InstructionPrompt.resolve(ctx.messages, filepath, ctx.messageID)
  98. // Exclude SVG (XML-based) and vnd.fastbidsheet (.fbs extension, commonly FlatBuffers schema files)
  99. const isImage =
  100. file.type.startsWith("image/") && file.type !== "image/svg+xml" && file.type !== "image/vnd.fastbidsheet"
  101. const isPdf = file.type === "application/pdf"
  102. if (isImage || isPdf) {
  103. const mime = file.type
  104. const msg = `${isImage ? "Image" : "PDF"} read successfully`
  105. return {
  106. title,
  107. output: msg,
  108. metadata: {
  109. preview: msg,
  110. truncated: false,
  111. loaded: instructions.map((i) => i.filepath),
  112. },
  113. attachments: [
  114. {
  115. id: Identifier.ascending("part"),
  116. sessionID: ctx.sessionID,
  117. messageID: ctx.messageID,
  118. type: "file",
  119. mime,
  120. url: `data:${mime};base64,${Buffer.from(await file.bytes()).toString("base64")}`,
  121. },
  122. ],
  123. }
  124. }
  125. const isBinary = await isBinaryFile(filepath, file)
  126. if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`)
  127. const limit = params.limit ?? DEFAULT_READ_LIMIT
  128. const offset = params.offset ?? 1
  129. const start = offset - 1
  130. const lines = await file.text().then((text) => text.split("\n"))
  131. if (start >= lines.length) throw new Error(`Offset ${offset} is out of range for this file (${lines.length} lines)`)
  132. const raw: string[] = []
  133. let bytes = 0
  134. let truncatedByBytes = false
  135. for (let i = start; i < Math.min(lines.length, start + limit); i++) {
  136. const line = lines[i].length > MAX_LINE_LENGTH ? lines[i].substring(0, MAX_LINE_LENGTH) + "..." : lines[i]
  137. const size = Buffer.byteLength(line, "utf-8") + (raw.length > 0 ? 1 : 0)
  138. if (bytes + size > MAX_BYTES) {
  139. truncatedByBytes = true
  140. break
  141. }
  142. raw.push(line)
  143. bytes += size
  144. }
  145. const content = raw.map((line, index) => {
  146. return `${index + offset}: ${line}`
  147. })
  148. const preview = raw.slice(0, 20).join("\n")
  149. let output = [`<path>${filepath}</path>`, `<type>file</type>`, "<content>"].join("\n")
  150. output += content.join("\n")
  151. const totalLines = lines.length
  152. const lastReadLine = offset + raw.length - 1
  153. const hasMoreLines = totalLines > lastReadLine
  154. const truncated = hasMoreLines || truncatedByBytes
  155. if (truncatedByBytes) {
  156. output += `\n\n(Output truncated at ${MAX_BYTES} bytes. Use 'offset' parameter to read beyond line ${lastReadLine})`
  157. } else if (hasMoreLines) {
  158. output += `\n\n(File has more lines. Use 'offset' parameter to read beyond line ${lastReadLine})`
  159. } else {
  160. output += `\n\n(End of file - total ${totalLines} lines)`
  161. }
  162. output += "\n</content>"
  163. // just warms the lsp client
  164. LSP.touchFile(filepath, false)
  165. FileTime.read(ctx.sessionID, filepath)
  166. if (instructions.length > 0) {
  167. output += `\n\n<system-reminder>\n${instructions.map((i) => i.content).join("\n\n")}\n</system-reminder>`
  168. }
  169. return {
  170. title,
  171. output,
  172. metadata: {
  173. preview,
  174. truncated,
  175. loaded: instructions.map((i) => i.filepath),
  176. },
  177. }
  178. },
  179. })
  180. async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolean> {
  181. const ext = path.extname(filepath).toLowerCase()
  182. // binary check for common non-text extensions
  183. switch (ext) {
  184. case ".zip":
  185. case ".tar":
  186. case ".gz":
  187. case ".exe":
  188. case ".dll":
  189. case ".so":
  190. case ".class":
  191. case ".jar":
  192. case ".war":
  193. case ".7z":
  194. case ".doc":
  195. case ".docx":
  196. case ".xls":
  197. case ".xlsx":
  198. case ".ppt":
  199. case ".pptx":
  200. case ".odt":
  201. case ".ods":
  202. case ".odp":
  203. case ".bin":
  204. case ".dat":
  205. case ".obj":
  206. case ".o":
  207. case ".a":
  208. case ".lib":
  209. case ".wasm":
  210. case ".pyc":
  211. case ".pyo":
  212. return true
  213. default:
  214. break
  215. }
  216. const stat = await file.stat()
  217. const fileSize = stat.size
  218. if (fileSize === 0) return false
  219. const bufferSize = Math.min(4096, fileSize)
  220. const buffer = await file.arrayBuffer()
  221. if (buffer.byteLength === 0) return false
  222. const bytes = new Uint8Array(buffer.slice(0, bufferSize))
  223. let nonPrintableCount = 0
  224. for (let i = 0; i < bytes.length; i++) {
  225. if (bytes[i] === 0) return true
  226. if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) {
  227. nonPrintableCount++
  228. }
  229. }
  230. // If >30% non-printable characters, consider it binary
  231. return nonPrintableCount / bytes.length > 0.3
  232. }