read.ts 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. import z from "zod"
  2. import * as fs from "fs"
  3. import * as path from "path"
  4. import { Tool } from "./tool"
  5. import { LSP } from "../lsp"
  6. import { FileTime } from "../file/time"
  7. import DESCRIPTION from "./read.txt"
  8. import { Filesystem } from "../util/filesystem"
  9. import { Instance } from "../project/instance"
  10. import { Identifier } from "../id/id"
  11. import { Permission } from "../permission"
  12. import { Agent } from "@/agent/agent"
  13. import { iife } from "@/util/iife"
  14. const DEFAULT_READ_LIMIT = 2000
  15. const MAX_LINE_LENGTH = 2000
  16. export const ReadTool = Tool.define("read", {
  17. description: DESCRIPTION,
  18. parameters: z.object({
  19. filePath: z.string().describe("The path to the file to read"),
  20. offset: z.coerce.number().describe("The line number to start reading from (0-based)").optional(),
  21. limit: z.coerce.number().describe("The number of lines to read (defaults to 2000)").optional(),
  22. }),
  23. async execute(params, ctx) {
  24. let filepath = params.filePath
  25. if (!path.isAbsolute(filepath)) {
  26. filepath = path.join(process.cwd(), filepath)
  27. }
  28. const title = path.relative(Instance.worktree, filepath)
  29. const agent = await Agent.get(ctx.agent)
  30. if (!ctx.extra?.["bypassCwdCheck"] && !Filesystem.contains(Instance.directory, filepath)) {
  31. const parentDir = path.dirname(filepath)
  32. if (agent.permission.external_directory === "ask") {
  33. await Permission.ask({
  34. type: "external_directory",
  35. pattern: [parentDir, path.join(parentDir, "*")],
  36. sessionID: ctx.sessionID,
  37. messageID: ctx.messageID,
  38. callID: ctx.callID,
  39. title: `Access file outside working directory: ${filepath}`,
  40. metadata: {
  41. filepath,
  42. parentDir,
  43. },
  44. })
  45. } else if (agent.permission.external_directory === "deny") {
  46. throw new Permission.RejectedError(
  47. ctx.sessionID,
  48. "external_directory",
  49. ctx.callID,
  50. {
  51. filepath: filepath,
  52. parentDir,
  53. },
  54. `File ${filepath} is not in the current working directory`,
  55. )
  56. }
  57. }
  58. const block = iife(() => {
  59. const whitelist = [".env.sample", ".example"]
  60. if (whitelist.some((w) => filepath.endsWith(w))) return false
  61. if (filepath.includes(".env")) return true
  62. return false
  63. })
  64. if (block) {
  65. throw new Error(`The user has blocked you from reading ${filepath}, DO NOT make further attempts to read it`)
  66. }
  67. const file = Bun.file(filepath)
  68. if (!(await file.exists())) {
  69. const dir = path.dirname(filepath)
  70. const base = path.basename(filepath)
  71. const dirEntries = fs.readdirSync(dir)
  72. const suggestions = dirEntries
  73. .filter(
  74. (entry) =>
  75. entry.toLowerCase().includes(base.toLowerCase()) || base.toLowerCase().includes(entry.toLowerCase()),
  76. )
  77. .map((entry) => path.join(dir, entry))
  78. .slice(0, 3)
  79. if (suggestions.length > 0) {
  80. throw new Error(`File not found: ${filepath}\n\nDid you mean one of these?\n${suggestions.join("\n")}`)
  81. }
  82. throw new Error(`File not found: ${filepath}`)
  83. }
  84. const isImage = file.type.startsWith("image/") && file.type !== "image/svg+xml"
  85. const isPdf = file.type === "application/pdf"
  86. if (isImage || isPdf) {
  87. const mime = file.type
  88. const msg = `${isImage ? "Image" : "PDF"} read successfully`
  89. return {
  90. title,
  91. output: msg,
  92. metadata: {
  93. preview: msg,
  94. },
  95. attachments: [
  96. {
  97. id: Identifier.ascending("part"),
  98. sessionID: ctx.sessionID,
  99. messageID: ctx.messageID,
  100. type: "file",
  101. mime,
  102. url: `data:${mime};base64,${Buffer.from(await file.bytes()).toString("base64")}`,
  103. },
  104. ],
  105. }
  106. }
  107. const isBinary = await isBinaryFile(filepath, file)
  108. if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`)
  109. const limit = params.limit ?? DEFAULT_READ_LIMIT
  110. const offset = params.offset || 0
  111. const lines = await file.text().then((text) => text.split("\n"))
  112. const raw = lines.slice(offset, offset + limit).map((line) => {
  113. return line.length > MAX_LINE_LENGTH ? line.substring(0, MAX_LINE_LENGTH) + "..." : line
  114. })
  115. const content = raw.map((line, index) => {
  116. return `${(index + offset + 1).toString().padStart(5, "0")}| ${line}`
  117. })
  118. const preview = raw.slice(0, 20).join("\n")
  119. let output = "<file>\n"
  120. output += content.join("\n")
  121. const totalLines = lines.length
  122. const lastReadLine = offset + content.length
  123. const hasMoreLines = totalLines > lastReadLine
  124. if (hasMoreLines) {
  125. output += `\n\n(File has more lines. Use 'offset' parameter to read beyond line ${lastReadLine})`
  126. } else {
  127. output += `\n\n(End of file - total ${totalLines} lines)`
  128. }
  129. output += "\n</file>"
  130. // just warms the lsp client
  131. LSP.touchFile(filepath, false)
  132. FileTime.read(ctx.sessionID, filepath)
  133. return {
  134. title,
  135. output,
  136. metadata: {
  137. preview,
  138. },
  139. }
  140. },
  141. })
  142. async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolean> {
  143. const ext = path.extname(filepath).toLowerCase()
  144. // binary check for common non-text extensions
  145. switch (ext) {
  146. case ".zip":
  147. case ".tar":
  148. case ".gz":
  149. case ".exe":
  150. case ".dll":
  151. case ".so":
  152. case ".class":
  153. case ".jar":
  154. case ".war":
  155. case ".7z":
  156. case ".doc":
  157. case ".docx":
  158. case ".xls":
  159. case ".xlsx":
  160. case ".ppt":
  161. case ".pptx":
  162. case ".odt":
  163. case ".ods":
  164. case ".odp":
  165. case ".bin":
  166. case ".dat":
  167. case ".obj":
  168. case ".o":
  169. case ".a":
  170. case ".lib":
  171. case ".wasm":
  172. case ".pyc":
  173. case ".pyo":
  174. return true
  175. default:
  176. break
  177. }
  178. const stat = await file.stat()
  179. const fileSize = stat.size
  180. if (fileSize === 0) return false
  181. const bufferSize = Math.min(4096, fileSize)
  182. const buffer = await file.arrayBuffer()
  183. if (buffer.byteLength === 0) return false
  184. const bytes = new Uint8Array(buffer.slice(0, bufferSize))
  185. let nonPrintableCount = 0
  186. for (let i = 0; i < bytes.length; i++) {
  187. if (bytes[i] === 0) return true
  188. if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) {
  189. nonPrintableCount++
  190. }
  191. }
  192. // If >30% non-printable characters, consider it binary
  193. return nonPrintableCount / bytes.length > 0.3
  194. }