read.ts 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. import z from "zod"
  2. import * as fs from "fs"
  3. import * as path from "path"
  4. import { Tool } from "./tool"
  5. import { LSP } from "../lsp"
  6. import { FileTime } from "../file/time"
  7. import DESCRIPTION from "./read.txt"
  8. import { Filesystem } from "../util/filesystem"
  9. import { Instance } from "../project/instance"
  10. import { Provider } from "../provider/provider"
  11. import { Identifier } from "../id/id"
  12. import { Permission } from "../permission"
  13. import { Agent } from "@/agent/agent"
  14. import { iife } from "@/util/iife"
  15. const DEFAULT_READ_LIMIT = 2000
  16. const MAX_LINE_LENGTH = 2000
  17. export const ReadTool = Tool.define("read", {
  18. description: DESCRIPTION,
  19. parameters: z.object({
  20. filePath: z.string().describe("The path to the file to read"),
  21. offset: z.coerce.number().describe("The line number to start reading from (0-based)").optional(),
  22. limit: z.coerce.number().describe("The number of lines to read (defaults to 2000)").optional(),
  23. }),
  24. async execute(params, ctx) {
  25. let filepath = params.filePath
  26. if (!path.isAbsolute(filepath)) {
  27. filepath = path.join(process.cwd(), filepath)
  28. }
  29. const title = path.relative(Instance.worktree, filepath)
  30. const agent = await Agent.get(ctx.agent)
  31. if (!ctx.extra?.["bypassCwdCheck"] && !Filesystem.contains(Instance.directory, filepath)) {
  32. const parentDir = path.dirname(filepath)
  33. if (agent.permission.external_directory === "ask") {
  34. await Permission.ask({
  35. type: "external_directory",
  36. pattern: parentDir,
  37. sessionID: ctx.sessionID,
  38. messageID: ctx.messageID,
  39. callID: ctx.callID,
  40. title: `Access file outside working directory: ${filepath}`,
  41. metadata: {
  42. filepath,
  43. parentDir,
  44. },
  45. })
  46. } else if (agent.permission.external_directory === "deny") {
  47. throw new Permission.RejectedError(
  48. ctx.sessionID,
  49. "external_directory",
  50. ctx.callID,
  51. {
  52. filepath: filepath,
  53. parentDir,
  54. },
  55. `File ${filepath} is not in the current working directory`,
  56. )
  57. }
  58. }
  59. const block = iife(() => {
  60. const whitelist = [".env.sample", ".example"]
  61. if (whitelist.some((w) => filepath.endsWith(w))) return false
  62. if (filepath.includes(".env")) return true
  63. return false
  64. })
  65. if (block) {
  66. throw new Error(`The user has blocked you from reading ${filepath}, DO NOT make further attempts to read it`)
  67. }
  68. const file = Bun.file(filepath)
  69. if (!(await file.exists())) {
  70. const dir = path.dirname(filepath)
  71. const base = path.basename(filepath)
  72. const dirEntries = fs.readdirSync(dir)
  73. const suggestions = dirEntries
  74. .filter(
  75. (entry) =>
  76. entry.toLowerCase().includes(base.toLowerCase()) || base.toLowerCase().includes(entry.toLowerCase()),
  77. )
  78. .map((entry) => path.join(dir, entry))
  79. .slice(0, 3)
  80. if (suggestions.length > 0) {
  81. throw new Error(`File not found: ${filepath}\n\nDid you mean one of these?\n${suggestions.join("\n")}`)
  82. }
  83. throw new Error(`File not found: ${filepath}`)
  84. }
  85. const isImage = isImageFile(filepath)
  86. const supportsImages = await (async () => {
  87. if (!ctx.extra?.["providerID"] || !ctx.extra?.["modelID"]) return false
  88. const providerID = ctx.extra["providerID"] as string
  89. const modelID = ctx.extra["modelID"] as string
  90. const model = await Provider.getModel(providerID, modelID).catch(() => undefined)
  91. if (!model) return false
  92. return model.info.modalities?.input?.includes("image") ?? false
  93. })()
  94. if (isImage) {
  95. if (!supportsImages) {
  96. throw new Error(`Failed to read image: ${filepath}, model may not be able to read images`)
  97. }
  98. const mime = file.type
  99. const msg = "Image read successfully"
  100. return {
  101. title,
  102. output: msg,
  103. metadata: {
  104. preview: msg,
  105. },
  106. attachments: [
  107. {
  108. id: Identifier.ascending("part"),
  109. sessionID: ctx.sessionID,
  110. messageID: ctx.messageID,
  111. type: "file",
  112. mime,
  113. url: `data:${mime};base64,${Buffer.from(await file.bytes()).toString("base64")}`,
  114. },
  115. ],
  116. }
  117. }
  118. const isBinary = await isBinaryFile(filepath, file)
  119. if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`)
  120. const limit = params.limit ?? DEFAULT_READ_LIMIT
  121. const offset = params.offset || 0
  122. const lines = await file.text().then((text) => text.split("\n"))
  123. const raw = lines.slice(offset, offset + limit).map((line) => {
  124. return line.length > MAX_LINE_LENGTH ? line.substring(0, MAX_LINE_LENGTH) + "..." : line
  125. })
  126. const content = raw.map((line, index) => {
  127. return `${(index + offset + 1).toString().padStart(5, "0")}| ${line}`
  128. })
  129. const preview = raw.slice(0, 20).join("\n")
  130. let output = "<file>\n"
  131. output += content.join("\n")
  132. const totalLines = lines.length
  133. const lastReadLine = offset + content.length
  134. const hasMoreLines = totalLines > lastReadLine
  135. if (hasMoreLines) {
  136. output += `\n\n(File has more lines. Use 'offset' parameter to read beyond line ${lastReadLine})`
  137. } else {
  138. output += `\n\n(End of file - total ${totalLines} lines)`
  139. }
  140. output += "\n</file>"
  141. // just warms the lsp client
  142. LSP.touchFile(filepath, false)
  143. FileTime.read(ctx.sessionID, filepath)
  144. return {
  145. title,
  146. output,
  147. metadata: {
  148. preview,
  149. },
  150. }
  151. },
  152. })
  153. function isImageFile(filePath: string): string | false {
  154. const ext = path.extname(filePath).toLowerCase()
  155. switch (ext) {
  156. case ".jpg":
  157. case ".jpeg":
  158. return "JPEG"
  159. case ".png":
  160. return "PNG"
  161. case ".gif":
  162. return "GIF"
  163. case ".bmp":
  164. return "BMP"
  165. case ".webp":
  166. return "WebP"
  167. default:
  168. return false
  169. }
  170. }
  171. async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolean> {
  172. const ext = path.extname(filepath).toLowerCase()
  173. // binary check for common non-text extensions
  174. switch (ext) {
  175. case ".zip":
  176. case ".tar":
  177. case ".gz":
  178. case ".exe":
  179. case ".dll":
  180. case ".so":
  181. case ".class":
  182. case ".jar":
  183. case ".war":
  184. case ".7z":
  185. case ".doc":
  186. case ".docx":
  187. case ".xls":
  188. case ".xlsx":
  189. case ".ppt":
  190. case ".pptx":
  191. case ".odt":
  192. case ".ods":
  193. case ".odp":
  194. case ".bin":
  195. case ".dat":
  196. case ".obj":
  197. case ".o":
  198. case ".a":
  199. case ".lib":
  200. case ".wasm":
  201. case ".pyc":
  202. case ".pyo":
  203. return true
  204. default:
  205. break
  206. }
  207. const stat = await file.stat()
  208. const fileSize = stat.size
  209. if (fileSize === 0) return false
  210. const bufferSize = Math.min(4096, fileSize)
  211. const buffer = await file.arrayBuffer()
  212. if (buffer.byteLength === 0) return false
  213. const bytes = new Uint8Array(buffer.slice(0, bufferSize))
  214. let nonPrintableCount = 0
  215. for (let i = 0; i < bytes.length; i++) {
  216. if (bytes[i] === 0) return true
  217. if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) {
  218. nonPrintableCount++
  219. }
  220. }
  221. // If >30% non-printable characters, consider it binary
  222. return nonPrintableCount / bytes.length > 0.3
  223. }