read.ts 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. import z from "zod"
  2. import * as fs from "fs"
  3. import * as path from "path"
  4. import { Tool } from "./tool"
  5. import { LSP } from "../lsp"
  6. import { FileTime } from "../file/time"
  7. import DESCRIPTION from "./read.txt"
  8. import { Filesystem } from "../util/filesystem"
  9. import { Instance } from "../project/instance"
  10. import { Identifier } from "../id/id"
  11. const DEFAULT_READ_LIMIT = 2000
  12. const MAX_LINE_LENGTH = 2000
  13. export const ReadTool = Tool.define("read", {
  14. description: DESCRIPTION,
  15. parameters: z.object({
  16. filePath: z.string().describe("The path to the file to read"),
  17. offset: z.coerce.number().describe("The line number to start reading from (0-based)").optional(),
  18. limit: z.coerce.number().describe("The number of lines to read (defaults to 2000)").optional(),
  19. }),
  20. async execute(params, ctx) {
  21. let filepath = params.filePath
  22. if (!path.isAbsolute(filepath)) {
  23. filepath = path.join(process.cwd(), filepath)
  24. }
  25. const title = path.relative(Instance.worktree, filepath)
  26. if (!ctx.extra?.["bypassCwdCheck"] && !Filesystem.contains(Instance.directory, filepath)) {
  27. const parentDir = path.dirname(filepath)
  28. await ctx.ask({
  29. permission: "external_directory",
  30. patterns: [parentDir],
  31. always: [parentDir + "/*"],
  32. metadata: {
  33. filepath,
  34. parentDir,
  35. },
  36. })
  37. }
  38. await ctx.ask({
  39. permission: "read",
  40. patterns: [filepath],
  41. always: ["*"],
  42. metadata: {},
  43. })
  44. const file = Bun.file(filepath)
  45. if (!(await file.exists())) {
  46. const dir = path.dirname(filepath)
  47. const base = path.basename(filepath)
  48. const dirEntries = fs.readdirSync(dir)
  49. const suggestions = dirEntries
  50. .filter(
  51. (entry) =>
  52. entry.toLowerCase().includes(base.toLowerCase()) || base.toLowerCase().includes(entry.toLowerCase()),
  53. )
  54. .map((entry) => path.join(dir, entry))
  55. .slice(0, 3)
  56. if (suggestions.length > 0) {
  57. throw new Error(`File not found: ${filepath}\n\nDid you mean one of these?\n${suggestions.join("\n")}`)
  58. }
  59. throw new Error(`File not found: ${filepath}`)
  60. }
  61. const isImage = file.type.startsWith("image/") && file.type !== "image/svg+xml"
  62. const isPdf = file.type === "application/pdf"
  63. if (isImage || isPdf) {
  64. const mime = file.type
  65. const msg = `${isImage ? "Image" : "PDF"} read successfully`
  66. return {
  67. title,
  68. output: msg,
  69. metadata: {
  70. preview: msg,
  71. },
  72. attachments: [
  73. {
  74. id: Identifier.ascending("part"),
  75. sessionID: ctx.sessionID,
  76. messageID: ctx.messageID,
  77. type: "file",
  78. mime,
  79. url: `data:${mime};base64,${Buffer.from(await file.bytes()).toString("base64")}`,
  80. },
  81. ],
  82. }
  83. }
  84. const isBinary = await isBinaryFile(filepath, file)
  85. if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`)
  86. const limit = params.limit ?? DEFAULT_READ_LIMIT
  87. const offset = params.offset || 0
  88. const lines = await file.text().then((text) => text.split("\n"))
  89. const raw = lines.slice(offset, offset + limit).map((line) => {
  90. return line.length > MAX_LINE_LENGTH ? line.substring(0, MAX_LINE_LENGTH) + "..." : line
  91. })
  92. const content = raw.map((line, index) => {
  93. return `${(index + offset + 1).toString().padStart(5, "0")}| ${line}`
  94. })
  95. const preview = raw.slice(0, 20).join("\n")
  96. let output = "<file>\n"
  97. output += content.join("\n")
  98. const totalLines = lines.length
  99. const lastReadLine = offset + content.length
  100. const hasMoreLines = totalLines > lastReadLine
  101. if (hasMoreLines) {
  102. output += `\n\n(File has more lines. Use 'offset' parameter to read beyond line ${lastReadLine})`
  103. } else {
  104. output += `\n\n(End of file - total ${totalLines} lines)`
  105. }
  106. output += "\n</file>"
  107. // just warms the lsp client
  108. LSP.touchFile(filepath, false)
  109. FileTime.read(ctx.sessionID, filepath)
  110. return {
  111. title,
  112. output,
  113. metadata: {
  114. preview,
  115. },
  116. }
  117. },
  118. })
  119. async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolean> {
  120. const ext = path.extname(filepath).toLowerCase()
  121. // binary check for common non-text extensions
  122. switch (ext) {
  123. case ".zip":
  124. case ".tar":
  125. case ".gz":
  126. case ".exe":
  127. case ".dll":
  128. case ".so":
  129. case ".class":
  130. case ".jar":
  131. case ".war":
  132. case ".7z":
  133. case ".doc":
  134. case ".docx":
  135. case ".xls":
  136. case ".xlsx":
  137. case ".ppt":
  138. case ".pptx":
  139. case ".odt":
  140. case ".ods":
  141. case ".odp":
  142. case ".bin":
  143. case ".dat":
  144. case ".obj":
  145. case ".o":
  146. case ".a":
  147. case ".lib":
  148. case ".wasm":
  149. case ".pyc":
  150. case ".pyo":
  151. return true
  152. default:
  153. break
  154. }
  155. const stat = await file.stat()
  156. const fileSize = stat.size
  157. if (fileSize === 0) return false
  158. const bufferSize = Math.min(4096, fileSize)
  159. const buffer = await file.arrayBuffer()
  160. if (buffer.byteLength === 0) return false
  161. const bytes = new Uint8Array(buffer.slice(0, bufferSize))
  162. let nonPrintableCount = 0
  163. for (let i = 0; i < bytes.length; i++) {
  164. if (bytes[i] === 0) return true
  165. if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) {
  166. nonPrintableCount++
  167. }
  168. }
  169. // If >30% non-printable characters, consider it binary
  170. return nonPrintableCount / bytes.length > 0.3
  171. }