read.ts 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. import z from "zod"
  2. import * as fs from "fs"
  3. import * as path from "path"
  4. import { Tool } from "./tool"
  5. import { LSP } from "../lsp"
  6. import { FileTime } from "../file/time"
  7. import DESCRIPTION from "./read.txt"
  8. import { Instance } from "../project/instance"
  9. import { Identifier } from "../id/id"
  10. import { assertExternalDirectory } from "./external-directory"
  11. import { InstructionPrompt } from "../session/instruction"
  12. const DEFAULT_READ_LIMIT = 2000
  13. const MAX_LINE_LENGTH = 2000
  14. const MAX_BYTES = 50 * 1024
  15. export const ReadTool = Tool.define("read", {
  16. description: DESCRIPTION,
  17. parameters: z.object({
  18. filePath: z.string().describe("The path to the file to read"),
  19. offset: z.coerce.number().describe("The line number to start reading from (0-based)").optional(),
  20. limit: z.coerce.number().describe("The number of lines to read (defaults to 2000)").optional(),
  21. }),
  22. async execute(params, ctx) {
  23. let filepath = params.filePath
  24. if (!path.isAbsolute(filepath)) {
  25. filepath = path.resolve(Instance.directory, filepath)
  26. }
  27. const title = path.relative(Instance.worktree, filepath)
  28. await assertExternalDirectory(ctx, filepath, {
  29. bypass: Boolean(ctx.extra?.["bypassCwdCheck"]),
  30. })
  31. await ctx.ask({
  32. permission: "read",
  33. patterns: [filepath],
  34. always: ["*"],
  35. metadata: {},
  36. })
  37. const file = Bun.file(filepath)
  38. if (!(await file.exists())) {
  39. const dir = path.dirname(filepath)
  40. const base = path.basename(filepath)
  41. const dirEntries = fs.readdirSync(dir)
  42. const suggestions = dirEntries
  43. .filter(
  44. (entry) =>
  45. entry.toLowerCase().includes(base.toLowerCase()) || base.toLowerCase().includes(entry.toLowerCase()),
  46. )
  47. .map((entry) => path.join(dir, entry))
  48. .slice(0, 3)
  49. if (suggestions.length > 0) {
  50. throw new Error(`File not found: ${filepath}\n\nDid you mean one of these?\n${suggestions.join("\n")}`)
  51. }
  52. throw new Error(`File not found: ${filepath}`)
  53. }
  54. const instructions = await InstructionPrompt.resolve(ctx.messages, filepath, ctx.messageID)
  55. // Exclude SVG (XML-based) and vnd.fastbidsheet (.fbs extension, commonly FlatBuffers schema files)
  56. const isImage =
  57. file.type.startsWith("image/") && file.type !== "image/svg+xml" && file.type !== "image/vnd.fastbidsheet"
  58. const isPdf = file.type === "application/pdf"
  59. if (isImage || isPdf) {
  60. const mime = file.type
  61. const msg = `${isImage ? "Image" : "PDF"} read successfully`
  62. return {
  63. title,
  64. output: msg,
  65. metadata: {
  66. preview: msg,
  67. truncated: false,
  68. ...(instructions.length > 0 && { loaded: instructions.map((i) => i.filepath) }),
  69. },
  70. attachments: [
  71. {
  72. id: Identifier.ascending("part"),
  73. sessionID: ctx.sessionID,
  74. messageID: ctx.messageID,
  75. type: "file",
  76. mime,
  77. url: `data:${mime};base64,${Buffer.from(await file.bytes()).toString("base64")}`,
  78. },
  79. ],
  80. }
  81. }
  82. const isBinary = await isBinaryFile(filepath, file)
  83. if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`)
  84. const limit = params.limit ?? DEFAULT_READ_LIMIT
  85. const offset = params.offset || 0
  86. const lines = await file.text().then((text) => text.split("\n"))
  87. const raw: string[] = []
  88. let bytes = 0
  89. let truncatedByBytes = false
  90. for (let i = offset; i < Math.min(lines.length, offset + limit); i++) {
  91. const line = lines[i].length > MAX_LINE_LENGTH ? lines[i].substring(0, MAX_LINE_LENGTH) + "..." : lines[i]
  92. const size = Buffer.byteLength(line, "utf-8") + (raw.length > 0 ? 1 : 0)
  93. if (bytes + size > MAX_BYTES) {
  94. truncatedByBytes = true
  95. break
  96. }
  97. raw.push(line)
  98. bytes += size
  99. }
  100. const content = raw.map((line, index) => {
  101. return `${(index + offset + 1).toString().padStart(5, "0")}| ${line}`
  102. })
  103. const preview = raw.slice(0, 20).join("\n")
  104. let output = "<file>\n"
  105. output += content.join("\n")
  106. const totalLines = lines.length
  107. const lastReadLine = offset + raw.length
  108. const hasMoreLines = totalLines > lastReadLine
  109. const truncated = hasMoreLines || truncatedByBytes
  110. if (truncatedByBytes) {
  111. output += `\n\n(Output truncated at ${MAX_BYTES} bytes. Use 'offset' parameter to read beyond line ${lastReadLine})`
  112. } else if (hasMoreLines) {
  113. output += `\n\n(File has more lines. Use 'offset' parameter to read beyond line ${lastReadLine})`
  114. } else {
  115. output += `\n\n(End of file - total ${totalLines} lines)`
  116. }
  117. output += "\n</file>"
  118. // just warms the lsp client
  119. LSP.touchFile(filepath, false)
  120. FileTime.read(ctx.sessionID, filepath)
  121. if (instructions.length > 0) {
  122. output += `\n\n<system-reminder>\n${instructions.map((i) => i.content).join("\n\n")}\n</system-reminder>`
  123. }
  124. return {
  125. title,
  126. output,
  127. metadata: {
  128. preview,
  129. truncated,
  130. ...(instructions.length > 0 && { loaded: instructions.map((i) => i.filepath) }),
  131. },
  132. }
  133. },
  134. })
  135. async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolean> {
  136. const ext = path.extname(filepath).toLowerCase()
  137. // binary check for common non-text extensions
  138. switch (ext) {
  139. case ".zip":
  140. case ".tar":
  141. case ".gz":
  142. case ".exe":
  143. case ".dll":
  144. case ".so":
  145. case ".class":
  146. case ".jar":
  147. case ".war":
  148. case ".7z":
  149. case ".doc":
  150. case ".docx":
  151. case ".xls":
  152. case ".xlsx":
  153. case ".ppt":
  154. case ".pptx":
  155. case ".odt":
  156. case ".ods":
  157. case ".odp":
  158. case ".bin":
  159. case ".dat":
  160. case ".obj":
  161. case ".o":
  162. case ".a":
  163. case ".lib":
  164. case ".wasm":
  165. case ".pyc":
  166. case ".pyo":
  167. return true
  168. default:
  169. break
  170. }
  171. const stat = await file.stat()
  172. const fileSize = stat.size
  173. if (fileSize === 0) return false
  174. const bufferSize = Math.min(4096, fileSize)
  175. const buffer = await file.arrayBuffer()
  176. if (buffer.byteLength === 0) return false
  177. const bytes = new Uint8Array(buffer.slice(0, bufferSize))
  178. let nonPrintableCount = 0
  179. for (let i = 0; i < bytes.length; i++) {
  180. if (bytes[i] === 0) return true
  181. if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) {
  182. nonPrintableCount++
  183. }
  184. }
  185. // If >30% non-printable characters, consider it binary
  186. return nonPrintableCount / bytes.length > 0.3
  187. }