read.ts 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. import z from "zod/v4"
  2. import * as fs from "fs"
  3. import * as path from "path"
  4. import { Tool } from "./tool"
  5. import { LSP } from "../lsp"
  6. import { FileTime } from "../file/time"
  7. import DESCRIPTION from "./read.txt"
  8. import { Filesystem } from "../util/filesystem"
  9. import { Instance } from "../project/instance"
  10. const DEFAULT_READ_LIMIT = 2000
  11. const MAX_LINE_LENGTH = 2000
  12. export const ReadTool = Tool.define("read", {
  13. description: DESCRIPTION,
  14. parameters: z.object({
  15. filePath: z.string().describe("The path to the file to read"),
  16. offset: z.coerce.number().describe("The line number to start reading from (0-based)").optional(),
  17. limit: z.coerce.number().describe("The number of lines to read (defaults to 2000)").optional(),
  18. }),
  19. async execute(params, ctx) {
  20. let filepath = params.filePath
  21. if (!path.isAbsolute(filepath)) {
  22. filepath = path.join(process.cwd(), filepath)
  23. }
  24. if (!ctx.extra?.["bypassCwdCheck"] && !Filesystem.contains(Instance.directory, filepath)) {
  25. throw new Error(`File ${filepath} is not in the current working directory`)
  26. }
  27. const file = Bun.file(filepath)
  28. if (!(await file.exists())) {
  29. const dir = path.dirname(filepath)
  30. const base = path.basename(filepath)
  31. const dirEntries = fs.readdirSync(dir)
  32. const suggestions = dirEntries
  33. .filter(
  34. (entry) =>
  35. entry.toLowerCase().includes(base.toLowerCase()) || base.toLowerCase().includes(entry.toLowerCase()),
  36. )
  37. .map((entry) => path.join(dir, entry))
  38. .slice(0, 3)
  39. if (suggestions.length > 0) {
  40. throw new Error(`File not found: ${filepath}\n\nDid you mean one of these?\n${suggestions.join("\n")}`)
  41. }
  42. throw new Error(`File not found: ${filepath}`)
  43. }
  44. const limit = params.limit ?? DEFAULT_READ_LIMIT
  45. const offset = params.offset || 0
  46. const isImage = isImageFile(filepath)
  47. if (isImage) throw new Error(`This is an image file of type: ${isImage}\nUse a different tool to process images`)
  48. const isBinary = await isBinaryFile(filepath, file)
  49. if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`)
  50. const lines = await file.text().then((text) => text.split("\n"))
  51. const raw = lines.slice(offset, offset + limit).map((line) => {
  52. return line.length > MAX_LINE_LENGTH ? line.substring(0, MAX_LINE_LENGTH) + "..." : line
  53. })
  54. const content = raw.map((line, index) => {
  55. return `${(index + offset + 1).toString().padStart(5, "0")}| ${line}`
  56. })
  57. const preview = raw.slice(0, 20).join("\n")
  58. let output = "<file>\n"
  59. output += content.join("\n")
  60. if (lines.length > offset + content.length) {
  61. output += `\n\n(File has more lines. Use 'offset' parameter to read beyond line ${offset + content.length})`
  62. }
  63. output += "\n</file>"
  64. // just warms the lsp client
  65. LSP.touchFile(filepath, false)
  66. FileTime.read(ctx.sessionID, filepath)
  67. return {
  68. title: path.relative(Instance.worktree, filepath),
  69. output,
  70. metadata: {
  71. preview,
  72. },
  73. }
  74. },
  75. })
  76. function isImageFile(filePath: string): string | false {
  77. const ext = path.extname(filePath).toLowerCase()
  78. switch (ext) {
  79. case ".jpg":
  80. case ".jpeg":
  81. return "JPEG"
  82. case ".png":
  83. return "PNG"
  84. case ".gif":
  85. return "GIF"
  86. case ".bmp":
  87. return "BMP"
  88. case ".webp":
  89. return "WebP"
  90. default:
  91. return false
  92. }
  93. }
  94. async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolean> {
  95. const ext = path.extname(filepath).toLowerCase()
  96. // binary check for common non-text extensions
  97. switch (ext) {
  98. case ".zip":
  99. case ".tar":
  100. case ".gz":
  101. case ".exe":
  102. case ".dll":
  103. case ".so":
  104. case ".class":
  105. case ".jar":
  106. case ".war":
  107. case ".7z":
  108. case ".doc":
  109. case ".docx":
  110. case ".xls":
  111. case ".xlsx":
  112. case ".ppt":
  113. case ".pptx":
  114. case ".odt":
  115. case ".ods":
  116. case ".odp":
  117. case ".bin":
  118. case ".dat":
  119. case ".obj":
  120. case ".o":
  121. case ".a":
  122. case ".lib":
  123. case ".wasm":
  124. case ".pyc":
  125. case ".pyo":
  126. return true
  127. default:
  128. break
  129. }
  130. const stat = await file.stat()
  131. const fileSize = stat.size
  132. if (fileSize === 0) return false
  133. const bufferSize = Math.min(4096, fileSize)
  134. const buffer = await file.arrayBuffer()
  135. if (buffer.byteLength === 0) return false
  136. const bytes = new Uint8Array(buffer.slice(0, bufferSize))
  137. let nonPrintableCount = 0
  138. for (let i = 0; i < bytes.length; i++) {
  139. if (bytes[i] === 0) return true
  140. if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) {
  141. nonPrintableCount++
  142. }
  143. }
  144. // If >30% non-printable characters, consider it binary
  145. return nonPrintableCount / bytes.length > 0.3
  146. }