read.ts 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. import z from "zod/v4"
  2. import * as fs from "fs"
  3. import * as path from "path"
  4. import { Tool } from "./tool"
  5. import { LSP } from "../lsp"
  6. import { FileTime } from "../file/time"
  7. import DESCRIPTION from "./read.txt"
  8. import { Filesystem } from "../util/filesystem"
  9. import { Instance } from "../project/instance"
  10. import { Provider } from "../provider/provider"
  11. import { Identifier } from "../id/id"
  12. const DEFAULT_READ_LIMIT = 2000
  13. const MAX_LINE_LENGTH = 2000
  14. export const ReadTool = Tool.define("read", {
  15. description: DESCRIPTION,
  16. parameters: z.object({
  17. filePath: z.string().describe("The path to the file to read"),
  18. offset: z.coerce.number().describe("The line number to start reading from (0-based)").optional(),
  19. limit: z.coerce.number().describe("The number of lines to read (defaults to 2000)").optional(),
  20. }),
  21. async execute(params, ctx) {
  22. let filepath = params.filePath
  23. if (!path.isAbsolute(filepath)) {
  24. filepath = path.join(process.cwd(), filepath)
  25. }
  26. const title = path.relative(Instance.worktree, filepath)
  27. if (!ctx.extra?.["bypassCwdCheck"] && !Filesystem.contains(Instance.directory, filepath)) {
  28. throw new Error(`File ${filepath} is not in the current working directory`)
  29. }
  30. const file = Bun.file(filepath)
  31. if (!(await file.exists())) {
  32. const dir = path.dirname(filepath)
  33. const base = path.basename(filepath)
  34. const dirEntries = fs.readdirSync(dir)
  35. const suggestions = dirEntries
  36. .filter(
  37. (entry) =>
  38. entry.toLowerCase().includes(base.toLowerCase()) || base.toLowerCase().includes(entry.toLowerCase()),
  39. )
  40. .map((entry) => path.join(dir, entry))
  41. .slice(0, 3)
  42. if (suggestions.length > 0) {
  43. throw new Error(`File not found: ${filepath}\n\nDid you mean one of these?\n${suggestions.join("\n")}`)
  44. }
  45. throw new Error(`File not found: ${filepath}`)
  46. }
  47. const isImage = isImageFile(filepath)
  48. const supportsImages = await (async () => {
  49. if (!ctx.extra?.["providerID"] || !ctx.extra?.["modelID"]) return false
  50. const providerID = ctx.extra["providerID"] as string
  51. const modelID = ctx.extra["modelID"] as string
  52. const model = await Provider.getModel(providerID, modelID).catch(() => undefined)
  53. if (!model) return false
  54. return model.info.modalities?.input?.includes("image") ?? false
  55. })()
  56. if (isImage) {
  57. if (!supportsImages) {
  58. throw new Error(`Failed to read image: ${filepath}, model may not be able to read images`)
  59. }
  60. const mime = file.type
  61. const msg = "Image read successfully"
  62. return {
  63. title,
  64. output: msg,
  65. metadata: {
  66. preview: msg,
  67. },
  68. attachments: [
  69. {
  70. id: Identifier.ascending("part"),
  71. sessionID: ctx.sessionID,
  72. messageID: ctx.messageID,
  73. type: "file",
  74. mime,
  75. url: `data:${mime};base64,${Buffer.from(await file.bytes()).toString("base64")}`,
  76. },
  77. ],
  78. }
  79. }
  80. const isBinary = await isBinaryFile(filepath, file)
  81. if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`)
  82. const limit = params.limit ?? DEFAULT_READ_LIMIT
  83. const offset = params.offset || 0
  84. const lines = await file.text().then((text) => text.split("\n"))
  85. const raw = lines.slice(offset, offset + limit).map((line) => {
  86. return line.length > MAX_LINE_LENGTH ? line.substring(0, MAX_LINE_LENGTH) + "..." : line
  87. })
  88. const content = raw.map((line, index) => {
  89. return `${(index + offset + 1).toString().padStart(5, "0")}| ${line}`
  90. })
  91. const preview = raw.slice(0, 20).join("\n")
  92. let output = "<file>\n"
  93. output += content.join("\n")
  94. if (lines.length > offset + content.length) {
  95. output += `\n\n(File has more lines. Use 'offset' parameter to read beyond line ${offset + content.length})`
  96. }
  97. output += "\n</file>"
  98. // just warms the lsp client
  99. LSP.touchFile(filepath, false)
  100. FileTime.read(ctx.sessionID, filepath)
  101. return {
  102. title,
  103. output,
  104. metadata: {
  105. preview,
  106. },
  107. }
  108. },
  109. })
  110. function isImageFile(filePath: string): string | false {
  111. const ext = path.extname(filePath).toLowerCase()
  112. switch (ext) {
  113. case ".jpg":
  114. case ".jpeg":
  115. return "JPEG"
  116. case ".png":
  117. return "PNG"
  118. case ".gif":
  119. return "GIF"
  120. case ".bmp":
  121. return "BMP"
  122. case ".webp":
  123. return "WebP"
  124. default:
  125. return false
  126. }
  127. }
  128. async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolean> {
  129. const ext = path.extname(filepath).toLowerCase()
  130. // binary check for common non-text extensions
  131. switch (ext) {
  132. case ".zip":
  133. case ".tar":
  134. case ".gz":
  135. case ".exe":
  136. case ".dll":
  137. case ".so":
  138. case ".class":
  139. case ".jar":
  140. case ".war":
  141. case ".7z":
  142. case ".doc":
  143. case ".docx":
  144. case ".xls":
  145. case ".xlsx":
  146. case ".ppt":
  147. case ".pptx":
  148. case ".odt":
  149. case ".ods":
  150. case ".odp":
  151. case ".bin":
  152. case ".dat":
  153. case ".obj":
  154. case ".o":
  155. case ".a":
  156. case ".lib":
  157. case ".wasm":
  158. case ".pyc":
  159. case ".pyo":
  160. return true
  161. default:
  162. break
  163. }
  164. const stat = await file.stat()
  165. const fileSize = stat.size
  166. if (fileSize === 0) return false
  167. const bufferSize = Math.min(4096, fileSize)
  168. const buffer = await file.arrayBuffer()
  169. if (buffer.byteLength === 0) return false
  170. const bytes = new Uint8Array(buffer.slice(0, bufferSize))
  171. let nonPrintableCount = 0
  172. for (let i = 0; i < bytes.length; i++) {
  173. if (bytes[i] === 0) return true
  174. if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) {
  175. nonPrintableCount++
  176. }
  177. }
  178. // If >30% non-printable characters, consider it binary
  179. return nonPrintableCount / bytes.length > 0.3
  180. }