index.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. import { BusEvent } from "@/bus/bus-event"
  2. import z from "zod"
  3. import { $ } from "bun"
  4. import type { BunFile } from "bun"
  5. import { formatPatch, structuredPatch } from "diff"
  6. import path from "path"
  7. import fs from "fs"
  8. import ignore from "ignore"
  9. import { Log } from "../util/log"
  10. import { Filesystem } from "../util/filesystem"
  11. import { Instance } from "../project/instance"
  12. import { Ripgrep } from "./ripgrep"
  13. import fuzzysort from "fuzzysort"
  14. import { Global } from "../global"
  15. export namespace File {
  16. const log = Log.create({ service: "file" })
  17. export const Info = z
  18. .object({
  19. path: z.string(),
  20. added: z.number().int(),
  21. removed: z.number().int(),
  22. status: z.enum(["added", "deleted", "modified"]),
  23. })
  24. .meta({
  25. ref: "File",
  26. })
  27. export type Info = z.infer<typeof Info>
  28. export const Node = z
  29. .object({
  30. name: z.string(),
  31. path: z.string(),
  32. absolute: z.string(),
  33. type: z.enum(["file", "directory"]),
  34. ignored: z.boolean(),
  35. })
  36. .meta({
  37. ref: "FileNode",
  38. })
  39. export type Node = z.infer<typeof Node>
  40. export const Content = z
  41. .object({
  42. type: z.literal("text"),
  43. content: z.string(),
  44. diff: z.string().optional(),
  45. patch: z
  46. .object({
  47. oldFileName: z.string(),
  48. newFileName: z.string(),
  49. oldHeader: z.string().optional(),
  50. newHeader: z.string().optional(),
  51. hunks: z.array(
  52. z.object({
  53. oldStart: z.number(),
  54. oldLines: z.number(),
  55. newStart: z.number(),
  56. newLines: z.number(),
  57. lines: z.array(z.string()),
  58. }),
  59. ),
  60. index: z.string().optional(),
  61. })
  62. .optional(),
  63. encoding: z.literal("base64").optional(),
  64. mimeType: z.string().optional(),
  65. })
  66. .meta({
  67. ref: "FileContent",
  68. })
  69. export type Content = z.infer<typeof Content>
  70. async function shouldEncode(file: BunFile): Promise<boolean> {
  71. const type = file.type?.toLowerCase()
  72. log.info("shouldEncode", { type })
  73. if (!type) return false
  74. if (type.startsWith("text/")) return false
  75. if (type.includes("charset=")) return false
  76. const parts = type.split("/", 2)
  77. const top = parts[0]
  78. const rest = parts[1] ?? ""
  79. const sub = rest.split(";", 1)[0]
  80. const tops = ["image", "audio", "video", "font", "model", "multipart"]
  81. if (tops.includes(top)) return true
  82. const bins = [
  83. "zip",
  84. "gzip",
  85. "bzip",
  86. "compressed",
  87. "binary",
  88. "pdf",
  89. "msword",
  90. "powerpoint",
  91. "excel",
  92. "ogg",
  93. "exe",
  94. "dmg",
  95. "iso",
  96. "rar",
  97. ]
  98. if (bins.some((mark) => sub.includes(mark))) return true
  99. return false
  100. }
  101. export const Event = {
  102. Edited: BusEvent.define(
  103. "file.edited",
  104. z.object({
  105. file: z.string(),
  106. }),
  107. ),
  108. }
  109. const state = Instance.state(async () => {
  110. type Entry = { files: string[]; dirs: string[] }
  111. let cache: Entry = { files: [], dirs: [] }
  112. let fetching = false
  113. const isGlobalHome = Instance.directory === Global.Path.home && Instance.project.id === "global"
  114. const fn = async (result: Entry) => {
  115. // Disable scanning if in root of file system
  116. if (Instance.directory === path.parse(Instance.directory).root) return
  117. fetching = true
  118. if (isGlobalHome) {
  119. const dirs = new Set<string>()
  120. const ignore = new Set<string>()
  121. if (process.platform === "darwin") ignore.add("Library")
  122. if (process.platform === "win32") ignore.add("AppData")
  123. const ignoreNested = new Set(["node_modules", "dist", "build", "target", "vendor"])
  124. const shouldIgnore = (name: string) => name.startsWith(".") || ignore.has(name)
  125. const shouldIgnoreNested = (name: string) => name.startsWith(".") || ignoreNested.has(name)
  126. const top = await fs.promises
  127. .readdir(Instance.directory, { withFileTypes: true })
  128. .catch(() => [] as fs.Dirent[])
  129. for (const entry of top) {
  130. if (!entry.isDirectory()) continue
  131. if (shouldIgnore(entry.name)) continue
  132. dirs.add(entry.name + "/")
  133. const base = path.join(Instance.directory, entry.name)
  134. const children = await fs.promises.readdir(base, { withFileTypes: true }).catch(() => [] as fs.Dirent[])
  135. for (const child of children) {
  136. if (!child.isDirectory()) continue
  137. if (shouldIgnoreNested(child.name)) continue
  138. dirs.add(entry.name + "/" + child.name + "/")
  139. }
  140. }
  141. result.dirs = Array.from(dirs).toSorted()
  142. cache = result
  143. fetching = false
  144. return
  145. }
  146. const set = new Set<string>()
  147. for await (const file of Ripgrep.files({ cwd: Instance.directory })) {
  148. result.files.push(file)
  149. let current = file
  150. while (true) {
  151. const dir = path.dirname(current)
  152. if (dir === ".") break
  153. if (dir === current) break
  154. current = dir
  155. if (set.has(dir)) continue
  156. set.add(dir)
  157. result.dirs.push(dir + "/")
  158. }
  159. }
  160. cache = result
  161. fetching = false
  162. }
  163. fn(cache)
  164. return {
  165. async files() {
  166. if (!fetching) {
  167. fn({
  168. files: [],
  169. dirs: [],
  170. })
  171. }
  172. return cache
  173. },
  174. }
  175. })
  176. export function init() {
  177. state()
  178. }
  179. export async function status() {
  180. const project = Instance.project
  181. if (project.vcs !== "git") return []
  182. const diffOutput = await $`git diff --numstat HEAD`.cwd(Instance.directory).quiet().nothrow().text()
  183. const changedFiles: Info[] = []
  184. if (diffOutput.trim()) {
  185. const lines = diffOutput.trim().split("\n")
  186. for (const line of lines) {
  187. const [added, removed, filepath] = line.split("\t")
  188. changedFiles.push({
  189. path: filepath,
  190. added: added === "-" ? 0 : parseInt(added, 10),
  191. removed: removed === "-" ? 0 : parseInt(removed, 10),
  192. status: "modified",
  193. })
  194. }
  195. }
  196. const untrackedOutput = await $`git ls-files --others --exclude-standard`
  197. .cwd(Instance.directory)
  198. .quiet()
  199. .nothrow()
  200. .text()
  201. if (untrackedOutput.trim()) {
  202. const untrackedFiles = untrackedOutput.trim().split("\n")
  203. for (const filepath of untrackedFiles) {
  204. try {
  205. const content = await Bun.file(path.join(Instance.directory, filepath)).text()
  206. const lines = content.split("\n").length
  207. changedFiles.push({
  208. path: filepath,
  209. added: lines,
  210. removed: 0,
  211. status: "added",
  212. })
  213. } catch {
  214. continue
  215. }
  216. }
  217. }
  218. // Get deleted files
  219. const deletedOutput = await $`git diff --name-only --diff-filter=D HEAD`
  220. .cwd(Instance.directory)
  221. .quiet()
  222. .nothrow()
  223. .text()
  224. if (deletedOutput.trim()) {
  225. const deletedFiles = deletedOutput.trim().split("\n")
  226. for (const filepath of deletedFiles) {
  227. changedFiles.push({
  228. path: filepath,
  229. added: 0,
  230. removed: 0, // Could get original line count but would require another git command
  231. status: "deleted",
  232. })
  233. }
  234. }
  235. return changedFiles.map((x) => ({
  236. ...x,
  237. path: path.relative(Instance.directory, x.path),
  238. }))
  239. }
  240. export async function read(file: string): Promise<Content> {
  241. using _ = log.time("read", { file })
  242. const project = Instance.project
  243. const full = path.join(Instance.directory, file)
  244. // TODO: Filesystem.contains is lexical only - symlinks inside the project can escape.
  245. // TODO: On Windows, cross-drive paths bypass this check. Consider realpath canonicalization.
  246. if (!Instance.containsPath(full)) {
  247. throw new Error(`Access denied: path escapes project directory`)
  248. }
  249. const bunFile = Bun.file(full)
  250. if (!(await bunFile.exists())) {
  251. return { type: "text", content: "" }
  252. }
  253. const encode = await shouldEncode(bunFile)
  254. if (encode) {
  255. const buffer = await bunFile.arrayBuffer().catch(() => new ArrayBuffer(0))
  256. const content = Buffer.from(buffer).toString("base64")
  257. const mimeType = bunFile.type || "application/octet-stream"
  258. return { type: "text", content, mimeType, encoding: "base64" }
  259. }
  260. const content = await bunFile
  261. .text()
  262. .catch(() => "")
  263. .then((x) => x.trim())
  264. if (project.vcs === "git") {
  265. let diff = await $`git diff ${file}`.cwd(Instance.directory).quiet().nothrow().text()
  266. if (!diff.trim()) diff = await $`git diff --staged ${file}`.cwd(Instance.directory).quiet().nothrow().text()
  267. if (diff.trim()) {
  268. const original = await $`git show HEAD:${file}`.cwd(Instance.directory).quiet().nothrow().text()
  269. const patch = structuredPatch(file, file, original, content, "old", "new", {
  270. context: Infinity,
  271. ignoreWhitespace: true,
  272. })
  273. const diff = formatPatch(patch)
  274. return { type: "text", content, patch, diff }
  275. }
  276. }
  277. return { type: "text", content }
  278. }
  279. export async function list(dir?: string) {
  280. const exclude = [".git", ".DS_Store"]
  281. const project = Instance.project
  282. let ignored = (_: string) => false
  283. if (project.vcs === "git") {
  284. const ig = ignore()
  285. const gitignore = Bun.file(path.join(Instance.worktree, ".gitignore"))
  286. if (await gitignore.exists()) {
  287. ig.add(await gitignore.text())
  288. }
  289. const ignoreFile = Bun.file(path.join(Instance.worktree, ".ignore"))
  290. if (await ignoreFile.exists()) {
  291. ig.add(await ignoreFile.text())
  292. }
  293. ignored = ig.ignores.bind(ig)
  294. }
  295. const resolved = dir ? path.join(Instance.directory, dir) : Instance.directory
  296. // TODO: Filesystem.contains is lexical only - symlinks inside the project can escape.
  297. // TODO: On Windows, cross-drive paths bypass this check. Consider realpath canonicalization.
  298. if (!Instance.containsPath(resolved)) {
  299. throw new Error(`Access denied: path escapes project directory`)
  300. }
  301. const nodes: Node[] = []
  302. for (const entry of await fs.promises
  303. .readdir(resolved, {
  304. withFileTypes: true,
  305. })
  306. .catch(() => [])) {
  307. if (exclude.includes(entry.name)) continue
  308. const fullPath = path.join(resolved, entry.name)
  309. const relativePath = path.relative(Instance.directory, fullPath)
  310. const type = entry.isDirectory() ? "directory" : "file"
  311. nodes.push({
  312. name: entry.name,
  313. path: relativePath,
  314. absolute: fullPath,
  315. type,
  316. ignored: ignored(type === "directory" ? relativePath + "/" : relativePath),
  317. })
  318. }
  319. return nodes.sort((a, b) => {
  320. if (a.type !== b.type) {
  321. return a.type === "directory" ? -1 : 1
  322. }
  323. return a.name.localeCompare(b.name)
  324. })
  325. }
  326. export async function search(input: { query: string; limit?: number; dirs?: boolean; type?: "file" | "directory" }) {
  327. const query = input.query.trim()
  328. const limit = input.limit ?? 100
  329. const kind = input.type ?? (input.dirs === false ? "file" : "all")
  330. log.info("search", { query, kind })
  331. const result = await state().then((x) => x.files())
  332. const hidden = (item: string) => {
  333. const normalized = item.replaceAll("\\", "/").replace(/\/+$/, "")
  334. return normalized.split("/").some((p) => p.startsWith(".") && p.length > 1)
  335. }
  336. const preferHidden = query.startsWith(".") || query.includes("/.")
  337. const sortHiddenLast = (items: string[]) => {
  338. if (preferHidden) return items
  339. const visible: string[] = []
  340. const hiddenItems: string[] = []
  341. for (const item of items) {
  342. const isHidden = hidden(item)
  343. if (isHidden) hiddenItems.push(item)
  344. if (!isHidden) visible.push(item)
  345. }
  346. return [...visible, ...hiddenItems]
  347. }
  348. if (!query) {
  349. if (kind === "file") return result.files.slice(0, limit)
  350. return sortHiddenLast(result.dirs.toSorted()).slice(0, limit)
  351. }
  352. const items =
  353. kind === "file" ? result.files : kind === "directory" ? result.dirs : [...result.files, ...result.dirs]
  354. const searchLimit = kind === "directory" && !preferHidden ? limit * 20 : limit
  355. const sorted = fuzzysort.go(query, items, { limit: searchLimit }).map((r) => r.target)
  356. const output = kind === "directory" ? sortHiddenLast(sorted).slice(0, limit) : sorted
  357. log.info("search", { query, kind, results: output.length })
  358. return output
  359. }
  360. }