ripgrep.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. // Ripgrep utility functions
  2. import path from "path"
  3. import { Global } from "../global"
  4. import fs from "fs/promises"
  5. import z from "zod"
  6. import { NamedError } from "@opencode-ai/util/error"
  7. import { lazy } from "../util/lazy"
  8. import { $ } from "bun"
  9. import { ZipReader, BlobReader, BlobWriter } from "@zip.js/zip.js"
  10. import { Log } from "@/util/log"
  11. export namespace Ripgrep {
  12. const log = Log.create({ service: "ripgrep" })
  13. const Stats = z.object({
  14. elapsed: z.object({
  15. secs: z.number(),
  16. nanos: z.number(),
  17. human: z.string(),
  18. }),
  19. searches: z.number(),
  20. searches_with_match: z.number(),
  21. bytes_searched: z.number(),
  22. bytes_printed: z.number(),
  23. matched_lines: z.number(),
  24. matches: z.number(),
  25. })
  26. const Begin = z.object({
  27. type: z.literal("begin"),
  28. data: z.object({
  29. path: z.object({
  30. text: z.string(),
  31. }),
  32. }),
  33. })
  34. export const Match = z.object({
  35. type: z.literal("match"),
  36. data: z.object({
  37. path: z.object({
  38. text: z.string(),
  39. }),
  40. lines: z.object({
  41. text: z.string(),
  42. }),
  43. line_number: z.number(),
  44. absolute_offset: z.number(),
  45. submatches: z.array(
  46. z.object({
  47. match: z.object({
  48. text: z.string(),
  49. }),
  50. start: z.number(),
  51. end: z.number(),
  52. }),
  53. ),
  54. }),
  55. })
  56. const End = z.object({
  57. type: z.literal("end"),
  58. data: z.object({
  59. path: z.object({
  60. text: z.string(),
  61. }),
  62. binary_offset: z.number().nullable(),
  63. stats: Stats,
  64. }),
  65. })
  66. const Summary = z.object({
  67. type: z.literal("summary"),
  68. data: z.object({
  69. elapsed_total: z.object({
  70. human: z.string(),
  71. nanos: z.number(),
  72. secs: z.number(),
  73. }),
  74. stats: Stats,
  75. }),
  76. })
  77. const Result = z.union([Begin, Match, End, Summary])
  78. export type Result = z.infer<typeof Result>
  79. export type Match = z.infer<typeof Match>
  80. export type Begin = z.infer<typeof Begin>
  81. export type End = z.infer<typeof End>
  82. export type Summary = z.infer<typeof Summary>
  83. const PLATFORM = {
  84. "arm64-darwin": { platform: "aarch64-apple-darwin", extension: "tar.gz" },
  85. "arm64-linux": {
  86. platform: "aarch64-unknown-linux-gnu",
  87. extension: "tar.gz",
  88. },
  89. "x64-darwin": { platform: "x86_64-apple-darwin", extension: "tar.gz" },
  90. "x64-linux": { platform: "x86_64-unknown-linux-musl", extension: "tar.gz" },
  91. "x64-win32": { platform: "x86_64-pc-windows-msvc", extension: "zip" },
  92. } as const
  93. export const ExtractionFailedError = NamedError.create(
  94. "RipgrepExtractionFailedError",
  95. z.object({
  96. filepath: z.string(),
  97. stderr: z.string(),
  98. }),
  99. )
  100. export const UnsupportedPlatformError = NamedError.create(
  101. "RipgrepUnsupportedPlatformError",
  102. z.object({
  103. platform: z.string(),
  104. }),
  105. )
  106. export const DownloadFailedError = NamedError.create(
  107. "RipgrepDownloadFailedError",
  108. z.object({
  109. url: z.string(),
  110. status: z.number(),
  111. }),
  112. )
  113. const state = lazy(async () => {
  114. let filepath = Bun.which("rg")
  115. if (filepath) return { filepath }
  116. filepath = path.join(Global.Path.bin, "rg" + (process.platform === "win32" ? ".exe" : ""))
  117. const file = Bun.file(filepath)
  118. if (!(await file.exists())) {
  119. const platformKey = `${process.arch}-${process.platform}` as keyof typeof PLATFORM
  120. const config = PLATFORM[platformKey]
  121. if (!config) throw new UnsupportedPlatformError({ platform: platformKey })
  122. const version = "14.1.1"
  123. const filename = `ripgrep-${version}-${config.platform}.${config.extension}`
  124. const url = `https://github.com/BurntSushi/ripgrep/releases/download/${version}/${filename}`
  125. const response = await fetch(url)
  126. if (!response.ok) throw new DownloadFailedError({ url, status: response.status })
  127. const buffer = await response.arrayBuffer()
  128. const archivePath = path.join(Global.Path.bin, filename)
  129. await Bun.write(archivePath, buffer)
  130. if (config.extension === "tar.gz") {
  131. const args = ["tar", "-xzf", archivePath, "--strip-components=1"]
  132. if (platformKey.endsWith("-darwin")) args.push("--include=*/rg")
  133. if (platformKey.endsWith("-linux")) args.push("--wildcards", "*/rg")
  134. const proc = Bun.spawn(args, {
  135. cwd: Global.Path.bin,
  136. stderr: "pipe",
  137. stdout: "pipe",
  138. })
  139. await proc.exited
  140. if (proc.exitCode !== 0)
  141. throw new ExtractionFailedError({
  142. filepath,
  143. stderr: await Bun.readableStreamToText(proc.stderr),
  144. })
  145. }
  146. if (config.extension === "zip") {
  147. const zipFileReader = new ZipReader(new BlobReader(new Blob([await Bun.file(archivePath).arrayBuffer()])))
  148. const entries = await zipFileReader.getEntries()
  149. let rgEntry: any
  150. for (const entry of entries) {
  151. if (entry.filename.endsWith("rg.exe")) {
  152. rgEntry = entry
  153. break
  154. }
  155. }
  156. if (!rgEntry) {
  157. throw new ExtractionFailedError({
  158. filepath: archivePath,
  159. stderr: "rg.exe not found in zip archive",
  160. })
  161. }
  162. const rgBlob = await rgEntry.getData(new BlobWriter())
  163. if (!rgBlob) {
  164. throw new ExtractionFailedError({
  165. filepath: archivePath,
  166. stderr: "Failed to extract rg.exe from zip archive",
  167. })
  168. }
  169. await Bun.write(filepath, await rgBlob.arrayBuffer())
  170. await zipFileReader.close()
  171. }
  172. await fs.unlink(archivePath)
  173. if (!platformKey.endsWith("-win32")) await fs.chmod(filepath, 0o755)
  174. }
  175. return {
  176. filepath,
  177. }
  178. })
  179. export async function filepath() {
  180. const { filepath } = await state()
  181. return filepath
  182. }
  183. export async function* files(input: {
  184. cwd: string
  185. glob?: string[]
  186. hidden?: boolean
  187. follow?: boolean
  188. maxDepth?: number
  189. }) {
  190. const args = [await filepath(), "--files", "--glob=!.git/*"]
  191. if (input.follow !== false) args.push("--follow")
  192. if (input.hidden !== false) args.push("--hidden")
  193. if (input.maxDepth !== undefined) args.push(`--max-depth=${input.maxDepth}`)
  194. if (input.glob) {
  195. for (const g of input.glob) {
  196. args.push(`--glob=${g}`)
  197. }
  198. }
  199. // Bun.spawn should throw this, but it incorrectly reports that the executable does not exist.
  200. // See https://github.com/oven-sh/bun/issues/24012
  201. if (!(await fs.stat(input.cwd).catch(() => undefined))?.isDirectory()) {
  202. throw Object.assign(new Error(`No such file or directory: '${input.cwd}'`), {
  203. code: "ENOENT",
  204. errno: -2,
  205. path: input.cwd,
  206. })
  207. }
  208. const proc = Bun.spawn(args, {
  209. cwd: input.cwd,
  210. stdout: "pipe",
  211. stderr: "ignore",
  212. maxBuffer: 1024 * 1024 * 20,
  213. })
  214. const reader = proc.stdout.getReader()
  215. const decoder = new TextDecoder()
  216. let buffer = ""
  217. try {
  218. while (true) {
  219. const { done, value } = await reader.read()
  220. if (done) break
  221. buffer += decoder.decode(value, { stream: true })
  222. // Handle both Unix (\n) and Windows (\r\n) line endings
  223. const lines = buffer.split(/\r?\n/)
  224. buffer = lines.pop() || ""
  225. for (const line of lines) {
  226. if (line) yield line
  227. }
  228. }
  229. if (buffer) yield buffer
  230. } finally {
  231. reader.releaseLock()
  232. await proc.exited
  233. }
  234. }
  235. export async function tree(input: { cwd: string; limit?: number }) {
  236. log.info("tree", input)
  237. const files = await Array.fromAsync(Ripgrep.files({ cwd: input.cwd }))
  238. interface Node {
  239. path: string[]
  240. children: Node[]
  241. }
  242. function getPath(node: Node, parts: string[], create: boolean) {
  243. if (parts.length === 0) return node
  244. let current = node
  245. for (const part of parts) {
  246. let existing = current.children.find((x) => x.path.at(-1) === part)
  247. if (!existing) {
  248. if (!create) return
  249. existing = {
  250. path: current.path.concat(part),
  251. children: [],
  252. }
  253. current.children.push(existing)
  254. }
  255. current = existing
  256. }
  257. return current
  258. }
  259. const root: Node = {
  260. path: [],
  261. children: [],
  262. }
  263. for (const file of files) {
  264. if (file.includes(".opencode")) continue
  265. const parts = file.split(path.sep)
  266. getPath(root, parts, true)
  267. }
  268. function sort(node: Node) {
  269. node.children.sort((a, b) => {
  270. if (!a.children.length && b.children.length) return 1
  271. if (!b.children.length && a.children.length) return -1
  272. return a.path.at(-1)!.localeCompare(b.path.at(-1)!)
  273. })
  274. for (const child of node.children) {
  275. sort(child)
  276. }
  277. }
  278. sort(root)
  279. let current = [root]
  280. const result: Node = {
  281. path: [],
  282. children: [],
  283. }
  284. let processed = 0
  285. const limit = input.limit ?? 50
  286. while (current.length > 0) {
  287. const next = []
  288. for (const node of current) {
  289. if (node.children.length) next.push(...node.children)
  290. }
  291. const max = Math.max(...current.map((x) => x.children.length))
  292. for (let i = 0; i < max && processed < limit; i++) {
  293. for (const node of current) {
  294. const child = node.children[i]
  295. if (!child) continue
  296. getPath(result, child.path, true)
  297. processed++
  298. if (processed >= limit) break
  299. }
  300. }
  301. if (processed >= limit) {
  302. for (const node of [...current, ...next]) {
  303. const compare = getPath(result, node.path, false)
  304. if (!compare) continue
  305. if (compare?.children.length !== node.children.length) {
  306. const diff = node.children.length - compare.children.length
  307. compare.children.push({
  308. path: compare.path.concat(`[${diff} truncated]`),
  309. children: [],
  310. })
  311. }
  312. }
  313. break
  314. }
  315. current = next
  316. }
  317. const lines: string[] = []
  318. function render(node: Node, depth: number) {
  319. const indent = "\t".repeat(depth)
  320. lines.push(indent + node.path.at(-1) + (node.children.length ? "/" : ""))
  321. for (const child of node.children) {
  322. render(child, depth + 1)
  323. }
  324. }
  325. result.children.map((x) => render(x, 0))
  326. return lines.join("\n")
  327. }
  328. export async function search(input: {
  329. cwd: string
  330. pattern: string
  331. glob?: string[]
  332. limit?: number
  333. follow?: boolean
  334. }) {
  335. const args = [`${await filepath()}`, "--json", "--hidden", "--glob='!.git/*'"]
  336. if (input.follow !== false) args.push("--follow")
  337. if (input.glob) {
  338. for (const g of input.glob) {
  339. args.push(`--glob=${g}`)
  340. }
  341. }
  342. if (input.limit) {
  343. args.push(`--max-count=${input.limit}`)
  344. }
  345. args.push("--")
  346. args.push(input.pattern)
  347. const command = args.join(" ")
  348. const result = await $`${{ raw: command }}`.cwd(input.cwd).quiet().nothrow()
  349. if (result.exitCode !== 0) {
  350. return []
  351. }
  352. // Handle both Unix (\n) and Windows (\r\n) line endings
  353. const lines = result.text().trim().split(/\r?\n/).filter(Boolean)
  354. // Parse JSON lines from ripgrep output
  355. return lines
  356. .map((line) => JSON.parse(line))
  357. .map((parsed) => Result.parse(parsed))
  358. .filter((r) => r.type === "match")
  359. .map((r) => r.data)
  360. }
  361. }