ripgrep.ts 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. // Ripgrep utility functions
  2. import path from "path"
  3. import { Global } from "../global"
  4. import fs from "fs/promises"
  5. import z from "zod"
  6. import { NamedError } from "@opencode-ai/util/error"
  7. import { lazy } from "../util/lazy"
  8. import { $ } from "bun"
  9. import { ZipReader, BlobReader, BlobWriter } from "@zip.js/zip.js"
  10. import { Log } from "@/util/log"
  11. export namespace Ripgrep {
  12. const log = Log.create({ service: "ripgrep" })
  13. const Stats = z.object({
  14. elapsed: z.object({
  15. secs: z.number(),
  16. nanos: z.number(),
  17. human: z.string(),
  18. }),
  19. searches: z.number(),
  20. searches_with_match: z.number(),
  21. bytes_searched: z.number(),
  22. bytes_printed: z.number(),
  23. matched_lines: z.number(),
  24. matches: z.number(),
  25. })
  26. const Begin = z.object({
  27. type: z.literal("begin"),
  28. data: z.object({
  29. path: z.object({
  30. text: z.string(),
  31. }),
  32. }),
  33. })
  34. export const Match = z.object({
  35. type: z.literal("match"),
  36. data: z.object({
  37. path: z.object({
  38. text: z.string(),
  39. }),
  40. lines: z.object({
  41. text: z.string(),
  42. }),
  43. line_number: z.number(),
  44. absolute_offset: z.number(),
  45. submatches: z.array(
  46. z.object({
  47. match: z.object({
  48. text: z.string(),
  49. }),
  50. start: z.number(),
  51. end: z.number(),
  52. }),
  53. ),
  54. }),
  55. })
  56. const End = z.object({
  57. type: z.literal("end"),
  58. data: z.object({
  59. path: z.object({
  60. text: z.string(),
  61. }),
  62. binary_offset: z.number().nullable(),
  63. stats: Stats,
  64. }),
  65. })
  66. const Summary = z.object({
  67. type: z.literal("summary"),
  68. data: z.object({
  69. elapsed_total: z.object({
  70. human: z.string(),
  71. nanos: z.number(),
  72. secs: z.number(),
  73. }),
  74. stats: Stats,
  75. }),
  76. })
  77. const Result = z.union([Begin, Match, End, Summary])
  78. export type Result = z.infer<typeof Result>
  79. export type Match = z.infer<typeof Match>
  80. export type Begin = z.infer<typeof Begin>
  81. export type End = z.infer<typeof End>
  82. export type Summary = z.infer<typeof Summary>
  83. const PLATFORM = {
  84. "arm64-darwin": { platform: "aarch64-apple-darwin", extension: "tar.gz" },
  85. "arm64-linux": {
  86. platform: "aarch64-unknown-linux-gnu",
  87. extension: "tar.gz",
  88. },
  89. "x64-darwin": { platform: "x86_64-apple-darwin", extension: "tar.gz" },
  90. "x64-linux": { platform: "x86_64-unknown-linux-musl", extension: "tar.gz" },
  91. "x64-win32": { platform: "x86_64-pc-windows-msvc", extension: "zip" },
  92. } as const
  93. export const ExtractionFailedError = NamedError.create(
  94. "RipgrepExtractionFailedError",
  95. z.object({
  96. filepath: z.string(),
  97. stderr: z.string(),
  98. }),
  99. )
  100. export const UnsupportedPlatformError = NamedError.create(
  101. "RipgrepUnsupportedPlatformError",
  102. z.object({
  103. platform: z.string(),
  104. }),
  105. )
  106. export const DownloadFailedError = NamedError.create(
  107. "RipgrepDownloadFailedError",
  108. z.object({
  109. url: z.string(),
  110. status: z.number(),
  111. }),
  112. )
  113. const state = lazy(async () => {
  114. let filepath = Bun.which("rg")
  115. if (filepath) return { filepath }
  116. filepath = path.join(Global.Path.bin, "rg" + (process.platform === "win32" ? ".exe" : ""))
  117. const file = Bun.file(filepath)
  118. if (!(await file.exists())) {
  119. const platformKey = `${process.arch}-${process.platform}` as keyof typeof PLATFORM
  120. const config = PLATFORM[platformKey]
  121. if (!config) throw new UnsupportedPlatformError({ platform: platformKey })
  122. const version = "14.1.1"
  123. const filename = `ripgrep-${version}-${config.platform}.${config.extension}`
  124. const url = `https://github.com/BurntSushi/ripgrep/releases/download/${version}/${filename}`
  125. const response = await fetch(url)
  126. if (!response.ok) throw new DownloadFailedError({ url, status: response.status })
  127. const buffer = await response.arrayBuffer()
  128. const archivePath = path.join(Global.Path.bin, filename)
  129. await Bun.write(archivePath, buffer)
  130. if (config.extension === "tar.gz") {
  131. const args = ["tar", "-xzf", archivePath, "--strip-components=1"]
  132. if (platformKey.endsWith("-darwin")) args.push("--include=*/rg")
  133. if (platformKey.endsWith("-linux")) args.push("--wildcards", "*/rg")
  134. const proc = Bun.spawn(args, {
  135. cwd: Global.Path.bin,
  136. stderr: "pipe",
  137. stdout: "pipe",
  138. })
  139. await proc.exited
  140. if (proc.exitCode !== 0)
  141. throw new ExtractionFailedError({
  142. filepath,
  143. stderr: await Bun.readableStreamToText(proc.stderr),
  144. })
  145. }
  146. if (config.extension === "zip") {
  147. const zipFileReader = new ZipReader(new BlobReader(new Blob([await Bun.file(archivePath).arrayBuffer()])))
  148. const entries = await zipFileReader.getEntries()
  149. let rgEntry: any
  150. for (const entry of entries) {
  151. if (entry.filename.endsWith("rg.exe")) {
  152. rgEntry = entry
  153. break
  154. }
  155. }
  156. if (!rgEntry) {
  157. throw new ExtractionFailedError({
  158. filepath: archivePath,
  159. stderr: "rg.exe not found in zip archive",
  160. })
  161. }
  162. const rgBlob = await rgEntry.getData(new BlobWriter())
  163. if (!rgBlob) {
  164. throw new ExtractionFailedError({
  165. filepath: archivePath,
  166. stderr: "Failed to extract rg.exe from zip archive",
  167. })
  168. }
  169. await Bun.write(filepath, await rgBlob.arrayBuffer())
  170. await zipFileReader.close()
  171. }
  172. await fs.unlink(archivePath)
  173. if (!platformKey.endsWith("-win32")) await fs.chmod(filepath, 0o755)
  174. }
  175. return {
  176. filepath,
  177. }
  178. })
  179. export async function filepath() {
  180. const { filepath } = await state()
  181. return filepath
  182. }
  183. export async function* files(input: {
  184. cwd: string
  185. glob?: string[]
  186. hidden?: boolean
  187. follow?: boolean
  188. maxDepth?: number
  189. signal?: AbortSignal
  190. }) {
  191. input.signal?.throwIfAborted()
  192. const args = [await filepath(), "--files", "--glob=!.git/*"]
  193. if (input.follow) args.push("--follow")
  194. if (input.hidden !== false) args.push("--hidden")
  195. if (input.maxDepth !== undefined) args.push(`--max-depth=${input.maxDepth}`)
  196. if (input.glob) {
  197. for (const g of input.glob) {
  198. args.push(`--glob=${g}`)
  199. }
  200. }
  201. // Bun.spawn should throw this, but it incorrectly reports that the executable does not exist.
  202. // See https://github.com/oven-sh/bun/issues/24012
  203. if (!(await fs.stat(input.cwd).catch(() => undefined))?.isDirectory()) {
  204. throw Object.assign(new Error(`No such file or directory: '${input.cwd}'`), {
  205. code: "ENOENT",
  206. errno: -2,
  207. path: input.cwd,
  208. })
  209. }
  210. const proc = Bun.spawn(args, {
  211. cwd: input.cwd,
  212. stdout: "pipe",
  213. stderr: "ignore",
  214. maxBuffer: 1024 * 1024 * 20,
  215. signal: input.signal,
  216. })
  217. const reader = proc.stdout.getReader()
  218. const decoder = new TextDecoder()
  219. let buffer = ""
  220. try {
  221. while (true) {
  222. input.signal?.throwIfAborted()
  223. const { done, value } = await reader.read()
  224. if (done) break
  225. buffer += decoder.decode(value, { stream: true })
  226. // Handle both Unix (\n) and Windows (\r\n) line endings
  227. const lines = buffer.split(/\r?\n/)
  228. buffer = lines.pop() || ""
  229. for (const line of lines) {
  230. if (line) yield line
  231. }
  232. }
  233. if (buffer) yield buffer
  234. } finally {
  235. reader.releaseLock()
  236. await proc.exited
  237. }
  238. input.signal?.throwIfAborted()
  239. }
  240. export async function tree(input: { cwd: string; limit?: number; signal?: AbortSignal }) {
  241. log.info("tree", input)
  242. const files = await Array.fromAsync(Ripgrep.files({ cwd: input.cwd, signal: input.signal }))
  243. interface Node {
  244. name: string
  245. children: Map<string, Node>
  246. }
  247. function dir(node: Node, name: string) {
  248. const existing = node.children.get(name)
  249. if (existing) return existing
  250. const next = { name, children: new Map() }
  251. node.children.set(name, next)
  252. return next
  253. }
  254. const root: Node = { name: "", children: new Map() }
  255. for (const file of files) {
  256. if (file.includes(".opencode")) continue
  257. const parts = file.split(path.sep)
  258. if (parts.length < 2) continue
  259. let node = root
  260. for (const part of parts.slice(0, -1)) {
  261. node = dir(node, part)
  262. }
  263. }
  264. function count(node: Node): number {
  265. let total = 0
  266. for (const child of node.children.values()) {
  267. total += 1 + count(child)
  268. }
  269. return total
  270. }
  271. const total = count(root)
  272. const limit = input.limit ?? total
  273. const lines: string[] = []
  274. const queue: { node: Node; path: string }[] = []
  275. for (const child of Array.from(root.children.values()).sort((a, b) => a.name.localeCompare(b.name))) {
  276. queue.push({ node: child, path: child.name })
  277. }
  278. let used = 0
  279. for (let i = 0; i < queue.length && used < limit; i++) {
  280. const { node, path } = queue[i]
  281. lines.push(path)
  282. used++
  283. for (const child of Array.from(node.children.values()).sort((a, b) => a.name.localeCompare(b.name))) {
  284. queue.push({ node: child, path: `${path}/${child.name}` })
  285. }
  286. }
  287. if (total > used) lines.push(`[${total - used} truncated]`)
  288. return lines.join("\n")
  289. }
  290. export async function search(input: {
  291. cwd: string
  292. pattern: string
  293. glob?: string[]
  294. limit?: number
  295. follow?: boolean
  296. }) {
  297. const args = [`${await filepath()}`, "--json", "--hidden", "--glob='!.git/*'"]
  298. if (input.follow) args.push("--follow")
  299. if (input.glob) {
  300. for (const g of input.glob) {
  301. args.push(`--glob=${g}`)
  302. }
  303. }
  304. if (input.limit) {
  305. args.push(`--max-count=${input.limit}`)
  306. }
  307. args.push("--")
  308. args.push(input.pattern)
  309. const command = args.join(" ")
  310. const result = await $`${{ raw: command }}`.cwd(input.cwd).quiet().nothrow()
  311. if (result.exitCode !== 0) {
  312. return []
  313. }
  314. // Handle both Unix (\n) and Windows (\r\n) line endings
  315. const lines = result.text().trim().split(/\r?\n/).filter(Boolean)
  316. // Parse JSON lines from ripgrep output
  317. return lines
  318. .map((line) => JSON.parse(line))
  319. .map((parsed) => Result.parse(parsed))
  320. .filter((r) => r.type === "match")
  321. .map((r) => r.data)
  322. }
  323. }