grep.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. package tools
  2. import (
  3. "bufio"
  4. "bytes"
  5. "cmp"
  6. "context"
  7. _ "embed"
  8. "encoding/json"
  9. "fmt"
  10. "io"
  11. "net/http"
  12. "os"
  13. "os/exec"
  14. "path/filepath"
  15. "regexp"
  16. "sort"
  17. "strings"
  18. "time"
  19. "charm.land/fantasy"
  20. "github.com/charmbracelet/crush/internal/config"
  21. "github.com/charmbracelet/crush/internal/csync"
  22. "github.com/charmbracelet/crush/internal/fsext"
  23. )
  24. // regexCache provides thread-safe caching of compiled regex patterns
  25. type regexCache struct {
  26. *csync.Map[string, *regexp.Regexp]
  27. }
  28. // newRegexCache creates a new regex cache
  29. func newRegexCache() *regexCache {
  30. return &regexCache{
  31. Map: csync.NewMap[string, *regexp.Regexp](),
  32. }
  33. }
  34. // get retrieves a compiled regex from cache or compiles and caches it
  35. func (rc *regexCache) get(pattern string) (*regexp.Regexp, error) {
  36. var rerr error
  37. return rc.GetOrSet(pattern, func() *regexp.Regexp {
  38. regex, err := regexp.Compile(pattern)
  39. if err != nil {
  40. rerr = err
  41. }
  42. return regex
  43. }), rerr
  44. }
  45. // ResetCache clears compiled regex caches to prevent unbounded growth across sessions.
  46. func ResetCache() {
  47. searchRegexCache.Reset(map[string]*regexp.Regexp{})
  48. globRegexCache.Reset(map[string]*regexp.Regexp{})
  49. }
  50. // Global regex cache instances
  51. var (
  52. searchRegexCache = newRegexCache()
  53. globRegexCache = newRegexCache()
  54. // Pre-compiled regex for glob conversion (used frequently)
  55. globBraceRegex = regexp.MustCompile(`\{([^}]+)\}`)
  56. )
  57. type GrepParams struct {
  58. Pattern string `json:"pattern" description:"The regex pattern to search for in file contents"`
  59. Path string `json:"path,omitempty" description:"The directory to search in. Defaults to the current working directory."`
  60. Include string `json:"include,omitempty" description:"File pattern to include in the search (e.g. \"*.js\", \"*.{ts,tsx}\")"`
  61. LiteralText bool `json:"literal_text,omitempty" description:"If true, the pattern will be treated as literal text with special regex characters escaped. Default is false."`
  62. }
  63. type grepMatch struct {
  64. path string
  65. modTime time.Time
  66. lineNum int
  67. charNum int
  68. lineText string
  69. }
  70. type GrepResponseMetadata struct {
  71. NumberOfMatches int `json:"number_of_matches"`
  72. Truncated bool `json:"truncated"`
  73. }
  74. const (
  75. GrepToolName = "grep"
  76. maxGrepContentWidth = 500
  77. )
  78. //go:embed grep.md
  79. var grepDescription []byte
  80. // escapeRegexPattern escapes special regex characters so they're treated as literal characters
  81. func escapeRegexPattern(pattern string) string {
  82. specialChars := []string{"\\", ".", "+", "*", "?", "(", ")", "[", "]", "{", "}", "^", "$", "|"}
  83. escaped := pattern
  84. for _, char := range specialChars {
  85. escaped = strings.ReplaceAll(escaped, char, "\\"+char)
  86. }
  87. return escaped
  88. }
  89. func NewGrepTool(workingDir string, config config.ToolGrep) fantasy.AgentTool {
  90. return fantasy.NewAgentTool(
  91. GrepToolName,
  92. FirstLineDescription(grepDescription),
  93. func(ctx context.Context, params GrepParams, call fantasy.ToolCall) (fantasy.ToolResponse, error) {
  94. if params.Pattern == "" {
  95. return fantasy.NewTextErrorResponse("pattern is required"), nil
  96. }
  97. searchPattern := params.Pattern
  98. if params.LiteralText {
  99. searchPattern = escapeRegexPattern(params.Pattern)
  100. }
  101. searchPath := cmp.Or(params.Path, workingDir)
  102. searchCtx, cancel := context.WithTimeout(ctx, config.GetTimeout())
  103. defer cancel()
  104. matches, truncated, err := searchFiles(searchCtx, searchPattern, searchPath, params.Include, 100)
  105. if err != nil {
  106. return fantasy.NewTextErrorResponse(fmt.Sprintf("error searching files: %v", err)), nil
  107. }
  108. var output strings.Builder
  109. if len(matches) == 0 {
  110. output.WriteString("No files found")
  111. } else {
  112. fmt.Fprintf(&output, "Found %d matches\n", len(matches))
  113. currentFile := ""
  114. for _, match := range matches {
  115. if currentFile != match.path {
  116. if currentFile != "" {
  117. output.WriteString("\n")
  118. }
  119. currentFile = match.path
  120. fmt.Fprintf(&output, "%s:\n", filepath.ToSlash(match.path))
  121. }
  122. if match.lineNum > 0 {
  123. lineText := match.lineText
  124. if len(lineText) > maxGrepContentWidth {
  125. lineText = lineText[:maxGrepContentWidth] + "..."
  126. }
  127. if match.charNum > 0 {
  128. fmt.Fprintf(&output, " Line %d, Char %d: %s\n", match.lineNum, match.charNum, lineText)
  129. } else {
  130. fmt.Fprintf(&output, " Line %d: %s\n", match.lineNum, lineText)
  131. }
  132. } else {
  133. fmt.Fprintf(&output, " %s\n", match.path)
  134. }
  135. }
  136. if truncated {
  137. output.WriteString("\n(Results are truncated. Consider using a more specific path or pattern.)")
  138. }
  139. }
  140. return fantasy.WithResponseMetadata(
  141. fantasy.NewTextResponse(output.String()),
  142. GrepResponseMetadata{
  143. NumberOfMatches: len(matches),
  144. Truncated: truncated,
  145. },
  146. ), nil
  147. })
  148. }
  149. func searchFiles(ctx context.Context, pattern, rootPath, include string, limit int) ([]grepMatch, bool, error) {
  150. matches, err := searchWithRipgrep(ctx, pattern, rootPath, include)
  151. if err != nil {
  152. matches, err = searchFilesWithRegex(pattern, rootPath, include)
  153. if err != nil {
  154. return nil, false, err
  155. }
  156. }
  157. sort.Slice(matches, func(i, j int) bool {
  158. return matches[i].modTime.After(matches[j].modTime)
  159. })
  160. truncated := len(matches) > limit
  161. if truncated {
  162. matches = matches[:limit]
  163. }
  164. return matches, truncated, nil
  165. }
  166. func searchWithRipgrep(ctx context.Context, pattern, path, include string) ([]grepMatch, error) {
  167. cmd := getRgSearchCmd(ctx, pattern, path, include)
  168. if cmd == nil {
  169. return nil, fmt.Errorf("ripgrep not found in $PATH")
  170. }
  171. // Only add ignore files if they exist
  172. for _, ignoreFile := range []string{".gitignore", ".crushignore"} {
  173. ignorePath := filepath.Join(path, ignoreFile)
  174. if _, err := os.Stat(ignorePath); err == nil {
  175. cmd.Args = append(cmd.Args, "--ignore-file", ignorePath)
  176. }
  177. }
  178. output, err := cmd.Output()
  179. if err != nil {
  180. if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
  181. return []grepMatch{}, nil
  182. }
  183. return nil, err
  184. }
  185. var matches []grepMatch
  186. for line := range bytes.SplitSeq(bytes.TrimSpace(output), []byte{'\n'}) {
  187. if len(line) == 0 {
  188. continue
  189. }
  190. var match ripgrepMatch
  191. if err := json.Unmarshal(line, &match); err != nil {
  192. continue
  193. }
  194. if match.Type != "match" {
  195. continue
  196. }
  197. for _, m := range match.Data.Submatches {
  198. fi, err := os.Stat(match.Data.Path.Text)
  199. if err != nil {
  200. continue // Skip files we can't access
  201. }
  202. matches = append(matches, grepMatch{
  203. path: match.Data.Path.Text,
  204. modTime: fi.ModTime(),
  205. lineNum: match.Data.LineNumber,
  206. charNum: m.Start + 1, // ensure 1-based
  207. lineText: strings.TrimSpace(match.Data.Lines.Text),
  208. })
  209. // only get the first match of each line
  210. break
  211. }
  212. }
  213. return matches, nil
  214. }
  215. type ripgrepMatch struct {
  216. Type string `json:"type"`
  217. Data struct {
  218. Path struct {
  219. Text string `json:"text"`
  220. } `json:"path"`
  221. Lines struct {
  222. Text string `json:"text"`
  223. } `json:"lines"`
  224. LineNumber int `json:"line_number"`
  225. Submatches []struct {
  226. Start int `json:"start"`
  227. } `json:"submatches"`
  228. } `json:"data"`
  229. }
  230. func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error) {
  231. matches := []grepMatch{}
  232. // Use cached regex compilation
  233. regex, err := searchRegexCache.get(pattern)
  234. if err != nil {
  235. return nil, fmt.Errorf("invalid regex pattern: %w", err)
  236. }
  237. var includePattern *regexp.Regexp
  238. if include != "" {
  239. regexPattern := globToRegex(include)
  240. includePattern, err = globRegexCache.get(regexPattern)
  241. if err != nil {
  242. return nil, fmt.Errorf("invalid include pattern: %w", err)
  243. }
  244. }
  245. // Create walker with gitignore and crushignore support
  246. walker := fsext.NewFastGlobWalker(rootPath)
  247. err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
  248. if err != nil {
  249. return nil // Skip errors
  250. }
  251. if info.IsDir() {
  252. // Check if directory should be skipped
  253. if walker.ShouldSkip(path) {
  254. return filepath.SkipDir
  255. }
  256. return nil // Continue into directory
  257. }
  258. // Use walker's shouldSkip method for files
  259. if walker.ShouldSkip(path) {
  260. return nil
  261. }
  262. // Skip hidden files (starting with a dot) to match ripgrep's default behavior
  263. base := filepath.Base(path)
  264. if base != "." && strings.HasPrefix(base, ".") {
  265. return nil
  266. }
  267. if includePattern != nil && !includePattern.MatchString(path) {
  268. return nil
  269. }
  270. match, lineNum, charNum, lineText, err := fileContainsPattern(path, regex)
  271. if err != nil {
  272. return nil // Skip files we can't read
  273. }
  274. if match {
  275. matches = append(matches, grepMatch{
  276. path: path,
  277. modTime: info.ModTime(),
  278. lineNum: lineNum,
  279. charNum: charNum,
  280. lineText: lineText,
  281. })
  282. if len(matches) >= 200 {
  283. return filepath.SkipAll
  284. }
  285. }
  286. return nil
  287. })
  288. if err != nil {
  289. return nil, err
  290. }
  291. return matches, nil
  292. }
  293. func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, int, string, error) {
  294. // Only search text files.
  295. if !isTextFile(filePath) {
  296. return false, 0, 0, "", nil
  297. }
  298. file, err := os.Open(filePath)
  299. if err != nil {
  300. return false, 0, 0, "", err
  301. }
  302. defer file.Close()
  303. scanner := bufio.NewScanner(file)
  304. lineNum := 0
  305. for scanner.Scan() {
  306. lineNum++
  307. line := scanner.Text()
  308. if loc := pattern.FindStringIndex(line); loc != nil {
  309. charNum := loc[0] + 1
  310. return true, lineNum, charNum, line, nil
  311. }
  312. }
  313. return false, 0, 0, "", scanner.Err()
  314. }
  315. // isTextFile checks if a file is a text file by examining its MIME type.
  316. func isTextFile(filePath string) bool {
  317. file, err := os.Open(filePath)
  318. if err != nil {
  319. return false
  320. }
  321. defer file.Close()
  322. // Read first 512 bytes for MIME type detection.
  323. buffer := make([]byte, 512)
  324. n, err := file.Read(buffer)
  325. if err != nil && err != io.EOF {
  326. return false
  327. }
  328. // Detect content type.
  329. contentType := http.DetectContentType(buffer[:n])
  330. // Check if it's a text MIME type.
  331. return strings.HasPrefix(contentType, "text/") ||
  332. contentType == "application/json" ||
  333. contentType == "application/xml" ||
  334. contentType == "application/javascript" ||
  335. contentType == "application/x-sh"
  336. }
  337. func globToRegex(glob string) string {
  338. regexPattern := strings.ReplaceAll(glob, ".", "\\.")
  339. regexPattern = strings.ReplaceAll(regexPattern, "*", ".*")
  340. regexPattern = strings.ReplaceAll(regexPattern, "?", ".")
  341. // Use pre-compiled regex instead of compiling each time
  342. regexPattern = globBraceRegex.ReplaceAllStringFunc(regexPattern, func(match string) string {
  343. inner := match[1 : len(match)-1]
  344. return "(" + strings.ReplaceAll(inner, ",", "|") + ")"
  345. })
  346. return regexPattern
  347. }