grep.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. package tools
  2. import (
  3. "bufio"
  4. "bytes"
  5. "context"
  6. _ "embed"
  7. "encoding/json"
  8. "fmt"
  9. "io"
  10. "net/http"
  11. "os"
  12. "os/exec"
  13. "path/filepath"
  14. "regexp"
  15. "sort"
  16. "strings"
  17. "sync"
  18. "time"
  19. "charm.land/fantasy"
  20. "github.com/charmbracelet/crush/internal/fsext"
  21. )
  22. // regexCache provides thread-safe caching of compiled regex patterns
  23. type regexCache struct {
  24. cache map[string]*regexp.Regexp
  25. mu sync.RWMutex
  26. }
  27. // newRegexCache creates a new regex cache
  28. func newRegexCache() *regexCache {
  29. return &regexCache{
  30. cache: make(map[string]*regexp.Regexp),
  31. }
  32. }
  33. // get retrieves a compiled regex from cache or compiles and caches it
  34. func (rc *regexCache) get(pattern string) (*regexp.Regexp, error) {
  35. // Try to get from cache first (read lock)
  36. rc.mu.RLock()
  37. if regex, exists := rc.cache[pattern]; exists {
  38. rc.mu.RUnlock()
  39. return regex, nil
  40. }
  41. rc.mu.RUnlock()
  42. // Compile the regex (write lock)
  43. rc.mu.Lock()
  44. defer rc.mu.Unlock()
  45. // Double-check in case another goroutine compiled it while we waited
  46. if regex, exists := rc.cache[pattern]; exists {
  47. return regex, nil
  48. }
  49. // Compile and cache the regex
  50. regex, err := regexp.Compile(pattern)
  51. if err != nil {
  52. return nil, err
  53. }
  54. rc.cache[pattern] = regex
  55. return regex, nil
  56. }
  57. // Global regex cache instances
  58. var (
  59. searchRegexCache = newRegexCache()
  60. globRegexCache = newRegexCache()
  61. // Pre-compiled regex for glob conversion (used frequently)
  62. globBraceRegex = regexp.MustCompile(`\{([^}]+)\}`)
  63. )
  64. type GrepParams struct {
  65. Pattern string `json:"pattern" description:"The regex pattern to search for in file contents"`
  66. Path string `json:"path,omitempty" description:"The directory to search in. Defaults to the current working directory."`
  67. Include string `json:"include,omitempty" description:"File pattern to include in the search (e.g. \"*.js\", \"*.{ts,tsx}\")"`
  68. LiteralText bool `json:"literal_text,omitempty" description:"If true, the pattern will be treated as literal text with special regex characters escaped. Default is false."`
  69. }
  70. type grepMatch struct {
  71. path string
  72. modTime time.Time
  73. lineNum int
  74. charNum int
  75. lineText string
  76. }
  77. type GrepResponseMetadata struct {
  78. NumberOfMatches int `json:"number_of_matches"`
  79. Truncated bool `json:"truncated"`
  80. }
  81. const (
  82. GrepToolName = "grep"
  83. maxGrepContentWidth = 500
  84. )
  85. //go:embed grep.md
  86. var grepDescription []byte
  87. // escapeRegexPattern escapes special regex characters so they're treated as literal characters
  88. func escapeRegexPattern(pattern string) string {
  89. specialChars := []string{"\\", ".", "+", "*", "?", "(", ")", "[", "]", "{", "}", "^", "$", "|"}
  90. escaped := pattern
  91. for _, char := range specialChars {
  92. escaped = strings.ReplaceAll(escaped, char, "\\"+char)
  93. }
  94. return escaped
  95. }
  96. func NewGrepTool(workingDir string) fantasy.AgentTool {
  97. return fantasy.NewAgentTool(
  98. GrepToolName,
  99. string(grepDescription),
  100. func(ctx context.Context, params GrepParams, call fantasy.ToolCall) (fantasy.ToolResponse, error) {
  101. if params.Pattern == "" {
  102. return fantasy.NewTextErrorResponse("pattern is required"), nil
  103. }
  104. // If literal_text is true, escape the pattern
  105. searchPattern := params.Pattern
  106. if params.LiteralText {
  107. searchPattern = escapeRegexPattern(params.Pattern)
  108. }
  109. searchPath := params.Path
  110. if searchPath == "" {
  111. searchPath = workingDir
  112. }
  113. matches, truncated, err := searchFiles(ctx, searchPattern, searchPath, params.Include, 100)
  114. if err != nil {
  115. return fantasy.NewTextErrorResponse(fmt.Sprintf("error searching files: %v", err)), nil
  116. }
  117. var output strings.Builder
  118. if len(matches) == 0 {
  119. output.WriteString("No files found")
  120. } else {
  121. fmt.Fprintf(&output, "Found %d matches\n", len(matches))
  122. currentFile := ""
  123. for _, match := range matches {
  124. if currentFile != match.path {
  125. if currentFile != "" {
  126. output.WriteString("\n")
  127. }
  128. currentFile = match.path
  129. fmt.Fprintf(&output, "%s:\n", filepath.ToSlash(match.path))
  130. }
  131. if match.lineNum > 0 {
  132. lineText := match.lineText
  133. if len(lineText) > maxGrepContentWidth {
  134. lineText = lineText[:maxGrepContentWidth] + "..."
  135. }
  136. if match.charNum > 0 {
  137. fmt.Fprintf(&output, " Line %d, Char %d: %s\n", match.lineNum, match.charNum, lineText)
  138. } else {
  139. fmt.Fprintf(&output, " Line %d: %s\n", match.lineNum, lineText)
  140. }
  141. } else {
  142. fmt.Fprintf(&output, " %s\n", match.path)
  143. }
  144. }
  145. if truncated {
  146. output.WriteString("\n(Results are truncated. Consider using a more specific path or pattern.)")
  147. }
  148. }
  149. return fantasy.WithResponseMetadata(
  150. fantasy.NewTextResponse(output.String()),
  151. GrepResponseMetadata{
  152. NumberOfMatches: len(matches),
  153. Truncated: truncated,
  154. },
  155. ), nil
  156. })
  157. }
  158. func searchFiles(ctx context.Context, pattern, rootPath, include string, limit int) ([]grepMatch, bool, error) {
  159. matches, err := searchWithRipgrep(ctx, pattern, rootPath, include)
  160. if err != nil {
  161. matches, err = searchFilesWithRegex(pattern, rootPath, include)
  162. if err != nil {
  163. return nil, false, err
  164. }
  165. }
  166. sort.Slice(matches, func(i, j int) bool {
  167. return matches[i].modTime.After(matches[j].modTime)
  168. })
  169. truncated := len(matches) > limit
  170. if truncated {
  171. matches = matches[:limit]
  172. }
  173. return matches, truncated, nil
  174. }
  175. func searchWithRipgrep(ctx context.Context, pattern, path, include string) ([]grepMatch, error) {
  176. cmd := getRgSearchCmd(ctx, pattern, path, include)
  177. if cmd == nil {
  178. return nil, fmt.Errorf("ripgrep not found in $PATH")
  179. }
  180. // Only add ignore files if they exist
  181. for _, ignoreFile := range []string{".gitignore", ".crushignore"} {
  182. ignorePath := filepath.Join(path, ignoreFile)
  183. if _, err := os.Stat(ignorePath); err == nil {
  184. cmd.Args = append(cmd.Args, "--ignore-file", ignorePath)
  185. }
  186. }
  187. output, err := cmd.Output()
  188. if err != nil {
  189. if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
  190. return []grepMatch{}, nil
  191. }
  192. return nil, err
  193. }
  194. var matches []grepMatch
  195. for line := range bytes.SplitSeq(bytes.TrimSpace(output), []byte{'\n'}) {
  196. if len(line) == 0 {
  197. continue
  198. }
  199. var match ripgrepMatch
  200. if err := json.Unmarshal(line, &match); err != nil {
  201. continue
  202. }
  203. if match.Type != "match" {
  204. continue
  205. }
  206. for _, m := range match.Data.Submatches {
  207. fi, err := os.Stat(match.Data.Path.Text)
  208. if err != nil {
  209. continue // Skip files we can't access
  210. }
  211. matches = append(matches, grepMatch{
  212. path: match.Data.Path.Text,
  213. modTime: fi.ModTime(),
  214. lineNum: match.Data.LineNumber,
  215. charNum: m.Start + 1, // ensure 1-based
  216. lineText: strings.TrimSpace(match.Data.Lines.Text),
  217. })
  218. // only get the first match of each line
  219. break
  220. }
  221. }
  222. return matches, nil
  223. }
  224. type ripgrepMatch struct {
  225. Type string `json:"type"`
  226. Data struct {
  227. Path struct {
  228. Text string `json:"text"`
  229. } `json:"path"`
  230. Lines struct {
  231. Text string `json:"text"`
  232. } `json:"lines"`
  233. LineNumber int `json:"line_number"`
  234. Submatches []struct {
  235. Start int `json:"start"`
  236. } `json:"submatches"`
  237. } `json:"data"`
  238. }
  239. func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error) {
  240. matches := []grepMatch{}
  241. // Use cached regex compilation
  242. regex, err := searchRegexCache.get(pattern)
  243. if err != nil {
  244. return nil, fmt.Errorf("invalid regex pattern: %w", err)
  245. }
  246. var includePattern *regexp.Regexp
  247. if include != "" {
  248. regexPattern := globToRegex(include)
  249. includePattern, err = globRegexCache.get(regexPattern)
  250. if err != nil {
  251. return nil, fmt.Errorf("invalid include pattern: %w", err)
  252. }
  253. }
  254. // Create walker with gitignore and crushignore support
  255. walker := fsext.NewFastGlobWalker(rootPath)
  256. err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
  257. if err != nil {
  258. return nil // Skip errors
  259. }
  260. if info.IsDir() {
  261. // Check if directory should be skipped
  262. if walker.ShouldSkip(path) {
  263. return filepath.SkipDir
  264. }
  265. return nil // Continue into directory
  266. }
  267. // Use walker's shouldSkip method for files
  268. if walker.ShouldSkip(path) {
  269. return nil
  270. }
  271. // Skip hidden files (starting with a dot) to match ripgrep's default behavior
  272. base := filepath.Base(path)
  273. if base != "." && strings.HasPrefix(base, ".") {
  274. return nil
  275. }
  276. if includePattern != nil && !includePattern.MatchString(path) {
  277. return nil
  278. }
  279. match, lineNum, charNum, lineText, err := fileContainsPattern(path, regex)
  280. if err != nil {
  281. return nil // Skip files we can't read
  282. }
  283. if match {
  284. matches = append(matches, grepMatch{
  285. path: path,
  286. modTime: info.ModTime(),
  287. lineNum: lineNum,
  288. charNum: charNum,
  289. lineText: lineText,
  290. })
  291. if len(matches) >= 200 {
  292. return filepath.SkipAll
  293. }
  294. }
  295. return nil
  296. })
  297. if err != nil {
  298. return nil, err
  299. }
  300. return matches, nil
  301. }
  302. func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, int, string, error) {
  303. // Only search text files.
  304. if !isTextFile(filePath) {
  305. return false, 0, 0, "", nil
  306. }
  307. file, err := os.Open(filePath)
  308. if err != nil {
  309. return false, 0, 0, "", err
  310. }
  311. defer file.Close()
  312. scanner := bufio.NewScanner(file)
  313. lineNum := 0
  314. for scanner.Scan() {
  315. lineNum++
  316. line := scanner.Text()
  317. if loc := pattern.FindStringIndex(line); loc != nil {
  318. charNum := loc[0] + 1
  319. return true, lineNum, charNum, line, nil
  320. }
  321. }
  322. return false, 0, 0, "", scanner.Err()
  323. }
  324. // isTextFile checks if a file is a text file by examining its MIME type.
  325. func isTextFile(filePath string) bool {
  326. file, err := os.Open(filePath)
  327. if err != nil {
  328. return false
  329. }
  330. defer file.Close()
  331. // Read first 512 bytes for MIME type detection.
  332. buffer := make([]byte, 512)
  333. n, err := file.Read(buffer)
  334. if err != nil && err != io.EOF {
  335. return false
  336. }
  337. // Detect content type.
  338. contentType := http.DetectContentType(buffer[:n])
  339. // Check if it's a text MIME type.
  340. return strings.HasPrefix(contentType, "text/") ||
  341. contentType == "application/json" ||
  342. contentType == "application/xml" ||
  343. contentType == "application/javascript" ||
  344. contentType == "application/x-sh"
  345. }
  346. func globToRegex(glob string) string {
  347. regexPattern := strings.ReplaceAll(glob, ".", "\\.")
  348. regexPattern = strings.ReplaceAll(regexPattern, "*", ".*")
  349. regexPattern = strings.ReplaceAll(regexPattern, "?", ".")
  350. // Use pre-compiled regex instead of compiling each time
  351. regexPattern = globBraceRegex.ReplaceAllStringFunc(regexPattern, func(match string) string {
  352. inner := match[1 : len(match)-1]
  353. return "(" + strings.ReplaceAll(inner, ",", "|") + ")"
  354. })
  355. return regexPattern
  356. }