ls.go 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. package fsext
  2. import (
  3. "errors"
  4. "log/slog"
  5. "os"
  6. "path/filepath"
  7. "slices"
  8. "strings"
  9. "sync"
  10. "github.com/charlievieth/fastwalk"
  11. "github.com/charmbracelet/crush/internal/csync"
  12. "github.com/charmbracelet/crush/internal/home"
  13. "github.com/go-git/go-git/v5/plumbing/format/gitignore"
  14. )
  15. // fastIgnoreDirs is a set of directory names that are always ignored.
  16. // This provides O(1) lookup for common cases to avoid expensive pattern matching.
  17. var fastIgnoreDirs = map[string]bool{
  18. ".git": true,
  19. ".svn": true,
  20. ".hg": true,
  21. ".bzr": true,
  22. ".vscode": true,
  23. ".idea": true,
  24. "node_modules": true,
  25. "__pycache__": true,
  26. ".pytest_cache": true,
  27. ".cache": true,
  28. ".tmp": true,
  29. ".Trash": true,
  30. ".Spotlight-V100": true,
  31. ".fseventsd": true,
  32. ".crush": true,
  33. "OrbStack": true,
  34. ".local": true,
  35. ".share": true,
  36. }
  37. // commonIgnorePatterns contains commonly ignored files and directories.
  38. // Note: Exact directory names that are in fastIgnoreDirs are handled there for O(1) lookup.
  39. // This list contains wildcard patterns and file-specific patterns.
  40. var commonIgnorePatterns = sync.OnceValue(func() []gitignore.Pattern {
  41. patterns := []string{
  42. // IDE and editor files (wildcards)
  43. "*.swp",
  44. "*.swo",
  45. "*~",
  46. ".DS_Store",
  47. "Thumbs.db",
  48. // Build artifacts (non-fastIgnoreDirs)
  49. "target",
  50. "build",
  51. "dist",
  52. "out",
  53. "bin",
  54. "obj",
  55. "*.o",
  56. "*.so",
  57. "*.dylib",
  58. "*.dll",
  59. "*.exe",
  60. // Logs and temporary files (wildcards)
  61. "*.log",
  62. "*.tmp",
  63. "*.temp",
  64. // Language-specific (wildcards and non-fastIgnoreDirs)
  65. "*.pyc",
  66. "*.pyo",
  67. "vendor",
  68. "Cargo.lock",
  69. "package-lock.json",
  70. "yarn.lock",
  71. "pnpm-lock.yaml",
  72. }
  73. return parsePatterns(patterns, nil)
  74. })
  75. var homeIgnorePatterns = sync.OnceValue(func() []gitignore.Pattern {
  76. homeDir := home.Dir()
  77. var lines []string
  78. for _, name := range []string{
  79. filepath.Join(homeDir, ".gitignore"),
  80. filepath.Join(homeDir, ".config", "git", "ignore"),
  81. filepath.Join(homeDir, ".config", "crush", "ignore"),
  82. } {
  83. if bts, err := os.ReadFile(name); err == nil {
  84. lines = append(lines, strings.Split(string(bts), "\n")...)
  85. }
  86. }
  87. return parsePatterns(lines, nil)
  88. })
  89. // parsePatterns parses gitignore pattern strings into Pattern objects.
  90. // domain is the path components where the patterns are defined (nil for global).
  91. func parsePatterns(lines []string, domain []string) []gitignore.Pattern {
  92. var patterns []gitignore.Pattern
  93. for _, line := range lines {
  94. line = strings.TrimSpace(line)
  95. if line == "" || strings.HasPrefix(line, "#") {
  96. continue
  97. }
  98. patterns = append(patterns, gitignore.ParsePattern(line, domain))
  99. }
  100. return patterns
  101. }
  102. type directoryLister struct {
  103. // dirPatterns caches parsed patterns from .gitignore/.crushignore for each directory.
  104. // This avoids re-reading files when building combined matchers.
  105. dirPatterns *csync.Map[string, []gitignore.Pattern]
  106. // combinedMatchers caches a combined matcher for each directory that includes
  107. // all ancestor patterns. This allows O(1) matching per file.
  108. combinedMatchers *csync.Map[string, gitignore.Matcher]
  109. rootPath string
  110. }
  111. func NewDirectoryLister(rootPath string) *directoryLister {
  112. return &directoryLister{
  113. rootPath: rootPath,
  114. dirPatterns: csync.NewMap[string, []gitignore.Pattern](),
  115. combinedMatchers: csync.NewMap[string, gitignore.Matcher](),
  116. }
  117. }
  118. // pathToComponents splits a path into its components for gitignore matching.
  119. func pathToComponents(path string) []string {
  120. path = filepath.ToSlash(path)
  121. if path == "" || path == "." {
  122. return nil
  123. }
  124. return strings.Split(path, "/")
  125. }
  126. // getDirPatterns returns the parsed patterns for a specific directory's
  127. // .gitignore and .crushignore files. Results are cached.
  128. func (dl *directoryLister) getDirPatterns(dir string) []gitignore.Pattern {
  129. return dl.dirPatterns.GetOrSet(dir, func() []gitignore.Pattern {
  130. var allPatterns []gitignore.Pattern
  131. relPath, _ := filepath.Rel(dl.rootPath, dir)
  132. var domain []string
  133. if relPath != "" && relPath != "." {
  134. domain = pathToComponents(relPath)
  135. }
  136. for _, ignoreFile := range []string{".gitignore", ".crushignore"} {
  137. ignPath := filepath.Join(dir, ignoreFile)
  138. if content, err := os.ReadFile(ignPath); err == nil {
  139. lines := strings.Split(string(content), "\n")
  140. allPatterns = append(allPatterns, parsePatterns(lines, domain)...)
  141. }
  142. }
  143. return allPatterns
  144. })
  145. }
  146. // getCombinedMatcher returns a matcher that combines all gitignore patterns
  147. // from the root to the given directory, plus common patterns and home patterns.
  148. // Results are cached per directory, and we reuse parent directory matchers.
  149. func (dl *directoryLister) getCombinedMatcher(dir string) gitignore.Matcher {
  150. return dl.combinedMatchers.GetOrSet(dir, func() gitignore.Matcher {
  151. var allPatterns []gitignore.Pattern
  152. // Add common patterns first (lowest priority).
  153. allPatterns = append(allPatterns, commonIgnorePatterns()...)
  154. // Add home ignore patterns.
  155. allPatterns = append(allPatterns, homeIgnorePatterns()...)
  156. // Collect patterns from root to this directory.
  157. relDir, _ := filepath.Rel(dl.rootPath, dir)
  158. var pathParts []string
  159. if relDir != "" && relDir != "." {
  160. pathParts = pathToComponents(relDir)
  161. }
  162. // Add patterns from each directory from root to current.
  163. currentPath := dl.rootPath
  164. allPatterns = append(allPatterns, dl.getDirPatterns(currentPath)...)
  165. for _, part := range pathParts {
  166. currentPath = filepath.Join(currentPath, part)
  167. allPatterns = append(allPatterns, dl.getDirPatterns(currentPath)...)
  168. }
  169. return gitignore.NewMatcher(allPatterns)
  170. })
  171. }
  172. // shouldIgnore checks if a path should be ignored based on gitignore rules.
  173. // This uses a combined matcher that includes all ancestor patterns for O(1) matching.
  174. func (dl *directoryLister) shouldIgnore(path string, ignorePatterns []string, isDir bool) bool {
  175. base := filepath.Base(path)
  176. // Fast path: O(1) lookup for commonly ignored directories.
  177. if isDir && fastIgnoreDirs[base] {
  178. return true
  179. }
  180. // Check explicit ignore patterns.
  181. if len(ignorePatterns) > 0 {
  182. for _, pattern := range ignorePatterns {
  183. if matched, err := filepath.Match(pattern, base); err == nil && matched {
  184. return true
  185. }
  186. }
  187. }
  188. // Don't apply gitignore rules to the root directory itself.
  189. if path == dl.rootPath {
  190. return false
  191. }
  192. relPath, err := filepath.Rel(dl.rootPath, path)
  193. if err != nil {
  194. relPath = path
  195. }
  196. pathComponents := pathToComponents(relPath)
  197. if len(pathComponents) == 0 {
  198. return false
  199. }
  200. // Get the combined matcher for the parent directory.
  201. parentDir := filepath.Dir(path)
  202. matcher := dl.getCombinedMatcher(parentDir)
  203. if matcher.Match(pathComponents, isDir) {
  204. slog.Debug("Ignoring path", "path", relPath)
  205. return true
  206. }
  207. return false
  208. }
  209. // ListDirectory lists files and directories in the specified path.
  210. func ListDirectory(initialPath string, ignorePatterns []string, depth, limit int) ([]string, bool, error) {
  211. found := csync.NewSlice[string]()
  212. dl := NewDirectoryLister(initialPath)
  213. slog.Debug("Listing directory", "path", initialPath, "depth", depth, "limit", limit, "ignorePatterns", ignorePatterns)
  214. conf := fastwalk.Config{
  215. Follow: true,
  216. ToSlash: fastwalk.DefaultToSlash(),
  217. Sort: fastwalk.SortDirsFirst,
  218. MaxDepth: depth,
  219. }
  220. err := fastwalk.Walk(&conf, initialPath, func(path string, d os.DirEntry, err error) error {
  221. if err != nil {
  222. return nil // Skip files we don't have permission to access
  223. }
  224. isDir := d.IsDir()
  225. if dl.shouldIgnore(path, ignorePatterns, isDir) {
  226. if isDir {
  227. return filepath.SkipDir
  228. }
  229. return nil
  230. }
  231. if path != initialPath {
  232. if isDir {
  233. path = path + string(filepath.Separator)
  234. }
  235. found.Append(path)
  236. }
  237. if limit > 0 && found.Len() >= limit {
  238. return filepath.SkipAll
  239. }
  240. return nil
  241. })
  242. if err != nil && !errors.Is(err, filepath.SkipAll) {
  243. return nil, false, err
  244. }
  245. matches, truncated := truncate(slices.Collect(found.Seq()), limit)
  246. return matches, truncated || errors.Is(err, filepath.SkipAll), nil
  247. }