grep_test.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. package tools
  2. import (
  3. "os"
  4. "path/filepath"
  5. "regexp"
  6. "testing"
  7. "github.com/stretchr/testify/require"
  8. )
  9. func TestRegexCache(t *testing.T) {
  10. cache := newRegexCache()
  11. // Test basic caching
  12. pattern := "test.*pattern"
  13. regex1, err := cache.get(pattern)
  14. if err != nil {
  15. t.Fatalf("Failed to compile regex: %v", err)
  16. }
  17. regex2, err := cache.get(pattern)
  18. if err != nil {
  19. t.Fatalf("Failed to get cached regex: %v", err)
  20. }
  21. // Should be the same instance (cached)
  22. if regex1 != regex2 {
  23. t.Error("Expected cached regex to be the same instance")
  24. }
  25. // Test that it actually works
  26. if !regex1.MatchString("test123pattern") {
  27. t.Error("Regex should match test string")
  28. }
  29. }
  30. func TestGlobToRegexCaching(t *testing.T) {
  31. // Test that globToRegex uses pre-compiled regex
  32. pattern1 := globToRegex("*.{js,ts}")
  33. // Should not panic and should work correctly
  34. regex1, err := regexp.Compile(pattern1)
  35. if err != nil {
  36. t.Fatalf("Failed to compile glob regex: %v", err)
  37. }
  38. if !regex1.MatchString("test.js") {
  39. t.Error("Glob regex should match .js files")
  40. }
  41. if !regex1.MatchString("test.ts") {
  42. t.Error("Glob regex should match .ts files")
  43. }
  44. if regex1.MatchString("test.go") {
  45. t.Error("Glob regex should not match .go files")
  46. }
  47. }
  48. func TestGrepWithIgnoreFiles(t *testing.T) {
  49. t.Parallel()
  50. tempDir := t.TempDir()
  51. // Create test files
  52. testFiles := map[string]string{
  53. "file1.txt": "hello world",
  54. "file2.txt": "hello world",
  55. "ignored/file3.txt": "hello world",
  56. "node_modules/lib.js": "hello world",
  57. "secret.key": "hello world",
  58. }
  59. for path, content := range testFiles {
  60. fullPath := filepath.Join(tempDir, path)
  61. require.NoError(t, os.MkdirAll(filepath.Dir(fullPath), 0o755))
  62. require.NoError(t, os.WriteFile(fullPath, []byte(content), 0o644))
  63. }
  64. // Create .gitignore file
  65. gitignoreContent := "ignored/\n*.key\n"
  66. require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".gitignore"), []byte(gitignoreContent), 0o644))
  67. // Create .crushignore file
  68. crushignoreContent := "node_modules/\n"
  69. require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".crushignore"), []byte(crushignoreContent), 0o644))
  70. // Test both implementations
  71. for name, fn := range map[string]func(pattern, path, include string) ([]grepMatch, error){
  72. "regex": searchFilesWithRegex,
  73. "rg": func(pattern, path, include string) ([]grepMatch, error) {
  74. return searchWithRipgrep(t.Context(), pattern, path, include)
  75. },
  76. } {
  77. t.Run(name, func(t *testing.T) {
  78. t.Parallel()
  79. if name == "rg" && getRg() == "" {
  80. t.Skip("rg is not in $PATH")
  81. }
  82. matches, err := fn("hello world", tempDir, "")
  83. require.NoError(t, err)
  84. // Convert matches to a set of file paths for easier testing
  85. foundFiles := make(map[string]bool)
  86. for _, match := range matches {
  87. foundFiles[filepath.Base(match.path)] = true
  88. }
  89. // Should find file1.txt and file2.txt
  90. require.True(t, foundFiles["file1.txt"], "Should find file1.txt")
  91. require.True(t, foundFiles["file2.txt"], "Should find file2.txt")
  92. // Should NOT find ignored files
  93. require.False(t, foundFiles["file3.txt"], "Should not find file3.txt (ignored by .gitignore)")
  94. require.False(t, foundFiles["lib.js"], "Should not find lib.js (ignored by .crushignore)")
  95. require.False(t, foundFiles["secret.key"], "Should not find secret.key (ignored by .gitignore)")
  96. // Should find exactly 2 matches
  97. require.Equal(t, 2, len(matches), "Should find exactly 2 matches")
  98. })
  99. }
  100. }
  101. func TestSearchImplementations(t *testing.T) {
  102. t.Parallel()
  103. tempDir := t.TempDir()
  104. for path, content := range map[string]string{
  105. "file1.go": "package main\nfunc main() {\n\tfmt.Println(\"hello world\")\n}",
  106. "file2.js": "console.log('hello world');",
  107. "file3.txt": "hello world from text file",
  108. "binary.exe": "\x00\x01\x02\x03",
  109. "empty.txt": "",
  110. "subdir/nested.go": "package nested\n// hello world comment",
  111. ".hidden.txt": "hello world in hidden file",
  112. "file4.txt": "hello world from a banana",
  113. "file5.txt": "hello world from a grape",
  114. } {
  115. fullPath := filepath.Join(tempDir, path)
  116. require.NoError(t, os.MkdirAll(filepath.Dir(fullPath), 0o755))
  117. require.NoError(t, os.WriteFile(fullPath, []byte(content), 0o644))
  118. }
  119. require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".gitignore"), []byte("file4.txt\n"), 0o644))
  120. require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".crushignore"), []byte("file5.txt\n"), 0o644))
  121. for name, fn := range map[string]func(pattern, path, include string) ([]grepMatch, error){
  122. "regex": searchFilesWithRegex,
  123. "rg": func(pattern, path, include string) ([]grepMatch, error) {
  124. return searchWithRipgrep(t.Context(), pattern, path, include)
  125. },
  126. } {
  127. t.Run(name, func(t *testing.T) {
  128. t.Parallel()
  129. if name == "rg" && getRg() == "" {
  130. t.Skip("rg is not in $PATH")
  131. }
  132. matches, err := fn("hello world", tempDir, "")
  133. require.NoError(t, err)
  134. require.Equal(t, len(matches), 4)
  135. for _, match := range matches {
  136. require.NotEmpty(t, match.path)
  137. require.NotZero(t, match.lineNum)
  138. require.NotEmpty(t, match.lineText)
  139. require.NotZero(t, match.modTime)
  140. require.NotContains(t, match.path, ".hidden.txt")
  141. require.NotContains(t, match.path, "file4.txt")
  142. require.NotContains(t, match.path, "file5.txt")
  143. require.NotContains(t, match.path, "binary.exe")
  144. }
  145. })
  146. }
  147. }
  148. // Benchmark to show performance improvement
  149. func BenchmarkRegexCacheVsCompile(b *testing.B) {
  150. cache := newRegexCache()
  151. pattern := "test.*pattern.*[0-9]+"
  152. b.Run("WithCache", func(b *testing.B) {
  153. for b.Loop() {
  154. _, err := cache.get(pattern)
  155. if err != nil {
  156. b.Fatal(err)
  157. }
  158. }
  159. })
  160. b.Run("WithoutCache", func(b *testing.B) {
  161. for b.Loop() {
  162. _, err := regexp.Compile(pattern)
  163. if err != nil {
  164. b.Fatal(err)
  165. }
  166. }
  167. })
  168. }
  169. func TestIsTextFile(t *testing.T) {
  170. t.Parallel()
  171. tempDir := t.TempDir()
  172. tests := []struct {
  173. name string
  174. filename string
  175. content []byte
  176. wantText bool
  177. }{
  178. {
  179. name: "go file",
  180. filename: "test.go",
  181. content: []byte("package main\n\nfunc main() {}\n"),
  182. wantText: true,
  183. },
  184. {
  185. name: "yaml file",
  186. filename: "config.yaml",
  187. content: []byte("key: value\nlist:\n - item1\n - item2\n"),
  188. wantText: true,
  189. },
  190. {
  191. name: "yml file",
  192. filename: "config.yml",
  193. content: []byte("key: value\n"),
  194. wantText: true,
  195. },
  196. {
  197. name: "json file",
  198. filename: "data.json",
  199. content: []byte(`{"key": "value"}`),
  200. wantText: true,
  201. },
  202. {
  203. name: "javascript file",
  204. filename: "script.js",
  205. content: []byte("console.log('hello');\n"),
  206. wantText: true,
  207. },
  208. {
  209. name: "typescript file",
  210. filename: "script.ts",
  211. content: []byte("const x: string = 'hello';\n"),
  212. wantText: true,
  213. },
  214. {
  215. name: "markdown file",
  216. filename: "README.md",
  217. content: []byte("# Title\n\nSome content\n"),
  218. wantText: true,
  219. },
  220. {
  221. name: "shell script",
  222. filename: "script.sh",
  223. content: []byte("#!/bin/bash\necho 'hello'\n"),
  224. wantText: true,
  225. },
  226. {
  227. name: "python file",
  228. filename: "script.py",
  229. content: []byte("print('hello')\n"),
  230. wantText: true,
  231. },
  232. {
  233. name: "xml file",
  234. filename: "data.xml",
  235. content: []byte("<?xml version=\"1.0\"?>\n<root></root>\n"),
  236. wantText: true,
  237. },
  238. {
  239. name: "plain text",
  240. filename: "file.txt",
  241. content: []byte("plain text content\n"),
  242. wantText: true,
  243. },
  244. {
  245. name: "css file",
  246. filename: "style.css",
  247. content: []byte("body { color: red; }\n"),
  248. wantText: true,
  249. },
  250. {
  251. name: "scss file",
  252. filename: "style.scss",
  253. content: []byte("$primary: blue;\nbody { color: $primary; }\n"),
  254. wantText: true,
  255. },
  256. {
  257. name: "sass file",
  258. filename: "style.sass",
  259. content: []byte("$primary: blue\nbody\n color: $primary\n"),
  260. wantText: true,
  261. },
  262. {
  263. name: "rust file",
  264. filename: "main.rs",
  265. content: []byte("fn main() {\n println!(\"Hello, world!\");\n}\n"),
  266. wantText: true,
  267. },
  268. {
  269. name: "zig file",
  270. filename: "main.zig",
  271. content: []byte("const std = @import(\"std\");\npub fn main() void {}\n"),
  272. wantText: true,
  273. },
  274. {
  275. name: "java file",
  276. filename: "Main.java",
  277. content: []byte("public class Main {\n public static void main(String[] args) {}\n}\n"),
  278. wantText: true,
  279. },
  280. {
  281. name: "c file",
  282. filename: "main.c",
  283. content: []byte("#include <stdio.h>\nint main() { return 0; }\n"),
  284. wantText: true,
  285. },
  286. {
  287. name: "cpp file",
  288. filename: "main.cpp",
  289. content: []byte("#include <iostream>\nint main() { return 0; }\n"),
  290. wantText: true,
  291. },
  292. {
  293. name: "fish shell",
  294. filename: "script.fish",
  295. content: []byte("#!/usr/bin/env fish\necho 'hello'\n"),
  296. wantText: true,
  297. },
  298. {
  299. name: "powershell file",
  300. filename: "script.ps1",
  301. content: []byte("Write-Host 'Hello, World!'\n"),
  302. wantText: true,
  303. },
  304. {
  305. name: "cmd batch file",
  306. filename: "script.bat",
  307. content: []byte("@echo off\necho Hello, World!\n"),
  308. wantText: true,
  309. },
  310. {
  311. name: "cmd file",
  312. filename: "script.cmd",
  313. content: []byte("@echo off\necho Hello, World!\n"),
  314. wantText: true,
  315. },
  316. {
  317. name: "binary exe",
  318. filename: "binary.exe",
  319. content: []byte{0x4D, 0x5A, 0x90, 0x00, 0x03, 0x00, 0x00, 0x00},
  320. wantText: false,
  321. },
  322. {
  323. name: "png image",
  324. filename: "image.png",
  325. content: []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A},
  326. wantText: false,
  327. },
  328. {
  329. name: "jpeg image",
  330. filename: "image.jpg",
  331. content: []byte{0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46},
  332. wantText: false,
  333. },
  334. {
  335. name: "zip archive",
  336. filename: "archive.zip",
  337. content: []byte{0x50, 0x4B, 0x03, 0x04, 0x14, 0x00, 0x00, 0x00},
  338. wantText: false,
  339. },
  340. {
  341. name: "pdf file",
  342. filename: "document.pdf",
  343. content: []byte("%PDF-1.4\n%âãÏÓ\n"),
  344. wantText: false,
  345. },
  346. }
  347. for _, tt := range tests {
  348. t.Run(tt.name, func(t *testing.T) {
  349. t.Parallel()
  350. filePath := filepath.Join(tempDir, tt.filename)
  351. require.NoError(t, os.WriteFile(filePath, tt.content, 0o644))
  352. got := isTextFile(filePath)
  353. require.Equal(t, tt.wantText, got, "isTextFile(%s) = %v, want %v", tt.filename, got, tt.wantText)
  354. })
  355. }
  356. }
  357. func TestColumnMatch(t *testing.T) {
  358. t.Parallel()
  359. // Test both implementations
  360. for name, fn := range map[string]func(pattern, path, include string) ([]grepMatch, error){
  361. "regex": searchFilesWithRegex,
  362. "rg": func(pattern, path, include string) ([]grepMatch, error) {
  363. return searchWithRipgrep(t.Context(), pattern, path, include)
  364. },
  365. } {
  366. t.Run(name, func(t *testing.T) {
  367. t.Parallel()
  368. if name == "rg" && getRg() == "" {
  369. t.Skip("rg is not in $PATH")
  370. }
  371. matches, err := fn("THIS", "./testdata/", "")
  372. require.NoError(t, err)
  373. require.Len(t, matches, 1)
  374. match := matches[0]
  375. require.Equal(t, 2, match.lineNum)
  376. require.Equal(t, 14, match.charNum)
  377. require.Equal(t, "I wanna grep THIS particular word", match.lineText)
  378. require.Equal(t, "testdata/grep.txt", filepath.ToSlash(filepath.Clean(match.path)))
  379. })
  380. }
  381. }