瀏覽代碼

Refactor ignore handling (...)

This uses persistent Matcher objects that can reload their content and
provide a hash string that can be used to check if it's changed. The
cache is local to each Matcher object instead of kept globally.
Jakob Borg 11 年之前
父節點
當前提交
2c89f04be7
共有 5 個文件被更改,包括 241 次插入122 次删除
  1. 1 35
      internal/ignore/cache.go
  2. 114 63
      internal/ignore/ignore.go
  3. 118 19
      internal/ignore/ignore_test.go
  4. 4 3
      internal/model/model.go
  5. 4 2
      internal/scanner/walk_test.go

+ 1 - 35
internal/ignore/cache.go

@@ -15,26 +15,11 @@
 
 package ignore
 
-import (
-	"sync"
-	"time"
-)
-
-var (
-	caches   = make(map[string]*cache)
-	cacheMut sync.Mutex
-)
-
-func init() {
-	// Periodically go through the cache and remove cache entries that have
-	// not been touched in the last two hours.
-	go cleanIgnoreCaches(2 * time.Hour)
-}
+import "time"
 
 type cache struct {
 	patterns []Pattern
 	entries  map[string]cacheEntry
-	mut      sync.Mutex
 }
 
 type cacheEntry struct {
@@ -50,46 +35,27 @@ func newCache(patterns []Pattern) *cache {
 }
 
 func (c *cache) clean(d time.Duration) {
-	c.mut.Lock()
 	for k, v := range c.entries {
 		if time.Since(v.access) > d {
 			delete(c.entries, k)
 		}
 	}
-	c.mut.Unlock()
 }
 
 func (c *cache) get(key string) (result, ok bool) {
-	c.mut.Lock()
 	res, ok := c.entries[key]
 	if ok {
 		res.access = time.Now()
 		c.entries[key] = res
 	}
-	c.mut.Unlock()
 	return res.value, ok
 }
 
 func (c *cache) set(key string, val bool) {
-	c.mut.Lock()
 	c.entries[key] = cacheEntry{val, time.Now()}
-	c.mut.Unlock()
 }
 
 func (c *cache) len() int {
-	c.mut.Lock()
 	l := len(c.entries)
-	c.mut.Unlock()
 	return l
 }
-
-func cleanIgnoreCaches(dur time.Duration) {
-	for {
-		time.Sleep(dur)
-		cacheMut.Lock()
-		for _, v := range caches {
-			v.clean(dur)
-		}
-		cacheMut.Unlock()
-	}
-}

+ 114 - 63
internal/ignore/ignore.go

@@ -17,6 +17,8 @@ package ignore
 
 import (
 	"bufio"
+	"bytes"
+	"crypto/md5"
 	"fmt"
 	"io"
 	"os"
@@ -24,6 +26,7 @@ import (
 	"regexp"
 	"strings"
 	"sync"
+	"time"
 
 	"github.com/syncthing/syncthing/internal/fnmatch"
 )
@@ -33,51 +36,76 @@ type Pattern struct {
 	include bool
 }
 
+func (p Pattern) String() string {
+	if p.include {
+		return p.match.String()
+	} else {
+		return "(?exclude)" + p.match.String()
+	}
+}
+
 type Matcher struct {
-	patterns []Pattern
-	matches  *cache
-	mut      sync.Mutex
+	patterns  []Pattern
+	withCache bool
+	matches   *cache
+	curHash   string
+	stop      chan struct{}
+	mut       sync.Mutex
+}
+
+func New(withCache bool) *Matcher {
+	m := &Matcher{
+		withCache: withCache,
+		stop:      make(chan struct{}),
+	}
+	if withCache {
+		go m.clean(2 * time.Hour)
+	}
+	return m
 }
 
-func Load(file string, cache bool) (*Matcher, error) {
-	seen := make(map[string]bool)
-	matcher, err := loadIgnoreFile(file, seen)
-	if !cache || err != nil {
-		return matcher, err
-	}
-
-	cacheMut.Lock()
-	defer cacheMut.Unlock()
-
-	// Get the current cache object for the given file
-	cached, ok := caches[file]
-	if !ok || !patternsEqual(cached.patterns, matcher.patterns) {
-		// Nothing in cache or a cache mismatch, create a new cache which will
-		// store matches for the given set of patterns.
-		// Initialize oldMatches to indicate that we are interested in
-		// caching.
-		cached = newCache(matcher.patterns)
-		matcher.matches = cached
-		caches[file] = cached
-		return matcher, nil
-	}
-
-	// Patterns haven't changed, so we can reuse the old matches, create a new
-	// matches map and update the pointer. (This prevents matches map from
-	// growing indefinately, as we only cache whatever we've matched in the last
-	// iteration, rather than through runtime history)
-	matcher.matches = cached
-	return matcher, nil
+func (m *Matcher) Load(file string) error {
+	// No locking, Parse() does the locking
+
+	fd, err := os.Open(file)
+	if err != nil {
+		// We do a parse with empty patterns to clear out the hash, cache etc.
+		m.Parse(&bytes.Buffer{}, file)
+		return err
+	}
+	defer fd.Close()
+
+	return m.Parse(fd, file)
 }
 
-func Parse(r io.Reader, file string) (*Matcher, error) {
-	seen := map[string]bool{
-		file: true,
+func (m *Matcher) Parse(r io.Reader, file string) error {
+	m.mut.Lock()
+	defer m.mut.Unlock()
+
+	seen := map[string]bool{file: true}
+	patterns, err := parseIgnoreFile(r, file, seen)
+	// Error is saved and returned at the end. We process the patterns
+	// (possibly blank) anyway.
+
+	newHash := hashPatterns(patterns)
+	if newHash == m.curHash {
+		// We've already loaded exactly these patterns.
+		return err
 	}
-	return parseIgnoreFile(r, file, seen)
+
+	m.curHash = newHash
+	m.patterns = patterns
+	if m.withCache {
+		m.matches = newCache(patterns)
+	}
+
+	return err
 }
 
 func (m *Matcher) Match(file string) (result bool) {
+	m.mut.Lock()
+	defer m.mut.Unlock()
+
 	if len(m.patterns) == 0 {
 		return false
 	}
@@ -108,18 +136,53 @@ func (m *Matcher) Match(file string) (result bool) {
 
 // Patterns return a list of the loaded regexp patterns, as strings
 func (m *Matcher) Patterns() []string {
+	m.mut.Lock()
+	defer m.mut.Unlock()
+
 	patterns := make([]string, len(m.patterns))
 	for i, pat := range m.patterns {
-		if pat.include {
-			patterns[i] = pat.match.String()
-		} else {
-			patterns[i] = "(?exclude)" + pat.match.String()
-		}
+		patterns[i] = pat.String()
 	}
 	return patterns
 }
 
-func loadIgnoreFile(file string, seen map[string]bool) (*Matcher, error) {
+func (m *Matcher) Hash() string {
+	m.mut.Lock()
+	defer m.mut.Unlock()
+	return m.curHash
+}
+
+func (m *Matcher) Stop() {
+	close(m.stop)
+}
+
+func (m *Matcher) clean(d time.Duration) {
+	t := time.NewTimer(d / 2)
+	for {
+		select {
+		case <-m.stop:
+			return
+		case <-t.C:
+			m.mut.Lock()
+			if m.matches != nil {
+				m.matches.clean(d)
+			}
+			t.Reset(d / 2)
+			m.mut.Unlock()
+		}
+	}
+}
+
+func hashPatterns(patterns []Pattern) string {
+	h := md5.New()
+	for _, pat := range patterns {
+		h.Write([]byte(pat.String()))
+		h.Write([]byte("\n"))
+	}
+	return fmt.Sprintf("%x", h.Sum(nil))
+}
+
+func loadIgnoreFile(file string, seen map[string]bool) ([]Pattern, error) {
 	if seen[file] {
 		return nil, fmt.Errorf("Multiple include of ignore file %q", file)
 	}
@@ -134,8 +197,8 @@ func loadIgnoreFile(file string, seen map[string]bool) (*Matcher, error) {
 	return parseIgnoreFile(fd, file, seen)
 }
 
-func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*Matcher, error) {
-	var exps Matcher
+func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) ([]Pattern, error) {
+	var patterns []Pattern
 
 	addPattern := func(line string) error {
 		include := true
@@ -150,27 +213,27 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*M
 			if err != nil {
 				return fmt.Errorf("Invalid pattern %q in ignore file", line)
 			}
-			exps.patterns = append(exps.patterns, Pattern{exp, include})
+			patterns = append(patterns, Pattern{exp, include})
 		} else if strings.HasPrefix(line, "**/") {
 			// Add the pattern as is, and without **/ so it matches in current dir
 			exp, err := fnmatch.Convert(line, fnmatch.FNM_PATHNAME)
 			if err != nil {
 				return fmt.Errorf("Invalid pattern %q in ignore file", line)
 			}
-			exps.patterns = append(exps.patterns, Pattern{exp, include})
+			patterns = append(patterns, Pattern{exp, include})
 
 			exp, err = fnmatch.Convert(line[3:], fnmatch.FNM_PATHNAME)
 			if err != nil {
 				return fmt.Errorf("Invalid pattern %q in ignore file", line)
 			}
-			exps.patterns = append(exps.patterns, Pattern{exp, include})
+			patterns = append(patterns, Pattern{exp, include})
 		} else if strings.HasPrefix(line, "#include ") {
 			includeFile := filepath.Join(filepath.Dir(currentFile), line[len("#include "):])
 			includes, err := loadIgnoreFile(includeFile, seen)
 			if err != nil {
 				return err
 			}
-			exps.patterns = append(exps.patterns, includes.patterns...)
+			patterns = append(patterns, includes...)
 		} else {
 			// Path name or pattern, add it so it matches files both in
 			// current directory and subdirs.
@@ -178,13 +241,13 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*M
 			if err != nil {
 				return fmt.Errorf("Invalid pattern %q in ignore file", line)
 			}
-			exps.patterns = append(exps.patterns, Pattern{exp, include})
+			patterns = append(patterns, Pattern{exp, include})
 
 			exp, err = fnmatch.Convert("**/"+line, fnmatch.FNM_PATHNAME)
 			if err != nil {
 				return fmt.Errorf("Invalid pattern %q in ignore file", line)
 			}
-			exps.patterns = append(exps.patterns, Pattern{exp, include})
+			patterns = append(patterns, Pattern{exp, include})
 		}
 		return nil
 	}
@@ -218,17 +281,5 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*M
 		}
 	}
 
-	return &exps, nil
-}
-
-func patternsEqual(a, b []Pattern) bool {
-	if len(a) != len(b) {
-		return false
-	}
-	for i := range a {
-		if a[i].include != b[i].include || a[i].match.String() != b[i].match.String() {
-			return false
-		}
-	}
-	return true
+	return patterns, nil
 }

+ 118 - 19
internal/ignore/ignore_test.go

@@ -25,7 +25,8 @@ import (
 )
 
 func TestIgnore(t *testing.T) {
-	pats, err := Load("testdata/.stignore", true)
+	pats := New(true)
+	err := pats.Load("testdata/.stignore")
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -74,7 +75,8 @@ func TestExcludes(t *testing.T) {
 	i*2
 	!ign2
 	`
-	pats, err := Parse(bytes.NewBufferString(stignore), ".stignore")
+	pats := New(true)
+	err := pats.Parse(bytes.NewBufferString(stignore), ".stignore")
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -114,15 +116,19 @@ func TestBadPatterns(t *testing.T) {
 	}
 
 	for _, pat := range badPatterns {
-		parsed, err := Parse(bytes.NewBufferString(pat), ".stignore")
+		err := New(true).Parse(bytes.NewBufferString(pat), ".stignore")
 		if err == nil {
-			t.Errorf("No error for pattern %q: %v", pat, parsed)
+			t.Errorf("No error for pattern %q", pat)
 		}
 	}
 }
 
 func TestCaseSensitivity(t *testing.T) {
-	ign, _ := Parse(bytes.NewBufferString("test"), ".stignore")
+	ign := New(true)
+	err := ign.Parse(bytes.NewBufferString("test"), ".stignore")
+	if err != nil {
+		t.Error(err)
+	}
 
 	match := []string{"test"}
 	dontMatch := []string{"foo"}
@@ -170,7 +176,8 @@ func TestCaching(t *testing.T) {
 
 	fd2.WriteString("/y/\n")
 
-	pats, err := Load(fd1.Name(), true)
+	pats := New(true)
+	err = pats.Load(fd1.Name())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -193,9 +200,9 @@ func TestCaching(t *testing.T) {
 		t.Fatal("Expected 4 cached results")
 	}
 
-	// Reload file, expect old outcomes to be provided
+	// Reload file, expect old outcomes to be preserved
 
-	pats, err = Load(fd1.Name(), true)
+	err = pats.Load(fd1.Name())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -207,7 +214,7 @@ func TestCaching(t *testing.T) {
 
 	fd2.WriteString("/z/\n")
 
-	pats, err = Load(fd1.Name(), true)
+	err = pats.Load(fd1.Name())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -222,9 +229,9 @@ func TestCaching(t *testing.T) {
 		pats.Match(letter)
 	}
 
-	// Verify that outcomes provided on next laod
+	// Verify that outcomes preserved on next laod
 
-	pats, err = Load(fd1.Name(), true)
+	err = pats.Load(fd1.Name())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -236,7 +243,7 @@ func TestCaching(t *testing.T) {
 
 	fd1.WriteString("/a/\n")
 
-	pats, err = Load(fd1.Name(), true)
+	err = pats.Load(fd1.Name())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -252,7 +259,7 @@ func TestCaching(t *testing.T) {
 
 	// Verify that outcomes provided on next laod
 
-	pats, err = Load(fd1.Name(), true)
+	err = pats.Load(fd1.Name())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -273,7 +280,11 @@ func TestCommentsAndBlankLines(t *testing.T) {
 
 
 	`
-	pats, _ := Parse(bytes.NewBufferString(stignore), ".stignore")
+	pats := New(true)
+	err := pats.Parse(bytes.NewBufferString(stignore), ".stignore")
+	if err != nil {
+		t.Error(err)
+	}
 	if len(pats.patterns) > 0 {
 		t.Errorf("Expected no patterns")
 	}
@@ -297,7 +308,11 @@ flamingo
 *.crow
 *.crow
 	`
-	pats, _ := Parse(bytes.NewBufferString(stignore), ".stignore")
+	pats := New(false)
+	err := pats.Parse(bytes.NewBufferString(stignore), ".stignore")
+	if err != nil {
+		b.Error(err)
+	}
 
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
@@ -335,7 +350,8 @@ flamingo
 	}
 
 	// Load the patterns
-	pats, err := Load(fd.Name(), true)
+	pats := New(true)
+	err = pats.Load(fd.Name())
 	if err != nil {
 		b.Fatal(err)
 	}
@@ -344,7 +360,7 @@ flamingo
 
 	// This load should now load the cached outcomes as the set of patterns
 	// has not changed.
-	pats, err = Load(fd.Name(), true)
+	err = pats.Load(fd.Name())
 	if err != nil {
 		b.Fatal(err)
 	}
@@ -370,7 +386,8 @@ func TestCacheReload(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	pats, err := Load(fd.Name(), true)
+	pats := New(true)
+	err = pats.Load(fd.Name())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -402,7 +419,7 @@ func TestCacheReload(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	pats, err = Load(fd.Name(), true)
+	err = pats.Load(fd.Name())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -419,3 +436,85 @@ func TestCacheReload(t *testing.T) {
 		t.Error("Unexpected non-match for f3")
 	}
 }
+
+func TestHash(t *testing.T) {
+	p1 := New(true)
+	err := p1.Load("testdata/.stignore")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Same list of patterns as testdata/.stignore, after expansion
+	stignore := `
+	dir2/dfile
+	dir3
+	bfile
+	dir1/cfile
+	**/efile
+	/ffile
+	lost+found
+	`
+	p2 := New(true)
+	err = p2.Parse(bytes.NewBufferString(stignore), ".stignore")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Not same list of patterns
+	stignore = `
+	dir2/dfile
+	dir3
+	bfile
+	dir1/cfile
+	/ffile
+	lost+found
+	`
+	p3 := New(true)
+	err = p3.Parse(bytes.NewBufferString(stignore), ".stignore")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if p1.Hash() == "" {
+		t.Error("p1 hash blank")
+	}
+	if p2.Hash() == "" {
+		t.Error("p2 hash blank")
+	}
+	if p3.Hash() == "" {
+		t.Error("p3 hash blank")
+	}
+	if p1.Hash() != p2.Hash() {
+		t.Error("p1-p2 hashes differ")
+	}
+	if p1.Hash() == p3.Hash() {
+		t.Error("p1-p3 hashes same")
+	}
+}
+
+func TestHashOfEmpty(t *testing.T) {
+	p1 := New(true)
+	err := p1.Load("testdata/.stignore")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	firstHash := p1.Hash()
+
+	// Reloading with a non-existent file should empty the patterns and
+	// recalculate the hash. d41d8cd98f00b204e9800998ecf8427e is the md5 of
+	// nothing.
+
+	p1.Load("file/does/not/exist")
+	secondHash := p1.Hash()
+
+	if firstHash == secondHash {
+		t.Error("hash did not change")
+	}
+	if secondHash != "d41d8cd98f00b204e9800998ecf8427e" {
+		t.Error("second hash is not hash of empty string")
+	}
+	if len(p1.patterns) != 0 {
+		t.Error("there are more than zero patterns")
+	}
+}

+ 4 - 3
internal/model/model.go

@@ -1040,7 +1040,8 @@ func (m *Model) AddFolder(cfg config.FolderConfiguration) {
 		m.deviceFolders[device.DeviceID] = append(m.deviceFolders[device.DeviceID], cfg.ID)
 	}
 
-	ignores, _ := ignore.Load(filepath.Join(cfg.Path, ".stignore"), m.cfg.Options().CacheIgnoredFiles)
+	ignores := ignore.New(m.cfg.Options().CacheIgnoredFiles)
+	_ = ignores.Load(filepath.Join(cfg.Path, ".stignore")) // Ignore error, there might not be an .stignore
 	m.folderIgnores[cfg.ID] = ignores
 
 	m.addedFolder = true
@@ -1083,8 +1084,8 @@ func (m *Model) ScanFolderSub(folder, sub string) error {
 	fs, ok := m.folderFiles[folder]
 	dir := m.folderCfgs[folder].Path
 
-	ignores, _ := ignore.Load(filepath.Join(dir, ".stignore"), m.cfg.Options().CacheIgnoredFiles)
-	m.folderIgnores[folder] = ignores
+	ignores := m.folderIgnores[folder]
+	_ = ignores.Load(filepath.Join(dir, ".stignore")) // Ignore error, there might not be an .stignore
 
 	w := &scanner.Walker{
 		Dir:          dir,

+ 4 - 2
internal/scanner/walk_test.go

@@ -58,7 +58,8 @@ func init() {
 }
 
 func TestWalkSub(t *testing.T) {
-	ignores, err := ignore.Load("testdata/.stignore", false)
+	ignores := ignore.New(false)
+	err := ignores.Load("testdata/.stignore")
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -93,7 +94,8 @@ func TestWalkSub(t *testing.T) {
 }
 
 func TestWalk(t *testing.T) {
-	ignores, err := ignore.Load("testdata/.stignore", false)
+	ignores := ignore.New(false)
+	err := ignores.Load("testdata/.stignore")
 	if err != nil {
 		t.Fatal(err)
 	}