Browse Source

lib/ignore: Optimise ignoring directories for filesystem watcher (fixes #9339) (#9340)

This improves the ignore handling so that directories can be fully
ignored (skipped in the watcher) in more cases. Specifically, where the
previous rule was that any complex `!`-pattern would disable skipping
directories, the new rule is that only matches on patterns *after* such
a `!`-pattern disable skipping. That is, the following now does the
intuitive thing:

```
/foo
/bar
!whatever
*
```

- `/foo/**` and `/bar/**` are completely skipped, since there is no
chance anything underneath them could ever be not-ignored
- `!whatever` toggles the "can't skip directories any more" flag
- Anything that matches `*` can't skip directories, because it's
possible we can have `whatever` match something deeper.

To enable this, some refactoring was necessary:

- The "can skip dirs" flag is now a property of the match result, not of
the pattern set as a whole.
- That meant returning a boolean is not good enough, we need to actually
return the entire `Result` (or, like, two booleans but that seemed
uglier and more annoying to use)
- `ShouldIgnore(string) boolean` went away with
`Match(string).IsIgnored()` being the obvious replacement (API
simplification!)
- The watcher then needed to import the `ignore` package (for the
`Result` type), but `fs` imports the watcher and `ignore` imports `fs`.
That's a cycle, so I broke out `Result` into a package of its own so
that it can be safely imported everywhere in things like `type Matcher
interface { Match(string) result.Result }`. There's a fair amount of
stuttering in `result.Result` and maybe we should go with something like
`ignoreresult.R` or so, leaving this open for discussion.

Tests refactored to suit, I think this change is in fact quite well
covered by the existing ones...

Also some noise because a few of the changed files were quite old and
got the `gofumpt` treatment by my editor. Sorry not sorry.

---------

Co-authored-by: Simon Frei <[email protected]>
Jakob Borg 1 year ago
parent
commit
3297624037

+ 6 - 10
lib/fs/basicfs_watch.go

@@ -37,18 +37,14 @@ func (f *BasicFilesystem) Watch(name string, ignore Matcher, ctx context.Context
 		eventMask |= permEventMask
 	}
 
-	if ignore.SkipIgnoredDirs() {
-		absShouldIgnore := func(absPath string) bool {
-			rel, err := f.unrootedChecked(absPath, roots)
-			if err != nil {
-				return true
-			}
-			return ignore.Match(rel).IsIgnored()
+	absShouldIgnore := func(absPath string) bool {
+		rel, err := f.unrootedChecked(absPath, roots)
+		if err != nil {
+			return true
 		}
-		err = notify.WatchWithFilter(watchPath, backendChan, absShouldIgnore, eventMask)
-	} else {
-		err = notify.Watch(watchPath, backendChan, eventMask)
+		return ignore.Match(rel).CanSkipDir()
 	}
+	err = notify.WatchWithFilter(watchPath, backendChan, absShouldIgnore, eventMask)
 	if err != nil {
 		notify.Stop(backendChan)
 		if reachedMaxUserWatches(err) {

+ 0 - 1
lib/fs/filesystem.go

@@ -129,7 +129,6 @@ type Usage struct {
 
 type Matcher interface {
 	Match(name string) ignoreresult.R
-	SkipIgnoredDirs() bool
 }
 
 type Event struct {

+ 37 - 44
lib/ignore/ignore.go

@@ -79,11 +79,17 @@ func (p Pattern) allowsSkippingIgnoredDirs() bool {
 	if p.pattern[0] != '/' {
 		return false
 	}
-	if strings.Contains(p.pattern[1:], "/") {
+	// A "/**" at the end is allowed and doesn't have any bearing on the
+	// below checks; remove it before checking.
+	pattern := strings.TrimSuffix(p.pattern, "/**")
+	if len(pattern) == 0 {
+		return true
+	}
+	if strings.Contains(pattern[1:], "/") {
 		return false
 	}
 	// Double asterisk everywhere in the path except at the end is bad
-	return !strings.Contains(strings.TrimSuffix(p.pattern, "**"), "**")
+	return !strings.Contains(strings.TrimSuffix(pattern, "**"), "**")
 }
 
 // The ChangeDetector is responsible for determining if files have changed
@@ -99,16 +105,15 @@ type ChangeDetector interface {
 }
 
 type Matcher struct {
-	fs              fs.Filesystem
-	lines           []string  // exact lines read from .stignore
-	patterns        []Pattern // patterns including those from included files
-	withCache       bool
-	matches         *cache
-	curHash         string
-	stop            chan struct{}
-	changeDetector  ChangeDetector
-	skipIgnoredDirs bool
-	mut             sync.Mutex
+	fs             fs.Filesystem
+	lines          []string  // exact lines read from .stignore
+	patterns       []Pattern // patterns including those from included files
+	withCache      bool
+	matches        *cache
+	curHash        string
+	stop           chan struct{}
+	changeDetector ChangeDetector
+	mut            sync.Mutex
 }
 
 // An Option can be passed to New()
@@ -131,10 +136,9 @@ func WithChangeDetector(cd ChangeDetector) Option {
 
 func New(fs fs.Filesystem, opts ...Option) *Matcher {
 	m := &Matcher{
-		fs:              fs,
-		stop:            make(chan struct{}),
-		mut:             sync.NewMutex(),
-		skipIgnoredDirs: true,
+		fs:   fs,
+		stop: make(chan struct{}),
+		mut:  sync.NewMutex(),
 	}
 	for _, opt := range opts {
 		opt(m)
@@ -198,23 +202,6 @@ func (m *Matcher) parseLocked(r io.Reader, file string) error {
 		return err
 	}
 
-	m.skipIgnoredDirs = true
-	var previous string
-	for _, p := range patterns {
-		// We automatically add patterns with a /** suffix, which normally
-		// means that we cannot skip directories. However if the same
-		// pattern without the /** already exists (which is true for
-		// automatically added patterns) we can skip.
-		if l := len(p.pattern); l > 3 && p.pattern[:len(p.pattern)-3] == previous {
-			continue
-		}
-		if !p.allowsSkippingIgnoredDirs() {
-			m.skipIgnoredDirs = false
-			break
-		}
-		previous = p.pattern
-	}
-
 	m.curHash = newHash
 	m.patterns = patterns
 	if m.withCache {
@@ -228,10 +215,10 @@ func (m *Matcher) parseLocked(r io.Reader, file string) error {
 func (m *Matcher) Match(file string) (result ignoreresult.R) {
 	switch {
 	case fs.IsTemporary(file):
-		return ignoreresult.Ignored
+		return ignoreresult.IgnoreAndSkip
 
 	case fs.IsInternal(file):
-		return ignoreresult.Ignored
+		return ignoreresult.IgnoreAndSkip
 
 	case file == ".":
 		return ignoreresult.NotIgnored
@@ -257,19 +244,31 @@ func (m *Matcher) Match(file string) (result ignoreresult.R) {
 		}()
 	}
 
-	// Check all the patterns for a match.
+	// Check all the patterns for a match. Track whether the patterns so far
+	// allow skipping matched directories or not. As soon as we hit an
+	// exclude pattern (with some exceptions), we can't skip directories
+	// anymore.
 	file = filepath.ToSlash(file)
 	var lowercaseFile string
+	canSkipDir := true
 	for _, pattern := range m.patterns {
+		if canSkipDir && !pattern.allowsSkippingIgnoredDirs() {
+			canSkipDir = false
+		}
+
+		res := pattern.result
+		if canSkipDir {
+			res = res.WithSkipDir()
+		}
 		if pattern.result.IsCaseFolded() {
 			if lowercaseFile == "" {
 				lowercaseFile = strings.ToLower(file)
 			}
 			if pattern.match.Match(lowercaseFile) {
-				return pattern.result
+				return res
 			}
 		} else if pattern.match.Match(file) {
-			return pattern.result
+			return res
 		}
 	}
 
@@ -327,12 +326,6 @@ func (m *Matcher) clean(d time.Duration) {
 	}
 }
 
-func (m *Matcher) SkipIgnoredDirs() bool {
-	m.mut.Lock()
-	defer m.mut.Unlock()
-	return m.skipIgnoredDirs
-}
-
 func hashPatterns(patterns []Pattern) string {
 	h := sha256.New()
 	for _, pat := range patterns {

+ 8 - 8
lib/ignore/ignore_test.go

@@ -1122,8 +1122,8 @@ func TestIssue5009(t *testing.T) {
 	if err := pats.Parse(bytes.NewBufferString(stignore), ".stignore"); err != nil {
 		t.Fatal(err)
 	}
-	if !pats.skipIgnoredDirs {
-		t.Error("skipIgnoredDirs should be true without includes")
+	if m := pats.Match("ign2"); !m.CanSkipDir() {
+		t.Error("CanSkipDir should be true without excludes")
 	}
 
 	stignore = `
@@ -1138,8 +1138,8 @@ func TestIssue5009(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	if pats.skipIgnoredDirs {
-		t.Error("skipIgnoredDirs should not be true with includes")
+	if m := pats.Match("ign2"); m.CanSkipDir() {
+		t.Error("CanSkipDir should not be true with excludes")
 	}
 }
 
@@ -1272,8 +1272,8 @@ func TestSkipIgnoredDirs(t *testing.T) {
 	if err := pats.Parse(bytes.NewBufferString(stignore), ".stignore"); err != nil {
 		t.Fatal(err)
 	}
-	if !pats.SkipIgnoredDirs() {
-		t.Error("SkipIgnoredDirs should be true")
+	if m := pats.Match("whatever"); !m.CanSkipDir() {
+		t.Error("CanSkipDir should be true")
 	}
 
 	stignore = `
@@ -1283,8 +1283,8 @@ func TestSkipIgnoredDirs(t *testing.T) {
 	if err := pats.Parse(bytes.NewBufferString(stignore), ".stignore"); err != nil {
 		t.Fatal(err)
 	}
-	if pats.SkipIgnoredDirs() {
-		t.Error("SkipIgnoredDirs should be false")
+	if m := pats.Match("whatever"); m.CanSkipDir() {
+		t.Error("CanSkipDir should be false")
 	}
 }
 

+ 21 - 0
lib/ignore/ignoreresult/ignoreresult.go

@@ -12,6 +12,7 @@ const (
 	NotIgnored R = 0
 	// `Ignored` is defined in platform specific files
 	IgnoredDeletable = Ignored | deletableBit
+	IgnoreAndSkip    = Ignored | canSkipDirBit
 )
 
 const (
@@ -19,6 +20,7 @@ const (
 	ignoreBit R = 1 << iota
 	deletableBit
 	foldCaseBit
+	canSkipDirBit
 )
 
 type R uint8
@@ -38,6 +40,15 @@ func (r R) IsCaseFolded() bool {
 	return r&foldCaseBit != 0
 }
 
+// CanSkipDir returns true if the result is ignored and the directory can be
+// skipped (no need to recurse deeper). Note that ignore matches are textual
+// and based on the name only -- this being true does not mean that the
+// matched item is a directory, merely that *if* it is a directory, it can
+// be skipped.
+func (r R) CanSkipDir() bool {
+	return r.IsIgnored() && r&canSkipDirBit != 0
+}
+
 // ToggleIgnored returns a copy of the result with the ignored bit toggled.
 func (r R) ToggleIgnored() R {
 	return r ^ ignoreBit
@@ -53,6 +64,11 @@ func (r R) WithFoldCase() R {
 	return r | foldCaseBit
 }
 
+// WithSkipDir returns a copy of the result with the skip dir bit set.
+func (r R) WithSkipDir() R {
+	return r | canSkipDirBit
+}
+
 // String returns a human readable representation of the result flags.
 func (r R) String() string {
 	var s string
@@ -71,5 +87,10 @@ func (r R) String() string {
 	} else {
 		s += "-"
 	}
+	if r&canSkipDirBit != 0 {
+		s += "s"
+	} else {
+		s += "-"
+	}
 	return s
 }

+ 38 - 0
lib/ignore/ignoreresult/ignoreresult_test.go

@@ -0,0 +1,38 @@
+// Copyright (C) 2024 The Syncthing Authors.
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this file,
+// You can obtain one at https://mozilla.org/MPL/2.0/.
+
+package ignoreresult_test
+
+import (
+	"testing"
+
+	"github.com/syncthing/syncthing/lib/ignore/ignoreresult"
+)
+
+func TestFlagCanSkipDir(t *testing.T) {
+	// Verify that CanSkipDir() means that something is both ignored and can
+	// be skipped as a directory, so that it's legitimate to say
+	// Match(...).CanSkipDir() instead of having to create a temporary
+	// variable and check both Match(...).IsIgnored() and
+	// Match(...).CanSkipDir().
+
+	cases := []struct {
+		res        ignoreresult.R
+		canSkipDir bool
+	}{
+		{0, false},
+		{ignoreresult.NotIgnored, false},
+		{ignoreresult.NotIgnored.WithSkipDir(), false},
+		{ignoreresult.Ignored, false},
+		{ignoreresult.IgnoreAndSkip, true},
+	}
+
+	for _, tc := range cases {
+		if tc.res.CanSkipDir() != tc.canSkipDir {
+			t.Errorf("%v.CanSkipDir() != %v", tc.res, tc.canSkipDir)
+		}
+	}
+}

+ 2 - 2
lib/scanner/walk.go

@@ -295,10 +295,10 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco
 			return skip
 		}
 
-		if w.Matcher.Match(path).IsIgnored() {
+		if m := w.Matcher.Match(path); m.IsIgnored() {
 			l.Debugln(w, "ignored (patterns):", path)
 			// Only descend if matcher says so and the current file is not a symlink.
-			if err != nil || w.Matcher.SkipIgnoredDirs() || info.IsSymlink() {
+			if err != nil || m.CanSkipDir() || info.IsSymlink() {
 				return skip
 			}
 			// If the parent wasn't ignored already, set this path as the "highest" ignored parent

+ 2 - 2
lib/scanner/walk_test.go

@@ -873,8 +873,8 @@ func TestSkipIgnoredDirs(t *testing.T) {
 	if err := pats.Parse(bytes.NewBufferString(stignore), ".stignore"); err != nil {
 		t.Fatal(err)
 	}
-	if !pats.SkipIgnoredDirs() {
-		t.Error("SkipIgnoredDirs should be true")
+	if m := pats.Match("whatever"); !m.CanSkipDir() {
+		t.Error("CanSkipDir should be true", m)
 	}
 
 	w.Matcher = pats