Ver código fonte

Merge pull request #842 from AudriusButkevicius/ignorecache

Cache ignore file matches
Jakob Borg 11 anos atrás
pai
commit
964d17d05a

+ 1 - 0
internal/config/config.go

@@ -173,6 +173,7 @@ type OptionsConfiguration struct {
 	RestartOnWakeup      bool     `xml:"restartOnWakeup" default:"true"`
 	AutoUpgradeIntervalH int      `xml:"autoUpgradeIntervalH" default:"12"` // 0 for off
 	KeepTemporariesH     int      `xml:"keepTemporariesH" default:"24"`     // 0 for off
+	CacheIgnoredFiles    bool     `xml:"cacheIgnoredFiles" default:"true"`
 
 	Deprecated_RescanIntervalS int    `xml:"rescanIntervalS,omitempty" json:"-"`
 	Deprecated_UREnabled       bool   `xml:"urEnabled,omitempty" json:"-"`

+ 2 - 0
internal/config/config_test.go

@@ -51,6 +51,7 @@ func TestDefaultValues(t *testing.T) {
 		RestartOnWakeup:      true,
 		AutoUpgradeIntervalH: 12,
 		KeepTemporariesH:     24,
+		CacheIgnoredFiles:    true,
 	}
 
 	cfg := New(device1)
@@ -148,6 +149,7 @@ func TestOverriddenValues(t *testing.T) {
 		RestartOnWakeup:      false,
 		AutoUpgradeIntervalH: 24,
 		KeepTemporariesH:     48,
+		CacheIgnoredFiles:    false,
 	}
 
 	cfg, err := Load("testdata/overridenvalues.xml", device1)

+ 1 - 0
internal/config/testdata/overridenvalues.xml

@@ -18,5 +18,6 @@
         <restartOnWakeup>false</restartOnWakeup>
         <autoUpgradeIntervalH>24</autoUpgradeIntervalH>
         <keepTemporariesH>48</keepTemporariesH>
+        <cacheIgnoredFiles>false</cacheIgnoredFiles>
     </options>
 </configuration>

+ 91 - 16
internal/ignore/ignore.go

@@ -23,31 +23,94 @@ import (
 	"path/filepath"
 	"regexp"
 	"strings"
+	"sync"
 
 	"github.com/syncthing/syncthing/internal/fnmatch"
 )
 
+var caches = make(map[string]MatcherCache)
+
 type Pattern struct {
 	match   *regexp.Regexp
 	include bool
 }
 
-type Patterns []Pattern
+type Matcher struct {
+	patterns   []Pattern
+	oldMatches map[string]bool
+
+	newMatches map[string]bool
+	mut        sync.Mutex
+}
+
+type MatcherCache struct {
+	patterns []Pattern
+	matches  *map[string]bool
+}
 
-func Load(file string) (Patterns, error) {
+func Load(file string, cache bool) (*Matcher, error) {
 	seen := make(map[string]bool)
-	return loadIgnoreFile(file, seen)
+	matcher, err := loadIgnoreFile(file, seen)
+	if !cache || err != nil {
+		return matcher, err
+	}
+
+	// Get the current cache object for the given file
+	cached, ok := caches[file]
+	if !ok || !patternsEqual(cached.patterns, matcher.patterns) {
+		// Nothing in cache or a cache mismatch, create a new cache which will
+		// store matches for the given set of patterns.
+		// Initialize oldMatches to indicate that we are interested in
+		// caching.
+		matcher.oldMatches = make(map[string]bool)
+		matcher.newMatches = make(map[string]bool)
+		caches[file] = MatcherCache{
+			patterns: matcher.patterns,
+			matches:  &matcher.newMatches,
+		}
+		return matcher, nil
+	}
+
+	// Patterns haven't changed, so we can reuse the old matches, create a new
+	// matches map and update the pointer. (This prevents matches map from
+	// growing indefinately, as we only cache whatever we've matched in the last
+	// iteration, rather than through runtime history)
+	matcher.oldMatches = *cached.matches
+	matcher.newMatches = make(map[string]bool)
+	cached.matches = &matcher.newMatches
+	caches[file] = cached
+	return matcher, nil
 }
 
-func Parse(r io.Reader, file string) (Patterns, error) {
+func Parse(r io.Reader, file string) (*Matcher, error) {
 	seen := map[string]bool{
 		file: true,
 	}
 	return parseIgnoreFile(r, file, seen)
 }
 
-func (l Patterns) Match(file string) bool {
-	for _, pattern := range l {
+func (m *Matcher) Match(file string) (result bool) {
+	if len(m.patterns) == 0 {
+		return false
+	}
+
+	// We have old matches map set, means we should do caching
+	if m.oldMatches != nil {
+		// Capture the result to the new matches regardless of who returns it
+		defer func() {
+			m.mut.Lock()
+			m.newMatches[file] = result
+			m.mut.Unlock()
+		}()
+		// Check perhaps we've seen this file before, and we already know
+		// what the outcome is going to be.
+		result, ok := m.oldMatches[file]
+		if ok {
+			return result
+		}
+	}
+
+	for _, pattern := range m.patterns {
 		if pattern.match.MatchString(file) {
 			return pattern.include
 		}
@@ -55,7 +118,7 @@ func (l Patterns) Match(file string) bool {
 	return false
 }
 
-func loadIgnoreFile(file string, seen map[string]bool) (Patterns, error) {
+func loadIgnoreFile(file string, seen map[string]bool) (*Matcher, error) {
 	if seen[file] {
 		return nil, fmt.Errorf("Multiple include of ignore file %q", file)
 	}
@@ -70,8 +133,8 @@ func loadIgnoreFile(file string, seen map[string]bool) (Patterns, error) {
 	return parseIgnoreFile(fd, file, seen)
 }
 
-func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (Patterns, error) {
-	var exps Patterns
+func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*Matcher, error) {
+	var exps Matcher
 
 	addPattern := func(line string) error {
 		include := true
@@ -86,27 +149,27 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (Pa
 			if err != nil {
 				return fmt.Errorf("Invalid pattern %q in ignore file", line)
 			}
-			exps = append(exps, Pattern{exp, include})
+			exps.patterns = append(exps.patterns, Pattern{exp, include})
 		} else if strings.HasPrefix(line, "**/") {
 			// Add the pattern as is, and without **/ so it matches in current dir
 			exp, err := fnmatch.Convert(line, fnmatch.FNM_PATHNAME)
 			if err != nil {
 				return fmt.Errorf("Invalid pattern %q in ignore file", line)
 			}
-			exps = append(exps, Pattern{exp, include})
+			exps.patterns = append(exps.patterns, Pattern{exp, include})
 
 			exp, err = fnmatch.Convert(line[3:], fnmatch.FNM_PATHNAME)
 			if err != nil {
 				return fmt.Errorf("Invalid pattern %q in ignore file", line)
 			}
-			exps = append(exps, Pattern{exp, include})
+			exps.patterns = append(exps.patterns, Pattern{exp, include})
 		} else if strings.HasPrefix(line, "#include ") {
 			includeFile := filepath.Join(filepath.Dir(currentFile), line[len("#include "):])
 			includes, err := loadIgnoreFile(includeFile, seen)
 			if err != nil {
 				return err
 			} else {
-				exps = append(exps, includes...)
+				exps.patterns = append(exps.patterns, includes.patterns...)
 			}
 		} else {
 			// Path name or pattern, add it so it matches files both in
@@ -115,13 +178,13 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (Pa
 			if err != nil {
 				return fmt.Errorf("Invalid pattern %q in ignore file", line)
 			}
-			exps = append(exps, Pattern{exp, include})
+			exps.patterns = append(exps.patterns, Pattern{exp, include})
 
 			exp, err = fnmatch.Convert("**/"+line, fnmatch.FNM_PATHNAME)
 			if err != nil {
 				return fmt.Errorf("Invalid pattern %q in ignore file", line)
 			}
-			exps = append(exps, Pattern{exp, include})
+			exps.patterns = append(exps.patterns, Pattern{exp, include})
 		}
 		return nil
 	}
@@ -155,5 +218,17 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (Pa
 		}
 	}
 
-	return exps, nil
+	return &exps, nil
+}
+
+func patternsEqual(a, b []Pattern) bool {
+	if len(a) != len(b) {
+		return false
+	}
+	for i := range a {
+		if a[i].include != b[i].include || a[i].match.String() != b[i].match.String() {
+			return false
+		}
+	}
+	return true
 }

+ 197 - 10
internal/ignore/ignore_test.go

@@ -13,19 +13,19 @@
 // You should have received a copy of the GNU General Public License along
 // with this program. If not, see <http://www.gnu.org/licenses/>.
 
-package ignore_test
+package ignore
 
 import (
 	"bytes"
+	"io/ioutil"
+	"os"
 	"path/filepath"
 	"runtime"
 	"testing"
-
-	"github.com/syncthing/syncthing/internal/ignore"
 )
 
 func TestIgnore(t *testing.T) {
-	pats, err := ignore.Load("testdata/.stignore")
+	pats, err := Load("testdata/.stignore", true)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -72,7 +72,7 @@ func TestExcludes(t *testing.T) {
 	i*2
 	!ign2
 	`
-	pats, err := ignore.Parse(bytes.NewBufferString(stignore), ".stignore")
+	pats, err := Parse(bytes.NewBufferString(stignore), ".stignore")
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -112,7 +112,7 @@ func TestBadPatterns(t *testing.T) {
 	}
 
 	for _, pat := range badPatterns {
-		parsed, err := ignore.Parse(bytes.NewBufferString(pat), ".stignore")
+		parsed, err := Parse(bytes.NewBufferString(pat), ".stignore")
 		if err == nil {
 			t.Errorf("No error for pattern %q: %v", pat, parsed)
 		}
@@ -120,7 +120,7 @@ func TestBadPatterns(t *testing.T) {
 }
 
 func TestCaseSensitivity(t *testing.T) {
-	ign, _ := ignore.Parse(bytes.NewBufferString("test"), ".stignore")
+	ign, _ := Parse(bytes.NewBufferString("test"), ".stignore")
 
 	match := []string{"test"}
 	dontMatch := []string{"foo"}
@@ -145,6 +145,144 @@ func TestCaseSensitivity(t *testing.T) {
 	}
 }
 
+func TestCaching(t *testing.T) {
+	fd1, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	fd2, err := ioutil.TempFile("", "")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	defer fd1.Close()
+	defer fd2.Close()
+	defer os.Remove(fd1.Name())
+	defer os.Remove(fd2.Name())
+
+	_, err = fd1.WriteString("/x/\n#include " + filepath.Base(fd2.Name()) + "\n")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	fd2.WriteString("/y/\n")
+
+	pats, err := Load(fd1.Name(), true)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if pats.oldMatches == nil || len(pats.oldMatches) != 0 {
+		t.Fatal("Expected empty map")
+	}
+
+	if pats.newMatches == nil || len(pats.newMatches) != 0 {
+		t.Fatal("Expected empty map")
+	}
+
+	if len(pats.patterns) != 4 {
+		t.Fatal("Incorrect number of patterns loaded", len(pats.patterns), "!=", 4)
+	}
+
+	// Cache some outcomes
+
+	for _, letter := range []string{"a", "b", "x", "y"} {
+		pats.Match(letter)
+	}
+
+	if len(pats.newMatches) != 4 {
+		t.Fatal("Expected 4 cached results")
+	}
+
+	// Reload file, expect old outcomes to be provided
+
+	pats, err = Load(fd1.Name(), true)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(pats.oldMatches) != 4 {
+		t.Fatal("Expected 4 cached results")
+	}
+
+	// Match less this time
+
+	for _, letter := range []string{"b", "x", "y"} {
+		pats.Match(letter)
+	}
+
+	if len(pats.newMatches) != 3 {
+		t.Fatal("Expected 3 cached results")
+	}
+
+	// Reload file, expect the new outcomes to be provided
+
+	pats, err = Load(fd1.Name(), true)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(pats.oldMatches) != 3 {
+		t.Fatal("Expected 3 cached results", len(pats.oldMatches))
+	}
+
+	// Modify the include file, expect empty cache
+
+	fd2.WriteString("/z/\n")
+
+	pats, err = Load(fd1.Name(), true)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(pats.oldMatches) != 0 {
+		t.Fatal("Expected 0 cached results")
+	}
+
+	// Cache some outcomes again
+
+	for _, letter := range []string{"b", "x", "y"} {
+		pats.Match(letter)
+	}
+
+	// Verify that outcomes provided on next laod
+
+	pats, err = Load(fd1.Name(), true)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(pats.oldMatches) != 3 {
+		t.Fatal("Expected 3 cached results")
+	}
+
+	// Modify the root file, expect cache to be invalidated
+
+	fd1.WriteString("/a/\n")
+
+	pats, err = Load(fd1.Name(), true)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(pats.oldMatches) != 0 {
+		t.Fatal("Expected cache invalidation")
+	}
+
+	// Cache some outcomes again
+
+	for _, letter := range []string{"b", "x", "y"} {
+		pats.Match(letter)
+	}
+
+	// Verify that outcomes provided on next laod
+
+	pats, err = Load(fd1.Name(), true)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(pats.oldMatches) != 3 {
+		t.Fatal("Expected 3 cached results")
+	}
+}
+
 func TestCommentsAndBlankLines(t *testing.T) {
 	stignore := `
 	// foo
@@ -157,8 +295,8 @@ func TestCommentsAndBlankLines(t *testing.T) {
 
 
 	`
-	pats, _ := ignore.Parse(bytes.NewBufferString(stignore), ".stignore")
-	if len(pats) > 0 {
+	pats, _ := Parse(bytes.NewBufferString(stignore), ".stignore")
+	if len(pats.patterns) > 0 {
 		t.Errorf("Expected no patterns")
 	}
 }
@@ -181,8 +319,57 @@ flamingo
 *.crow
 *.crow
 	`
-	pats, _ := ignore.Parse(bytes.NewBufferString(stignore), ".stignore")
+	pats, _ := Parse(bytes.NewBufferString(stignore), ".stignore")
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		result = pats.Match("filename")
+	}
+}
+
+func BenchmarkMatchCached(b *testing.B) {
+	stignore := `
+.frog
+.frog*
+.frogfox
+.whale
+.whale/*
+.dolphin
+.dolphin/*
+~ferret~.*
+.ferret.*
+flamingo.*
+flamingo
+*.crow
+*.crow
+	`
+	// Caches per file, hence write the patterns to a file.
+	fd, err := ioutil.TempFile("", "")
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	_, err = fd.WriteString(stignore)
+	defer fd.Close()
+	defer os.Remove(fd.Name())
+	if err != nil {
+		b.Fatal(err)
+	}
 
+	// Load the patterns
+	pats, err := Load(fd.Name(), true)
+	if err != nil {
+		b.Fatal(err)
+	}
+	// Cache the outcome for "filename"
+	pats.Match("filename")
+
+	// This load should now load the cached outcomes as the set of patterns
+	// has not changed.
+	pats, err = Load(fd.Name(), true)
+	if err != nil {
+		b.Fatal(err)
+	}
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		result = pats.Match("filename")

+ 5 - 5
internal/model/model.go

@@ -93,7 +93,7 @@ type Model struct {
 	folderDevices  map[string][]protocol.DeviceID                         // folder -> deviceIDs
 	deviceFolders  map[protocol.DeviceID][]string                         // deviceID -> folders
 	deviceStatRefs map[protocol.DeviceID]*stats.DeviceStatisticsReference // deviceID -> statsRef
-	folderIgnores  map[string]ignore.Patterns                             // folder -> list of ignore patterns
+	folderIgnores  map[string]*ignore.Matcher                             // folder -> matcher object
 	folderRunners  map[string]service                                     // folder -> puller or scanner
 	fmut           sync.RWMutex                                           // protects the above
 
@@ -130,7 +130,7 @@ func NewModel(cfg *config.ConfigWrapper, deviceName, clientName, clientVersion s
 		folderDevices:      make(map[string][]protocol.DeviceID),
 		deviceFolders:      make(map[protocol.DeviceID][]string),
 		deviceStatRefs:     make(map[protocol.DeviceID]*stats.DeviceStatisticsReference),
-		folderIgnores:      make(map[string]ignore.Patterns),
+		folderIgnores:      make(map[string]*ignore.Matcher),
 		folderRunners:      make(map[string]service),
 		folderState:        make(map[string]folderState),
 		folderStateChanged: make(map[string]time.Time),
@@ -834,7 +834,7 @@ func (m *Model) deviceWasSeen(deviceID protocol.DeviceID) {
 	m.deviceStatRef(deviceID).WasSeen()
 }
 
-func sendIndexes(conn protocol.Connection, folder string, fs *files.Set, ignores ignore.Patterns) {
+func sendIndexes(conn protocol.Connection, folder string, fs *files.Set, ignores *ignore.Matcher) {
 	deviceID := conn.ID()
 	name := conn.Name()
 	var err error
@@ -859,7 +859,7 @@ func sendIndexes(conn protocol.Connection, folder string, fs *files.Set, ignores
 	}
 }
 
-func sendIndexTo(initial bool, minLocalVer uint64, conn protocol.Connection, folder string, fs *files.Set, ignores ignore.Patterns) (uint64, error) {
+func sendIndexTo(initial bool, minLocalVer uint64, conn protocol.Connection, folder string, fs *files.Set, ignores *ignore.Matcher) (uint64, error) {
 	deviceID := conn.ID()
 	name := conn.Name()
 	batch := make([]protocol.FileInfo, 0, indexBatchSize)
@@ -1011,7 +1011,7 @@ func (m *Model) ScanFolderSub(folder, sub string) error {
 	fs, ok := m.folderFiles[folder]
 	dir := m.folderCfgs[folder].Path
 
-	ignores, _ := ignore.Load(filepath.Join(dir, ".stignore"))
+	ignores, _ := ignore.Load(filepath.Join(dir, ".stignore"), m.cfg.Options().CacheIgnoredFiles)
 	m.folderIgnores[folder] = ignores
 
 	w := &scanner.Walker{

+ 1 - 1
internal/model/model_test.go

@@ -390,7 +390,7 @@ func TestIgnores(t *testing.T) {
 	}
 
 	db, _ := leveldb.Open(storage.NewMemStorage(), nil)
-	m := NewModel(nil, "device", "syncthing", "dev", db)
+	m := NewModel(config.Wrap("", config.Configuration{}), "device", "syncthing", "dev", db)
 	m.AddFolder(config.FolderConfiguration{ID: "default", Path: "testdata"})
 
 	expected := []string{

+ 1 - 1
internal/scanner/walk.go

@@ -36,7 +36,7 @@ type Walker struct {
 	// BlockSize controls the size of the block used when hashing.
 	BlockSize int
 	// List of patterns to ignore
-	Ignores ignore.Patterns
+	Ignores *ignore.Matcher
 	// If TempNamer is not nil, it is used to ignore tempory files when walking.
 	TempNamer TempNamer
 	// If CurrentFiler is not nil, it is queried for the current file before rescanning.

+ 2 - 2
internal/scanner/walk_test.go

@@ -58,7 +58,7 @@ func init() {
 }
 
 func TestWalkSub(t *testing.T) {
-	ignores, err := ignore.Load("testdata/.stignore")
+	ignores, err := ignore.Load("testdata/.stignore", false)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -93,7 +93,7 @@ func TestWalkSub(t *testing.T) {
 }
 
 func TestWalk(t *testing.T) {
-	ignores, err := ignore.Load("testdata/.stignore")
+	ignores, err := ignore.Load("testdata/.stignore", false)
 	if err != nil {
 		t.Fatal(err)
 	}