Explorar o código

lib/fs: Cache all real-case results (fixes #7270) (#7286)

Simon Frei %!s(int64=5) %!d(string=hai) anos
pai
achega
e19d6e993d
Modificáronse 5 ficheiros con 141 adicións e 67 borrados
  1. 1 0
      go.mod
  2. 1 0
      go.sum
  3. 1 1
      lib/config/config_test.go
  4. 85 60
      lib/fs/casefs.go
  5. 53 6
      lib/fs/casefs_test.go

+ 1 - 0
go.mod

@@ -18,6 +18,7 @@ require (
 	github.com/gogo/protobuf v1.3.1
 	github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e
 	github.com/greatroar/blobloom v0.5.0
+	github.com/hashicorp/golang-lru v0.5.1
 	github.com/jackpal/gateway v1.0.6
 	github.com/jackpal/go-nat-pmp v1.0.2
 	github.com/julienschmidt/httprouter v1.3.0

+ 1 - 0
go.sum

@@ -212,6 +212,7 @@ github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/b
 github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
 github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90=
 github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
+github.com/hashicorp/golang-lru v0.5.1 h1:0hERBMJE1eitiLkihrMvRVBYAkpHzc/J3QdDN+dAcgU=
 github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
 github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
 github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=

+ 1 - 1
lib/config/config_test.go

@@ -509,7 +509,7 @@ func TestFolderCheckPath(t *testing.T) {
 		}
 
 		if err := cfg.CheckPath(); testcase.err != err {
-			t.Errorf("unexpected error in case %s: %s != %s", testcase.path, err, testcase.err)
+			t.Errorf("unexpected error in case %s: %s != %v", testcase.path, err, testcase.err)
 		}
 	}
 }

+ 85 - 60
lib/fs/casefs.go

@@ -14,11 +14,14 @@ import (
 	"strings"
 	"sync"
 	"time"
+
+	lru "github.com/hashicorp/golang-lru"
 )
 
 const (
 	// How long to consider cached dirnames valid
-	caseCacheTimeout = time.Second
+	caseCacheTimeout   = time.Second
+	caseCacheItemLimit = 4 << 10
 )
 
 type ErrCaseConflict struct {
@@ -351,96 +354,118 @@ func (f *caseFilesystem) checkCaseExisting(name string) error {
 }
 
 type defaultRealCaser struct {
-	fs   Filesystem
-	root *caseNode
-	mut  sync.RWMutex
+	fs    Filesystem
+	cache caseCache
 }
 
 func newDefaultRealCaser(fs Filesystem) *defaultRealCaser {
+	cache, err := lru.New2Q(caseCacheItemLimit)
+	// New2Q only errors if given invalid parameters, which we don't.
+	if err != nil {
+		panic(err)
+	}
 	caser := &defaultRealCaser{
-		fs:   fs,
-		root: &caseNode{name: "."},
+		fs: fs,
+		cache: caseCache{
+			TwoQueueCache: cache,
+		},
 	}
 	return caser
 }
 
 func (r *defaultRealCaser) realCase(name string) (string, error) {
-	out := "."
-	if name == out {
-		return out, nil
+	realName := "."
+	if name == realName {
+		return realName, nil
 	}
 
-	r.mut.Lock()
-	defer r.mut.Unlock()
-
-	node := r.root
 	for _, comp := range strings.Split(name, string(PathSeparator)) {
-		if node.dirNames == nil || node.expires.Before(time.Now()) {
-			// Haven't called DirNames yet, or the node has expired
+		node := r.cache.getExpireAdd(realName)
 
-			var err error
-			node.dirNames, err = r.fs.DirNames(out)
+		node.once.Do(func() {
+			dirNames, err := r.fs.DirNames(realName)
 			if err != nil {
-				return "", err
+				r.cache.Remove(realName)
+				node.err = err
+				return
 			}
 
-			node.dirNamesLower = make([]string, len(node.dirNames))
-			for i, n := range node.dirNames {
-				node.dirNamesLower[i] = UnicodeLowercase(n)
+			num := len(dirNames)
+			node.children = make(map[string]struct{}, num)
+			node.lowerToReal = make(map[string]string, num)
+			lastLower := ""
+			for _, n := range dirNames {
+				node.children[n] = struct{}{}
+				lower := UnicodeLowercase(n)
+				if lower != lastLower {
+					node.lowerToReal[lower] = n
+					lastLower = n
+				}
 			}
-
-			node.expires = time.Now().Add(caseCacheTimeout)
-			node.child = nil
+		})
+		if node.err != nil {
+			return "", node.err
 		}
 
-		// If we don't already have a correct cached child, try to find it.
-		if node.child == nil || node.child.name != comp {
-			// Actually loop dirNames to search for a match.
-			n, err := findCaseInsensitiveMatch(comp, node.dirNames, node.dirNamesLower)
-			if err != nil {
-				return "", err
+		// Try to find a direct or case match
+		if _, ok := node.children[comp]; !ok {
+			comp, ok = node.lowerToReal[UnicodeLowercase(comp)]
+			if !ok {
+				return "", ErrNotExist
 			}
-			node.child = &caseNode{name: n}
 		}
 
-		node = node.child
-		out = filepath.Join(out, node.name)
+		realName = filepath.Join(realName, comp)
 	}
 
-	return out, nil
+	return realName, nil
 }
 
 func (r *defaultRealCaser) dropCache() {
-	r.mut.Lock()
-	r.root = &caseNode{name: "."}
-	r.mut.Unlock()
+	r.cache.Purge()
+}
+
+func newCaseNode() *caseNode {
+	return &caseNode{
+		expires: time.Now().Add(caseCacheTimeout),
+	}
 }
 
-// Both name and the key to children are "Real", case resolved names of the path
+// The keys to children are "real", case resolved names of the path
 // component this node represents (i.e. containing no path separator).
-// The key to results is also a path component, but as given to RealCase, not
-// case resolved.
+// lowerToReal is a map of lowercase path components (as in UnicodeLowercase)
+// to their corresponding "real", case resolved names.
+// A node is created empty and populated using once. If an error occurs the node
+// is removed from cache and the error stored in err, such that anyone that
+// already got the node doesn't try to access the nil maps.
 type caseNode struct {
-	name          string
-	expires       time.Time
-	dirNames      []string
-	dirNamesLower []string
-	child         *caseNode
-}
-
-func findCaseInsensitiveMatch(name string, names, namesLower []string) (string, error) {
-	lower := UnicodeLowercase(name)
-	candidate := ""
-	for i, n := range names {
-		if n == name {
-			return n, nil
-		}
-		if candidate == "" && namesLower[i] == lower {
-			candidate = n
-		}
+	expires     time.Time
+	lowerToReal map[string]string
+	children    map[string]struct{}
+	once        sync.Once
+	err         error
+}
+
+type caseCache struct {
+	*lru.TwoQueueCache
+	mut sync.Mutex
+}
+
+// getExpireAdd gets an entry for the given key. If no entry exists, or it is
+// expired a new one is created and added to the cache.
+func (c *caseCache) getExpireAdd(key string) *caseNode {
+	c.mut.Lock()
+	defer c.mut.Unlock()
+	v, ok := c.Get(key)
+	if !ok {
+		node := newCaseNode()
+		c.Add(key, node)
+		return node
 	}
-	if candidate == "" {
-		return "", ErrNotExist
+	node := v.(*caseNode)
+	if node.expires.Before(time.Now()) {
+		node = newCaseNode()
+		c.Add(key, node)
 	}
-	return candidate, nil
+	return node
 }

+ 53 - 6
lib/fs/casefs_test.go

@@ -163,7 +163,7 @@ func BenchmarkWalkCaseFakeFS100k(b *testing.B) {
 	b.Run("rawfs", func(b *testing.B) {
 		fakefs := unwrapFilesystem(fsys).(*fakefs)
 		fakefs.resetCounters()
-		benchmarkWalkFakeFS(b, fsys, paths)
+		benchmarkWalkFakeFS(b, fsys, paths, 0, "")
 		fakefs.reportMetricsPerOp(b)
 		fakefs.reportMetricsPer(b, entries, "entry")
 		b.ReportAllocs()
@@ -176,14 +176,37 @@ func BenchmarkWalkCaseFakeFS100k(b *testing.B) {
 		}
 		fakefs := unwrapFilesystem(fsys).(*fakefs)
 		fakefs.resetCounters()
-		benchmarkWalkFakeFS(b, casefs, paths)
+		benchmarkWalkFakeFS(b, casefs, paths, 0, "")
+		fakefs.reportMetricsPerOp(b)
+		fakefs.reportMetricsPer(b, entries, "entry")
+		b.ReportAllocs()
+	})
+	var otherOpPath string
+	sep := string(PathSeparator)
+	longest := 0
+	for _, p := range paths {
+		if length := len(strings.Split(p, sep)); length > longest {
+			otherOpPath = p
+			longest = length
+		}
+	}
+	otherOpEvery := 1000
+	b.Run(fmt.Sprintf("casefs-otherOpEvery%v", otherOpEvery), func(b *testing.B) {
+		// Construct the casefs manually or it will get cached and the benchmark is invalid.
+		casefs := &caseFilesystem{
+			Filesystem: fsys,
+			realCaser:  newDefaultRealCaser(fsys),
+		}
+		fakefs := unwrapFilesystem(fsys).(*fakefs)
+		fakefs.resetCounters()
+		benchmarkWalkFakeFS(b, casefs, paths, otherOpEvery, otherOpPath)
 		fakefs.reportMetricsPerOp(b)
 		fakefs.reportMetricsPer(b, entries, "entry")
 		b.ReportAllocs()
 	})
 }
 
-func benchmarkWalkFakeFS(b *testing.B, fsys Filesystem, paths []string) {
+func benchmarkWalkFakeFS(b *testing.B, fsys Filesystem, paths []string, otherOpEvery int, otherOpPath string) {
 	// Simulate a scanner pass over the filesystem. First walk it to
 	// discover all names, then stat each name individually to check if it's
 	// been deleted or not (pretending that they all existed in the
@@ -194,7 +217,7 @@ func benchmarkWalkFakeFS(b *testing.B, fsys Filesystem, paths []string) {
 	t0 := time.Now()
 
 	for i := 0; i < b.N; i++ {
-		if err := doubleWalkFS(fsys, paths); err != nil {
+		if err := doubleWalkFSWithOtherOps(fsys, paths, otherOpEvery, otherOpPath); err != nil {
 			b.Fatal(err)
 		}
 	}
@@ -250,15 +273,39 @@ func TestStressCaseFS(t *testing.T) {
 }
 
 func doubleWalkFS(fsys Filesystem, paths []string) error {
+	return doubleWalkFSWithOtherOps(fsys, paths, 0, "")
+}
+
+func doubleWalkFSWithOtherOps(fsys Filesystem, paths []string, otherOpEvery int, otherOpPath string) error {
+	i := 0
 	if err := fsys.Walk("/", func(path string, info FileInfo, err error) error {
+		i++
+		if otherOpEvery != 0 && i%otherOpEvery == 0 {
+			// l.Infoln("AAA", otherOpPath)
+			if _, err := fsys.Lstat(otherOpPath); err != nil {
+				return err
+			}
+		}
+		// l.Infoln("CCC", path)
 		return err
 	}); err != nil {
 		return err
 	}
 
 	for _, p := range paths {
-		if _, err := fsys.Lstat(p); err != nil {
-			return err
+		for p != "." {
+			i++
+			if otherOpEvery != 0 && i%otherOpEvery == 0 {
+				if _, err := fsys.Lstat(otherOpPath); err != nil {
+					// l.Infoln("AAA", otherOpPath)
+					return err
+				}
+			}
+			// l.Infoln("CCC", p)
+			if _, err := fsys.Lstat(p); err != nil {
+				return err
+			}
+			p = filepath.Dir(p)
 		}
 	}
 	return nil