Selaa lähdekoodia

lib/fs: Ignore normalization differences in case insensitive lookup (fixes #7677) (#7678)

Jakob Borg 4 vuotta sitten
vanhempi
sitoutus
97437cad64
7 muutettua tiedostoa jossa 93 lisäystä ja 27 poistoa
  1. 5 5
      lib/fs/basicfs_windows.go
  2. 7 3
      lib/fs/casefs.go
  3. 10 10
      lib/fs/fakefs.go
  4. 6 2
      lib/fs/folding.go
  5. 7 5
      lib/fs/folding_test.go
  6. 2 2
      lib/fs/mtimefs.go
  7. 56 0
      lib/scanner/walk_test.go

+ 5 - 5
lib/fs/basicfs_windows.go

@@ -157,9 +157,9 @@ func (f *BasicFilesystem) Roots() ([]string, error) {
 // pathseparator.
 func (f *BasicFilesystem) unrootedChecked(absPath string, roots []string) (string, error) {
 	absPath = f.resolveWin83(absPath)
-	lowerAbsPath := UnicodeLowercase(absPath)
+	lowerAbsPath := UnicodeLowercaseNormalized(absPath)
 	for _, root := range roots {
-		lowerRoot := UnicodeLowercase(root)
+		lowerRoot := UnicodeLowercaseNormalized(root)
 		if lowerAbsPath+string(PathSeparator) == lowerRoot {
 			return ".", nil
 		}
@@ -171,7 +171,7 @@ func (f *BasicFilesystem) unrootedChecked(absPath string, roots []string) (strin
 }
 
 func rel(path, prefix string) string {
-	lowerRel := strings.TrimPrefix(strings.TrimPrefix(UnicodeLowercase(path), UnicodeLowercase(prefix)), string(PathSeparator))
+	lowerRel := strings.TrimPrefix(strings.TrimPrefix(UnicodeLowercaseNormalized(path), UnicodeLowercaseNormalized(prefix)), string(PathSeparator))
 	return path[len(path)-len(lowerRel):]
 }
 
@@ -193,8 +193,8 @@ func (f *BasicFilesystem) resolveWin83(absPath string) string {
 	}
 	// Failed getting the long path. Return the part of the path which is
 	// already a long path.
-	lowerRoot := UnicodeLowercase(f.root)
-	for absPath = filepath.Dir(absPath); strings.HasPrefix(UnicodeLowercase(absPath), lowerRoot); absPath = filepath.Dir(absPath) {
+	lowerRoot := UnicodeLowercaseNormalized(f.root)
+	for absPath = filepath.Dir(absPath); strings.HasPrefix(UnicodeLowercaseNormalized(absPath), lowerRoot); absPath = filepath.Dir(absPath) {
 		if !isMaybeWin83(absPath) {
 			return absPath
 		}

+ 7 - 3
lib/fs/casefs.go

@@ -15,6 +15,7 @@ import (
 	"time"
 
 	lru "github.com/hashicorp/golang-lru"
+	"golang.org/x/text/unicode/norm"
 )
 
 const (
@@ -375,7 +376,10 @@ func (f *caseFilesystem) checkCaseExisting(name string) error {
 	if err != nil {
 		return err
 	}
-	if realName != name {
+	// We normalize the normalization (hah!) of the strings before
+	// comparing, as we don't want to treat a normalization difference as a
+	// case conflict.
+	if norm.NFC.String(realName) != norm.NFC.String(name) {
 		return &ErrCaseConflict{name, realName}
 	}
 	return nil
@@ -424,7 +428,7 @@ func (r *defaultRealCaser) realCase(name string) (string, error) {
 			lastLower := ""
 			for _, n := range dirNames {
 				node.children[n] = struct{}{}
-				lower := UnicodeLowercase(n)
+				lower := UnicodeLowercaseNormalized(n)
 				if lower != lastLower {
 					node.lowerToReal[lower] = n
 					lastLower = n
@@ -437,7 +441,7 @@ func (r *defaultRealCaser) realCase(name string) (string, error) {
 
 		// Try to find a direct or case match
 		if _, ok := node.children[comp]; !ok {
-			comp, ok = node.lowerToReal[UnicodeLowercase(comp)]
+			comp, ok = node.lowerToReal[UnicodeLowercaseNormalized(comp)]
 			if !ok {
 				return "", ErrNotExist
 			}

+ 10 - 10
lib/fs/fakefs.go

@@ -186,7 +186,7 @@ type fakeEntry struct {
 func (fs *fakeFS) entryForName(name string) *fakeEntry {
 	// bug: lookup doesn't work through symlinks.
 	if fs.insens {
-		name = UnicodeLowercase(name)
+		name = UnicodeLowercaseNormalized(name)
 	}
 
 	name = filepath.ToSlash(name)
@@ -285,7 +285,7 @@ func (fs *fakeFS) create(name string) (*fakeEntry, error) {
 	}
 
 	if fs.insens {
-		base = UnicodeLowercase(base)
+		base = UnicodeLowercaseNormalized(base)
 	}
 
 	if fs.withContent {
@@ -373,7 +373,7 @@ func (fs *fakeFS) Mkdir(name string, perm FileMode) error {
 		return os.ErrExist
 	}
 	if fs.insens {
-		key = UnicodeLowercase(key)
+		key = UnicodeLowercaseNormalized(key)
 	}
 	if _, ok := entry.children[key]; ok {
 		return os.ErrExist
@@ -402,7 +402,7 @@ func (fs *fakeFS) MkdirAll(name string, perm FileMode) error {
 	for _, comp := range comps {
 		key := comp
 		if fs.insens {
-			key = UnicodeLowercase(key)
+			key = UnicodeLowercaseNormalized(key)
 		}
 
 		next, ok := entry.children[key]
@@ -465,7 +465,7 @@ func (fs *fakeFS) OpenFile(name string, flags int, mode FileMode) (File, error)
 	}
 
 	if fs.insens {
-		key = UnicodeLowercase(key)
+		key = UnicodeLowercaseNormalized(key)
 	}
 	if flags&os.O_EXCL != 0 {
 		if _, ok := entry.children[key]; ok {
@@ -508,7 +508,7 @@ func (fs *fakeFS) Remove(name string) error {
 	time.Sleep(fs.latency)
 
 	if fs.insens {
-		name = UnicodeLowercase(name)
+		name = UnicodeLowercaseNormalized(name)
 	}
 
 	entry := fs.entryForName(name)
@@ -531,7 +531,7 @@ func (fs *fakeFS) RemoveAll(name string) error {
 	time.Sleep(fs.latency)
 
 	if fs.insens {
-		name = UnicodeLowercase(name)
+		name = UnicodeLowercaseNormalized(name)
 	}
 
 	entry := fs.entryForName(filepath.Dir(name))
@@ -555,8 +555,8 @@ func (fs *fakeFS) Rename(oldname, newname string) error {
 	newKey := filepath.Base(newname)
 
 	if fs.insens {
-		oldKey = UnicodeLowercase(oldKey)
-		newKey = UnicodeLowercase(newKey)
+		oldKey = UnicodeLowercaseNormalized(oldKey)
+		newKey = UnicodeLowercaseNormalized(newKey)
 	}
 
 	p0 := fs.entryForName(filepath.Dir(oldname))
@@ -651,7 +651,7 @@ func (fs *fakeFS) SameFile(fi1, fi2 FileInfo) bool {
 	// where ModTime is not that precise
 	var ok bool
 	if fs.insens {
-		ok = UnicodeLowercase(fi1.Name()) == UnicodeLowercase(fi2.Name())
+		ok = UnicodeLowercaseNormalized(fi1.Name()) == UnicodeLowercaseNormalized(fi2.Name())
 	} else {
 		ok = fi1.Name() == fi2.Name()
 	}

+ 6 - 2
lib/fs/folding.go

@@ -10,9 +10,13 @@ import (
 	"strings"
 	"unicode"
 	"unicode/utf8"
+
+	"golang.org/x/text/unicode/norm"
 )
 
-func UnicodeLowercase(s string) string {
+// UnicodeLowercaseNormalized returns the Unicode lower case variant of s,
+// having also normalized it to normalization form C.
+func UnicodeLowercaseNormalized(s string) string {
 	i := firstCaseChange(s)
 	if i == -1 {
 		return s
@@ -28,7 +32,7 @@ func UnicodeLowercase(s string) string {
 	for _, r := range s[i:] {
 		rs.WriteRune(unicode.ToLower(unicode.ToUpper(r)))
 	}
-	return rs.String()
+	return norm.NFC.String(rs.String())
 }
 
 // Byte index of the first rune r s.t. lower(upper(r)) != r.

+ 7 - 5
lib/fs/folding_test.go

@@ -44,13 +44,15 @@ var caseCases = [][2]string{
 	{"チャーハン", "チャーハン"},
 	// Some special Unicode characters, however, are folded by OSes.
 	{"\u212A", "k"},
+	// Folding renormalizes to NFC
+	{"A\xCC\x88", "\xC3\xA4"}, // ä
 }
 
-func TestUnicodeLowercase(t *testing.T) {
+func TestUnicodeLowercaseNormalized(t *testing.T) {
 	for _, tc := range caseCases {
-		res := UnicodeLowercase(tc[0])
+		res := UnicodeLowercaseNormalized(tc[0])
 		if res != tc[1] {
-			t.Errorf("UnicodeLowercase(%q) => %q, expected %q", tc[0], res, tc[1])
+			t.Errorf("UnicodeLowercaseNormalized(%q) => %q, expected %q", tc[0], res, tc[1])
 		}
 	}
 }
@@ -60,7 +62,7 @@ func BenchmarkUnicodeLowercaseMaybeChange(b *testing.B) {
 
 	for i := 0; i < b.N; i++ {
 		for _, s := range caseCases {
-			UnicodeLowercase(s[0])
+			UnicodeLowercaseNormalized(s[0])
 		}
 	}
 }
@@ -70,7 +72,7 @@ func BenchmarkUnicodeLowercaseNoChange(b *testing.B) {
 
 	for i := 0; i < b.N; i++ {
 		for _, s := range caseCases {
-			UnicodeLowercase(s[1])
+			UnicodeLowercaseNormalized(s[1])
 		}
 	}
 }

+ 2 - 2
lib/fs/mtimefs.go

@@ -157,7 +157,7 @@ func (f *mtimeFS) wrapperType() filesystemWrapperType {
 
 func (f *mtimeFS) save(name string, real, virtual time.Time) {
 	if f.caseInsensitive {
-		name = UnicodeLowercase(name)
+		name = UnicodeLowercaseNormalized(name)
 	}
 
 	if real.Equal(virtual) {
@@ -177,7 +177,7 @@ func (f *mtimeFS) save(name string, real, virtual time.Time) {
 
 func (f *mtimeFS) load(name string) (MtimeMapping, error) {
 	if f.caseInsensitive {
-		name = UnicodeLowercase(name)
+		name = UnicodeLowercaseNormalized(name)
 	}
 
 	data, exists, err := f.db.Bytes(name)

+ 56 - 0
lib/scanner/walk_test.go

@@ -251,6 +251,62 @@ func TestNormalization(t *testing.T) {
 	}
 }
 
+func TestNormalizationDarwinCaseFS(t *testing.T) {
+	// This tests that normalization works on Darwin, through a CaseFS.
+
+	if runtime.GOOS != "darwin" {
+		t.Skip("Normalization test not possible on non-Darwin")
+		return
+	}
+
+	testFs := fs.NewCaseFilesystem(testFs)
+
+	testFs.RemoveAll("normalization")
+	defer testFs.RemoveAll("normalization")
+	testFs.MkdirAll("normalization", 0755)
+
+	const (
+		inNFC = "\xC3\x84"
+		inNFD = "\x41\xCC\x88"
+	)
+
+	// Create dir in NFC
+	if err := testFs.Mkdir(filepath.Join("normalization", "dir-"+inNFC), 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	// Create file in NFC
+	fd, err := testFs.Create(filepath.Join("normalization", "dir-"+inNFC, "file-"+inNFC))
+	if err != nil {
+		t.Fatal(err)
+	}
+	fd.Close()
+
+	// Walk, which should normalize and return
+	walkDir(testFs, "normalization", nil, nil, 0)
+	tmp := walkDir(testFs, "normalization", nil, nil, 0)
+	if len(tmp) != 3 {
+		t.Error("Expected one file and one dir scanned")
+	}
+
+	// Verify we see the normalized entries in the result
+	foundFile := false
+	foundDir := false
+	for _, f := range tmp {
+		if f.Name == filepath.Join("normalization", "dir-"+inNFD) {
+			foundDir = true
+			continue
+		}
+		if f.Name == filepath.Join("normalization", "dir-"+inNFD, "file-"+inNFD) {
+			foundFile = true
+			continue
+		}
+	}
+	if !foundFile || !foundDir {
+		t.Error("Didn't find expected normalization form")
+	}
+}
+
 func TestIssue1507(t *testing.T) {
 	w := &walker{}
 	w.Matcher = ignore.New(w.Filesystem)