10 years ago · 8311162be3
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -50,6 +50,7 @@ type FolderConfiguration struct {
 
				 	ReadOnly        bool                        `xml:"ro,attr" json:"readOnly"`
			
 
				 	RescanIntervalS int                         `xml:"rescanIntervalS,attr" json:"rescanIntervalS" default:"60"`
			
 
				 	IgnorePerms     bool                        `xml:"ignorePerms,attr" json:"ignorePerms"`
			
 
				+	AutoNormalize   bool                        `xml:"autoNormalize,attr" json:"autoNormalize" default:"true"`
			
 
				 	Versioning      VersioningConfiguration     `xml:"versioning" json:"versioning"`
			
 
				 	LenientMtimes   bool                        `xml:"lenientMtimes" json:"lenientMTimes"`
			
 
				 	Copiers         int                         `xml:"copiers" json:"copiers" default:"1"`  // This defines how many files are handled concurrently.
			
--- a/internal/model/model.go
+++ b/internal/model/model.go
@@ -1139,15 +1139,16 @@ func (m *Model) ScanFolderSub(folder, sub string) error {
 
				 	}
			
 
				 
			
 
				 	w := &scanner.Walker{
			
 
				-		Dir:          folderCfg.Path,
			
 
				-		Sub:          sub,
			
 
				-		Matcher:      ignores,
			
 
				-		BlockSize:    protocol.BlockSize,
			
 
				-		TempNamer:    defTempNamer,
			
 
				-		TempLifetime: time.Duration(m.cfg.Options().KeepTemporariesH) * time.Hour,
			
 
				-		CurrentFiler: cFiler{m, folder},
			
 
				-		IgnorePerms:  folderCfg.IgnorePerms,
			
 
				-		Hashers:      folderCfg.Hashers,
			
 
				+		Dir:           folderCfg.Path,
			
 
				+		Sub:           sub,
			
 
				+		Matcher:       ignores,
			
 
				+		BlockSize:     protocol.BlockSize,
			
 
				+		TempNamer:     defTempNamer,
			
 
				+		TempLifetime:  time.Duration(m.cfg.Options().KeepTemporariesH) * time.Hour,
			
 
				+		CurrentFiler:  cFiler{m, folder},
			
 
				+		IgnorePerms:   folderCfg.IgnorePerms,
			
 
				+		AutoNormalize: folderCfg.AutoNormalize,
			
 
				+		Hashers:       folderCfg.Hashers,
			
 
				 	}
			
 
				 
			
 
				 	runner.setState(FolderScanning)
			
--- a/internal/scanner/walk.go
+++ b/internal/scanner/walk.go
@@ -13,6 +13,7 @@ import (
 
				 	"runtime"
			
 
				 	"strings"
			
 
				 	"time"
			
 
				+	"unicode/utf8"
			
 
				 
			
 
				 	"github.com/syncthing/protocol"
			
 
				 	"github.com/syncthing/syncthing/internal/ignore"
			
@@ -55,6 +56,9 @@ type Walker struct {
 
				 	// detected. Scanned files will get zero permission bits and the
			
 
				 	// NoPermissionBits flag set.
			
 
				 	IgnorePerms bool
			
 
				+	// When AutoNormalize is set, file names that are in UTF8 but incorrect
			
 
				+	// normalization form will be corrected.
			
 
				+	AutoNormalize bool
			
 
				 	// Number of routines to use for hashing
			
 
				 	Hashers int
			
 
				 }
			
@@ -149,11 +153,63 @@ func (w *Walker) walkAndHashFiles(fchan chan protocol.FileInfo) filepath.WalkFun
 
				 			return nil
			
 
				 		}
			
 
				 
			
 
				-		if (runtime.GOOS == "linux" || runtime.GOOS == "windows") && !norm.NFC.IsNormalString(rn) {
			
 
				-			l.Warnf("File %q contains non-NFC UTF-8 sequences and cannot be synced. Consider renaming.", rn)
			
 
				+		if !utf8.ValidString(rn) {
			
 
				+			l.Warnf("File name %q is not in UTF8 encoding; skipping.", rn)
			
 
				+			if info.IsDir() {
			
 
				+				return filepath.SkipDir
			
 
				+			}
			
 
				 			return nil
			
 
				 		}
			
 
				 
			
 
				+		var normalizedRn string
			
 
				+		if runtime.GOOS == "darwin" {
			
 
				+			// Mac OS X file names should always be NFD normalized.
			
 
				+			normalizedRn = norm.NFD.String(rn)
			
 
				+		} else {
			
 
				+			// Every other OS in the known universe uses NFC or just plain
			
 
				+			// doesn't bother to define an encoding. In our case *we* do care,
			
 
				+			// so we enforce NFC regardless.
			
 
				+			normalizedRn = norm.NFC.String(rn)
			
 
				+		}
			
 
				+
			
 
				+		if rn != normalizedRn {
			
 
				+			// The file name was not normalized.
			
 
				+
			
 
				+			if !w.AutoNormalize {
			
 
				+				// We're not authorized to do anything about it, so complain and skip.
			
 
				+
			
 
				+				l.Warnf("File name %q is not in the correct UTF8 normalization form; skipping.", rn)
			
 
				+				if info.IsDir() {
			
 
				+					return filepath.SkipDir
			
 
				+				}
			
 
				+				return nil
			
 
				+			}
			
 
				+
			
 
				+			// We will attempt to normalize it.
			
 
				+			normalizedPath := filepath.Join(w.Dir, normalizedRn)
			
 
				+			if _, err := os.Lstat(normalizedPath); os.IsNotExist(err) {
			
 
				+				// Nothing exists with the normalized filename. Good.
			
 
				+				if err = os.Rename(p, normalizedPath); err != nil {
			
 
				+					l.Infof(`Error normalizing UTF8 encoding of file "%s": %v`, rn, err)
			
 
				+					if info.IsDir() {
			
 
				+						return filepath.SkipDir
			
 
				+					}
			
 
				+					return nil
			
 
				+				}
			
 
				+				l.Infof(`Normalized UTF8 encoding of file name "%s".`, rn)
			
 
				+			} else {
			
 
				+				// There is something already in the way at the normalized
			
 
				+				// file name.
			
 
				+				l.Infof(`File "%s" has UTF8 encoding conflict with another file; ignoring.`, rn)
			
 
				+				if info.IsDir() {
			
 
				+					return filepath.SkipDir
			
 
				+				}
			
 
				+				return nil
			
 
				+			}
			
 
				+
			
 
				+			rn = normalizedRn
			
 
				+		}
			
 
				+
			
 
				 		// Index wise symlinks are always files, regardless of what the target
			
 
				 		// is, because symlinks carry their target path as their content.
			
 
				 		if info.Mode()&os.ModeSymlink == os.ModeSymlink {
			
--- a/internal/scanner/walk_test.go
+++ b/internal/scanner/walk_test.go
@@ -9,14 +9,17 @@ package scanner
 
				 import (
			
 
				 	"bytes"
			
 
				 	"fmt"
			
 
				+	"os"
			
 
				 	"path/filepath"
			
 
				 	"reflect"
			
 
				+	"runtime"
			
 
				 	rdebug "runtime/debug"
			
 
				 	"sort"
			
 
				 	"testing"
			
 
				 
			
 
				 	"github.com/syncthing/protocol"
			
 
				 	"github.com/syncthing/syncthing/internal/ignore"
			
 
				+	"golang.org/x/text/unicode/norm"
			
 
				 )
			
 
				 
			
 
				 type testfile struct {
			
@@ -181,6 +184,102 @@ func TestVerify(t *testing.T) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+func TestNormalization(t *testing.T) {
			
 
				+	if runtime.GOOS == "darwin" {
			
 
				+		t.Skip("Normalization test not possible on darwin")
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	os.RemoveAll("testdata/normalization")
			
 
				+	defer os.RemoveAll("testdata/normalization")
			
 
				+
			
 
				+	tests := []string{
			
 
				+		"0-A",            // ASCII A -- accepted
			
 
				+		"1-\xC3\x84",     // NFC 'Ä' -- conflicts with the entry below, accepted
			
 
				+		"1-\x41\xCC\x88", // NFD 'Ä' -- conflicts with the entry above, ignored
			
 
				+		"2-\xC3\x85",     // NFC 'Å' -- accepted
			
 
				+		"3-\x41\xCC\x83", // NFD 'Ã' -- converted to NFC
			
 
				+		"4-\xE2\x98\x95", // U+2615 HOT BEVERAGE (☕) -- accepted
			
 
				+		"5-\xCD\xE2",     // EUC-CN "wài" (外) -- ignored (not UTF8)
			
 
				+	}
			
 
				+	numInvalid := 2
			
 
				+	numValid := len(tests) - numInvalid
			
 
				+
			
 
				+	for _, s1 := range tests {
			
 
				+		// Create a directory for each of the interesting strings above
			
 
				+		if err := os.MkdirAll(filepath.Join("testdata/normalization", s1), 0755); err != nil {
			
 
				+			t.Fatal(err)
			
 
				+		}
			
 
				+
			
 
				+		for _, s2 := range tests {
			
 
				+			// Within each dir, create a file with each of the interesting
			
 
				+			// file names. Ensure that the file doesn't exist when it's
			
 
				+			// created. This detects and fails if there's file name
			
 
				+			// normalization stuff at the filesystem level.
			
 
				+			if fd, err := os.OpenFile(filepath.Join("testdata/normalization", s1, s2), os.O_CREATE|os.O_EXCL, 0644); err != nil {
			
 
				+				t.Fatal(err)
			
 
				+			} else {
			
 
				+				fd.WriteString("test")
			
 
				+				fd.Close()
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// We can normalize a directory name, but we can't descend into it in the
			
 
				+	// same pass due to how filepath.Walk works. So we run the scan twice to
			
 
				+	// make sure it all gets done. In production, things will be correct
			
 
				+	// eventually...
			
 
				+
			
 
				+	_, err := walkDir("testdata/normalization")
			
 
				+	if err != nil {
			
 
				+		t.Fatal(err)
			
 
				+	}
			
 
				+	tmp, err := walkDir("testdata/normalization")
			
 
				+	if err != nil {
			
 
				+		t.Fatal(err)
			
 
				+	}
			
 
				+
			
 
				+	files := fileList(tmp).testfiles()
			
 
				+
			
 
				+	// We should have one file per combination, plus the directories
			
 
				+	// themselves
			
 
				+
			
 
				+	expectedNum := numValid*numValid + numValid
			
 
				+	if len(files) != expectedNum {
			
 
				+		t.Errorf("Expected %d files, got %d", expectedNum, len(files))
			
 
				+	}
			
 
				+
			
 
				+	// The file names should all be in NFC form.
			
 
				+
			
 
				+	for _, f := range files {
			
 
				+		t.Logf("%q (% x) %v", f.name, f.name, norm.NFC.IsNormalString(f.name))
			
 
				+		if !norm.NFC.IsNormalString(f.name) {
			
 
				+			t.Errorf("File name %q is not NFC normalized", f.name)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func walkDir(dir string) ([]protocol.FileInfo, error) {
			
 
				+	w := Walker{
			
 
				+		Dir:           dir,
			
 
				+		BlockSize:     128 * 1024,
			
 
				+		AutoNormalize: true,
			
 
				+	}
			
 
				+
			
 
				+	fchan, err := w.Walk()
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+
			
 
				+	var tmp []protocol.FileInfo
			
 
				+	for f := range fchan {
			
 
				+		tmp = append(tmp, f)
			
 
				+	}
			
 
				+	sort.Sort(fileList(tmp))
			
 
				+
			
 
				+	return tmp, nil
			
 
				+}
			
 
				+
			
 
				 type fileList []protocol.FileInfo
			
 
				 
			
 
				 func (l fileList) Len() int {