Browse Source

Enforce correct filename normalization (fixes #96)

Jakob Borg 11 years ago
parent
commit
68d9454bc4
5 changed files with 34 additions and 5 deletions
  1. 3 3
      cmd/syncthing/model.go
  2. 11 0
      cmd/syncthing/normalize.go
  3. 11 0
      cmd/syncthing/normalize_darwin.go
  4. 3 2
      protocol/PROTOCOL.md
  5. 6 0
      scanner/walk.go

+ 3 - 3
cmd/syncthing/model.go

@@ -839,8 +839,8 @@ func (m *Model) recomputeNeedForFile(gf scanner.File, toAdd []addOrder, toDelete
 		} else {
 			local, remote := scanner.BlockDiff(lf.Blocks, gf.Blocks)
 			fm := fileMonitor{
-				name:        gf.Name,
-				path:        path.Clean(path.Join(m.dir, gf.Name)),
+				name:        FSNormalize(gf.Name),
+				path:        FSNormalize(path.Clean(path.Join(m.dir, gf.Name))),
 				global:      gf,
 				model:       m,
 				localBlocks: local,
@@ -875,7 +875,7 @@ func (m *Model) deleteLoop() {
 		if debugPull {
 			dlog.Println("delete", file.Name)
 		}
-		path := path.Clean(path.Join(m.dir, file.Name))
+		path := FSNormalize(path.Clean(path.Join(m.dir, file.Name)))
 		err := os.Remove(path)
 		if err != nil {
 			warnf("%s: %v", file.Name, err)

+ 11 - 0
cmd/syncthing/normalize.go

@@ -0,0 +1,11 @@
+//+build !darwin
+
+package main
+
+import "code.google.com/p/go.text/unicode/norm"
+
+// FSNormalize returns the string with the required unicode normalization for
+// the host operating system.
+func FSNormalize(s string) string {
+	return norm.NFC.String(s)
+}

+ 11 - 0
cmd/syncthing/normalize_darwin.go

@@ -0,0 +1,11 @@
+//+build darwin
+
+package main
+
+import "code.google.com/p/go.text/unicode/norm"
+
+// FSNormalize returns the string with the required unicode normalization for
+// the host operating system.
+func FSNormalize(s string) string {
+	return norm.NFD.String(s)
+}

+ 3 - 2
protocol/PROTOCOL.md

@@ -163,8 +163,9 @@ response to the Index message.
 The Repository field identifies the repository that the index message
 pertains to. For single repository implementations an empty repository
 ID is acceptable, or the word "default". The Name is the file name path
-relative to the repository root. The combination of Repository and Name
-uniquely identifies each file in a cluster.
+relative to the repository root. The Name is always in UTF-8 NFC regardless
+of operating system or file system specific conventions. The combination of
+Repository and Name uniquely identifies each file in a cluster.
 
 The Version field is a counter that is initially zero for each file. It
 is incremented each time a change is detected. The combination of

+ 6 - 0
scanner/walk.go

@@ -9,6 +9,8 @@ import (
 	"path/filepath"
 	"strings"
 	"time"
+
+	"code.google.com/p/go.text/unicode/norm"
 )
 
 type Walker struct {
@@ -136,6 +138,7 @@ func (w *Walker) loadIgnoreFiles(dir string, ign map[string][]string) filepath.W
 
 func (w *Walker) walkAndHashFiles(res *[]File, ign map[string][]string) filepath.WalkFunc {
 	return func(p string, info os.FileInfo, err error) error {
+
 		if err != nil {
 			if debug {
 				dlog.Println("error:", p, info, err)
@@ -151,6 +154,9 @@ func (w *Walker) walkAndHashFiles(res *[]File, ign map[string][]string) filepath
 			return nil
 		}
 
+		// Internally, we always use unicode normalization form C
+		rn = norm.NFC.String(rn)
+
 		if w.TempNamer != nil && w.TempNamer.IsTemporary(rn) {
 			if debug {
 				dlog.Println("temporary:", rn)