Browse Source

all: Reorder sequences for better rename detection (#6574)

Audrius Butkevicius 5 years ago
parent
commit
decb967969
9 changed files with 409 additions and 42 deletions
  1. 28 0
      lib/db/keyer.go
  2. 38 4
      lib/db/lowlevel.go
  3. 48 6
      lib/db/schemaupdater.go
  4. 7 0
      lib/db/set.go
  5. 42 0
      lib/db/transactions.go
  6. 49 0
      lib/model/folder.go
  7. 30 29
      lib/model/folder_sendrecv.go
  8. 15 3
      lib/model/model.go
  9. 152 0
      lib/model/model_test.go

+ 28 - 0
lib/db/keyer.go

@@ -62,6 +62,9 @@ const (
 
 	// KeyTypeBlockList <block list hash> = BlockList
 	KeyTypeBlockList = 13
+
+	// KeyTypeBlockListMap <int32 folder ID> <block list hash> <file name> = <nothing>
+	KeyTypeBlockListMap = 14
 )
 
 type keyer interface {
@@ -79,6 +82,8 @@ type keyer interface {
 	// block map key stuff (former BlockMap)
 	GenerateBlockMapKey(key, folder, hash, name []byte) (blockMapKey, error)
 	NameFromBlockMapKey(key []byte) []byte
+	GenerateBlockListMapKey(key, folder, hash, name []byte) (blockListMapKey, error)
+	NameFromBlockListMapKey(key []byte) []byte
 
 	// file need index
 	GenerateNeedFileKey(key, folder, name []byte) (needFileKey, error)
@@ -203,6 +208,29 @@ func (k blockMapKey) WithoutHashAndName() []byte {
 	return k[:keyPrefixLen+keyFolderLen]
 }
 
+type blockListMapKey []byte
+
+func (k defaultKeyer) GenerateBlockListMapKey(key, folder, hash, name []byte) (blockListMapKey, error) {
+	folderID, err := k.folderIdx.ID(folder)
+	if err != nil {
+		return nil, err
+	}
+	key = resize(key, keyPrefixLen+keyFolderLen+keyHashLen+len(name))
+	key[0] = KeyTypeBlockListMap
+	binary.BigEndian.PutUint32(key[keyPrefixLen:], folderID)
+	copy(key[keyPrefixLen+keyFolderLen:], hash)
+	copy(key[keyPrefixLen+keyFolderLen+keyHashLen:], name)
+	return key, nil
+}
+
+func (k defaultKeyer) NameFromBlockListMapKey(key []byte) []byte {
+	return key[keyPrefixLen+keyFolderLen+keyHashLen:]
+}
+
+func (k blockListMapKey) WithoutHashAndName() []byte {
+	return k[:keyPrefixLen+keyFolderLen]
+}
+
 type needFileKey []byte
 
 func (k needFileKey) WithoutName() []byte {

+ 38 - 4
lib/db/lowlevel.go

@@ -195,6 +195,7 @@ func (db *Lowlevel) updateLocalFiles(folder []byte, fs []protocol.FileInfo, meta
 		if ok && unchanged(f, ef) {
 			continue
 		}
+		blocksHashSame := ok && bytes.Equal(ef.BlocksHash, f.BlocksHash)
 
 		if ok {
 			if !ef.IsDirectory() && !ef.IsDeleted() && !ef.IsInvalid() {
@@ -207,6 +208,15 @@ func (db *Lowlevel) updateLocalFiles(folder []byte, fs []protocol.FileInfo, meta
 						return err
 					}
 				}
+				if !blocksHashSame {
+					keyBuf, err := db.keyer.GenerateBlockListMapKey(keyBuf, folder, ef.BlocksHash, name)
+					if err != nil {
+						return err
+					}
+					if err = t.Delete(keyBuf); err != nil {
+						return err
+					}
+				}
 			}
 
 			keyBuf, err = db.keyer.GenerateSequenceKey(keyBuf, folder, ef.SequenceNo())
@@ -260,6 +270,15 @@ func (db *Lowlevel) updateLocalFiles(folder []byte, fs []protocol.FileInfo, meta
 					return err
 				}
 			}
+			if !blocksHashSame {
+				keyBuf, err := db.keyer.GenerateBlockListMapKey(keyBuf, folder, f.BlocksHash, name)
+				if err != nil {
+					return err
+				}
+				if err = t.Put(keyBuf, nil); err != nil {
+					return err
+				}
+			}
 		}
 
 		if err := t.Checkpoint(func() error {
@@ -296,7 +315,7 @@ func (db *Lowlevel) dropFolder(folder []byte) error {
 	}
 
 	// Remove all sequences related to the folder
-	k1, err := db.keyer.GenerateSequenceKey(nil, folder, 0)
+	k1, err := db.keyer.GenerateSequenceKey(k0, folder, 0)
 	if err != nil {
 		return err
 	}
@@ -305,7 +324,7 @@ func (db *Lowlevel) dropFolder(folder []byte) error {
 	}
 
 	// Remove all items related to the given folder from the global bucket
-	k2, err := db.keyer.GenerateGlobalVersionKey(nil, folder, nil)
+	k2, err := db.keyer.GenerateGlobalVersionKey(k1, folder, nil)
 	if err != nil {
 		return err
 	}
@@ -314,7 +333,7 @@ func (db *Lowlevel) dropFolder(folder []byte) error {
 	}
 
 	// Remove all needs related to the folder
-	k3, err := db.keyer.GenerateNeedFileKey(nil, folder, nil)
+	k3, err := db.keyer.GenerateNeedFileKey(k2, folder, nil)
 	if err != nil {
 		return err
 	}
@@ -323,7 +342,7 @@ func (db *Lowlevel) dropFolder(folder []byte) error {
 	}
 
 	// Remove the blockmap of the folder
-	k4, err := db.keyer.GenerateBlockMapKey(nil, folder, nil, nil)
+	k4, err := db.keyer.GenerateBlockMapKey(k3, folder, nil, nil)
 	if err != nil {
 		return err
 	}
@@ -331,6 +350,14 @@ func (db *Lowlevel) dropFolder(folder []byte) error {
 		return err
 	}
 
+	k5, err := db.keyer.GenerateBlockListMapKey(k4, folder, nil, nil)
+	if err != nil {
+		return err
+	}
+	if err := t.deleteKeyPrefix(k5.WithoutHashAndName()); err != nil {
+		return err
+	}
+
 	return t.Commit()
 }
 
@@ -383,6 +410,13 @@ func (db *Lowlevel) dropDeviceFolder(device, folder []byte, meta *metadataTracke
 		if err := t.deleteKeyPrefix(key.WithoutHashAndName()); err != nil {
 			return err
 		}
+		key2, err := db.keyer.GenerateBlockListMapKey(key, folder, nil, nil)
+		if err != nil {
+			return err
+		}
+		if err := t.deleteKeyPrefix(key2.WithoutHashAndName()); err != nil {
+			return err
+		}
 	}
 	return t.Commit()
 }

+ 48 - 6
lib/db/schemaupdater.go

@@ -22,9 +22,9 @@ import (
 //   6: v0.14.50
 //   7: v0.14.53
 //   8-9: v1.4.0
-//   10: v1.6.0
+//   10-11: v1.6.0
 const (
-	dbVersion             = 10
+	dbVersion             = 11
 	dbMinSyncthingVersion = "v1.6.0"
 )
 
@@ -85,8 +85,9 @@ func (db *schemaUpdater) updateSchema() error {
 		{5, db.updateSchemaTo5},
 		{6, db.updateSchema5to6},
 		{7, db.updateSchema6to7},
-		{9, db.updateSchemato9},
-		{10, db.updateSchemato10},
+		{9, db.updateSchemaTo9},
+		{10, db.updateSchemaTo10},
+		{11, db.updateSchemaTo11},
 	}
 
 	for _, m := range migrations {
@@ -450,7 +451,7 @@ func (db *schemaUpdater) updateSchema6to7(_ int) error {
 	return t.Commit()
 }
 
-func (db *schemaUpdater) updateSchemato9(prev int) error {
+func (db *schemaUpdater) updateSchemaTo9(prev int) error {
 	// Loads and rewrites all files with blocks, to deduplicate block lists.
 	// Checks for missing or incorrect sequence entries and rewrites those.
 
@@ -499,7 +500,7 @@ func (db *schemaUpdater) updateSchemato9(prev int) error {
 	return t.Commit()
 }
 
-func (db *schemaUpdater) updateSchemato10(_ int) error {
+func (db *schemaUpdater) updateSchemaTo10(_ int) error {
 	t, err := db.newReadWriteTransaction()
 	if err != nil {
 		return err
@@ -568,3 +569,44 @@ func (db *schemaUpdater) updateSchemato10(_ int) error {
 
 	return t.Commit()
 }
+
+func (db *schemaUpdater) updateSchemaTo11(_ int) error {
+	// Populates block list map for every folder.
+
+	t, err := db.newReadWriteTransaction()
+	if err != nil {
+		return err
+	}
+	defer t.close()
+
+	var dk []byte
+	for _, folderStr := range db.ListFolders() {
+		folder := []byte(folderStr)
+		var putErr error
+		err := t.withHave(folder, protocol.LocalDeviceID[:], nil, true, func(fi FileIntf) bool {
+			f := fi.(FileInfoTruncated)
+			if f.IsDirectory() || f.IsDeleted() || f.IsInvalid() || f.BlocksHash == nil {
+				return true
+			}
+
+			name := []byte(f.FileName())
+			dk, putErr = db.keyer.GenerateBlockListMapKey(dk, folder, f.BlocksHash, name)
+			if putErr != nil {
+				return false
+			}
+
+			if putErr = t.Put(dk, nil); putErr != nil {
+				return false
+			}
+			putErr = t.Checkpoint()
+			return putErr == nil
+		})
+		if putErr != nil {
+			return putErr
+		}
+		if err != nil {
+			return err
+		}
+	}
+	return t.Commit()
+}

+ 7 - 0
lib/db/set.go

@@ -365,6 +365,13 @@ func (s *Snapshot) RemoteNeedFolderFiles(device protocol.DeviceID, page, perpage
 	return files
 }
 
+func (s *Snapshot) WithBlocksHash(hash []byte, fn Iterator) {
+	l.Debugf(`%s WithBlocksHash("%x")`, s.folder, hash)
+	if err := s.t.withBlocksHash([]byte(s.folder), hash, nativeFileIterator(fn)); err != nil && !backend.IsClosed(err) {
+		panic(err)
+	}
+}
+
 func (s *FileSet) Sequence(device protocol.DeviceID) int64 {
 	return s.meta.Sequence(device)
 }

+ 42 - 0
lib/db/transactions.go

@@ -9,6 +9,7 @@ package db
 import (
 	"bytes"
 	"errors"
+	"github.com/syncthing/syncthing/lib/osutil"
 
 	"github.com/syncthing/syncthing/lib/db/backend"
 	"github.com/syncthing/syncthing/lib/protocol"
@@ -304,6 +305,47 @@ func (t *readOnlyTransaction) withGlobal(folder, prefix []byte, truncate bool, f
 	return dbi.Error()
 }
 
+func (t *readOnlyTransaction) withBlocksHash(folder, hash []byte, iterator Iterator) error {
+	key, err := t.keyer.GenerateBlockListMapKey(nil, folder, hash, nil)
+	if err != nil {
+		return err
+	}
+
+	iter, err := t.NewPrefixIterator(key)
+	if err != nil {
+		return err
+	}
+	defer iter.Release()
+
+	for iter.Next() {
+		file := string(t.keyer.NameFromBlockListMapKey(iter.Key()))
+		f, ok, err := t.getFile(folder, protocol.LocalDeviceID[:], []byte(osutil.NormalizedFilename(file)))
+		if err != nil {
+			return err
+		}
+		if !ok {
+			continue
+		}
+		f.Name = osutil.NativeFilename(f.Name)
+
+		if !bytes.Equal(f.BlocksHash, hash) {
+			l.Warnf("Mismatching block map list hashes: got %x expected %x", f.BlocksHash, hash)
+			continue
+		}
+
+		if f.IsDeleted() || f.IsInvalid() || f.IsDirectory() || f.IsSymlink() {
+			l.Warnf("Found something of unexpected type in block list map: %s", f)
+			continue
+		}
+
+		if !iterator(f) {
+			break
+		}
+	}
+
+	return iter.Error()
+}
+
 func (t *readOnlyTransaction) availability(folder, file []byte) ([]protocol.DeviceID, error) {
 	vl, err := t.getGlobalVersions(nil, folder, file)
 	if backend.IsNotFound(err) {

+ 49 - 0
lib/model/folder.go

@@ -464,6 +464,13 @@ func (f *folder) scanSubdirs(subDirs []string) error {
 
 		batch.append(res.File)
 		changes++
+
+		if f.localFlags&protocol.FlagLocalReceiveOnly == 0 {
+			if nf, ok := f.findRename(snap, mtimefs, res.File); ok {
+				batch.append(nf)
+				changes++
+			}
+		}
 	}
 
 	if err := batch.flush(); err != nil {
@@ -615,6 +622,48 @@ func (f *folder) scanSubdirs(subDirs []string) error {
 	return nil
 }
 
+func (f *folder) findRename(snap *db.Snapshot, mtimefs fs.Filesystem, file protocol.FileInfo) (protocol.FileInfo, bool) {
+	found := false
+	nf := protocol.FileInfo{}
+
+	snap.WithBlocksHash(file.BlocksHash, func(ifi db.FileIntf) bool {
+		fi := ifi.(protocol.FileInfo)
+
+		select {
+		case <-f.ctx.Done():
+			return false
+		default:
+		}
+
+		if fi.ShouldConflict() {
+			return true
+		}
+
+		if f.ignores.Match(fi.Name).IsIgnored() {
+			return true
+		}
+
+		// Only check the size.
+		// No point checking block equality, as that uses BlocksHash comparison if that is set (which it will be).
+		// No point checking BlocksHash comparison as WithBlocksHash already does that.
+		if file.Size != fi.Size {
+			return true
+		}
+
+		if !osutil.IsDeleted(mtimefs, fi.Name) {
+			return true
+		}
+
+		nf = fi
+		nf.SetDeleted(f.shortID)
+		nf.LocalFlags = f.localFlags
+		found = true
+		return false
+	})
+
+	return nf, found
+}
+
 func (f *folder) scanTimerFired() {
 	err := f.scanSubdirs(nil)
 

+ 30 - 29
lib/model/folder_sendrecv.go

@@ -355,7 +355,7 @@ func (f *sendReceiveFolder) processNeeded(snap *db.Snapshot, dbUpdateChan chan<-
 				if ok && !df.IsDeleted() && !df.IsSymlink() && !df.IsDirectory() && !df.IsInvalid() {
 					fileDeletions[file.Name] = file
 					// Put files into buckets per first hash
-					key := string(df.Blocks[0].Hash)
+					key := string(df.BlocksHash)
 					buckets[key] = append(buckets[key], df)
 				} else {
 					f.deleteFileWithCurrent(file, df, ok, dbUpdateChan, scanChan)
@@ -458,30 +458,28 @@ nextFile:
 
 		// Check our list of files to be removed for a match, in which case
 		// we can just do a rename instead.
-		key := string(fi.Blocks[0].Hash)
+		key := string(fi.BlocksHash)
 		for i, candidate := range buckets[key] {
-			if candidate.BlocksEqual(fi) {
-				// Remove the candidate from the bucket
-				lidx := len(buckets[key]) - 1
-				buckets[key][i] = buckets[key][lidx]
-				buckets[key] = buckets[key][:lidx]
-
-				// candidate is our current state of the file, where as the
-				// desired state with the delete bit set is in the deletion
-				// map.
-				desired := fileDeletions[candidate.Name]
-				if err := f.renameFile(candidate, desired, fi, snap, dbUpdateChan, scanChan); err != nil {
-					// Failed to rename, try to handle files as separate
-					// deletions and updates.
-					break
-				}
+			// Remove the candidate from the bucket
+			lidx := len(buckets[key]) - 1
+			buckets[key][i] = buckets[key][lidx]
+			buckets[key] = buckets[key][:lidx]
+
+			// candidate is our current state of the file, where as the
+			// desired state with the delete bit set is in the deletion
+			// map.
+			desired := fileDeletions[candidate.Name]
+			if err := f.renameFile(candidate, desired, fi, snap, dbUpdateChan, scanChan); err != nil {
+				l.Debugln("rename shortcut for %s failed: %S", fi.Name, err.Error())
+				// Failed to rename, try next one.
+				continue
+			}
 
-				// Remove the pending deletion (as we performed it by renaming)
-				delete(fileDeletions, candidate.Name)
+			// Remove the pending deletion (as we performed it by renaming)
+			delete(fileDeletions, candidate.Name)
 
-				f.queue.Done(fileName)
-				continue nextFile
-			}
+			f.queue.Done(fileName)
+			continue nextFile
 		}
 
 		devices := snap.Availability(fileName)
@@ -1181,6 +1179,16 @@ func (f *sendReceiveFolder) copierRoutine(in <-chan copyBlocksState, pullChan ch
 		protocol.BufferPool.Put(buf)
 	}()
 
+	folderFilesystems := make(map[string]fs.Filesystem)
+	// Hope that it's usually in the same folder, so start with that one.
+	folders := []string{f.folderID}
+	for folder, cfg := range f.model.cfg.Folders() {
+		folderFilesystems[folder] = cfg.Filesystem()
+		if folder != f.folderID {
+			folders = append(folders, folder)
+		}
+	}
+
 	for state := range in {
 		if err := f.CheckAvailableSpace(state.file.Size); err != nil {
 			state.fail(err)
@@ -1198,13 +1206,6 @@ func (f *sendReceiveFolder) copierRoutine(in <-chan copyBlocksState, pullChan ch
 
 		f.model.progressEmitter.Register(state.sharedPullerState)
 
-		folderFilesystems := make(map[string]fs.Filesystem)
-		var folders []string
-		for folder, cfg := range f.model.cfg.Folders() {
-			folderFilesystems[folder] = cfg.Filesystem()
-			folders = append(folders, folder)
-		}
-
 		var file fs.File
 		var weakHashFinder *weakhash.Finder
 

+ 15 - 3
lib/model/model.go

@@ -1926,9 +1926,15 @@ func (s *indexSender) sendIndexTo(ctx context.Context) error {
 	var f protocol.FileInfo
 	snap := s.fset.Snapshot()
 	defer snap.Release()
+	previousWasDelete := false
 	snap.WithHaveSequence(s.prevSequence+1, func(fi db.FileIntf) bool {
-		if err = batch.flushIfFull(); err != nil {
-			return false
+		// This is to make sure that renames (which is an add followed by a delete) land in the same batch.
+		// Even if the batch is full, we allow a last delete to slip in, we do this by making sure that
+		// the batch ends with a non-delete, or that the last item in the batch is already a delete
+		if batch.full() && (!fi.IsDeleted() || previousWasDelete) {
+			if err = batch.flush(); err != nil {
+				return false
+			}
 		}
 
 		if shouldDebug() {
@@ -1964,6 +1970,8 @@ func (s *indexSender) sendIndexTo(ctx context.Context) error {
 		}
 		f.LocalFlags = 0 // never sent externally
 
+		previousWasDelete = f.IsDeleted()
+
 		batch.append(f)
 		return true
 	})
@@ -2607,8 +2615,12 @@ func (b *fileInfoBatch) append(f protocol.FileInfo) {
 	b.size += f.ProtoSize()
 }
 
+func (b *fileInfoBatch) full() bool {
+	return len(b.infos) >= maxBatchSizeFiles || b.size >= maxBatchSizeBytes
+}
+
 func (b *fileInfoBatch) flushIfFull() error {
-	if len(b.infos) >= maxBatchSizeFiles || b.size >= maxBatchSizeBytes {
+	if b.full() {
 		return b.flush()
 	}
 	return nil

+ 152 - 0
lib/model/model_test.go

@@ -18,6 +18,7 @@ import (
 	"path/filepath"
 	"runtime"
 	"runtime/pprof"
+	"sort"
 	"strconv"
 	"strings"
 	"sync"
@@ -3537,3 +3538,154 @@ func TestFolderAPIErrors(t *testing.T) {
 		}
 	}
 }
+
+func TestRenameSequenceOrder(t *testing.T) {
+	wcfg, fcfg := tmpDefaultWrapper()
+	m := setupModel(wcfg)
+	defer cleanupModel(m)
+
+	numFiles := 20
+
+	ffs := fcfg.Filesystem()
+	for i := 0; i < numFiles; i++ {
+		v := fmt.Sprintf("%d", i)
+		must(t, writeFile(ffs, v, []byte(v), 0644))
+	}
+
+	m.ScanFolders()
+
+	count := 0
+	snap := dbSnapshot(t, m, "default")
+	snap.WithHave(protocol.LocalDeviceID, func(i db.FileIntf) bool {
+		count++
+		return true
+	})
+	snap.Release()
+
+	if count != numFiles {
+		t.Errorf("Unexpected count: %d != %d", count, numFiles)
+	}
+
+	// Modify all the files, other than the ones we expect to rename
+	for i := 0; i < numFiles; i++ {
+		if i == 3 || i == 17 || i == 16 || i == 4 {
+			continue
+		}
+		v := fmt.Sprintf("%d", i)
+		must(t, writeFile(ffs, v, []byte(v+"-new"), 0644))
+	}
+	// Rename
+	must(t, ffs.Rename("3", "17"))
+	must(t, ffs.Rename("16", "4"))
+
+	// Scan
+	m.ScanFolders()
+
+	// Verify sequence of a appearing is followed by c disappearing.
+	snap = dbSnapshot(t, m, "default")
+	defer snap.Release()
+
+	var firstExpectedSequence int64
+	var secondExpectedSequence int64
+	failed := false
+	snap.WithHaveSequence(0, func(i db.FileIntf) bool {
+		t.Log(i)
+		if i.FileName() == "17" {
+			firstExpectedSequence = i.SequenceNo() + 1
+		}
+		if i.FileName() == "4" {
+			secondExpectedSequence = i.SequenceNo() + 1
+		}
+		if i.FileName() == "3" {
+			failed = i.SequenceNo() != firstExpectedSequence || failed
+		}
+		if i.FileName() == "16" {
+			failed = i.SequenceNo() != secondExpectedSequence || failed
+		}
+		return true
+	})
+	if failed {
+		t.Fail()
+	}
+}
+
+func TestBlockListMap(t *testing.T) {
+	wcfg, fcfg := tmpDefaultWrapper()
+	m := setupModel(wcfg)
+	defer cleanupModel(m)
+
+	ffs := fcfg.Filesystem()
+	must(t, writeFile(ffs, "one", []byte("content"), 0644))
+	must(t, writeFile(ffs, "two", []byte("content"), 0644))
+	must(t, writeFile(ffs, "three", []byte("content"), 0644))
+	must(t, writeFile(ffs, "four", []byte("content"), 0644))
+	must(t, writeFile(ffs, "five", []byte("content"), 0644))
+
+	m.ScanFolders()
+
+	snap := dbSnapshot(t, m, "default")
+	defer snap.Release()
+	fi, ok := snap.Get(protocol.LocalDeviceID, "one")
+	if !ok {
+		t.Error("failed to find existing file")
+	}
+	var paths []string
+
+	snap.WithBlocksHash(fi.BlocksHash, func(fi db.FileIntf) bool {
+		paths = append(paths, fi.FileName())
+		return true
+	})
+	snap.Release()
+
+	expected := []string{"one", "two", "three", "four", "five"}
+	if !equalStringsInAnyOrder(paths, expected) {
+		t.Errorf("expected %q got %q", expected, paths)
+	}
+
+	// Fudge the files around
+	// Remove
+	must(t, ffs.Remove("one"))
+
+	// Modify
+	must(t, ffs.Remove("two"))
+	must(t, writeFile(ffs, "two", []byte("mew-content"), 0644))
+
+	// Rename
+	must(t, ffs.Rename("three", "new-three"))
+
+	// Change type
+	must(t, ffs.Remove("four"))
+	must(t, ffs.Mkdir("four", 0644))
+
+	m.ScanFolders()
+
+	// Check we're left with 2 of the 5
+	snap = dbSnapshot(t, m, "default")
+	defer snap.Release()
+
+	paths = paths[:0]
+	snap.WithBlocksHash(fi.BlocksHash, func(fi db.FileIntf) bool {
+		paths = append(paths, fi.FileName())
+		return true
+	})
+	snap.Release()
+
+	expected = []string{"new-three", "five"}
+	if !equalStringsInAnyOrder(paths, expected) {
+		t.Errorf("expected %q got %q", expected, paths)
+	}
+}
+
+func equalStringsInAnyOrder(a, b []string) bool {
+	if len(a) != len(b) {
+		return false
+	}
+	sort.Strings(a)
+	sort.Strings(b)
+	for i := range a {
+		if a[i] != b[i] {
+			return false
+		}
+	}
+	return true
+}