Browse Source

scanner: Allow disabling weak hash in scanning (fixes #3891)

GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/3905
Audrius Butkevicius 8 years ago
parent
commit
dd78177ae0

+ 1 - 1
cmd/stfileinfo/main.go

@@ -70,7 +70,7 @@ func main() {
 		if *standardBlocks || blockSize < protocol.BlockSize {
 			blockSize = protocol.BlockSize
 		}
-		bs, err := scanner.Blocks(fd, blockSize, fi.Size(), nil)
+		bs, err := scanner.Blocks(fd, blockSize, fi.Size(), nil, true)
 		if err != nil {
 			log.Fatal(err)
 		}

+ 1 - 0
lib/model/model.go

@@ -1813,6 +1813,7 @@ func (m *Model) internalScanFolderSubdirs(folder string, subDirs []string) error
 		ShortID:               m.shortID,
 		ProgressTickIntervalS: folderCfg.ScanProgressIntervalS,
 		Cancel:                cancel,
+		UseWeakHashes:         folderCfg.WeakHashThresholdPct < 100,
 	})
 
 	if err != nil {

+ 1 - 1
lib/model/model_test.go

@@ -317,7 +317,7 @@ func (f *fakeConnection) addFile(name string, flags uint32, ftype protocol.FileI
 	f.mut.Lock()
 	defer f.mut.Unlock()
 
-	blocks, _ := scanner.Blocks(bytes.NewReader(data), protocol.BlockSize, int64(len(data)), nil)
+	blocks, _ := scanner.Blocks(bytes.NewReader(data), protocol.BlockSize, int64(len(data)), nil, true)
 	var version protocol.Vector
 	version = version.Update(f.id.Short())
 

+ 1 - 1
lib/model/rwfolder.go

@@ -1082,7 +1082,7 @@ func (f *sendReceiveFolder) handleFile(file protocol.FileInfo, copyChan chan<- c
 
 	// Check for an old temporary file which might have some blocks we could
 	// reuse.
-	tempBlocks, err := scanner.HashFile(tempName, protocol.BlockSize, nil)
+	tempBlocks, err := scanner.HashFile(tempName, protocol.BlockSize, nil, false)
 	if err == nil {
 		// Check for any reusable blocks in the temp file
 		tempCopyBlocks, _ := scanner.BlockDiff(tempBlocks, file.Blocks)

+ 3 - 3
lib/model/rwfolder_test.go

@@ -238,7 +238,7 @@ func TestCopierFinder(t *testing.T) {
 	}
 
 	// Verify that the fetched blocks have actually been written to the temp file
-	blks, err := scanner.HashFile(tempFile, protocol.BlockSize, nil)
+	blks, err := scanner.HashFile(tempFile, protocol.BlockSize, nil, false)
 	if err != nil {
 		t.Log(err)
 	}
@@ -291,7 +291,7 @@ func TestWeakHash(t *testing.T) {
 	// File 1: abcdefgh
 	// File 2: xyabcdef
 	f.Seek(0, os.SEEK_SET)
-	existing, err := scanner.Blocks(f, protocol.BlockSize, size, nil)
+	existing, err := scanner.Blocks(f, protocol.BlockSize, size, nil, true)
 	if err != nil {
 		t.Error(err)
 	}
@@ -300,7 +300,7 @@ func TestWeakHash(t *testing.T) {
 	remainder := io.LimitReader(f, size-shift)
 	prefix := io.LimitReader(rand.Reader, shift)
 	nf := io.MultiReader(prefix, remainder)
-	desired, err := scanner.Blocks(nf, protocol.BlockSize, size, nil)
+	desired, err := scanner.Blocks(nf, protocol.BlockSize, size, nil, true)
 	if err != nil {
 		t.Error(err)
 	}

+ 7 - 6
lib/scanner/blockqueue.go

@@ -20,13 +20,13 @@ import (
 // workers are used in parallel. The outbox will become closed when the inbox
 // is closed and all items handled.
 
-func newParallelHasher(dir string, blockSize, workers int, outbox, inbox chan protocol.FileInfo, counter Counter, done, cancel chan struct{}) {
+func newParallelHasher(dir string, blockSize, workers int, outbox, inbox chan protocol.FileInfo, counter Counter, done, cancel chan struct{}, useWeakHashes bool) {
 	wg := sync.NewWaitGroup()
 	wg.Add(workers)
 
 	for i := 0; i < workers; i++ {
 		go func() {
-			hashFiles(dir, blockSize, outbox, inbox, counter, cancel)
+			hashFiles(dir, blockSize, outbox, inbox, counter, cancel, useWeakHashes)
 			wg.Done()
 		}()
 	}
@@ -40,7 +40,8 @@ func newParallelHasher(dir string, blockSize, workers int, outbox, inbox chan pr
 	}()
 }
 
-func HashFile(path string, blockSize int, counter Counter) ([]protocol.BlockInfo, error) {
+// HashFile hashes the files and returns a list of blocks representing the file.
+func HashFile(path string, blockSize int, counter Counter, useWeakHashes bool) ([]protocol.BlockInfo, error) {
 	fd, err := os.Open(path)
 	if err != nil {
 		l.Debugln("open:", err)
@@ -60,7 +61,7 @@ func HashFile(path string, blockSize int, counter Counter) ([]protocol.BlockInfo
 
 	// Hash the file. This may take a while for large files.
 
-	blocks, err := Blocks(fd, blockSize, size, counter)
+	blocks, err := Blocks(fd, blockSize, size, counter, useWeakHashes)
 	if err != nil {
 		l.Debugln("blocks:", err)
 		return nil, err
@@ -81,7 +82,7 @@ func HashFile(path string, blockSize int, counter Counter) ([]protocol.BlockInfo
 	return blocks, nil
 }
 
-func hashFiles(dir string, blockSize int, outbox, inbox chan protocol.FileInfo, counter Counter, cancel chan struct{}) {
+func hashFiles(dir string, blockSize int, outbox, inbox chan protocol.FileInfo, counter Counter, cancel chan struct{}, useWeakHashes bool) {
 	for {
 		select {
 		case f, ok := <-inbox:
@@ -93,7 +94,7 @@ func hashFiles(dir string, blockSize int, outbox, inbox chan protocol.FileInfo,
 				panic("Bug. Asked to hash a directory or a deleted file.")
 			}
 
-			blocks, err := HashFile(filepath.Join(dir, f.Name), blockSize, counter)
+			blocks, err := HashFile(filepath.Join(dir, f.Name), blockSize, counter, useWeakHashes)
 			if err != nil {
 				l.Debugln("hash error:", f.Name, err)
 				continue

+ 22 - 3
lib/scanner/blocks.go

@@ -9,6 +9,7 @@ package scanner
 import (
 	"bytes"
 	"fmt"
+	"hash"
 	"io"
 
 	"github.com/chmduquesne/rollinghash/adler32"
@@ -23,11 +24,20 @@ type Counter interface {
 }
 
 // Blocks returns the blockwise hash of the reader.
-func Blocks(r io.Reader, blocksize int, sizehint int64, counter Counter) ([]protocol.BlockInfo, error) {
+func Blocks(r io.Reader, blocksize int, sizehint int64, counter Counter, useWeakHashes bool) ([]protocol.BlockInfo, error) {
 	hf := sha256.New()
 	hashLength := hf.Size()
-	whf := adler32.New()
-	mhf := io.MultiWriter(hf, whf)
+
+	var mhf io.Writer
+	var whf hash.Hash32
+
+	if useWeakHashes {
+		whf = adler32.New()
+		mhf = io.MultiWriter(hf, whf)
+	} else {
+		whf = noopHash{}
+		mhf = hf
+	}
 
 	var blocks []protocol.BlockInfo
 	var hashes, thisHash []byte
@@ -189,3 +199,12 @@ func BlocksEqual(src, tgt []protocol.BlockInfo) bool {
 	}
 	return true
 }
+
+type noopHash struct{}
+
+func (noopHash) Sum32() uint32             { return 0 }
+func (noopHash) BlockSize() int            { return 0 }
+func (noopHash) Size() int                 { return 0 }
+func (noopHash) Reset()                    {}
+func (noopHash) Sum([]byte) []byte         { return nil }
+func (noopHash) Write([]byte) (int, error) { return 0, nil }

+ 3 - 3
lib/scanner/blocks_test.go

@@ -68,7 +68,7 @@ var blocksTestData = []struct {
 func TestBlocks(t *testing.T) {
 	for testNo, test := range blocksTestData {
 		buf := bytes.NewBuffer(test.data)
-		blocks, err := Blocks(buf, test.blocksize, -1, nil)
+		blocks, err := Blocks(buf, test.blocksize, -1, nil, true)
 
 		if err != nil {
 			t.Fatal(err)
@@ -125,8 +125,8 @@ var diffTestData = []struct {
 
 func TestDiff(t *testing.T) {
 	for i, test := range diffTestData {
-		a, _ := Blocks(bytes.NewBufferString(test.a), test.s, -1, nil)
-		b, _ := Blocks(bytes.NewBufferString(test.b), test.s, -1, nil)
+		a, _ := Blocks(bytes.NewBufferString(test.a), test.s, -1, nil, false)
+		b, _ := Blocks(bytes.NewBufferString(test.b), test.s, -1, nil, false)
 		_, d := BlockDiff(a, b)
 		if len(d) != len(test.d) {
 			t.Fatalf("Incorrect length for diff %d; %d != %d", i, len(d), len(test.d))

+ 4 - 2
lib/scanner/walk.go

@@ -72,6 +72,8 @@ type Config struct {
 	ProgressTickIntervalS int
 	// Signals cancel from the outside - when closed, we should stop walking.
 	Cancel chan struct{}
+	// Wether or not we should also compute weak hashes
+	UseWeakHashes bool
 }
 
 type CurrentFiler interface {
@@ -129,7 +131,7 @@ func (w *walker) walk() (chan protocol.FileInfo, error) {
 	// We're not required to emit scan progress events, just kick off hashers,
 	// and feed inputs directly from the walker.
 	if w.ProgressTickIntervalS < 0 {
-		newParallelHasher(w.Dir, w.BlockSize, w.Hashers, finishedChan, toHashChan, nil, nil, w.Cancel)
+		newParallelHasher(w.Dir, w.BlockSize, w.Hashers, finishedChan, toHashChan, nil, nil, w.Cancel, w.UseWeakHashes)
 		return finishedChan, nil
 	}
 
@@ -160,7 +162,7 @@ func (w *walker) walk() (chan protocol.FileInfo, error) {
 		done := make(chan struct{})
 		progress := newByteCounter()
 
-		newParallelHasher(w.Dir, w.BlockSize, w.Hashers, finishedChan, realToHashChan, progress, done, w.Cancel)
+		newParallelHasher(w.Dir, w.BlockSize, w.Hashers, finishedChan, realToHashChan, progress, done, w.Cancel, w.UseWeakHashes)
 
 		// A routine which actually emits the FolderScanProgress events
 		// every w.ProgressTicker ticks, until the hasher routines terminate.

+ 2 - 2
lib/scanner/walk_test.go

@@ -148,7 +148,7 @@ func TestVerify(t *testing.T) {
 	progress := newByteCounter()
 	defer progress.Close()
 
-	blocks, err := Blocks(buf, blocksize, -1, progress)
+	blocks, err := Blocks(buf, blocksize, -1, progress, false)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -423,7 +423,7 @@ func BenchmarkHashFile(b *testing.B) {
 	b.ResetTimer()
 
 	for i := 0; i < b.N; i++ {
-		if _, err := HashFile(testdataName, protocol.BlockSize, nil); err != nil {
+		if _, err := HashFile(testdataName, protocol.BlockSize, nil, true); err != nil {
 			b.Fatal(err)
 		}
 	}