Преглед изворни кода

lib/scanner: Recheck file size and modification time after hashing (ref #3440)

To catch the case where the file changed. Also make sure we never let a
size-vs-blocklist mismatch slip through.

GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/3443
Jakob Borg пре 9 година
родитељ
комит
7aaa1dd8a3

+ 1 - 1
lib/model/rwfolder.go

@@ -941,7 +941,7 @@ func (f *rwFolder) handleFile(file protocol.FileInfo, copyChan chan<- copyBlocks
 
 	// Check for an old temporary file which might have some blocks we could
 	// reuse.
-	tempBlocks, err := scanner.HashFile(tempName, protocol.BlockSize, 0, nil)
+	tempBlocks, err := scanner.HashFile(tempName, protocol.BlockSize, nil)
 	if err == nil {
 		// Check for any reusable blocks in the temp file
 		tempCopyBlocks, _ := scanner.BlockDiff(tempBlocks, file.Blocks)

+ 1 - 1
lib/model/rwfolder_test.go

@@ -223,7 +223,7 @@ func TestCopierFinder(t *testing.T) {
 	}
 
 	// Verify that the fetched blocks have actually been written to the temp file
-	blks, err := scanner.HashFile(tempFile, protocol.BlockSize, 0, nil)
+	blks, err := scanner.HashFile(tempFile, protocol.BlockSize, nil)
 	if err != nil {
 		t.Log(err)
 	}

+ 43 - 11
lib/scanner/blockqueue.go

@@ -7,6 +7,7 @@
 package scanner
 
 import (
+	"errors"
 	"os"
 	"path/filepath"
 
@@ -39,24 +40,45 @@ func newParallelHasher(dir string, blockSize, workers int, outbox, inbox chan pr
 	}()
 }
 
-func HashFile(path string, blockSize int, sizeHint int64, counter Counter) ([]protocol.BlockInfo, error) {
+func HashFile(path string, blockSize int, counter Counter) ([]protocol.BlockInfo, error) {
 	fd, err := os.Open(path)
 	if err != nil {
 		l.Debugln("open:", err)
-		return []protocol.BlockInfo{}, err
+		return nil, err
 	}
 	defer fd.Close()
 
-	if sizeHint == 0 {
-		fi, err := fd.Stat()
-		if err != nil {
-			l.Debugln("stat:", err)
-			return []protocol.BlockInfo{}, err
-		}
-		sizeHint = fi.Size()
+	// Get the size and modtime of the file before we start hashing it.
+
+	fi, err := fd.Stat()
+	if err != nil {
+		l.Debugln("stat before:", err)
+		return nil, err
+	}
+	size := fi.Size()
+	modTime := fi.ModTime()
+
+	// Hash the file. This may take a while for large files.
+
+	blocks, err := Blocks(fd, blockSize, size, counter)
+	if err != nil {
+		l.Debugln("blocks:", err)
+		return nil, err
+	}
+
+	// Recheck the size and modtime again. If they differ, the file changed
+	// while we were reading it and our hash results are invalid.
+
+	fi, err = fd.Stat()
+	if err != nil {
+		l.Debugln("stat after:", err)
+		return nil, err
+	}
+	if size != fi.Size() || !modTime.Equal(fi.ModTime()) {
+		return nil, errors.New("file changed during hashing")
 	}
 
-	return Blocks(fd, blockSize, sizeHint, counter)
+	return blocks, nil
 }
 
 func hashFiles(dir string, blockSize int, outbox, inbox chan protocol.FileInfo, counter Counter, cancel chan struct{}) {
@@ -71,13 +93,23 @@ func hashFiles(dir string, blockSize int, outbox, inbox chan protocol.FileInfo,
 				panic("Bug. Asked to hash a directory or a deleted file.")
 			}
 
-			blocks, err := HashFile(filepath.Join(dir, f.Name), blockSize, f.Size, counter)
+			blocks, err := HashFile(filepath.Join(dir, f.Name), blockSize, counter)
 			if err != nil {
 				l.Debugln("hash error:", f.Name, err)
 				continue
 			}
 
 			f.Blocks = blocks
+
+			// The size we saw when initially deciding to hash the file
+			// might not have been the size it actually had when we hashed
+			// it. Update the size from the block list.
+
+			f.Size = 0
+			for _, b := range blocks {
+				f.Size += int64(b.Size)
+			}
+
 			select {
 			case outbox <- f:
 			case <-cancel:

+ 1 - 1
lib/scanner/blocks.go

@@ -29,7 +29,7 @@ func Blocks(r io.Reader, blocksize int, sizehint int64, counter Counter) ([]prot
 	var blocks []protocol.BlockInfo
 	var hashes, thisHash []byte
 
-	if sizehint > 0 {
+	if sizehint >= 0 {
 		// Allocate contiguous blocks for the BlockInfo structures and their
 		// hashes once and for all, and stick to the specified size.
 		r = io.LimitReader(r, sizehint)

+ 3 - 3
lib/scanner/blocks_test.go

@@ -51,7 +51,7 @@ var blocksTestData = []struct {
 func TestBlocks(t *testing.T) {
 	for _, test := range blocksTestData {
 		buf := bytes.NewBuffer(test.data)
-		blocks, err := Blocks(buf, test.blocksize, 0, nil)
+		blocks, err := Blocks(buf, test.blocksize, -1, nil)
 
 		if err != nil {
 			t.Fatal(err)
@@ -105,8 +105,8 @@ var diffTestData = []struct {
 
 func TestDiff(t *testing.T) {
 	for i, test := range diffTestData {
-		a, _ := Blocks(bytes.NewBufferString(test.a), test.s, 0, nil)
-		b, _ := Blocks(bytes.NewBufferString(test.b), test.s, 0, nil)
+		a, _ := Blocks(bytes.NewBufferString(test.a), test.s, -1, nil)
+		b, _ := Blocks(bytes.NewBufferString(test.b), test.s, -1, nil)
 		_, d := BlockDiff(a, b)
 		if len(d) != len(test.d) {
 			t.Fatalf("Incorrect length for diff %d; %d != %d", i, len(d), len(test.d))

+ 2 - 2
lib/scanner/walk_test.go

@@ -148,7 +148,7 @@ func TestVerify(t *testing.T) {
 	progress := newByteCounter()
 	defer progress.Close()
 
-	blocks, err := Blocks(buf, blocksize, 0, progress)
+	blocks, err := Blocks(buf, blocksize, -1, progress)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -380,7 +380,7 @@ func BenchmarkHashFile(b *testing.B) {
 	b.ResetTimer()
 
 	for i := 0; i < b.N; i++ {
-		if _, err := HashFile(testdataName, protocol.BlockSize, testdataSize, nil); err != nil {
+		if _, err := HashFile(testdataName, protocol.BlockSize, nil); err != nil {
 			b.Fatal(err)
 		}
 	}