Browse Source

lib/scanner: Refactoring

GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4642
LGTM: imsodin, AudriusButkevicius
Lars K.W. Gohlke 7 years ago
parent
commit
89a021609b
5 changed files with 192 additions and 187 deletions
  1. 75 7
      lib/model/rwfolder.go
  2. 62 0
      lib/model/rwfolder_test.go
  3. 18 114
      lib/scanner/blocks.go
  4. 0 62
      lib/scanner/blocks_test.go
  5. 37 4
      lib/scanner/walk_test.go

+ 75 - 7
lib/model/rwfolder.go

@@ -7,6 +7,7 @@
 package model
 
 import (
+	"bytes"
 	"errors"
 	"fmt"
 	"math/rand"
@@ -24,6 +25,7 @@ import (
 	"github.com/syncthing/syncthing/lib/osutil"
 	"github.com/syncthing/syncthing/lib/protocol"
 	"github.com/syncthing/syncthing/lib/scanner"
+	"github.com/syncthing/syncthing/lib/sha256"
 	"github.com/syncthing/syncthing/lib/sync"
 	"github.com/syncthing/syncthing/lib/versioner"
 	"github.com/syncthing/syncthing/lib/weakhash"
@@ -560,7 +562,7 @@ nextFile:
 		// we can just do a rename instead.
 		key := string(fi.Blocks[0].Hash)
 		for i, candidate := range buckets[key] {
-			if scanner.BlocksEqual(candidate.Blocks, fi.Blocks) {
+			if blocksEqual(candidate.Blocks, fi.Blocks) {
 				// Remove the candidate from the bucket
 				lidx := len(buckets[key]) - 1
 				buckets[key][i] = buckets[key][lidx]
@@ -615,6 +617,21 @@ nextFile:
 	return changed
 }
 
+// blocksEqual returns whether two slices of blocks are exactly the same hash
+// and index pair wise.
+func blocksEqual(src, tgt []protocol.BlockInfo) bool {
+	if len(tgt) != len(src) {
+		return false
+	}
+
+	for i, sblk := range src {
+		if !bytes.Equal(sblk.Hash, tgt[i].Hash) {
+			return false
+		}
+	}
+	return true
+}
+
 // handleDir creates or updates the given directory
 func (f *sendReceiveFolder) handleDir(file protocol.FileInfo, dbUpdateChan chan<- dbUpdateJob) {
 	// Used in the defer closure below, updated by the function body. Take
@@ -976,7 +993,7 @@ func (f *sendReceiveFolder) renameFile(source, target protocol.FileInfo, dbUpdat
 func (f *sendReceiveFolder) handleFile(file protocol.FileInfo, copyChan chan<- copyBlocksState, finisherChan chan<- *sharedPullerState, dbUpdateChan chan<- dbUpdateJob) {
 	curFile, hasCurFile := f.model.CurrentFolderFile(f.folderID, file.Name)
 
-	have, need := scanner.BlockDiff(curFile.Blocks, file.Blocks)
+	have, need := blockDiff(curFile.Blocks, file.Blocks)
 
 	if hasCurFile && len(need) == 0 {
 		// We are supposed to copy the entire file, and then fetch nothing. We
@@ -1013,7 +1030,7 @@ func (f *sendReceiveFolder) handleFile(file protocol.FileInfo, copyChan chan<- c
 
 	tempName := fs.TempName(file.Name)
 
-	scanner.PopulateOffsets(file.Blocks)
+	populateOffsets(file.Blocks)
 
 	var blocks []protocol.BlockInfo
 	var blocksSize int64
@@ -1024,7 +1041,7 @@ func (f *sendReceiveFolder) handleFile(file protocol.FileInfo, copyChan chan<- c
 	tempBlocks, err := scanner.HashFile(f.ctx, f.fs, tempName, protocol.BlockSize, nil, false)
 	if err == nil {
 		// Check for any reusable blocks in the temp file
-		tempCopyBlocks, _ := scanner.BlockDiff(tempBlocks, file.Blocks)
+		tempCopyBlocks, _ := blockDiff(tempBlocks, file.Blocks)
 
 		// block.String() returns a string unique to the block
 		existingBlocks := make(map[string]struct{}, len(tempCopyBlocks))
@@ -1108,6 +1125,39 @@ func (f *sendReceiveFolder) handleFile(file protocol.FileInfo, copyChan chan<- c
 	copyChan <- cs
 }
 
+// blockDiff returns lists of common and missing (to transform src into tgt)
+// blocks. Both block lists must have been created with the same block size.
+func blockDiff(src, tgt []protocol.BlockInfo) (have, need []protocol.BlockInfo) {
+	if len(tgt) == 0 && len(src) != 0 {
+		return nil, nil
+	}
+
+	if len(tgt) != 0 && len(src) == 0 {
+		// Copy the entire file
+		return nil, tgt
+	}
+
+	for i := range tgt {
+		if i >= len(src) || !bytes.Equal(tgt[i].Hash, src[i].Hash) {
+			// Copy differing block
+			need = append(need, tgt[i])
+		} else {
+			have = append(have, tgt[i])
+		}
+	}
+
+	return have, need
+}
+
+// populateOffsets sets the Offset field on each block
+func populateOffsets(blocks []protocol.BlockInfo) {
+	var offset int64
+	for i := range blocks {
+		blocks[i].Offset = offset
+		offset += int64(blocks[i].Size)
+	}
+}
+
 // shortcutFile sets file mode and modification time, when that's the only
 // thing that has changed.
 func (f *sendReceiveFolder) shortcutFile(file protocol.FileInfo) error {
@@ -1207,7 +1257,7 @@ func (f *sendReceiveFolder) copierRoutine(in <-chan copyBlocksState, pullChan ch
 			buf = buf[:int(block.Size)]
 
 			found, err := weakHashFinder.Iterate(block.WeakHash, buf, func(offset int64) bool {
-				if _, err := scanner.VerifyBuffer(buf, block); err != nil {
+				if _, err := verifyBuffer(buf, block); err != nil {
 					return true
 				}
 
@@ -1242,7 +1292,7 @@ func (f *sendReceiveFolder) copierRoutine(in <-chan copyBlocksState, pullChan ch
 						return false
 					}
 
-					hash, err := scanner.VerifyBuffer(buf, block)
+					hash, err := verifyBuffer(buf, block)
 					if err != nil {
 						if hash != nil {
 							l.Debugf("Finder block mismatch in %s:%s:%d expected %q got %q", folder, path, index, block.Hash, hash)
@@ -1292,6 +1342,24 @@ func (f *sendReceiveFolder) copierRoutine(in <-chan copyBlocksState, pullChan ch
 	}
 }
 
+func verifyBuffer(buf []byte, block protocol.BlockInfo) ([]byte, error) {
+	if len(buf) != int(block.Size) {
+		return nil, fmt.Errorf("length mismatch %d != %d", len(buf), block.Size)
+	}
+	hf := sha256.New()
+	_, err := hf.Write(buf)
+	if err != nil {
+		return nil, err
+	}
+	hash := hf.Sum(nil)
+
+	if !bytes.Equal(hash, block.Hash) {
+		return hash, fmt.Errorf("hash mismatch %x != %x", hash, block.Hash)
+	}
+
+	return hash, nil
+}
+
 func (f *sendReceiveFolder) pullerRoutine(in <-chan pullBlockState, out chan<- *sharedPullerState) {
 	for state := range in {
 		if state.failed() != nil {
@@ -1346,7 +1414,7 @@ func (f *sendReceiveFolder) pullerRoutine(in <-chan pullBlockState, out chan<- *
 
 			// Verify that the received block matches the desired hash, if not
 			// try pulling it from another device.
-			_, lastError = scanner.VerifyBuffer(buf, state.block)
+			_, lastError = verifyBuffer(buf, state.block)
 			if lastError != nil {
 				l.Debugln("request:", f.folderID, state.file.Name, state.block.Offset, state.block.Size, "hash mismatch")
 				continue

+ 62 - 0
lib/model/rwfolder_test.go

@@ -58,6 +58,26 @@ var blocks = []protocol.BlockInfo{
 
 var folders = []string{"default"}
 
+var diffTestData = []struct {
+	a string
+	b string
+	s int
+	d []protocol.BlockInfo
+}{
+	{"contents", "contents", 1024, []protocol.BlockInfo{}},
+	{"", "", 1024, []protocol.BlockInfo{}},
+	{"contents", "contents", 3, []protocol.BlockInfo{}},
+	{"contents", "cantents", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}}},
+	{"contents", "contants", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}}},
+	{"contents", "cantants", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}, {Offset: 3, Size: 3}}},
+	{"contents", "", 3, []protocol.BlockInfo{{Offset: 0, Size: 0}}},
+	{"", "contents", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}, {Offset: 3, Size: 3}, {Offset: 6, Size: 2}}},
+	{"con", "contents", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}, {Offset: 6, Size: 2}}},
+	{"contents", "con", 3, nil},
+	{"contents", "cont", 3, []protocol.BlockInfo{{Offset: 3, Size: 1}}},
+	{"cont", "contents", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}, {Offset: 6, Size: 2}}},
+}
+
 func setUpFile(filename string, blockNumbers []int) protocol.FileInfo {
 	// Create existing file
 	existingBlocks := make([]protocol.BlockInfo, len(blockNumbers))
@@ -651,3 +671,45 @@ func TestIssue3164(t *testing.T) {
 		t.Fatal(err)
 	}
 }
+
+func TestDiff(t *testing.T) {
+	for i, test := range diffTestData {
+		a, _ := scanner.Blocks(context.TODO(), bytes.NewBufferString(test.a), test.s, -1, nil, false)
+		b, _ := scanner.Blocks(context.TODO(), bytes.NewBufferString(test.b), test.s, -1, nil, false)
+		_, d := blockDiff(a, b)
+		if len(d) != len(test.d) {
+			t.Fatalf("Incorrect length for diff %d; %d != %d", i, len(d), len(test.d))
+		} else {
+			for j := range test.d {
+				if d[j].Offset != test.d[j].Offset {
+					t.Errorf("Incorrect offset for diff %d block %d; %d != %d", i, j, d[j].Offset, test.d[j].Offset)
+				}
+				if d[j].Size != test.d[j].Size {
+					t.Errorf("Incorrect length for diff %d block %d; %d != %d", i, j, d[j].Size, test.d[j].Size)
+				}
+			}
+		}
+	}
+}
+
+func TestDiffEmpty(t *testing.T) {
+	emptyCases := []struct {
+		a    []protocol.BlockInfo
+		b    []protocol.BlockInfo
+		need int
+		have int
+	}{
+		{nil, nil, 0, 0},
+		{[]protocol.BlockInfo{{Offset: 3, Size: 1}}, nil, 0, 0},
+		{nil, []protocol.BlockInfo{{Offset: 3, Size: 1}}, 1, 0},
+	}
+	for _, emptyCase := range emptyCases {
+		h, n := blockDiff(emptyCase.a, emptyCase.b)
+		if len(h) != emptyCase.have {
+			t.Errorf("incorrect have: %d != %d", len(h), emptyCase.have)
+		}
+		if len(n) != emptyCase.need {
+			t.Errorf("incorrect have: %d != %d", len(h), emptyCase.have)
+		}
+	}
+}

+ 18 - 114
lib/scanner/blocks.go

@@ -7,9 +7,7 @@
 package scanner
 
 import (
-	"bytes"
 	"context"
-	"fmt"
 	"hash"
 	"io"
 
@@ -26,18 +24,20 @@ type Counter interface {
 
 // Blocks returns the blockwise hash of the reader.
 func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, counter Counter, useWeakHashes bool) ([]protocol.BlockInfo, error) {
+	if counter == nil {
+		counter = &noopCounter{}
+	}
+
 	hf := sha256.New()
 	hashLength := hf.Size()
 
-	var mhf io.Writer
-	var whf hash.Hash32
-
+	var weakHf hash.Hash32 = noopHash{}
+	var multiHf io.Writer = hf
 	if useWeakHashes {
-		whf = adler32.New()
-		mhf = io.MultiWriter(hf, whf)
-	} else {
-		whf = noopHash{}
-		mhf = hf
+		// Use an actual weak hash function, make the multiHf
+		// write to both hash functions.
+		weakHf = adler32.New()
+		multiHf = io.MultiWriter(hf, weakHf)
 	}
 
 	var blocks []protocol.BlockInfo
@@ -65,7 +65,7 @@ func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, cou
 		}
 
 		lr.N = int64(blocksize)
-		n, err := io.CopyBuffer(mhf, lr, buf)
+		n, err := io.CopyBuffer(multiHf, lr, buf)
 		if err != nil {
 			return nil, err
 		}
@@ -74,9 +74,7 @@ func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, cou
 			break
 		}
 
-		if counter != nil {
-			counter.Update(n)
-		}
+		counter.Update(n)
 
 		// Carve out a hash-sized chunk of "hashes" to store the hash for this
 		// block.
@@ -87,14 +85,14 @@ func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, cou
 			Size:     int32(n),
 			Offset:   offset,
 			Hash:     thisHash,
-			WeakHash: whf.Sum32(),
+			WeakHash: weakHf.Sum32(),
 		}
 
 		blocks = append(blocks, b)
 		offset += n
 
 		hf.Reset()
-		whf.Reset()
+		weakHf.Reset()
 	}
 
 	if len(blocks) == 0 {
@@ -109,104 +107,6 @@ func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, cou
 	return blocks, nil
 }
 
-// PopulateOffsets sets the Offset field on each block
-func PopulateOffsets(blocks []protocol.BlockInfo) {
-	var offset int64
-	for i := range blocks {
-		blocks[i].Offset = offset
-		offset += int64(blocks[i].Size)
-	}
-}
-
-// BlockDiff returns lists of common and missing (to transform src into tgt)
-// blocks. Both block lists must have been created with the same block size.
-func BlockDiff(src, tgt []protocol.BlockInfo) (have, need []protocol.BlockInfo) {
-	if len(tgt) == 0 && len(src) != 0 {
-		return nil, nil
-	}
-
-	if len(tgt) != 0 && len(src) == 0 {
-		// Copy the entire file
-		return nil, tgt
-	}
-
-	for i := range tgt {
-		if i >= len(src) || !bytes.Equal(tgt[i].Hash, src[i].Hash) {
-			// Copy differing block
-			need = append(need, tgt[i])
-		} else {
-			have = append(have, tgt[i])
-		}
-	}
-
-	return have, need
-}
-
-// Verify returns nil or an error describing the mismatch between the block
-// list and actual reader contents
-func Verify(r io.Reader, blocksize int, blocks []protocol.BlockInfo) error {
-	hf := sha256.New()
-	// A 32k buffer is used for copying into the hash function.
-	buf := make([]byte, 32<<10)
-
-	for i, block := range blocks {
-		lr := &io.LimitedReader{R: r, N: int64(blocksize)}
-		_, err := io.CopyBuffer(hf, lr, buf)
-		if err != nil {
-			return err
-		}
-
-		hash := hf.Sum(nil)
-		hf.Reset()
-
-		if !bytes.Equal(hash, block.Hash) {
-			return fmt.Errorf("hash mismatch %x != %x for block %d", hash, block.Hash, i)
-		}
-	}
-
-	// We should have reached the end  now
-	bs := make([]byte, 1)
-	n, err := r.Read(bs)
-	if n != 0 || err != io.EOF {
-		return fmt.Errorf("file continues past end of blocks")
-	}
-
-	return nil
-}
-
-func VerifyBuffer(buf []byte, block protocol.BlockInfo) ([]byte, error) {
-	if len(buf) != int(block.Size) {
-		return nil, fmt.Errorf("length mismatch %d != %d", len(buf), block.Size)
-	}
-	hf := sha256.New()
-	_, err := hf.Write(buf)
-	if err != nil {
-		return nil, err
-	}
-	hash := hf.Sum(nil)
-
-	if !bytes.Equal(hash, block.Hash) {
-		return hash, fmt.Errorf("hash mismatch %x != %x", hash, block.Hash)
-	}
-
-	return hash, nil
-}
-
-// BlocksEqual returns whether two slices of blocks are exactly the same hash
-// and index pair wise.
-func BlocksEqual(src, tgt []protocol.BlockInfo) bool {
-	if len(tgt) != len(src) {
-		return false
-	}
-
-	for i, sblk := range src {
-		if !bytes.Equal(sblk.Hash, tgt[i].Hash) {
-			return false
-		}
-	}
-	return true
-}
-
 type noopHash struct{}
 
 func (noopHash) Sum32() uint32             { return 0 }
@@ -215,3 +115,7 @@ func (noopHash) Size() int                 { return 0 }
 func (noopHash) Reset()                    {}
 func (noopHash) Sum([]byte) []byte         { return nil }
 func (noopHash) Write([]byte) (int, error) { return 0, nil }
+
+type noopCounter struct{}
+
+func (c *noopCounter) Update(bytes int64) {}

+ 0 - 62
lib/scanner/blocks_test.go

@@ -104,68 +104,6 @@ func TestBlocks(t *testing.T) {
 	}
 }
 
-var diffTestData = []struct {
-	a string
-	b string
-	s int
-	d []protocol.BlockInfo
-}{
-	{"contents", "contents", 1024, []protocol.BlockInfo{}},
-	{"", "", 1024, []protocol.BlockInfo{}},
-	{"contents", "contents", 3, []protocol.BlockInfo{}},
-	{"contents", "cantents", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}}},
-	{"contents", "contants", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}}},
-	{"contents", "cantants", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}, {Offset: 3, Size: 3}}},
-	{"contents", "", 3, []protocol.BlockInfo{{Offset: 0, Size: 0}}},
-	{"", "contents", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}, {Offset: 3, Size: 3}, {Offset: 6, Size: 2}}},
-	{"con", "contents", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}, {Offset: 6, Size: 2}}},
-	{"contents", "con", 3, nil},
-	{"contents", "cont", 3, []protocol.BlockInfo{{Offset: 3, Size: 1}}},
-	{"cont", "contents", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}, {Offset: 6, Size: 2}}},
-}
-
-func TestDiff(t *testing.T) {
-	for i, test := range diffTestData {
-		a, _ := Blocks(context.TODO(), bytes.NewBufferString(test.a), test.s, -1, nil, false)
-		b, _ := Blocks(context.TODO(), bytes.NewBufferString(test.b), test.s, -1, nil, false)
-		_, d := BlockDiff(a, b)
-		if len(d) != len(test.d) {
-			t.Fatalf("Incorrect length for diff %d; %d != %d", i, len(d), len(test.d))
-		} else {
-			for j := range test.d {
-				if d[j].Offset != test.d[j].Offset {
-					t.Errorf("Incorrect offset for diff %d block %d; %d != %d", i, j, d[j].Offset, test.d[j].Offset)
-				}
-				if d[j].Size != test.d[j].Size {
-					t.Errorf("Incorrect length for diff %d block %d; %d != %d", i, j, d[j].Size, test.d[j].Size)
-				}
-			}
-		}
-	}
-}
-
-func TestDiffEmpty(t *testing.T) {
-	emptyCases := []struct {
-		a    []protocol.BlockInfo
-		b    []protocol.BlockInfo
-		need int
-		have int
-	}{
-		{nil, nil, 0, 0},
-		{[]protocol.BlockInfo{{Offset: 3, Size: 1}}, nil, 0, 0},
-		{nil, []protocol.BlockInfo{{Offset: 3, Size: 1}}, 1, 0},
-	}
-	for _, emptyCase := range emptyCases {
-		h, n := BlockDiff(emptyCase.a, emptyCase.b)
-		if len(h) != emptyCase.have {
-			t.Errorf("incorrect have: %d != %d", len(h), emptyCase.have)
-		}
-		if len(n) != emptyCase.need {
-			t.Errorf("incorrect have: %d != %d", len(h), emptyCase.have)
-		}
-	}
-}
-
 func TestAdler32Variants(t *testing.T) {
 	// Verify that the two adler32 functions give matching results for a few
 	// different blocks of data.

+ 37 - 4
lib/scanner/walk_test.go

@@ -25,6 +25,7 @@ import (
 	"github.com/syncthing/syncthing/lib/ignore"
 	"github.com/syncthing/syncthing/lib/osutil"
 	"github.com/syncthing/syncthing/lib/protocol"
+	"github.com/syncthing/syncthing/lib/sha256"
 	"golang.org/x/text/unicode/norm"
 )
 
@@ -162,21 +163,21 @@ func TestVerify(t *testing.T) {
 	}
 
 	buf = bytes.NewBuffer(data)
-	err = Verify(buf, blocksize, blocks)
+	err = verify(buf, blocksize, blocks)
 	t.Log(err)
 	if err != nil {
 		t.Fatal("Unexpected verify failure", err)
 	}
 
 	buf = bytes.NewBuffer(append(data, '\n'))
-	err = Verify(buf, blocksize, blocks)
+	err = verify(buf, blocksize, blocks)
 	t.Log(err)
 	if err == nil {
 		t.Fatal("Unexpected verify success")
 	}
 
 	buf = bytes.NewBuffer(data[:len(data)-1])
-	err = Verify(buf, blocksize, blocks)
+	err = verify(buf, blocksize, blocks)
 	t.Log(err)
 	if err == nil {
 		t.Fatal("Unexpected verify success")
@@ -184,7 +185,7 @@ func TestVerify(t *testing.T) {
 
 	data[42] = 42
 	buf = bytes.NewBuffer(data)
-	err = Verify(buf, blocksize, blocks)
+	err = verify(buf, blocksize, blocks)
 	t.Log(err)
 	if err == nil {
 		t.Fatal("Unexpected verify success")
@@ -529,3 +530,35 @@ func TestStopWalk(t *testing.T) {
 		t.Error("unexpected extra entries received after cancel")
 	}
 }
+
+// Verify returns nil or an error describing the mismatch between the block
+// list and actual reader contents
+func verify(r io.Reader, blocksize int, blocks []protocol.BlockInfo) error {
+	hf := sha256.New()
+	// A 32k buffer is used for copying into the hash function.
+	buf := make([]byte, 32<<10)
+
+	for i, block := range blocks {
+		lr := &io.LimitedReader{R: r, N: int64(blocksize)}
+		_, err := io.CopyBuffer(hf, lr, buf)
+		if err != nil {
+			return err
+		}
+
+		hash := hf.Sum(nil)
+		hf.Reset()
+
+		if !bytes.Equal(hash, block.Hash) {
+			return fmt.Errorf("hash mismatch %x != %x for block %d", hash, block.Hash, i)
+		}
+	}
+
+	// We should have reached the end  now
+	bs := make([]byte, 1)
+	n, err := r.Read(bs)
+	if n != 0 || err != io.EOF {
+		return fmt.Errorf("file continues past end of blocks")
+	}
+
+	return nil
+}