Browse Source

Reduce allocations while hash scanning

Jakob Borg 11 years ago
parent
commit
f51b775698
4 changed files with 20 additions and 7 deletions
  1. 1 1
      model/puller.go
  2. 9 1
      scanner/blockqueue.go
  3. 7 2
      scanner/blocks.go
  4. 3 3
      scanner/blocks_test.go

+ 1 - 1
model/puller.go

@@ -728,7 +728,7 @@ func (p *puller) closeFile(f protocol.FileInfo) {
 		l.Infof("open: error: %q / %q: %v", p.repoCfg.ID, f.Name, err)
 		return
 	}
-	hb, _ := scanner.Blocks(fd, scanner.StandardBlockSize)
+	hb, _ := scanner.Blocks(fd, scanner.StandardBlockSize, f.Size())
 	fd.Close()
 
 	if l0, l1 := len(hb), len(f.Blocks); l0 != l1 {

+ 9 - 1
scanner/blockqueue.go

@@ -49,7 +49,15 @@ func hashFile(dir string, blockSize int, outbox, inbox chan protocol.FileInfo) {
 			continue
 		}
 
-		blocks, err := Blocks(fd, blockSize)
+		fi, err := fd.Stat()
+		if err != nil {
+			fd.Close()
+			if debug {
+				l.Debugln("stat:", err)
+			}
+			continue
+		}
+		blocks, err := Blocks(fd, blockSize, fi.Size())
 		fd.Close()
 
 		if err != nil {

+ 7 - 2
scanner/blocks.go

@@ -17,12 +17,15 @@ const StandardBlockSize = 128 * 1024
 var sha256OfNothing = []uint8{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55}
 
 // Blocks returns the blockwise hash of the reader.
-func Blocks(r io.Reader, blocksize int) ([]protocol.BlockInfo, error) {
+func Blocks(r io.Reader, blocksize int, sizehint int64) ([]protocol.BlockInfo, error) {
 	var blocks []protocol.BlockInfo
+	if sizehint > 0 {
+		blocks = make([]protocol.BlockInfo, 0, int(sizehint/int64(blocksize)))
+	}
 	var offset int64
+	hf := sha256.New()
 	for {
 		lr := &io.LimitedReader{R: r, N: int64(blocksize)}
-		hf := sha256.New()
 		n, err := io.Copy(hf, lr)
 		if err != nil {
 			return nil, err
@@ -39,6 +42,8 @@ func Blocks(r io.Reader, blocksize int) ([]protocol.BlockInfo, error) {
 		}
 		blocks = append(blocks, b)
 		offset += int64(n)
+
+		hf.Reset()
 	}
 
 	if len(blocks) == 0 {

+ 3 - 3
scanner/blocks_test.go

@@ -49,7 +49,7 @@ var blocksTestData = []struct {
 func TestBlocks(t *testing.T) {
 	for _, test := range blocksTestData {
 		buf := bytes.NewBuffer(test.data)
-		blocks, err := Blocks(buf, test.blocksize)
+		blocks, err := Blocks(buf, test.blocksize, 0)
 
 		if err != nil {
 			t.Fatal(err)
@@ -103,8 +103,8 @@ var diffTestData = []struct {
 
 func TestDiff(t *testing.T) {
 	for i, test := range diffTestData {
-		a, _ := Blocks(bytes.NewBufferString(test.a), test.s)
-		b, _ := Blocks(bytes.NewBufferString(test.b), test.s)
+		a, _ := Blocks(bytes.NewBufferString(test.a), test.s, 0)
+		b, _ := Blocks(bytes.NewBufferString(test.b), test.s, 0)
 		_, d := BlockDiff(a, b)
 		if len(d) != len(test.d) {
 			t.Fatalf("Incorrect length for diff %d; %d != %d", i, len(d), len(test.d))