Browse Source

lib/scanner: Use standard adler32 when we don't need rolling (#5556)

* lib/scanner: Use standard adler32 when we don't need rolling

Seems the rolling adler32 implementation is super slow when executed on large blocks, even tho I can't explain why.

BenchmarkFind1MFile-16    				     100	  18991667 ns/op	  55.21 MB/s	  398844 B/op	      20 allocs/op
BenchmarkBlock/adler32-131072/#00-16     		     200	   9726519 ns/op	1078.06 MB/s	 2654936 B/op	     163 allocs/op
BenchmarkBlock/bozo32-131072/#00-16      		      20	  73435540 ns/op	 142.79 MB/s	 2654928 B/op	     163 allocs/op
BenchmarkBlock/buzhash32-131072/#00-16   		      20	  61482005 ns/op	 170.55 MB/s	 2654928 B/op	     163 allocs/op
BenchmarkBlock/buzhash64-131072/#00-16   		      20	  61673660 ns/op	 170.02 MB/s	 2654928 B/op	     163 allocs/op
BenchmarkBlock/vanilla-adler32-131072/#00-16         	     300	   4377307 ns/op	2395.48 MB/s	 2654935 B/op	     163 allocs/op
BenchmarkBlock/adler32-16777216/#00-16               	       2	 544010100 ns/op	  19.27 MB/s	   65624 B/op	       5 allocs/op
BenchmarkBlock/bozo32-16777216/#00-16                	       1	4678108500 ns/op	   2.24 MB/s	51970144 B/op	      24 allocs/op
BenchmarkBlock/buzhash32-16777216/#00-16             	       1	3880370700 ns/op	   2.70 MB/s	51970144 B/op	      24 allocs/op
BenchmarkBlock/buzhash64-16777216/#00-16             	       1	3875911700 ns/op	   2.71 MB/s	51970144 B/op	      24 allocs/op
BenchmarkBlock/vanilla-adler32-16777216/#00-16       	     300	   4010279 ns/op	2614.72 MB/s	   65624 B/op	       5 allocs/op
BenchmarkRoll/adler32-131072/#00-16                  	    2000	    974279 ns/op	 134.53 MB/s	     270 B/op	       0 allocs/op
BenchmarkRoll/bozo32-131072/#00-16                   	    2000	    791770 ns/op	 165.54 MB/s	     270 B/op	       0 allocs/op
BenchmarkRoll/buzhash32-131072/#00-16                	    2000	    917409 ns/op	 142.87 MB/s	     270 B/op	       0 allocs/op
BenchmarkRoll/buzhash64-131072/#00-16                	    2000	    881125 ns/op	 148.76 MB/s	     270 B/op	       0 allocs/op
BenchmarkRoll/adler32-16777216/#00-16                	      10	 124000400 ns/op	 135.30 MB/s	 7548937 B/op	       0 allocs/op
BenchmarkRoll/bozo32-16777216/#00-16                 	      10	 118008080 ns/op	 142.17 MB/s	 7548928 B/op	       0 allocs/op
BenchmarkRoll/buzhash32-16777216/#00-16              	      10	 126794440 ns/op	 132.32 MB/s	 7548928 B/op	       0 allocs/op
BenchmarkRoll/buzhash64-16777216/#00-16              	      10	 126631960 ns/op	 132.49 MB/s	 7548928 B/op	       0 allocs/op

* Update benchmark_test.go

* gofmt

* fixup benchmark
Audrius Butkevicius 6 years ago
parent
commit
fafd30f804
2 changed files with 96 additions and 123 deletions
  1. 1 1
      lib/scanner/blocks.go
  2. 95 122
      lib/weakhash/benchmark_test.go

+ 1 - 1
lib/scanner/blocks.go

@@ -10,9 +10,9 @@ import (
 	"bytes"
 	"context"
 	"hash"
+	"hash/adler32"
 	"io"
 
-	"github.com/chmduquesne/rollinghash/adler32"
 	"github.com/syncthing/syncthing/lib/protocol"
 	"github.com/syncthing/syncthing/lib/sha256"
 )

+ 95 - 122
lib/weakhash/benchmark_test.go

@@ -7,7 +7,13 @@
 package weakhash
 
 import (
+	"bytes"
 	"context"
+	"fmt"
+	"hash"
+	vadler32 "hash/adler32"
+	"io"
+	"math/rand"
 	"os"
 	"testing"
 
@@ -15,10 +21,9 @@ import (
 	"github.com/chmduquesne/rollinghash/bozo32"
 	"github.com/chmduquesne/rollinghash/buzhash32"
 	"github.com/chmduquesne/rollinghash/buzhash64"
-	"github.com/chmduquesne/rollinghash/rabinkarp64"
 )
 
-const testFile = "../model/testdata/~syncthing~file.tmp"
+const testFile = "../model/testdata/tmpfile"
 const size = 128 << 10
 
 func BenchmarkFind1MFile(b *testing.B) {
@@ -37,142 +42,110 @@ func BenchmarkFind1MFile(b *testing.B) {
 	}
 }
 
-func BenchmarkWeakHashAdler32(b *testing.B) {
-	data := make([]byte, size)
-	hf := adler32.New()
-
-	for i := 0; i < b.N; i++ {
-		hf.Write(data)
-	}
-
-	hf.Sum32()
-	b.SetBytes(size)
-}
-
-func BenchmarkWeakHashAdler32Roll(b *testing.B) {
-	data := make([]byte, size)
-	hf := adler32.New()
-	hf.Write(data)
-
-	b.ResetTimer()
-
-	for i := 0; i < b.N; i++ {
-		for i := 0; i <= size; i++ {
-			hf.Roll('a')
-		}
-	}
-
-	b.SetBytes(size)
-}
-
-func BenchmarkWeakHashRabinKarp64(b *testing.B) {
-	data := make([]byte, size)
-	hf := rabinkarp64.New()
-
-	for i := 0; i < b.N; i++ {
-		hf.Write(data)
-	}
-
-	hf.Sum64()
-	b.SetBytes(size)
-}
-
-func BenchmarkWeakHashRabinKarp64Roll(b *testing.B) {
-	data := make([]byte, size)
-	hf := rabinkarp64.New()
-	hf.Write(data)
-
-	b.ResetTimer()
-
-	for i := 0; i < b.N; i++ {
-		for i := 0; i <= size; i++ {
-			hf.Roll('a')
-		}
-	}
-
-	b.SetBytes(size)
+type RollingHash interface {
+	hash.Hash
+	Roll(byte)
 }
 
-func BenchmarkWeakHashBozo32(b *testing.B) {
-	data := make([]byte, size)
-	hf := bozo32.New()
-
-	for i := 0; i < b.N; i++ {
-		hf.Write(data)
+func BenchmarkBlock(b *testing.B) {
+	tests := []struct {
+		name string
+		hash hash.Hash
+	}{
+		{
+			"adler32", adler32.New(),
+		},
+		{
+			"bozo32", bozo32.New(),
+		},
+		{
+			"buzhash32", buzhash32.New(),
+		},
+		{
+			"buzhash64", buzhash64.New(),
+		},
+		{
+			"vanilla-adler32", vadler32.New(),
+		},
 	}
 
-	hf.Sum32()
-	b.SetBytes(size)
-}
-
-func BenchmarkWeakHashBozo32Roll(b *testing.B) {
-	data := make([]byte, size)
-	hf := bozo32.New()
-	hf.Write(data)
-
-	b.ResetTimer()
-
-	for i := 0; i < b.N; i++ {
-		for i := 0; i <= size; i++ {
-			hf.Roll('a')
+	sizes := []int64{128 << 10, 16 << 20}
+
+	buf := make([]byte, 16<<20)
+	rand.Read(buf)
+
+	for _, testSize := range sizes {
+		for _, test := range tests {
+			b.Run(test.name+"-"+fmt.Sprint(testSize), func(bb *testing.B) {
+				bb.Run("", func(bbb *testing.B) {
+					bbb.ResetTimer()
+					for i := 0; i < bbb.N; i++ {
+						lr := io.LimitReader(bytes.NewReader(buf), testSize)
+						n, err := io.Copy(test.hash, lr)
+						if err != nil {
+							bbb.Error(err)
+						}
+						if n != testSize {
+							bbb.Errorf("%d != %d", n, testSize)
+						}
+
+						test.hash.Sum(nil)
+						test.hash.Reset()
+					}
+
+					bbb.SetBytes(int64(len(buf)))
+					bbb.ReportAllocs()
+				})
+
+			})
 		}
 	}
-
-	b.SetBytes(size)
 }
 
-func BenchmarkWeakHashBuzhash32(b *testing.B) {
-	data := make([]byte, size)
-	hf := buzhash32.New()
-
-	for i := 0; i < b.N; i++ {
-		hf.Write(data)
+func BenchmarkRoll(b *testing.B) {
+	tests := []struct {
+		name string
+		hash RollingHash
+	}{
+		{
+			"adler32", adler32.New(),
+		},
+		{
+			"bozo32", bozo32.New(),
+		},
+		{
+			"buzhash32", buzhash32.New(),
+		},
+		{
+			"buzhash64", buzhash64.New(),
+		},
 	}
 
-	hf.Sum32()
-	b.SetBytes(size)
-}
+	sizes := []int64{128 << 10, 16 << 20}
 
-func BenchmarkWeakHashBuzhash32Roll(b *testing.B) {
-	data := make([]byte, size)
-	hf := buzhash32.New()
-	hf.Write(data)
+	for _, testSize := range sizes {
+		for _, test := range tests {
+			b.Run(test.name+"-"+fmt.Sprint(testSize), func(bb *testing.B) {
+				bb.Run("", func(bbb *testing.B) {
+					data := make([]byte, testSize)
 
-	b.ResetTimer()
+					if _, err := test.hash.Write(data); err != nil {
+						bbb.Error(err)
+					}
 
-	for i := 0; i < b.N; i++ {
-		for i := 0; i <= size; i++ {
-			hf.Roll('a')
-		}
-	}
-
-	b.SetBytes(size)
-}
-
-func BenchmarkWeakHashBuzhash64(b *testing.B) {
-	data := make([]byte, size)
-	hf := buzhash64.New()
-
-	for i := 0; i < b.N; i++ {
-		hf.Write(data)
-	}
-
-	hf.Sum64()
-	b.SetBytes(size)
-}
+					bbb.ResetTimer()
 
-func BenchmarkWeakHashBuzhash64Roll(b *testing.B) {
-	data := make([]byte, size)
-	hf := buzhash64.New()
-	hf.Write(data)
+					for i := 0; i < bbb.N; i++ {
+						for j := int64(0); j <= testSize; j++ {
+							test.hash.Roll('a')
+						}
+					}
 
-	b.ResetTimer()
+					bbb.SetBytes(testSize)
+					bbb.ReportAllocs()
+				})
 
-	for i := 0; i < b.N; i++ {
-		for i := 0; i <= size; i++ {
-			hf.Roll('a')
+			})
 		}
 	}
-
-	b.SetBytes(size)
 }