Browse Source

vendor: Update github.com/minio/sha256-simd (fixes #4585)

GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4592
Jakob Borg 8 years ago
parent
commit
24c721cb5d

+ 38 - 0
vendor/github.com/minio/sha256-simd/cpuid.go

@@ -16,6 +16,7 @@
 package sha256
 
 // True when SIMD instructions are available.
+var avx512 = haveAVX512()
 var avx2 = haveAVX2()
 var avx = haveAVX()
 var ssse3 = haveSSSE3()
@@ -46,6 +47,43 @@ func haveAVX2() bool {
 	return false
 }
 
+// haveAVX512 returns true when there is AVX512 support
+func haveAVX512() bool {
+	mfi, _, _, _ := cpuid(0)
+
+	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
+	if mfi >= 7 {
+		_, _, c, _ := cpuid(1)
+
+		// Only detect AVX-512 features if XGETBV is supported
+		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
+			// Check for OS support
+			eax, _ := xgetbv(0)
+			_, ebx, _, _ := cpuidex(7, 0)
+
+			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
+			// ZMM16-ZMM31 state are enabled by OS)
+			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
+			if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
+				if ebx&(1<<16) == 0 {
+					return false // no AVX512F
+				}
+				if ebx&(1<<17) == 0 {
+					return false // no AVX512DQ
+				}
+				if ebx&(1<<30) == 0 {
+					return false // no AVX512BW
+				}
+				if ebx&(1<<31) == 0 {
+					return false // no AVX512VL
+				}
+				return true
+			}
+		}
+	}
+	return false
+}
+
 // haveSSSE3 returns true when there is SSSE3 support
 func haveSSSE3() bool {
 

+ 1 - 1
vendor/github.com/minio/sha256-simd/cpuid_other.go

@@ -13,7 +13,7 @@
 // limitations under the License.
 //
 
-// +build ppc64 ppc64le mips mipsle mips64 mips64le
+// +build ppc64 ppc64le mips mipsle mips64 mips64le s390x
 
 package sha256
 

+ 497 - 0
vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.go

@@ -0,0 +1,497 @@
+//+build !noasm
+
+/*
+ * Minio Cloud Storage, (C) 2017 Minio, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package sha256
+
+import (
+	"encoding/binary"
+	"errors"
+	"hash"
+	"sort"
+	"sync/atomic"
+	"time"
+)
+
+//go:noescape
+func sha256_x16_avx512(digests *[512]byte, scratch *[512]byte, table *[512]uint64, mask []uint64, inputs [16][]byte)
+
+// Do not start at 0 but next multiple of 16 so as to be able to
+// differentiate with default initialiation value of 0
+const Avx512ServerUid = 16
+
+var uidCounter uint64
+
+func NewAvx512(a512srv *Avx512Server) hash.Hash {
+	uid := atomic.AddUint64(&uidCounter, 1)
+	return &Avx512Digest{uid: uid, a512srv: a512srv}
+}
+
+// Type for computing SHA256 using AVX51
+type Avx512Digest struct {
+	uid     uint64
+	a512srv *Avx512Server
+	x       [chunk]byte
+	nx      int
+	len     uint64
+	final   bool
+	result  [Size]byte
+}
+
+// Return size of checksum
+func (d *Avx512Digest) Size() int { return Size }
+
+// Return blocksize of checksum
+func (d Avx512Digest) BlockSize() int { return BlockSize }
+
+func (d *Avx512Digest) Reset() {
+	d.a512srv.blocksCh <- blockInput{uid: d.uid, reset: true}
+	d.nx = 0
+	d.len = 0
+	d.final = false
+}
+
+// Write to digest
+func (d *Avx512Digest) Write(p []byte) (nn int, err error) {
+
+	if d.final {
+		return 0, errors.New("Avx512Digest already finalized. Reset first before writing again.")
+	}
+
+	nn = len(p)
+	d.len += uint64(nn)
+	if d.nx > 0 {
+		n := copy(d.x[d.nx:], p)
+		d.nx += n
+		if d.nx == chunk {
+			d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: d.x[:]}
+			d.nx = 0
+		}
+		p = p[n:]
+	}
+	if len(p) >= chunk {
+		n := len(p) &^ (chunk - 1)
+		d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: p[:n]}
+		p = p[n:]
+	}
+	if len(p) > 0 {
+		d.nx = copy(d.x[:], p)
+	}
+	return
+}
+
+// Return sha256 sum in bytes
+func (d *Avx512Digest) Sum(in []byte) (result []byte) {
+
+	if d.final {
+		return append(in, d.result[:]...)
+	}
+
+	trail := make([]byte, 0, 128)
+
+	len := d.len
+	// Padding.  Add a 1 bit and 0 bits until 56 bytes mod 64.
+	var tmp [64]byte
+	tmp[0] = 0x80
+	if len%64 < 56 {
+		trail = append(d.x[:d.nx], tmp[0:56-len%64]...)
+	} else {
+		trail = append(d.x[:d.nx], tmp[0:64+56-len%64]...)
+	}
+	d.nx = 0
+
+	// Length in bits.
+	len <<= 3
+	for i := uint(0); i < 8; i++ {
+		tmp[i] = byte(len >> (56 - 8*i))
+	}
+	trail = append(trail, tmp[0:8]...)
+
+	sumCh := make(chan [Size]byte)
+	d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: trail, final: true, sumCh: sumCh}
+	d.result = <-sumCh
+	d.final = true
+	return append(in, d.result[:]...)
+}
+
+var table = [512]uint64{
+	0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98,
+	0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98,
+	0x7137449171374491, 0x7137449171374491, 0x7137449171374491, 0x7137449171374491,
+	0x7137449171374491, 0x7137449171374491, 0x7137449171374491, 0x7137449171374491,
+	0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf,
+	0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf,
+	0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5,
+	0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5,
+	0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b,
+	0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b,
+	0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1,
+	0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1,
+	0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4,
+	0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4,
+	0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5,
+	0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5,
+	0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98,
+	0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98,
+	0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01,
+	0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01,
+	0x243185be243185be, 0x243185be243185be, 0x243185be243185be, 0x243185be243185be,
+	0x243185be243185be, 0x243185be243185be, 0x243185be243185be, 0x243185be243185be,
+	0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3,
+	0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3,
+	0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74,
+	0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74,
+	0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe,
+	0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe,
+	0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7,
+	0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7,
+	0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174,
+	0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174,
+	0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1,
+	0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1,
+	0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786,
+	0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786,
+	0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6,
+	0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6,
+	0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc,
+	0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc,
+	0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f,
+	0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f,
+	0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa,
+	0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa,
+	0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc,
+	0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc,
+	0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da,
+	0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da,
+	0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152,
+	0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152,
+	0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d,
+	0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d,
+	0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8,
+	0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8,
+	0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7,
+	0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7,
+	0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3,
+	0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3,
+	0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147,
+	0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147,
+	0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351,
+	0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351,
+	0x1429296714292967, 0x1429296714292967, 0x1429296714292967, 0x1429296714292967,
+	0x1429296714292967, 0x1429296714292967, 0x1429296714292967, 0x1429296714292967,
+	0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85,
+	0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85,
+	0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138,
+	0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138,
+	0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc,
+	0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc,
+	0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13,
+	0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13,
+	0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354,
+	0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354,
+	0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb,
+	0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb,
+	0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e,
+	0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e,
+	0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85,
+	0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85,
+	0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1,
+	0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1,
+	0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b,
+	0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b,
+	0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70,
+	0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70,
+	0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3,
+	0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3,
+	0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819,
+	0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819,
+	0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624,
+	0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624,
+	0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585,
+	0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585,
+	0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070,
+	0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070,
+	0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116,
+	0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116,
+	0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08,
+	0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08,
+	0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c,
+	0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c,
+	0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5,
+	0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5,
+	0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3,
+	0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3,
+	0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a,
+	0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a,
+	0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f,
+	0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f,
+	0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3,
+	0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3,
+	0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee,
+	0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee,
+	0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f,
+	0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f,
+	0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814,
+	0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814,
+	0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208,
+	0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208,
+	0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa,
+	0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa,
+	0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb,
+	0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb,
+	0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7,
+	0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7,
+	0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2,
+	0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2}
+
+// Interface function to assembly ode
+func blockAvx512(digests *[512]byte, input [16][]byte, mask []uint64) [16][Size]byte {
+
+	scratch := [512]byte{}
+	sha256_x16_avx512(digests, &scratch, &table, mask, input)
+
+	output := [16][Size]byte{}
+	for i := 0; i < 16; i++ {
+		output[i] = getDigest(i, digests[:])
+	}
+
+	return output
+}
+
+func getDigest(index int, state []byte) (sum [Size]byte) {
+	for j := 0; j < 16; j += 2 {
+		for i := index*4 + j*Size; i < index*4+(j+1)*Size; i += Size {
+			binary.BigEndian.PutUint32(sum[j*2:], binary.LittleEndian.Uint32(state[i:i+4]))
+		}
+	}
+	return
+}
+
+// Message to send across input channel
+type blockInput struct {
+	uid   uint64
+	msg   []byte
+	reset bool
+	final bool
+	sumCh chan [Size]byte
+}
+
+// Type to implement 16x parallel handling of SHA256 invocations
+type Avx512Server struct {
+	blocksCh chan blockInput       // Input channel
+	totalIn  int                   // Total number of inputs waiting to be processed
+	lanes    [16]Avx512LaneInfo    // Array with info per lane (out of 16)
+	digests  map[uint64][Size]byte // Map of uids to (interim) digest results
+}
+
+// Info for each lane
+type Avx512LaneInfo struct {
+	uid      uint64          // unique identification for this SHA processing
+	block    []byte          // input block to be processed
+	outputCh chan [Size]byte // channel for output result
+}
+
+// Create new object for parallel processing handling
+func NewAvx512Server() *Avx512Server {
+	a512srv := &Avx512Server{}
+	a512srv.digests = make(map[uint64][Size]byte)
+	a512srv.blocksCh = make(chan blockInput)
+
+	// Start a single thread for reading from the input channel
+	go a512srv.Process()
+	return a512srv
+}
+
+// Sole handler for reading from the input channel
+func (a512srv *Avx512Server) Process() {
+	for {
+		select {
+		case block := <-a512srv.blocksCh:
+			if block.reset {
+				a512srv.reset(block.uid)
+				continue
+			}
+			index := block.uid & 0xf
+			// fmt.Println("Adding message:", block.uid, index)
+
+			if a512srv.lanes[index].block != nil { // If slot is already filled, process all inputs
+				//fmt.Println("Invoking Blocks()")
+				a512srv.blocks()
+			}
+			a512srv.totalIn++
+			a512srv.lanes[index] = Avx512LaneInfo{uid: block.uid, block: block.msg}
+			if block.final {
+				a512srv.lanes[index].outputCh = block.sumCh
+			}
+			if a512srv.totalIn == len(a512srv.lanes) {
+				// fmt.Println("Invoking Blocks() while FULL: ")
+				a512srv.blocks()
+			}
+
+			// TODO: test with larger timeout
+		case <-time.After(1 * time.Microsecond):
+			for _, lane := range a512srv.lanes {
+				if lane.block != nil { // check if there is any input to process
+					// fmt.Println("Invoking Blocks() on TIMEOUT: ")
+					a512srv.blocks()
+					break // we are done
+				}
+			}
+		}
+	}
+}
+
+// Do a reset for this calculation
+func (a512srv *Avx512Server) reset(uid uint64) {
+
+	// Check if there is a message still waiting to be processed (and remove if so)
+	for i, lane := range a512srv.lanes {
+		if lane.uid == uid {
+			if lane.block != nil {
+				a512srv.lanes[i] = Avx512LaneInfo{} // clear message
+				a512srv.totalIn -= 1
+			}
+		}
+	}
+
+	// Delete entry from hash map
+	delete(a512srv.digests, uid)
+}
+
+// Invoke assembly and send results back
+func (a512srv *Avx512Server) blocks() (err error) {
+
+	inputs := [16][]byte{}
+	for i := range inputs {
+		inputs[i] = a512srv.lanes[i].block
+	}
+
+	mask := expandMask(genMask(inputs))
+	outputs := blockAvx512(a512srv.getDigests(), inputs, mask)
+
+	a512srv.totalIn = 0
+	for i := 0; i < len(outputs); i++ {
+		uid, outputCh := a512srv.lanes[i].uid, a512srv.lanes[i].outputCh
+		a512srv.digests[uid] = outputs[i]
+		a512srv.lanes[i] = Avx512LaneInfo{}
+
+		if outputCh != nil {
+			// Send back result
+			outputCh <- outputs[i]
+			delete(a512srv.digests, uid) // Delete entry from hashmap
+		}
+	}
+	return
+}
+
+func (a512srv *Avx512Server) Write(uid uint64, p []byte) (nn int, err error) {
+	a512srv.blocksCh <- blockInput{uid: uid, msg: p}
+	return len(p), nil
+}
+
+func (a512srv *Avx512Server) Sum(uid uint64, p []byte) [32]byte {
+	sumCh := make(chan [32]byte)
+	a512srv.blocksCh <- blockInput{uid: uid, msg: p, final: true, sumCh: sumCh}
+	return <-sumCh
+}
+
+func (a512srv *Avx512Server) getDigests() *[512]byte {
+	digests := [512]byte{}
+	for i, lane := range a512srv.lanes {
+		a, ok := a512srv.digests[lane.uid]
+		if ok {
+			binary.BigEndian.PutUint32(digests[(i+0*16)*4:], binary.LittleEndian.Uint32(a[0:4]))
+			binary.BigEndian.PutUint32(digests[(i+1*16)*4:], binary.LittleEndian.Uint32(a[4:8]))
+			binary.BigEndian.PutUint32(digests[(i+2*16)*4:], binary.LittleEndian.Uint32(a[8:12]))
+			binary.BigEndian.PutUint32(digests[(i+3*16)*4:], binary.LittleEndian.Uint32(a[12:16]))
+			binary.BigEndian.PutUint32(digests[(i+4*16)*4:], binary.LittleEndian.Uint32(a[16:20]))
+			binary.BigEndian.PutUint32(digests[(i+5*16)*4:], binary.LittleEndian.Uint32(a[20:24]))
+			binary.BigEndian.PutUint32(digests[(i+6*16)*4:], binary.LittleEndian.Uint32(a[24:28]))
+			binary.BigEndian.PutUint32(digests[(i+7*16)*4:], binary.LittleEndian.Uint32(a[28:32]))
+		} else {
+			binary.LittleEndian.PutUint32(digests[(i+0*16)*4:], init0)
+			binary.LittleEndian.PutUint32(digests[(i+1*16)*4:], init1)
+			binary.LittleEndian.PutUint32(digests[(i+2*16)*4:], init2)
+			binary.LittleEndian.PutUint32(digests[(i+3*16)*4:], init3)
+			binary.LittleEndian.PutUint32(digests[(i+4*16)*4:], init4)
+			binary.LittleEndian.PutUint32(digests[(i+5*16)*4:], init5)
+			binary.LittleEndian.PutUint32(digests[(i+6*16)*4:], init6)
+			binary.LittleEndian.PutUint32(digests[(i+7*16)*4:], init7)
+		}
+	}
+	return &digests
+}
+
+// Helper struct for sorting blocks based on length
+type lane struct {
+	len uint
+	pos uint
+}
+
+type lanes []lane
+
+func (lns lanes) Len() int           { return len(lns) }
+func (lns lanes) Swap(i, j int)      { lns[i], lns[j] = lns[j], lns[i] }
+func (lns lanes) Less(i, j int) bool { return lns[i].len < lns[j].len }
+
+// Helper struct for
+type maskRounds struct {
+	mask   uint64
+	rounds uint64
+}
+
+func genMask(input [16][]byte) [16]maskRounds {
+
+	// Sort on blocks length small to large
+	var sorted [16]lane
+	for c, inpt := range input {
+		sorted[c] = lane{uint(len(inpt)), uint(c)}
+	}
+	sort.Sort(lanes(sorted[:]))
+
+	// Create mask array including 'rounds' between masks
+	m, round, index := uint64(0xffff), uint64(0), 0
+	var mr [16]maskRounds
+	for _, s := range sorted {
+		if s.len > 0 {
+			if uint64(s.len)>>6 > round {
+				mr[index] = maskRounds{m, (uint64(s.len) >> 6) - round}
+				index++
+			}
+			round = uint64(s.len) >> 6
+		}
+		m = m & ^(1 << uint(s.pos))
+	}
+
+	return mr
+}
+
+// TODO: remove function
+func expandMask(mr [16]maskRounds) []uint64 {
+	size := uint64(0)
+	for _, r := range mr {
+		size += r.rounds
+	}
+	result, index := make([]uint64, size), 0
+	for _, r := range mr {
+		for j := uint64(0); j < r.rounds; j++ {
+			result[index] = r.mask
+			index++
+		}
+	}
+	return result
+}

File diff suppressed because it is too large
+ 98 - 0
vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.s


+ 1 - 1
vendor/github.com/minio/sha256-simd/sha256block_arm64.s

@@ -19,7 +19,7 @@
 //
 
 //
-// Based on implementaion as found in  https://github.com/jocover/sha256-armv8
+// Based on implementation as found in https://github.com/jocover/sha256-armv8
 //
 // Use github.com/minio/asm2plan9s on this file to assemble ARM instructions to
 // their Plan9 equivalents

+ 1 - 1
vendor/github.com/minio/sha256-simd/sha256block_other.go

@@ -13,7 +13,7 @@
 // limitations under the License.
 //
 
-// +build ppc64 ppc64le mips mipsle mips64 mips64le
+// +build ppc64 ppc64le mips mipsle mips64 mips64le s390x
 
 package sha256
 

+ 1 - 1
vendor/manifest

@@ -276,7 +276,7 @@
 			"importpath": "github.com/minio/sha256-simd",
 			"repository": "https://github.com/minio/sha256-simd",
 			"vcs": "git",
-			"revision": "6124d070eb4e7001c244b6ccc282620a5dce44a0",
+			"revision": "e8f1b272f8349748e71a1aff2ce81093c33943f6",
 			"branch": "master",
 			"notests": true
 		},

Some files were not shown because too many files changed in this diff