blocks.go 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. // Copyright (C) 2014 The Syncthing Authors.
  2. //
  3. // This Source Code Form is subject to the terms of the Mozilla Public
  4. // License, v. 2.0. If a copy of the MPL was not distributed with this file,
  5. // You can obtain one at https://mozilla.org/MPL/2.0/.
  6. package scanner
  7. import (
  8. "bytes"
  9. "context"
  10. "fmt"
  11. "hash"
  12. "io"
  13. "github.com/chmduquesne/rollinghash/adler32"
  14. "github.com/syncthing/syncthing/lib/protocol"
  15. "github.com/syncthing/syncthing/lib/sha256"
  16. )
  17. var SHA256OfNothing = []uint8{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55}
  18. type Counter interface {
  19. Update(bytes int64)
  20. }
  21. // Blocks returns the blockwise hash of the reader.
  22. func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, counter Counter, useWeakHashes bool) ([]protocol.BlockInfo, error) {
  23. hf := sha256.New()
  24. hashLength := hf.Size()
  25. var mhf io.Writer
  26. var whf hash.Hash32
  27. if useWeakHashes {
  28. whf = adler32.New()
  29. mhf = io.MultiWriter(hf, whf)
  30. } else {
  31. whf = noopHash{}
  32. mhf = hf
  33. }
  34. var blocks []protocol.BlockInfo
  35. var hashes, thisHash []byte
  36. if sizehint >= 0 {
  37. // Allocate contiguous blocks for the BlockInfo structures and their
  38. // hashes once and for all, and stick to the specified size.
  39. r = io.LimitReader(r, sizehint)
  40. numBlocks := int(sizehint / int64(blocksize))
  41. blocks = make([]protocol.BlockInfo, 0, numBlocks)
  42. hashes = make([]byte, 0, hashLength*numBlocks)
  43. }
  44. // A 32k buffer is used for copying into the hash function.
  45. buf := make([]byte, 32<<10)
  46. var offset int64
  47. lr := io.LimitReader(r, int64(blocksize)).(*io.LimitedReader)
  48. for {
  49. select {
  50. case <-ctx.Done():
  51. return nil, ctx.Err()
  52. default:
  53. }
  54. lr.N = int64(blocksize)
  55. n, err := io.CopyBuffer(mhf, lr, buf)
  56. if err != nil {
  57. return nil, err
  58. }
  59. if n == 0 {
  60. break
  61. }
  62. if counter != nil {
  63. counter.Update(n)
  64. }
  65. // Carve out a hash-sized chunk of "hashes" to store the hash for this
  66. // block.
  67. hashes = hf.Sum(hashes)
  68. thisHash, hashes = hashes[:hashLength], hashes[hashLength:]
  69. b := protocol.BlockInfo{
  70. Size: int32(n),
  71. Offset: offset,
  72. Hash: thisHash,
  73. WeakHash: whf.Sum32(),
  74. }
  75. blocks = append(blocks, b)
  76. offset += n
  77. hf.Reset()
  78. whf.Reset()
  79. }
  80. if len(blocks) == 0 {
  81. // Empty file
  82. blocks = append(blocks, protocol.BlockInfo{
  83. Offset: 0,
  84. Size: 0,
  85. Hash: SHA256OfNothing,
  86. })
  87. }
  88. return blocks, nil
  89. }
  90. // PopulateOffsets sets the Offset field on each block
  91. func PopulateOffsets(blocks []protocol.BlockInfo) {
  92. var offset int64
  93. for i := range blocks {
  94. blocks[i].Offset = offset
  95. offset += int64(blocks[i].Size)
  96. }
  97. }
  98. // BlockDiff returns lists of common and missing (to transform src into tgt)
  99. // blocks. Both block lists must have been created with the same block size.
  100. func BlockDiff(src, tgt []protocol.BlockInfo) (have, need []protocol.BlockInfo) {
  101. if len(tgt) == 0 && len(src) != 0 {
  102. return nil, nil
  103. }
  104. if len(tgt) != 0 && len(src) == 0 {
  105. // Copy the entire file
  106. return nil, tgt
  107. }
  108. for i := range tgt {
  109. if i >= len(src) || !bytes.Equal(tgt[i].Hash, src[i].Hash) {
  110. // Copy differing block
  111. need = append(need, tgt[i])
  112. } else {
  113. have = append(have, tgt[i])
  114. }
  115. }
  116. return have, need
  117. }
  118. // Verify returns nil or an error describing the mismatch between the block
  119. // list and actual reader contents
  120. func Verify(r io.Reader, blocksize int, blocks []protocol.BlockInfo) error {
  121. hf := sha256.New()
  122. // A 32k buffer is used for copying into the hash function.
  123. buf := make([]byte, 32<<10)
  124. for i, block := range blocks {
  125. lr := &io.LimitedReader{R: r, N: int64(blocksize)}
  126. _, err := io.CopyBuffer(hf, lr, buf)
  127. if err != nil {
  128. return err
  129. }
  130. hash := hf.Sum(nil)
  131. hf.Reset()
  132. if !bytes.Equal(hash, block.Hash) {
  133. return fmt.Errorf("hash mismatch %x != %x for block %d", hash, block.Hash, i)
  134. }
  135. }
  136. // We should have reached the end now
  137. bs := make([]byte, 1)
  138. n, err := r.Read(bs)
  139. if n != 0 || err != io.EOF {
  140. return fmt.Errorf("file continues past end of blocks")
  141. }
  142. return nil
  143. }
  144. func VerifyBuffer(buf []byte, block protocol.BlockInfo) ([]byte, error) {
  145. if len(buf) != int(block.Size) {
  146. return nil, fmt.Errorf("length mismatch %d != %d", len(buf), block.Size)
  147. }
  148. hf := sha256.New()
  149. _, err := hf.Write(buf)
  150. if err != nil {
  151. return nil, err
  152. }
  153. hash := hf.Sum(nil)
  154. if !bytes.Equal(hash, block.Hash) {
  155. return hash, fmt.Errorf("hash mismatch %x != %x", hash, block.Hash)
  156. }
  157. return hash, nil
  158. }
  159. // BlocksEqual returns whether two slices of blocks are exactly the same hash
  160. // and index pair wise.
  161. func BlocksEqual(src, tgt []protocol.BlockInfo) bool {
  162. if len(tgt) != len(src) {
  163. return false
  164. }
  165. for i, sblk := range src {
  166. if !bytes.Equal(sblk.Hash, tgt[i].Hash) {
  167. return false
  168. }
  169. }
  170. return true
  171. }
  172. type noopHash struct{}
  173. func (noopHash) Sum32() uint32 { return 0 }
  174. func (noopHash) BlockSize() int { return 0 }
  175. func (noopHash) Size() int { return 0 }
  176. func (noopHash) Reset() {}
  177. func (noopHash) Sum([]byte) []byte { return nil }
  178. func (noopHash) Write([]byte) (int, error) { return 0, nil }