weakhash.go 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. // Copyright (C) 2016 The Syncthing Authors.
  2. //
  3. // This Source Code Form is subject to the terms of the Mozilla Public
  4. // License, v. 2.0. If a copy of the MPL was not distributed with this file,
  5. // You can obtain one at https://mozilla.org/MPL/2.0/.
  6. package weakhash
  7. import (
  8. "bufio"
  9. "io"
  10. "github.com/chmduquesne/rollinghash/adler32"
  11. )
  12. const (
  13. Size = 4
  14. // don't track more hits than this for any given weakhash
  15. maxWeakhashFinderHits = 10
  16. )
  17. var (
  18. Enabled = true
  19. )
  20. // Find finds all the blocks of the given size within io.Reader that matches
  21. // the hashes provided, and returns a hash -> slice of offsets within reader
  22. // map, that produces the same weak hash.
  23. func Find(ir io.Reader, hashesToFind []uint32, size int) (map[uint32][]int64, error) {
  24. if ir == nil || len(hashesToFind) == 0 {
  25. return nil, nil
  26. }
  27. r := bufio.NewReader(ir)
  28. hf := adler32.New()
  29. n, err := io.CopyN(hf, r, int64(size))
  30. if err == io.EOF {
  31. return nil, nil
  32. }
  33. if err != nil {
  34. return nil, err
  35. }
  36. if n != int64(size) {
  37. return nil, io.ErrShortBuffer
  38. }
  39. offsets := make(map[uint32][]int64)
  40. for _, hashToFind := range hashesToFind {
  41. offsets[hashToFind] = make([]int64, 0, maxWeakhashFinderHits)
  42. }
  43. var i int64
  44. var hash uint32
  45. for {
  46. hash = hf.Sum32()
  47. if existing, ok := offsets[hash]; ok && len(existing) < maxWeakhashFinderHits {
  48. offsets[hash] = append(existing, i)
  49. }
  50. i++
  51. bt, err := r.ReadByte()
  52. if err == io.EOF {
  53. break
  54. } else if err != nil {
  55. return offsets, err
  56. }
  57. hf.Roll(bt)
  58. }
  59. return offsets, nil
  60. }
  61. func NewFinder(ir io.ReadSeeker, size int, hashesToFind []uint32) (*Finder, error) {
  62. offsets, err := Find(ir, hashesToFind, size)
  63. if err != nil {
  64. return nil, err
  65. }
  66. return &Finder{
  67. reader: ir,
  68. size: size,
  69. offsets: offsets,
  70. }, nil
  71. }
  72. type Finder struct {
  73. reader io.ReadSeeker
  74. size int
  75. offsets map[uint32][]int64
  76. }
  77. // Iterate iterates all available blocks that matches the provided hash, reads
  78. // them into buf, and calls the iterator function. The iterator function should
  79. // return whether it wishes to continue iterating.
  80. func (h *Finder) Iterate(hash uint32, buf []byte, iterFunc func(int64) bool) (bool, error) {
  81. if h == nil || hash == 0 || len(buf) != h.size {
  82. return false, nil
  83. }
  84. for _, offset := range h.offsets[hash] {
  85. _, err := h.reader.Seek(offset, io.SeekStart)
  86. if err != nil {
  87. return false, err
  88. }
  89. _, err = h.reader.Read(buf)
  90. if err != nil {
  91. return false, err
  92. }
  93. if !iterFunc(offset) {
  94. return true, nil
  95. }
  96. }
  97. return false, nil
  98. }