deephash.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527
  1. // Copyright (c) Tailscale Inc & AUTHORS
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. // Package deephash hashes a Go value recursively, in a predictable order,
  4. // without looping. The hash is only valid within the lifetime of a program.
  5. // Users should not store the hash on disk or send it over the network.
  6. // The hash is sufficiently strong and unique such that
  7. // Hash(&x) == Hash(&y) is an appropriate replacement for x == y.
  8. //
  9. // The definition of equality is identical to reflect.DeepEqual except:
  10. // - Floating-point values are compared based on the raw bits,
  11. // which means that NaNs (with the same bit pattern) are treated as equal.
  12. // - time.Time are compared based on whether they are the same instant in time
  13. // and also in the same zone offset. Monotonic measurements and zone names
  14. // are ignored as part of the hash.
  15. // - netip.Addr are compared based on a shallow comparison of the struct.
  16. //
  17. // WARNING: This package, like most of the tailscale.com Go module,
  18. // should be considered Tailscale-internal; we make no API promises.
  19. package deephash
  20. import (
  21. "crypto/sha256"
  22. "encoding/binary"
  23. "encoding/hex"
  24. "reflect"
  25. "sync"
  26. "time"
  27. "tailscale.com/util/hashx"
  28. )
  29. // There is much overlap between the theory of serialization and hashing.
  30. // A hash (useful for determining equality) can be produced by printing a value
  31. // and hashing the output. The format must:
  32. // * be deterministic such that the same value hashes to the same output, and
  33. // * be parsable such that the same value can be reproduced by the output.
  34. //
  35. // The logic below hashes a value by printing it to a hash.Hash.
  36. // To be parsable, it assumes that we know the Go type of each value:
  37. // * scalar types (e.g., bool or int32) are directly printed as their
  38. // underlying memory representation.
  39. // * list types (e.g., strings and slices) are prefixed by a
  40. // fixed-width length field, followed by the contents of the list.
  41. // * slices, arrays, and structs print each element/field consecutively.
  42. // * interfaces print with a 1-byte prefix indicating whether it is nil.
  43. // If non-nil, it is followed by a fixed-width field of the type index,
  44. // followed by the format of the underlying value.
  45. // * pointers print with a 1-byte prefix indicating whether the pointer is
  46. // 1) nil, 2) previously seen, or 3) newly seen. Previously seen pointers are
  47. // followed by a fixed-width field with the index of the previous pointer.
  48. // Newly seen pointers are followed by the format of the underlying value.
  49. // * maps print with a 1-byte prefix indicating whether the map pointer is
  50. // 1) nil, 2) previously seen, or 3) newly seen. Previously seen pointers
  51. // are followed by a fixed-width field of the index of the previous pointer.
  52. // Newly seen maps are printed with a fixed-width length field, followed by
  53. // a fixed-width field with the XOR of the hash of every map entry.
  54. // With a sufficiently strong hash, this value is theoretically "parsable"
  55. // by looking up the hash in a magical map that returns the set of entries
  56. // for that given hash.
  57. // hasher is reusable state for hashing a value.
  58. // Get one via hasherPool.
  59. type hasher struct {
  60. hashx.Block512
  61. visitStack visitStack
  62. }
  63. var hasherPool = &sync.Pool{
  64. New: func() any { return new(hasher) },
  65. }
  66. func (h *hasher) reset() {
  67. if h.Block512.Hash == nil {
  68. h.Block512.Hash = sha256.New()
  69. }
  70. h.Block512.Reset()
  71. }
  72. // hashType hashes a reflect.Type.
  73. // The hash is only consistent within the lifetime of a program.
  74. func (h *hasher) hashType(t reflect.Type) {
  75. // This approach relies on reflect.Type always being backed by a unique
  76. // *reflect.rtype pointer. A safer approach is to use a global sync.Map
  77. // that maps reflect.Type to some arbitrary and unique index.
  78. // While safer, it requires global state with memory that can never be GC'd.
  79. rtypeAddr := reflect.ValueOf(t).Pointer() // address of *reflect.rtype
  80. h.HashUint64(uint64(rtypeAddr))
  81. }
  82. func (h *hasher) sum() (s Sum) {
  83. h.Sum(s.sum[:0])
  84. return s
  85. }
  86. // Sum is an opaque checksum type that is comparable.
  87. type Sum struct {
  88. sum [sha256.Size]byte
  89. }
  90. func (s1 *Sum) xor(s2 Sum) {
  91. for i := 0; i < sha256.Size; i++ {
  92. s1.sum[i] ^= s2.sum[i]
  93. }
  94. }
  95. func (s Sum) String() string {
  96. // Note: if we change this, keep in sync with AppendTo
  97. return hex.EncodeToString(s.sum[:])
  98. }
  99. // AppendTo appends the string encoding of this sum (as returned by the String
  100. // method) to the provided byte slice and returns the extended buffer.
  101. func (s Sum) AppendTo(b []byte) []byte {
  102. // TODO: switch to upstream implementation if accepted:
  103. // https://github.com/golang/go/issues/53693
  104. var lb [len(s.sum) * 2]byte
  105. hex.Encode(lb[:], s.sum[:])
  106. return append(b, lb[:]...)
  107. }
  108. var (
  109. seedOnce sync.Once
  110. seed uint64
  111. )
  112. func initSeed() {
  113. seed = uint64(time.Now().UnixNano())
  114. }
  115. // Hash returns the hash of v.
  116. func Hash[T any](v *T) Sum {
  117. h := hasherPool.Get().(*hasher)
  118. defer hasherPool.Put(h)
  119. h.reset()
  120. seedOnce.Do(initSeed)
  121. h.HashUint64(seed)
  122. // Always treat the Hash input as if it were an interface by including
  123. // a hash of the type. This ensures that hashing of two different types
  124. // but with the same value structure produces different hashes.
  125. t := reflect.TypeOf(v).Elem()
  126. h.hashType(t)
  127. if v == nil {
  128. h.HashUint8(0) // indicates nil
  129. } else {
  130. h.HashUint8(1) // indicates visiting pointer element
  131. p := pointerOf(reflect.ValueOf(v))
  132. hash := lookupTypeHasher(t)
  133. hash(h, p)
  134. }
  135. return h.sum()
  136. }
  137. // HasherForType returns a hash that is specialized for the provided type.
  138. func HasherForType[T any]() func(*T) Sum {
  139. var v *T
  140. seedOnce.Do(initSeed)
  141. t := reflect.TypeOf(v).Elem()
  142. hash := lookupTypeHasher(t)
  143. return func(v *T) (s Sum) {
  144. // This logic is identical to Hash, but pull out a few statements.
  145. h := hasherPool.Get().(*hasher)
  146. defer hasherPool.Put(h)
  147. h.reset()
  148. h.HashUint64(seed)
  149. h.hashType(t)
  150. if v == nil {
  151. h.HashUint8(0) // indicates nil
  152. } else {
  153. h.HashUint8(1) // indicates visiting pointer element
  154. p := pointerOf(reflect.ValueOf(v))
  155. hash(h, p)
  156. }
  157. return h.sum()
  158. }
  159. }
  160. // Update sets last to the hash of v and reports whether its value changed.
  161. func Update[T any](last *Sum, v *T) (changed bool) {
  162. sum := Hash(v)
  163. changed = sum != *last
  164. if changed {
  165. *last = sum
  166. }
  167. return changed
  168. }
  169. // typeHasherFunc hashes the value pointed at by p for a given type.
  170. // For example, if t is a bool, then p is a *bool.
  171. // The provided pointer must always be non-nil.
  172. type typeHasherFunc func(h *hasher, p pointer)
  173. var typeHasherCache sync.Map // map[reflect.Type]typeHasherFunc
  174. func lookupTypeHasher(t reflect.Type) typeHasherFunc {
  175. if v, ok := typeHasherCache.Load(t); ok {
  176. return v.(typeHasherFunc)
  177. }
  178. hash := makeTypeHasher(t)
  179. v, _ := typeHasherCache.LoadOrStore(t, hash)
  180. return v.(typeHasherFunc)
  181. }
  182. func makeTypeHasher(t reflect.Type) typeHasherFunc {
  183. // Types with specific hashing.
  184. switch t {
  185. case timeTimeType:
  186. return hashTime
  187. case netipAddrType:
  188. return hashAddr
  189. }
  190. // Types that can have their memory representation directly hashed.
  191. if typeIsMemHashable(t) {
  192. return makeMemHasher(t.Size())
  193. }
  194. switch t.Kind() {
  195. case reflect.String:
  196. return hashString
  197. case reflect.Array:
  198. return makeArrayHasher(t)
  199. case reflect.Slice:
  200. return makeSliceHasher(t)
  201. case reflect.Struct:
  202. return makeStructHasher(t)
  203. case reflect.Map:
  204. return makeMapHasher(t)
  205. case reflect.Pointer:
  206. return makePointerHasher(t)
  207. case reflect.Interface:
  208. return makeInterfaceHasher(t)
  209. default: // Func, Chan, UnsafePointer
  210. return func(*hasher, pointer) {}
  211. }
  212. }
  213. func hashTime(h *hasher, p pointer) {
  214. // Include the zone offset (but not the name) to keep
  215. // Hash(t1) == Hash(t2) being semantically equivalent to
  216. // t1.Format(time.RFC3339Nano) == t2.Format(time.RFC3339Nano).
  217. t := *p.asTime()
  218. _, offset := t.Zone()
  219. h.HashUint64(uint64(t.Unix()))
  220. h.HashUint32(uint32(t.Nanosecond()))
  221. h.HashUint32(uint32(offset))
  222. }
  223. func hashAddr(h *hasher, p pointer) {
  224. // The formatting of netip.Addr covers the
  225. // IP version, the address, and the optional zone name (for v6).
  226. // This is equivalent to a1.MarshalBinary() == a2.MarshalBinary().
  227. ip := *p.asAddr()
  228. switch {
  229. case !ip.IsValid():
  230. h.HashUint64(0)
  231. case ip.Is4():
  232. b := ip.As4()
  233. h.HashUint64(4)
  234. h.HashUint32(binary.LittleEndian.Uint32(b[:]))
  235. case ip.Is6():
  236. b := ip.As16()
  237. z := ip.Zone()
  238. h.HashUint64(16 + uint64(len(z)))
  239. h.HashUint64(binary.LittleEndian.Uint64(b[:8]))
  240. h.HashUint64(binary.LittleEndian.Uint64(b[8:]))
  241. h.HashString(z)
  242. }
  243. }
  244. func hashString(h *hasher, p pointer) {
  245. s := *p.asString()
  246. h.HashUint64(uint64(len(s)))
  247. h.HashString(s)
  248. }
  249. func makeMemHasher(n uintptr) typeHasherFunc {
  250. return func(h *hasher, p pointer) {
  251. h.HashBytes(p.asMemory(n))
  252. }
  253. }
  254. func makeArrayHasher(t reflect.Type) typeHasherFunc {
  255. var once sync.Once
  256. var hashElem typeHasherFunc
  257. init := func() {
  258. hashElem = lookupTypeHasher(t.Elem())
  259. }
  260. n := t.Len() // number of array elements
  261. nb := t.Elem().Size() // byte size of each array element
  262. return func(h *hasher, p pointer) {
  263. once.Do(init)
  264. for i := 0; i < n; i++ {
  265. hashElem(h, p.arrayIndex(i, nb))
  266. }
  267. }
  268. }
  269. func makeSliceHasher(t reflect.Type) typeHasherFunc {
  270. nb := t.Elem().Size() // byte size of each slice element
  271. if typeIsMemHashable(t.Elem()) {
  272. return func(h *hasher, p pointer) {
  273. pa := p.sliceArray()
  274. if pa.isNil() {
  275. h.HashUint8(0) // indicates nil
  276. return
  277. }
  278. h.HashUint8(1) // indicates visiting slice
  279. n := p.sliceLen()
  280. b := pa.asMemory(uintptr(n) * nb)
  281. h.HashUint64(uint64(n))
  282. h.HashBytes(b)
  283. }
  284. }
  285. var once sync.Once
  286. var hashElem typeHasherFunc
  287. init := func() {
  288. hashElem = lookupTypeHasher(t.Elem())
  289. if typeIsRecursive(t) {
  290. hashElemDefault := hashElem
  291. hashElem = func(h *hasher, p pointer) {
  292. if idx, ok := h.visitStack.seen(p.p); ok {
  293. h.HashUint8(2) // indicates cycle
  294. h.HashUint64(uint64(idx))
  295. return
  296. }
  297. h.HashUint8(1) // indicates visiting slice element
  298. h.visitStack.push(p.p)
  299. defer h.visitStack.pop(p.p)
  300. hashElemDefault(h, p)
  301. }
  302. }
  303. }
  304. return func(h *hasher, p pointer) {
  305. pa := p.sliceArray()
  306. if pa.isNil() {
  307. h.HashUint8(0) // indicates nil
  308. return
  309. }
  310. once.Do(init)
  311. h.HashUint8(1) // indicates visiting slice
  312. n := p.sliceLen()
  313. h.HashUint64(uint64(n))
  314. for i := 0; i < n; i++ {
  315. pe := pa.arrayIndex(i, nb)
  316. hashElem(h, pe)
  317. }
  318. }
  319. }
  320. func makeStructHasher(t reflect.Type) typeHasherFunc {
  321. type fieldHasher struct {
  322. idx int // index of field for reflect.Type.Field(n); negative if memory is directly hashable
  323. hash typeHasherFunc // only valid if idx is not negative
  324. offset uintptr
  325. size uintptr
  326. }
  327. var once sync.Once
  328. var fields []fieldHasher
  329. init := func() {
  330. for i, numField := 0, t.NumField(); i < numField; i++ {
  331. sf := t.Field(i)
  332. f := fieldHasher{i, nil, sf.Offset, sf.Type.Size()}
  333. if typeIsMemHashable(sf.Type) {
  334. f.idx = -1
  335. }
  336. // Combine with previous field if both contiguous and mem-hashable.
  337. if f.idx < 0 && len(fields) > 0 {
  338. if last := &fields[len(fields)-1]; last.idx < 0 && last.offset+last.size == f.offset {
  339. last.size += f.size
  340. continue
  341. }
  342. }
  343. fields = append(fields, f)
  344. }
  345. for i, f := range fields {
  346. if f.idx >= 0 {
  347. fields[i].hash = lookupTypeHasher(t.Field(f.idx).Type)
  348. }
  349. }
  350. }
  351. return func(h *hasher, p pointer) {
  352. once.Do(init)
  353. for _, field := range fields {
  354. pf := p.structField(field.idx, field.offset, field.size)
  355. if field.idx < 0 {
  356. h.HashBytes(pf.asMemory(field.size))
  357. } else {
  358. field.hash(h, pf)
  359. }
  360. }
  361. }
  362. }
  363. func makeMapHasher(t reflect.Type) typeHasherFunc {
  364. var once sync.Once
  365. var hashKey, hashValue typeHasherFunc
  366. var isRecursive bool
  367. init := func() {
  368. hashKey = lookupTypeHasher(t.Key())
  369. hashValue = lookupTypeHasher(t.Elem())
  370. isRecursive = typeIsRecursive(t)
  371. }
  372. return func(h *hasher, p pointer) {
  373. v := p.asValue(t).Elem() // reflect.Map kind
  374. if v.IsNil() {
  375. h.HashUint8(0) // indicates nil
  376. return
  377. }
  378. once.Do(init)
  379. if isRecursive {
  380. pm := v.UnsafePointer() // underlying pointer of map
  381. if idx, ok := h.visitStack.seen(pm); ok {
  382. h.HashUint8(2) // indicates cycle
  383. h.HashUint64(uint64(idx))
  384. return
  385. }
  386. h.visitStack.push(pm)
  387. defer h.visitStack.pop(pm)
  388. }
  389. h.HashUint8(1) // indicates visiting map entries
  390. h.HashUint64(uint64(v.Len()))
  391. mh := mapHasherPool.Get().(*mapHasher)
  392. defer mapHasherPool.Put(mh)
  393. // Hash a map in a sort-free manner.
  394. // It relies on a map being a an unordered set of KV entries.
  395. // So long as we hash each KV entry together, we can XOR all the
  396. // individual hashes to produce a unique hash for the entire map.
  397. k := mh.valKey.get(v.Type().Key())
  398. e := mh.valElem.get(v.Type().Elem())
  399. mh.sum = Sum{}
  400. mh.h.visitStack = h.visitStack // always use the parent's visit stack to avoid cycles
  401. for iter := v.MapRange(); iter.Next(); {
  402. k.SetIterKey(iter)
  403. e.SetIterValue(iter)
  404. mh.h.reset()
  405. hashKey(&mh.h, pointerOf(k.Addr()))
  406. hashValue(&mh.h, pointerOf(e.Addr()))
  407. mh.sum.xor(mh.h.sum())
  408. }
  409. h.HashBytes(mh.sum.sum[:])
  410. }
  411. }
  412. func makePointerHasher(t reflect.Type) typeHasherFunc {
  413. var once sync.Once
  414. var hashElem typeHasherFunc
  415. var isRecursive bool
  416. init := func() {
  417. hashElem = lookupTypeHasher(t.Elem())
  418. isRecursive = typeIsRecursive(t)
  419. }
  420. return func(h *hasher, p pointer) {
  421. pe := p.pointerElem()
  422. if pe.isNil() {
  423. h.HashUint8(0) // indicates nil
  424. return
  425. }
  426. once.Do(init)
  427. if isRecursive {
  428. if idx, ok := h.visitStack.seen(pe.p); ok {
  429. h.HashUint8(2) // indicates cycle
  430. h.HashUint64(uint64(idx))
  431. return
  432. }
  433. h.visitStack.push(pe.p)
  434. defer h.visitStack.pop(pe.p)
  435. }
  436. h.HashUint8(1) // indicates visiting a pointer element
  437. hashElem(h, pe)
  438. }
  439. }
  440. func makeInterfaceHasher(t reflect.Type) typeHasherFunc {
  441. return func(h *hasher, p pointer) {
  442. v := p.asValue(t).Elem() // reflect.Interface kind
  443. if v.IsNil() {
  444. h.HashUint8(0) // indicates nil
  445. return
  446. }
  447. h.HashUint8(1) // indicates visiting an interface value
  448. v = v.Elem()
  449. t := v.Type()
  450. h.hashType(t)
  451. va := reflect.New(t).Elem()
  452. va.Set(v)
  453. hashElem := lookupTypeHasher(t)
  454. hashElem(h, pointerOf(va.Addr()))
  455. }
  456. }
  457. type mapHasher struct {
  458. h hasher
  459. valKey valueCache
  460. valElem valueCache
  461. sum Sum
  462. }
  463. var mapHasherPool = &sync.Pool{
  464. New: func() any { return new(mapHasher) },
  465. }
  466. type valueCache map[reflect.Type]reflect.Value
  467. // get returns an addressable reflect.Value for the given type.
  468. func (c *valueCache) get(t reflect.Type) reflect.Value {
  469. v, ok := (*c)[t]
  470. if !ok {
  471. v = reflect.New(t).Elem()
  472. if *c == nil {
  473. *c = make(valueCache)
  474. }
  475. (*c)[t] = v
  476. }
  477. return v
  478. }