Преглед изворни кода

tka: implement synchronization mechanics

This PR implements the synchronization mechanics for TKA: generating a SyncOffer, processing a SyncOffer to find an intersection,
and computing the set of AUMs that should be transmitted to effect convergence.

This is the final PR implementing core mechanics for TKA.

Signed-off-by: Tom DNetto <[email protected]>
Tom DNetto пре 3 година
родитељ
комит
393a229de9
2 измењених фајлова са 624 додато и 0 уклоњено
  1. 251 0
      tka/sync.go
  2. 373 0
      tka/sync_test.go

+ 251 - 0
tka/sync.go

@@ -0,0 +1,251 @@
+// Copyright (c) 2022 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package tka
+
+import (
+	"errors"
+	"fmt"
+	"os"
+)
+
+const (
+	// Max iterations searching for any intersection.
+	maxSyncIter = 2000
+	// Max iterations searching for a head intersection.
+	maxSyncHeadIntersectionIter = 400
+)
+
+// ErrNoIntersection is returned when a shared AUM could
+// not be determined when evaluating a remote sync offer.
+var ErrNoIntersection = errors.New("no intersection")
+
+// SyncOffer conveys information about the current head & ancestor AUMs,
+// for the purpose of synchronization with some remote end.
+//
+// Ancestors should contain a subset of the ancestors of the chain.
+// The last entry in that slice is the oldest-known AUM in the chain.
+type SyncOffer struct {
+	Head      AUMHash
+	Ancestors []AUMHash
+}
+
+const (
+	// The starting number of AUMs to skip when listing
+	// ancestors in a SyncOffer.
+	ancestorsSkipStart = 4
+
+	// How many bits to advance the skip count when listing
+	// ancestors in a SyncOffer.
+	//
+	// 2 bits, so (4<<2), so after skipping 4 it skips 16.
+	ancestorsSkipShift = 2
+)
+
+func (a *Authority) syncOffer() (SyncOffer, error) {
+	oldest := a.oldestAncestor.Hash()
+
+	out := SyncOffer{
+		Head:      a.Head(),
+		Ancestors: make([]AUMHash, 0, 6), // 6 chosen arbitrarily.
+	}
+
+	// We send some subset of our ancestors to help the remote
+	// find a more-recent 'head intersection'.
+	// The number of AUMs between each ancestor entry gets
+	// exponentially larger.
+	var (
+		skipAmount uint64  = ancestorsSkipStart
+		curs       AUMHash = a.Head()
+	)
+	for i := uint64(0); i < maxSyncHeadIntersectionIter; i++ {
+		if i > 0 && (i%skipAmount) == 0 {
+			out.Ancestors = append(out.Ancestors, curs)
+			skipAmount = skipAmount << ancestorsSkipShift
+		}
+
+		parent, err := a.storage.AUM(curs)
+		if err != nil {
+			if err != os.ErrNotExist {
+				return SyncOffer{}, err
+			}
+			break
+		}
+
+		// We add the oldest later on, so don't duplicate.
+		if parent.Hash() == oldest {
+			break
+		}
+		copy(curs[:], parent.PrevAUMHash)
+	}
+
+	out.Ancestors = append(out.Ancestors, oldest)
+	return out, nil
+}
+
+// SyncOffer returns an abbreviated description of the current AUM
+// chain, which can be used to synchronize with another (untrusted)
+// Authority instance.
+//
+// The returned SyncOffer structure should be transmitted to the remote
+// Authority, which should call MissingAUMs() using it to determine
+// AUMs which need to be transmitted. This list of AUMs from the remote
+// can then be applied locally with Inform().
+//
+// This SyncOffer + AUM exchange should be performed by both ends,
+// because its possible that either end has AUMs that the other needs
+// to find out about.
+func (a *Authority) SyncOffer() (SyncOffer, error) {
+	return a.syncOffer()
+}
+
+// intersection describes how to synchronize AUMs with a remote
+// authority.
+type intersection struct {
+	// if true, no exchange of AUMs is needed.
+	upToDate bool
+
+	// headIntersection is the latest common AUM on the remote. In other
+	// words, we need to send all AUMs since this one.
+	headIntersection *AUMHash
+
+	// tailIntersection is the oldest common AUM on the remote. In other
+	// words, we diverge with the remote after this AUM, so we both need
+	// to transmit our AUM chain starting here.
+	tailIntersection *AUMHash
+}
+
+// computeSyncIntersection determines the common AUMs between a local and
+// remote SyncOffer. This intersection can be used to synchronize both
+// sides.
+func computeSyncIntersection(authority *Authority, localOffer, remoteOffer SyncOffer) (*intersection, error) {
+	// Simple case: up to date.
+	if remoteOffer.Head == localOffer.Head {
+		return &intersection{upToDate: true, headIntersection: &localOffer.Head}, nil
+	}
+
+	// Case: 'head intersection'
+	// If we have the remote's head, its more likely than not that
+	// we have updates that build on that head. To confirm this,
+	// we iterate backwards through our chain to see if the given
+	// head is an ancestor of our current chain.
+	//
+	// In other words:
+	// <Us>   A -> B -> C
+	// <Them> A -> B
+	//   ∴ their head intersects with our chain, we need to send C
+	var hasRemoteHead bool
+	_, err := authority.storage.AUM(remoteOffer.Head)
+	if err != nil {
+		if err != os.ErrNotExist {
+			return nil, err
+		}
+	} else {
+		hasRemoteHead = true
+	}
+
+	if hasRemoteHead {
+		curs := localOffer.Head
+		for i := 0; i < maxSyncHeadIntersectionIter; i++ {
+			parent, err := authority.storage.AUM(curs)
+			if err != nil {
+				if err != os.ErrNotExist {
+					return nil, err
+				}
+				break
+			}
+
+			if parent.Hash() == remoteOffer.Head {
+				h := parent.Hash()
+				return &intersection{headIntersection: &h}, nil
+			}
+
+			copy(curs[:], parent.PrevAUMHash)
+		}
+	}
+
+	// Case: 'tail intersection'
+	// So we don't have a clue what the remote's head is, but
+	// if one of the ancestors they gave us is part of our chain,
+	// then theres an intersection, which is a starting point for
+	// the remote to send us AUMs from.
+	//
+	// We iterate the list of ancestors in order because the remote
+	// ordered them such that the newer ones are earlier, so with
+	// a bit of luck we can use an earlier one and hence do less work /
+	// transmit fewer AUMs.
+	for _, a := range remoteOffer.Ancestors {
+		state, err := computeStateAt(authority.storage, maxSyncIter, a)
+		if err != nil {
+			if err != os.ErrNotExist {
+				return nil, fmt.Errorf("computeStateAt: %v", err)
+			}
+			continue
+		}
+
+		end, _, err := fastForward(authority.storage, maxSyncIter, state, func(curs AUM, _ State) bool {
+			return curs.Hash() == localOffer.Head
+		})
+		if err != nil {
+			return nil, err
+		}
+		// fastForward can terminate before the done condition if there are
+		// no more children left, so we check again before considering this
+		// an intersection.
+		if end.Hash() == localOffer.Head {
+			return &intersection{tailIntersection: &a}, nil
+		}
+	}
+
+	return nil, ErrNoIntersection
+}
+
+// MissingAUMs returns AUMs a remote may be missing based on the
+// remotes' SyncOffer.
+func (a *Authority) MissingAUMs(remoteOffer SyncOffer) ([]AUM, error) {
+	localOffer, err := a.syncOffer()
+	if err != nil {
+		return nil, fmt.Errorf("local syncOffer: %v", err)
+	}
+	intersection, err := computeSyncIntersection(a, localOffer, remoteOffer)
+	if err != nil {
+		return nil, fmt.Errorf("intersection: %v", err)
+	}
+	if intersection.upToDate {
+		return nil, nil
+	}
+	out := make([]AUM, 0, 12) // 12 chosen arbitrarily.
+
+	if intersection.headIntersection != nil {
+		state, err := computeStateAt(a.storage, maxSyncIter, *intersection.headIntersection)
+		if err != nil {
+			return nil, err
+		}
+
+		_, _, err = fastForward(a.storage, maxSyncIter, state, func(curs AUM, _ State) bool {
+			if curs.Hash() != *intersection.headIntersection {
+				out = append(out, curs)
+			}
+			return false
+		})
+		return out, err
+	}
+
+	if intersection.tailIntersection != nil {
+		state, err := computeStateAt(a.storage, maxSyncIter, *intersection.tailIntersection)
+		if err != nil {
+			return nil, err
+		}
+
+		_, _, err = fastForward(a.storage, maxSyncIter, state, func(curs AUM, _ State) bool {
+			if curs.Hash() != *intersection.tailIntersection {
+				out = append(out, curs)
+			}
+			return false
+		})
+		return out, err
+	}
+
+	panic("unreachable")
+}

+ 373 - 0
tka/sync_test.go

@@ -0,0 +1,373 @@
+// Copyright (c) 2022 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package tka
+
+import (
+	"bytes"
+	"strconv"
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+)
+
+func TestSyncOffer(t *testing.T) {
+	c := newTestchain(t, `
+        A1 -> A2 -> A3 -> A4 -> A5 -> A6 -> A7 -> A8 -> A9 -> A10
+        A10 -> A11 -> A12 -> A13 -> A14 -> A15 -> A16 -> A17 -> A18
+        A18 -> A19 -> A20 -> A21 -> A22 -> A23 -> A24 -> A25
+    `)
+	a, err := Open(c.Chonk())
+	if err != nil {
+		t.Fatal(err)
+	}
+	got, err := a.SyncOffer()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// A SyncOffer includes a selection of AUMs going backwards in the tree,
+	// progressively skipping more and more each iteration.
+	want := SyncOffer{
+		Head: c.AUMHashes["A25"],
+		Ancestors: []AUMHash{
+			c.AUMHashes["A"+strconv.Itoa(25-ancestorsSkipStart)],
+			c.AUMHashes["A"+strconv.Itoa(25-ancestorsSkipStart<<ancestorsSkipShift)],
+			c.AUMHashes["A1"],
+		},
+	}
+
+	if diff := cmp.Diff(want, got); diff != "" {
+		t.Errorf("SyncOffer diff (-want, +got):\n%s", diff)
+	}
+}
+
+func TestComputeSyncIntersection_FastForward(t *testing.T) {
+	// Node 1 has: A1 -> A2
+	// Node 2 has: A1 -> A2 -> A3 -> A4
+	c := newTestchain(t, `
+        A1 -> A2 -> A3 -> A4
+    `)
+	a1H, a2H := c.AUMHashes["A1"], c.AUMHashes["A2"]
+
+	chonk1 := c.ChonkWith("A1", "A2")
+	n1, err := Open(chonk1)
+	if err != nil {
+		t.Fatal(err)
+	}
+	offer1, err := n1.SyncOffer()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	chonk2 := c.Chonk() // All AUMs
+	n2, err := Open(chonk2)
+	if err != nil {
+		t.Fatal(err)
+	}
+	offer2, err := n2.SyncOffer()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Node 1 only knows about the first two nodes, so the head of n2 is
+	// alien to it.
+	t.Run("n1", func(t *testing.T) {
+		got, err := computeSyncIntersection(n1, offer1, offer2)
+		if err != nil {
+			t.Fatalf("computeSyncIntersection() failed: %v", err)
+		}
+		want := &intersection{
+			tailIntersection: &a1H,
+		}
+		if diff := cmp.Diff(want, got, cmp.AllowUnexported(intersection{})); diff != "" {
+			t.Errorf("intersection diff (-want, +got):\n%s", diff)
+		}
+	})
+
+	// Node 2 knows about the full chain, so it can see that the head of n1
+	// intersects with a subset of its chain (a Head Intersection).
+	t.Run("n2", func(t *testing.T) {
+		got, err := computeSyncIntersection(n2, offer2, offer1)
+		if err != nil {
+			t.Fatalf("computeSyncIntersection() failed: %v", err)
+		}
+		want := &intersection{
+			headIntersection: &a2H,
+		}
+		if diff := cmp.Diff(want, got, cmp.AllowUnexported(intersection{})); diff != "" {
+			t.Errorf("intersection diff (-want, +got):\n%s", diff)
+		}
+	})
+}
+
+func TestComputeSyncIntersection_ForkSmallDiff(t *testing.T) {
+	// The number of nodes in the chain is longer than ancestorSkipStart,
+	// so that during sync both nodes are able to find a common ancestor
+	// which was later than A1.
+
+	c := newTestchain(t, `
+        A1 -> A2 -> A3 -> A4 -> A5 -> A6 -> A7 -> A8 -> A9 -> A10
+                                                   | -> F1
+        // Make F1 different to A9.
+        // hashSeed is chosen such that the hash is higher than A9.
+        F1.hashSeed = 7
+    `)
+	// Node 1 has: A1 -> A2 -> A3 -> A4 -> A5 -> A6 -> A7 -> A8 -> F1
+	// Node 2 has: A1 -> A2 -> A3 -> A4 -> A5 -> A6 -> A7 -> A8 -> A9 -> A10
+	f1H, a9H := c.AUMHashes["F1"], c.AUMHashes["A9"]
+
+	if bytes.Compare(f1H[:], a9H[:]) < 0 {
+		t.Fatal("failed assert: h(a9) > h(f1H)\nTweak hashSeed till this passes")
+	}
+
+	n1, err := Open(c.ChonkWith("A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "F1"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	offer1, err := n1.SyncOffer()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if diff := cmp.Diff(SyncOffer{
+		Head: c.AUMHashes["F1"],
+		Ancestors: []AUMHash{
+			c.AUMHashes["A"+strconv.Itoa(9-ancestorsSkipStart)],
+			c.AUMHashes["A1"],
+		},
+	}, offer1); diff != "" {
+		t.Errorf("offer1 diff (-want, +got):\n%s", diff)
+	}
+
+	n2, err := Open(c.ChonkWith("A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	offer2, err := n2.SyncOffer()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if diff := cmp.Diff(SyncOffer{
+		Head: c.AUMHashes["A10"],
+		Ancestors: []AUMHash{
+			c.AUMHashes["A"+strconv.Itoa(10-ancestorsSkipStart)],
+			c.AUMHashes["A1"],
+		},
+	}, offer2); diff != "" {
+		t.Errorf("offer2 diff (-want, +got):\n%s", diff)
+	}
+
+	// Node 1 only knows about the first eight nodes, so the head of n2 is
+	// alien to it.
+	t.Run("n1", func(t *testing.T) {
+		// n2 has 10 nodes, so the first common ancestor should be 10-ancestorsSkipStart
+		wantIntersection := c.AUMHashes["A"+strconv.Itoa(10-ancestorsSkipStart)]
+
+		got, err := computeSyncIntersection(n1, offer1, offer2)
+		if err != nil {
+			t.Fatalf("computeSyncIntersection() failed: %v", err)
+		}
+		want := &intersection{
+			tailIntersection: &wantIntersection,
+		}
+		if diff := cmp.Diff(want, got, cmp.AllowUnexported(intersection{})); diff != "" {
+			t.Errorf("intersection diff (-want, +got):\n%s", diff)
+		}
+	})
+
+	// Node 2 knows about the full chain but doesn't recognize the head.
+	t.Run("n2", func(t *testing.T) {
+		// n1 has 9 nodes, so the first common ancestor should be 9-ancestorsSkipStart
+		wantIntersection := c.AUMHashes["A"+strconv.Itoa(9-ancestorsSkipStart)]
+
+		got, err := computeSyncIntersection(n2, offer2, offer1)
+		if err != nil {
+			t.Fatalf("computeSyncIntersection() failed: %v", err)
+		}
+		want := &intersection{
+			tailIntersection: &wantIntersection,
+		}
+		if diff := cmp.Diff(want, got, cmp.AllowUnexported(intersection{})); diff != "" {
+			t.Errorf("intersection diff (-want, +got):\n%s", diff)
+		}
+	})
+}
+
+func TestMissingAUMs_FastForward(t *testing.T) {
+	// Node 1 has: A1 -> A2
+	// Node 2 has: A1 -> A2 -> A3 -> A4
+	c := newTestchain(t, `
+        A1 -> A2 -> A3 -> A4
+        A1.hashSeed = 1
+        A2.hashSeed = 2
+        A3.hashSeed = 3
+        A4.hashSeed = 4
+    `)
+
+	chonk1 := c.ChonkWith("A1", "A2")
+	n1, err := Open(chonk1)
+	if err != nil {
+		t.Fatal(err)
+	}
+	offer1, err := n1.SyncOffer()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	chonk2 := c.Chonk() // All AUMs
+	n2, err := Open(chonk2)
+	if err != nil {
+		t.Fatal(err)
+	}
+	offer2, err := n2.SyncOffer()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Node 1 only knows about the first two nodes, so the head of n2 is
+	// alien to it. As such, it should send history from the newest ancestor,
+	// A1 (if the chain was longer there would be one in the middle).
+	t.Run("n1", func(t *testing.T) {
+		got, err := n1.MissingAUMs(offer2)
+		if err != nil {
+			t.Fatalf("MissingAUMs() failed: %v", err)
+		}
+
+		// Both sides have A1, so the only AUM that n2 might not have is
+		// A2.
+		want := []AUM{c.AUMs["A2"]}
+		if diff := cmp.Diff(want, got); diff != "" {
+			t.Errorf("MissingAUMs diff (-want, +got):\n%s", diff)
+		}
+	})
+
+	// Node 2 knows about the full chain, so it can see that the head of n1
+	// intersects with a subset of its chain (a Head Intersection).
+	t.Run("n2", func(t *testing.T) {
+		got, err := n2.MissingAUMs(offer1)
+		if err != nil {
+			t.Fatalf("MissingAUMs() failed: %v", err)
+		}
+
+		want := []AUM{
+			c.AUMs["A3"],
+			c.AUMs["A4"],
+		}
+		if diff := cmp.Diff(want, got); diff != "" {
+			t.Errorf("MissingAUMs diff (-want, +got):\n%s", diff)
+		}
+	})
+}
+
+func TestMissingAUMs_Fork(t *testing.T) {
+	// Node 1 has: A1 -> A2 -> A3 -> F1
+	// Node 2 has: A1 -> A2 -> A3 -> A4
+	c := newTestchain(t, `
+        A1 -> A2 -> A3 -> A4
+                     | -> F1
+        A1.hashSeed = 1
+        A2.hashSeed = 2
+        A3.hashSeed = 3
+        A4.hashSeed = 4
+    `)
+
+	chonk1 := c.ChonkWith("A1", "A2", "A3", "F1")
+	n1, err := Open(chonk1)
+	if err != nil {
+		t.Fatal(err)
+	}
+	offer1, err := n1.SyncOffer()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	chonk2 := c.ChonkWith("A1", "A2", "A3", "A4")
+	n2, err := Open(chonk2)
+	if err != nil {
+		t.Fatal(err)
+	}
+	offer2, err := n2.SyncOffer()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	t.Run("n1", func(t *testing.T) {
+		got, err := n1.MissingAUMs(offer2)
+		if err != nil {
+			t.Fatalf("MissingAUMs() failed: %v", err)
+		}
+
+		// Both sides have A1, so n1 will send everything it knows from
+		// there to head.
+		want := []AUM{
+			c.AUMs["A2"],
+			c.AUMs["A3"],
+			c.AUMs["F1"],
+		}
+		if diff := cmp.Diff(want, got); diff != "" {
+			t.Errorf("MissingAUMs diff (-want, +got):\n%s", diff)
+		}
+	})
+
+	t.Run("n2", func(t *testing.T) {
+		got, err := n2.MissingAUMs(offer1)
+		if err != nil {
+			t.Fatalf("MissingAUMs() failed: %v", err)
+		}
+
+		// Both sides have A1, so n2 will send everything it knows from
+		// there to head.
+		want := []AUM{
+			c.AUMs["A2"],
+			c.AUMs["A3"],
+			c.AUMs["A4"],
+		}
+		if diff := cmp.Diff(want, got); diff != "" {
+			t.Errorf("MissingAUMs diff (-want, +got):\n%s", diff)
+		}
+	})
+}
+
+func TestSyncSimpleE2E(t *testing.T) {
+	pub, priv := testingKey25519(t, 1)
+	key := Key{Kind: Key25519, Public: pub, Votes: 2}
+
+	c := newTestchain(t, `
+        G1 -> L1 -> L2 -> L3
+        G1.template = genesis
+    `,
+		optTemplate("genesis", AUM{MessageKind: AUMCheckpoint, State: &State{
+			Keys:               []Key{key},
+			DisablementSecrets: [][]byte{disablementKDF([]byte{1, 2, 3})},
+		}}),
+		optKey("key", key, priv),
+		optSignAllUsing("key"))
+
+	node, err := Bootstrap(&Mem{}, c.AUMs["G1"])
+	if err != nil {
+		t.Fatalf("node Bootstrap() failed: %v", err)
+	}
+	control, err := Open(c.Chonk())
+	if err != nil {
+		t.Fatalf("control Open() failed: %v", err)
+	}
+
+	// Control knows the full chain, node only knows the genesis. Lets see
+	// if they can sync.
+	nodeOffer, err := node.SyncOffer()
+	if err != nil {
+		t.Fatal(err)
+	}
+	controlAUMs, err := control.MissingAUMs(nodeOffer)
+	if err != nil {
+		t.Fatalf("control.MissingAUMs(%v) failed: %v", nodeOffer, err)
+	}
+	if err := node.Inform(controlAUMs); err != nil {
+		t.Fatalf("node.Inform(%v) failed: %v", controlAUMs, err)
+	}
+
+	if cHash, nHash := control.Head(), node.Head(); cHash != nHash {
+		t.Errorf("node & control are not synced: c=%x, n=%x", cHash, nHash)
+	}
+}