Browse Source

tka: compact TKA storage on startup

Signed-off-by: Tom DNetto <[email protected]>
Tom DNetto 3 years ago
parent
commit
88c7d19d54
6 changed files with 250 additions and 5 deletions
  1. 3 0
      ipn/ipnlocal/local.go
  2. 5 0
      ipn/ipnlocal/network-lock.go
  3. 99 5
      tka/tailchonk.go
  4. 79 0
      tka/tailchonk_test.go
  5. 14 0
      tka/tka.go
  6. 50 0
      tka/tka_test.go

+ 3 - 0
ipn/ipnlocal/local.go

@@ -4740,6 +4740,9 @@ func (b *LocalBackend) initTKALocked() error {
 		if err != nil {
 			return fmt.Errorf("initializing tka: %v", err)
 		}
+		if err := authority.Compact(storage, tkaCompactionDefaults); err != nil {
+			b.logf("tka compaction failed: %v", err)
+		}
 
 		b.tka = &tkaState{
 			profile:   cp.ID,

+ 5 - 0
ipn/ipnlocal/network-lock.go

@@ -37,6 +37,11 @@ import (
 var (
 	errMissingNetmap        = errors.New("missing netmap: verify that you are logged in")
 	errNetworkLockNotActive = errors.New("network-lock is not active")
+
+	tkaCompactionDefaults = tka.CompactionOptions{
+		MinChain: 24,                  // Keep at minimum 24 AUMs since head.
+		MinAge:   14 * 24 * time.Hour, // Keep 2 weeks of AUMs.
+	}
 )
 
 type tkaState struct {

+ 99 - 5
tka/tailchonk.go

@@ -200,6 +200,11 @@ func ChonkDir(dir string) (*FS, error) {
 	if !stat.IsDir() {
 		return nil, fmt.Errorf("chonk directory %q is a file", dir)
 	}
+
+	// TODO(tom): *FS marks AUMs as deleted but does not actually
+	// delete them, to avoid data loss in the event of a bug.
+	// Implement deletion after we are fairly sure in the implementation.
+
 	return &FS{base: dir}, nil
 }
 
@@ -213,8 +218,17 @@ func ChonkDir(dir string) (*FS, error) {
 // much smaller than JSON for AUMs. The 'keyasint' thing isn't essential
 // but again it saves a bunch of bytes.
 type fsHashInfo struct {
-	Children []AUMHash `cbor:"1,keyasint"`
-	AUM      *AUM      `cbor:"2,keyasint"`
+	Children    []AUMHash `cbor:"1,keyasint"`
+	AUM         *AUM      `cbor:"2,keyasint"`
+	CreatedUnix int64     `cbor:"3,keyasint,omitempty"`
+
+	// PurgedUnix is set when the AUM is deleted. The value is
+	// the unix epoch at the time it was deleted.
+	//
+	// While a non-zero PurgedUnix symbolizes the AUM is deleted,
+	// the fsHashInfo entry can continue to exist to track children
+	// of this AUMHash.
+	PurgedUnix int64 `cbor:"4,keyasint,omitempty"`
 }
 
 // aumDir returns the directory an AUM is stored in, and its filename
@@ -238,12 +252,45 @@ func (c *FS) AUM(hash AUMHash) (AUM, error) {
 		}
 		return AUM{}, err
 	}
-	if info.AUM == nil {
+	if info.AUM == nil || info.PurgedUnix > 0 {
 		return AUM{}, os.ErrNotExist
 	}
 	return *info.AUM, nil
 }
 
+// CommitTime returns the time at which the AUM was committed.
+//
+// If the AUM does not exist, then os.ErrNotExist is returned.
+func (c *FS) CommitTime(h AUMHash) (time.Time, error) {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	info, err := c.get(h)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return time.Time{}, os.ErrNotExist
+		}
+		return time.Time{}, err
+	}
+	if info.PurgedUnix > 0 {
+		return time.Time{}, os.ErrNotExist
+	}
+	if info.CreatedUnix > 0 {
+		return time.Unix(info.CreatedUnix, 0), nil
+	}
+
+	// If we got this far, the AUM exists but CreatedUnix is not
+	// set, presumably because this AUM was committed using a version
+	// of tailscaled that pre-dates the introduction of CreatedUnix.
+	// As such, we use the file modification time as a suitable analog.
+	dir, base := c.aumDir(h)
+	s, err := os.Stat(filepath.Join(dir, base))
+	if err != nil {
+		return time.Time{}, nil
+	}
+	return s.ModTime(), nil
+}
+
 // AUM returns any known AUMs with a specific parent hash.
 func (c *FS) ChildAUMs(prevAUMHash AUMHash) ([]AUM, error) {
 	c.mu.RLock()
@@ -257,6 +304,9 @@ func (c *FS) ChildAUMs(prevAUMHash AUMHash) ([]AUM, error) {
 		}
 		return nil, err
 	}
+	// NOTE(tom): We don't check PurgedUnix here because 'purged'
+	// only applies to that specific AUM (i.e. info.AUM) and not to
+	// any information about children stored against that hash.
 
 	out := make([]AUM, len(info.Children))
 	for i, h := range info.Children {
@@ -265,7 +315,7 @@ func (c *FS) ChildAUMs(prevAUMHash AUMHash) ([]AUM, error) {
 			// We expect any AUM recorded as a child on its parent to exist.
 			return nil, fmt.Errorf("reading child %d of %x: %v", i, h, err)
 		}
-		if c.AUM == nil {
+		if c.AUM == nil || c.PurgedUnix > 0 {
 			return nil, fmt.Errorf("child %d of %x: AUM not stored", i, h)
 		}
 		out[i] = *c.AUM
@@ -309,13 +359,27 @@ func (c *FS) Heads() ([]AUM, error) {
 
 	out := make([]AUM, 0, 6) // 6 is arbitrary.
 	err := c.scanHashes(func(info *fsHashInfo) {
-		if len(info.Children) == 0 && info.AUM != nil {
+		if len(info.Children) == 0 && info.AUM != nil && info.PurgedUnix == 0 {
 			out = append(out, *info.AUM)
 		}
 	})
 	return out, err
 }
 
+// AllAUMs returns all AUMs stored in the chonk.
+func (c *FS) AllAUMs() ([]AUMHash, error) {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	out := make([]AUMHash, 0, 6) // 6 is arbitrary.
+	err := c.scanHashes(func(info *fsHashInfo) {
+		if info.AUM != nil && info.PurgedUnix == 0 {
+			out = append(out, info.AUM.Hash())
+		}
+	})
+	return out, err
+}
+
 func (c *FS) scanHashes(eachHashInfo func(*fsHashInfo)) error {
 	prefixDirs, err := os.ReadDir(c.base)
 	if err != nil {
@@ -411,6 +475,7 @@ func (c *FS) CommitVerifiedAUMs(updates []AUM) error {
 		}
 
 		err := c.commit(h, func(info *fsHashInfo) {
+			info.PurgedUnix = 0 // just in-case it was set for some reason
 			info.AUM = &aum
 		})
 		if err != nil {
@@ -421,6 +486,31 @@ func (c *FS) CommitVerifiedAUMs(updates []AUM) error {
 	return nil
 }
 
+// PurgeAUMs marks the specified AUMs for deletion from storage.
+func (c *FS) PurgeAUMs(hashes []AUMHash) error {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	now := time.Now()
+	for i, h := range hashes {
+		stored, err := c.get(h)
+		if err != nil {
+			return fmt.Errorf("reading %d (%x): %w", i, h, err)
+		}
+		if stored.AUM == nil || stored.PurgedUnix > 0 {
+			continue
+		}
+
+		err = c.commit(h, func(info *fsHashInfo) {
+			info.PurgedUnix = now.Unix()
+		})
+		if err != nil {
+			return fmt.Errorf("committing purge[%d] (%x): %w", i, h, err)
+		}
+	}
+	return nil
+}
+
 // commit calls the provided updater function to record changes relevant
 // to the given hash. The caller is expected to update the AUM and
 // Children fields, as relevant.
@@ -430,6 +520,7 @@ func (c *FS) commit(h AUMHash, updater func(*fsHashInfo)) error {
 	existing, err := c.get(h)
 	switch {
 	case os.IsNotExist(err):
+		toCommit.CreatedUnix = time.Now().Unix()
 	case err != nil:
 		return err
 	default:
@@ -754,5 +845,8 @@ func Compact(storage CompactableChonk, head AUMHash, opts CompactionOptions) (la
 		}
 	}
 
+	if err := storage.SetLastActiveAncestor(lastActiveAncestor); err != nil {
+		return AUMHash{}, err
+	}
 	return lastActiveAncestor, storage.PurgeAUMs(toDelete)
 }

+ 79 - 0
tka/tailchonk_test.go

@@ -173,6 +173,85 @@ func TestTailchonkFS_Commit(t *testing.T) {
 	if _, err := os.Stat(filepath.Join(chonk.base, "M7", "M7LL2NDB4NKCZIUPVS6RDM2GUOIMW6EEAFVBWMVCPUANQJPHT3SQ")); err != nil {
 		t.Errorf("stat of AUM parent failed: %v", err)
 	}
+
+	info, err := chonk.get(aum.Hash())
+	if err != nil {
+		t.Fatal(err)
+	}
+	if info.PurgedUnix > 0 {
+		t.Errorf("recently-created AUM PurgedUnix = %d, want 0", info.PurgedUnix)
+	}
+}
+
+func TestTailchonkFS_CommitTime(t *testing.T) {
+	chonk := &FS{base: t.TempDir()}
+	parentHash := randHash(t, 1)
+	aum := AUM{MessageKind: AUMNoOp, PrevAUMHash: parentHash[:]}
+
+	if err := chonk.CommitVerifiedAUMs([]AUM{aum}); err != nil {
+		t.Fatal(err)
+	}
+	ct, err := chonk.CommitTime(aum.Hash())
+	if err != nil {
+		t.Fatalf("CommitTime() failed: %v", err)
+	}
+	if ct.Before(time.Now().Add(-time.Minute)) || ct.After(time.Now().Add(time.Minute)) {
+		t.Errorf("commit time was wrong: %v more than a minute off from now (%v)", ct, time.Now())
+	}
+}
+
+func TestTailchonkFS_PurgeAUMs(t *testing.T) {
+	chonk := &FS{base: t.TempDir()}
+	parentHash := randHash(t, 1)
+	aum := AUM{MessageKind: AUMNoOp, PrevAUMHash: parentHash[:]}
+
+	if err := chonk.CommitVerifiedAUMs([]AUM{aum}); err != nil {
+		t.Fatal(err)
+	}
+	if err := chonk.PurgeAUMs([]AUMHash{aum.Hash()}); err != nil {
+		t.Fatal(err)
+	}
+
+	if _, err := chonk.AUM(aum.Hash()); err != os.ErrNotExist {
+		t.Errorf("AUM() on purged AUM returned err = %v, want ErrNotExist", err)
+	}
+
+	info, err := chonk.get(aum.Hash())
+	if err != nil {
+		t.Fatal(err)
+	}
+	if info.PurgedUnix == 0 {
+		t.Errorf("recently-created AUM PurgedUnix = %d, want non-zero", info.PurgedUnix)
+	}
+}
+
+func TestTailchonkFS_AllAUMs(t *testing.T) {
+	chonk := &FS{base: t.TempDir()}
+	genesis := AUM{MessageKind: AUMRemoveKey, KeyID: []byte{1, 2}}
+	gHash := genesis.Hash()
+	intermediate := AUM{PrevAUMHash: gHash[:]}
+	iHash := intermediate.Hash()
+	leaf := AUM{PrevAUMHash: iHash[:]}
+
+	commitSet := []AUM{
+		genesis,
+		intermediate,
+		leaf,
+	}
+	if err := chonk.CommitVerifiedAUMs(commitSet); err != nil {
+		t.Fatalf("CommitVerifiedAUMs failed: %v", err)
+	}
+
+	hashes, err := chonk.AllAUMs()
+	if err != nil {
+		t.Fatal(err)
+	}
+	hashesLess := func(a, b AUMHash) bool {
+		return bytes.Compare(a[:], b[:]) < 0
+	}
+	if diff := cmp.Diff([]AUMHash{genesis.Hash(), intermediate.Hash(), leaf.Hash()}, hashes, cmpopts.SortSlices(hashesLess)); diff != "" {
+		t.Fatalf("AllAUMs() output differs (-want, +got):\n%s", diff)
+	}
 }
 
 func TestMarkActiveChain(t *testing.T) {

+ 14 - 0
tka/tka.go

@@ -720,3 +720,17 @@ func (a *Authority) Keys() []Key {
 func (a *Authority) StateIDs() (uint64, uint64) {
 	return a.state.StateID1, a.state.StateID2
 }
+
+// Compact deletes historical AUMs based on the given compaction options.
+func (a *Authority) Compact(storage CompactableChonk, o CompactionOptions) error {
+	newAncestor, err := Compact(storage, a.head.Hash(), o)
+	if err != nil {
+		return err
+	}
+	ancestor, err := storage.AUM(newAncestor)
+	if err != nil {
+		return err
+	}
+	a.oldestAncestor = ancestor
+	return nil
+}

+ 50 - 0
tka/tka_test.go

@@ -474,3 +474,53 @@ func TestInteropWithNLKey(t *testing.T) {
 		t.Error("pub3 want untrusted, got trusted")
 	}
 }
+
+func TestAuthorityCompact(t *testing.T) {
+	pub, priv := testingKey25519(t, 1)
+	key := Key{Kind: Key25519, Public: pub, Votes: 2}
+
+	c := newTestchain(t, `
+        G -> A -> B -> C -> D -> E
+
+        G.template = genesis
+        C.template = checkpoint2
+    `,
+		optTemplate("genesis", AUM{MessageKind: AUMCheckpoint, State: &State{
+			Keys:               []Key{key},
+			DisablementSecrets: [][]byte{DisablementKDF([]byte{1, 2, 3})},
+		}}),
+		optTemplate("checkpoint2", AUM{MessageKind: AUMCheckpoint, State: &State{
+			Keys:               []Key{key},
+			DisablementSecrets: [][]byte{DisablementKDF([]byte{1, 2, 3})},
+		}}),
+		optKey("key", key, priv),
+		optSignAllUsing("key"))
+
+	storage := &FS{base: t.TempDir()}
+	a, err := Bootstrap(storage, c.AUMs["G"])
+	if err != nil {
+		t.Fatalf("Bootstrap() failed: %v", err)
+	}
+	a.Inform(storage, []AUM{c.AUMs["A"], c.AUMs["B"], c.AUMs["C"], c.AUMs["D"], c.AUMs["E"]})
+
+	// Should compact down to C -> D -> E
+	if err := a.Compact(storage, CompactionOptions{MinChain: 2, MinAge: 1}); err != nil {
+		t.Fatal(err)
+	}
+	if a.oldestAncestor.Hash() != c.AUMHashes["C"] {
+		t.Errorf("ancestor = %v, want %v", a.oldestAncestor.Hash(), c.AUMHashes["C"])
+	}
+
+	// Make sure the stored authority is still openable and resolves to the same state.
+	stored, err := Open(storage)
+	if err != nil {
+		t.Fatalf("Failed to open stored authority: %v", err)
+	}
+	if stored.Head() != a.Head() {
+		t.Errorf("Stored authority head differs: head = %v, want %v", stored.Head(), a.Head())
+	}
+	t.Logf("original ancestor = %v", c.AUMHashes["G"])
+	if anc, _ := storage.LastActiveAncestor(); *anc != c.AUMHashes["C"] {
+		t.Errorf("ancestor = %v, want %v", anc, c.AUMHashes["C"])
+	}
+}