Browse Source

ipn/ipnlocal: use an in-memory TKA store if FS is unavailable

This requires making the internals of LocalBackend a bit more generic,
and implementing the `tka.CompactableChonk` interface for `tka.Mem`.

Signed-off-by: Alex Chan <[email protected]>

Updates https://github.com/tailscale/corp/issues/33599
Alex Chan 4 months ago
parent
commit
1723cb83ed

+ 1 - 0
cmd/tailscale/cli/up.go

@@ -818,6 +818,7 @@ func upWorthyWarning(s string) bool {
 		strings.Contains(s, healthmsg.WarnAcceptRoutesOff) ||
 		strings.Contains(s, healthmsg.LockedOut) ||
 		strings.Contains(s, healthmsg.WarnExitNodeUsage) ||
+		strings.Contains(s, healthmsg.InMemoryTailnetLockState) ||
 		strings.Contains(strings.ToLower(s), "update available: ")
 }
 

+ 6 - 5
health/healthmsg/healthmsg.go

@@ -8,9 +8,10 @@
 package healthmsg
 
 const (
-	WarnAcceptRoutesOff = "Some peers are advertising routes but --accept-routes is false"
-	TailscaleSSHOnBut   = "Tailscale SSH enabled, but " // + ... something from caller
-	LockedOut           = "this node is locked out; it will not have connectivity until it is signed. For more info, see https://tailscale.com/s/locked-out"
-	WarnExitNodeUsage   = "The following issues on your machine will likely make usage of exit nodes impossible"
-	DisableRPFilter     = "Please set rp_filter=2 instead of rp_filter=1; see https://github.com/tailscale/tailscale/issues/3310"
+	WarnAcceptRoutesOff      = "Some peers are advertising routes but --accept-routes is false"
+	TailscaleSSHOnBut        = "Tailscale SSH enabled, but " // + ... something from caller
+	LockedOut                = "this node is locked out; it will not have connectivity until it is signed. For more info, see https://tailscale.com/s/locked-out"
+	WarnExitNodeUsage        = "The following issues on your machine will likely make usage of exit nodes impossible"
+	DisableRPFilter          = "Please set rp_filter=2 instead of rp_filter=1; see https://github.com/tailscale/tailscale/issues/3310"
+	InMemoryTailnetLockState = "Tailnet Lock state is only being stored in-memory. Set --statedir to store state on disk, which is more secure. See https://tailscale.com/kb/1226/tailnet-lock#tailnet-lock-state"
 )

+ 33 - 21
ipn/ipnlocal/network-lock.go

@@ -23,6 +23,7 @@ import (
 	"slices"
 	"time"
 
+	"tailscale.com/health"
 	"tailscale.com/health/healthmsg"
 	"tailscale.com/ipn"
 	"tailscale.com/ipn/ipnstate"
@@ -54,7 +55,7 @@ var (
 type tkaState struct {
 	profile   ipn.ProfileID
 	authority *tka.Authority
-	storage   *tka.FS
+	storage   tka.CompactableChonk
 	filtered  []ipnstate.TKAPeer
 }
 
@@ -75,7 +76,7 @@ func (b *LocalBackend) initTKALocked() error {
 	root := b.TailscaleVarRoot()
 	if root == "" {
 		b.tka = nil
-		b.logf("network-lock unavailable; no state directory")
+		b.logf("cannot fetch existing TKA state; no state directory for network-lock")
 		return nil
 	}
 
@@ -90,6 +91,7 @@ func (b *LocalBackend) initTKALocked() error {
 		if err != nil {
 			return fmt.Errorf("initializing tka: %v", err)
 		}
+
 		if err := authority.Compact(storage, tkaCompactionDefaults); err != nil {
 			b.logf("tka compaction failed: %v", err)
 		}
@@ -105,6 +107,16 @@ func (b *LocalBackend) initTKALocked() error {
 	return nil
 }
 
+// noNetworkLockStateDirWarnable is a Warnable to warn the user that Tailnet Lock data
+// (in particular, the list of AUMs in the TKA state) is being stored in memory and will
+// be lost when tailscaled restarts.
+var noNetworkLockStateDirWarnable = health.Register(&health.Warnable{
+	Code:     "no-tailnet-lock-state-dir",
+	Title:    "No statedir for Tailnet Lock",
+	Severity: health.SeverityMedium,
+	Text:     health.StaticMessage(healthmsg.InMemoryTailnetLockState),
+})
+
 // tkaFilterNetmapLocked checks the signatures on each node key, dropping
 // nodes from the netmap whose signature does not verify.
 //
@@ -447,7 +459,7 @@ func (b *LocalBackend) tkaSyncLocked(ourNodeKey key.NodePublic) error {
 // b.mu must be held & TKA must be initialized.
 func (b *LocalBackend) tkaApplyDisablementLocked(secret []byte) error {
 	if b.tka.authority.ValidDisablement(secret) {
-		if err := os.RemoveAll(b.chonkPathLocked()); err != nil {
+		if err := b.tka.storage.RemoveAll(); err != nil {
 			return err
 		}
 		b.tka = nil
@@ -491,19 +503,21 @@ func (b *LocalBackend) tkaBootstrapFromGenesisLocked(g tkatype.MarshaledAUM, per
 		}
 	}
 
-	chonkDir := b.chonkPathLocked()
-	if err := os.Mkdir(filepath.Dir(chonkDir), 0755); err != nil && !os.IsExist(err) {
-		return fmt.Errorf("creating chonk root dir: %v", err)
-	}
-	if err := os.Mkdir(chonkDir, 0755); err != nil && !os.IsExist(err) {
-		return fmt.Errorf("mkdir: %v", err)
-	}
-
-	chonk, err := tka.ChonkDir(chonkDir)
-	if err != nil {
-		return fmt.Errorf("chonk: %v", err)
+	root := b.TailscaleVarRoot()
+	var storage tka.CompactableChonk
+	if root == "" {
+		b.health.SetUnhealthy(noNetworkLockStateDirWarnable, nil)
+		b.logf("network-lock using in-memory storage; no state directory")
+		storage = &tka.Mem{}
+	} else {
+		chonkDir := b.chonkPathLocked()
+		chonk, err := tka.ChonkDir(chonkDir)
+		if err != nil {
+			return fmt.Errorf("chonk: %v", err)
+		}
+		storage = chonk
 	}
-	authority, err := tka.Bootstrap(chonk, genesis)
+	authority, err := tka.Bootstrap(storage, genesis)
 	if err != nil {
 		return fmt.Errorf("tka bootstrap: %v", err)
 	}
@@ -511,7 +525,7 @@ func (b *LocalBackend) tkaBootstrapFromGenesisLocked(g tkatype.MarshaledAUM, per
 	b.tka = &tkaState{
 		profile:   b.pm.CurrentProfile().ID(),
 		authority: authority,
-		storage:   chonk,
+		storage:   storage,
 	}
 	return nil
 }
@@ -524,10 +538,6 @@ func (b *LocalBackend) CanSupportNetworkLock() error {
 		return nil
 	}
 
-	if b.TailscaleVarRoot() == "" {
-		return errors.New("network-lock is not supported in this configuration, try setting --statedir")
-	}
-
 	// There's a var root (aka --statedir), so if network lock gets
 	// initialized we have somewhere to store our AUMs. That's all
 	// we need.
@@ -647,6 +657,7 @@ func tkaStateFromPeer(p tailcfg.NodeView) ipnstate.TKAPeer {
 // needing signatures is returned as a response.
 // The Finish RPC submits signatures for all these nodes, at which point
 // Control has everything it needs to atomically enable network lock.
+// TODO(alexc): Only with persistent backend
 func (b *LocalBackend) NetworkLockInit(keys []tka.Key, disablementValues [][]byte, supportDisablement []byte) error {
 	if err := b.CanSupportNetworkLock(); err != nil {
 		return err
@@ -767,7 +778,7 @@ func (b *LocalBackend) NetworkLockForceLocalDisable() error {
 		return fmt.Errorf("saving prefs: %w", err)
 	}
 
-	if err := os.RemoveAll(b.chonkPathLocked()); err != nil {
+	if err := b.tka.storage.RemoveAll(); err != nil {
 		return fmt.Errorf("deleting TKA state: %w", err)
 	}
 	b.tka = nil
@@ -776,6 +787,7 @@ func (b *LocalBackend) NetworkLockForceLocalDisable() error {
 
 // NetworkLockSign signs the given node-key and submits it to the control plane.
 // rotationPublic, if specified, must be an ed25519 public key.
+// TODO(alexc): in-memory only
 func (b *LocalBackend) NetworkLockSign(nodeKey key.NodePublic, rotationPublic []byte) error {
 	ourNodeKey, sig, err := func(nodeKey key.NodePublic, rotationPublic []byte) (key.NodePublic, tka.NodeKeySignature, error) {
 		b.mu.Lock()

+ 91 - 1
tka/tailchonk.go

@@ -10,6 +10,7 @@ import (
 	"errors"
 	"fmt"
 	"log"
+	"maps"
 	"os"
 	"path/filepath"
 	"slices"
@@ -57,6 +58,10 @@ type Chonk interface {
 	// as a hint to pick the correct chain in the event that the Chonk stores
 	// multiple distinct chains.
 	LastActiveAncestor() (*AUMHash, error)
+
+	// RemoveAll permanently and completely clears the TKA state. This should
+	// be called when the user disables Tailnet Lock.
+	RemoveAll() error
 }
 
 // CompactableChonk implementation are extensions of Chonk, which are
@@ -78,12 +83,21 @@ type CompactableChonk interface {
 }
 
 // Mem implements in-memory storage of TKA state, suitable for
-// tests.
+// tests or cases where filesystem storage is unavailable.
 //
 // Mem implements the Chonk interface.
+//
+// Mem is thread-safe.
 type Mem struct {
 	mu          sync.RWMutex
 	aums        map[AUMHash]AUM
+	commitTimes map[AUMHash]time.Time
+
+	// parentIndex is a map of AUMs to the AUMs for which they are
+	// the parent.
+	//
+	// For example, if parent index is {1 -> {2, 3, 4}}, that means
+	// that AUMs 2, 3, 4 all have aum.PrevAUMHash = 1.
 	parentIndex map[AUMHash][]AUMHash
 
 	lastActiveAncestor *AUMHash
@@ -152,12 +166,14 @@ func (c *Mem) CommitVerifiedAUMs(updates []AUM) error {
 	if c.aums == nil {
 		c.parentIndex = make(map[AUMHash][]AUMHash, 64)
 		c.aums = make(map[AUMHash]AUM, 64)
+		c.commitTimes = make(map[AUMHash]time.Time, 64)
 	}
 
 updateLoop:
 	for _, aum := range updates {
 		aumHash := aum.Hash()
 		c.aums[aumHash] = aum
+		c.commitTimes[aumHash] = time.Now()
 
 		parent, ok := aum.Parent()
 		if ok {
@@ -173,6 +189,71 @@ updateLoop:
 	return nil
 }
 
+// RemoveAll permanently and completely clears the TKA state.
+func (c *Mem) RemoveAll() error {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	c.aums = nil
+	c.commitTimes = nil
+	c.parentIndex = nil
+	c.lastActiveAncestor = nil
+	return nil
+}
+
+// AllAUMs returns all AUMs stored in the chonk.
+func (c *Mem) AllAUMs() ([]AUMHash, error) {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	return slices.Collect(maps.Keys(c.aums)), nil
+}
+
+// CommitTime returns the time at which the AUM was committed.
+//
+// If the AUM does not exist, then os.ErrNotExist is returned.
+func (c *Mem) CommitTime(h AUMHash) (time.Time, error) {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	t, ok := c.commitTimes[h]
+	if ok {
+		return t, nil
+	} else {
+		return time.Time{}, os.ErrNotExist
+	}
+}
+
+// PurgeAUMs marks the specified AUMs for deletion from storage.
+func (c *Mem) PurgeAUMs(hashes []AUMHash) error {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	for _, h := range hashes {
+		// Remove the deleted AUM from the list of its parents' children.
+		//
+		// However, we leave the list of this AUM's children in parentIndex,
+		// so we can find them later in ChildAUMs().
+		if aum, ok := c.aums[h]; ok {
+			parent, hasParent := aum.Parent()
+			if hasParent {
+				c.parentIndex[parent] = slices.DeleteFunc(
+					c.parentIndex[parent],
+					func(other AUMHash) bool { return bytes.Equal(h[:], other[:]) },
+				)
+				if len(c.parentIndex[parent]) == 0 {
+					delete(c.parentIndex, parent)
+				}
+			}
+		}
+
+		// Delete this AUM from the list of AUMs and commit times.
+		delete(c.aums, h)
+		delete(c.commitTimes, h)
+	}
+
+	return nil
+}
+
 // FS implements filesystem storage of TKA state.
 //
 // FS implements the Chonk interface.
@@ -184,6 +265,10 @@ type FS struct {
 // ChonkDir returns an implementation of Chonk which uses the
 // given directory to store TKA state.
 func ChonkDir(dir string) (*FS, error) {
+	if err := os.MkdirAll(dir, 0755); err != nil && !os.IsExist(err) {
+		return nil, fmt.Errorf("creating chonk root dir: %v", err)
+	}
+
 	stat, err := os.Stat(dir)
 	if err != nil {
 		return nil, err
@@ -376,6 +461,11 @@ func (c *FS) Heads() ([]AUM, error) {
 	return out, nil
 }
 
+// RemoveAll permanently and completely clears the TKA state.
+func (c *FS) RemoveAll() error {
+	return os.RemoveAll(c.base)
+}
+
 // AllAUMs returns all AUMs stored in the chonk.
 func (c *FS) AllAUMs() ([]AUMHash, error) {
 	c.mu.RLock()

+ 37 - 0
tka/tailchonk_test.go

@@ -127,6 +127,43 @@ func TestTailchonkFS_IgnoreTempFile(t *testing.T) {
 	}
 }
 
+// If we use a non-existent directory with filesystem Chonk storage,
+// it's automatically created.
+func TestTailchonkFS_CreateChonkDir(t *testing.T) {
+	base := filepath.Join(t.TempDir(), "a", "b", "c")
+
+	chonk, err := ChonkDir(base)
+	if err != nil {
+		t.Fatalf("ChonkDir: %v", err)
+	}
+
+	aum := AUM{MessageKind: AUMNoOp}
+	must.Do(chonk.CommitVerifiedAUMs([]AUM{aum}))
+
+	got, err := chonk.AUM(aum.Hash())
+	if err != nil {
+		t.Errorf("Chonk.AUM: %v", err)
+	}
+	if diff := cmp.Diff(got, aum); diff != "" {
+		t.Errorf("wrong AUM; (-got+want):%v", diff)
+	}
+
+	if _, err := os.Stat(base); err != nil {
+		t.Errorf("os.Stat: %v", err)
+	}
+}
+
+// You can't use a file as the root of your filesystem Chonk storage.
+func TestTailchonkFS_CannotUseFile(t *testing.T) {
+	base := filepath.Join(t.TempDir(), "tka_storage.txt")
+	must.Do(os.WriteFile(base, []byte("this won't work"), 0644))
+
+	_, err := ChonkDir(base)
+	if err == nil {
+		t.Fatal("ChonkDir succeeded; expected an error")
+	}
+}
+
 func TestMarkActiveChain(t *testing.T) {
 	type aumTemplate struct {
 		AUM AUM

+ 6 - 0
tstest/chonktest/tailchonk_test.go

@@ -39,6 +39,12 @@ func TestImplementsCompactableChonk(t *testing.T) {
 		name     string
 		newChonk func(t *testing.T) tka.CompactableChonk
 	}{
+		{
+			name: "Mem",
+			newChonk: func(t *testing.T) tka.CompactableChonk {
+				return &tka.Mem{}
+			},
+		},
 		{
 			name: "FS",
 			newChonk: func(t *testing.T) tka.CompactableChonk {