Browse Source

ipn/ipnlocal: add basic support for netmap caching (#18530)

This commit is based on ff0978ab, and extends #18497 to connect network map
caching to the LocalBackend. As implemented, only "whole" netmap values are
stored, and we do not yet handle incremental updates. As-written, the feature must
be explicitly enabled via the TS_USE_CACHED_NETMAP envknob, and must be
considered experimental.

Updates #12639

Co-Authored-by: Brad Fitzpatrick <[email protected]>
Change-Id: I48a1e92facfbf7fb3a8e67cff7f2c9ab4ed62c83
Signed-off-by: M. J. Fromberger <[email protected]>
M. J. Fromberger 2 weeks ago
parent
commit
f4aea70f7a

+ 1 - 0
cmd/k8s-operator/depaware.txt

@@ -821,6 +821,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
      💣 tailscale.com/ipn/ipnauth                                    from tailscale.com/ipn/ipnlocal+
         tailscale.com/ipn/ipnext                                     from tailscale.com/ipn/ipnlocal
         tailscale.com/ipn/ipnlocal                                   from tailscale.com/ipn/localapi+
+        tailscale.com/ipn/ipnlocal/netmapcache                       from tailscale.com/ipn/ipnlocal
         tailscale.com/ipn/ipnstate                                   from tailscale.com/client/local+
         tailscale.com/ipn/localapi                                   from tailscale.com/tsnet
         tailscale.com/ipn/store                                      from tailscale.com/ipn/ipnlocal+

+ 1 - 0
cmd/tailscaled/depaware-min.txt

@@ -71,6 +71,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
         tailscale.com/ipn/ipnauth                                    from tailscale.com/ipn/ipnext+
         tailscale.com/ipn/ipnext                                     from tailscale.com/ipn/ipnlocal
         tailscale.com/ipn/ipnlocal                                   from tailscale.com/cmd/tailscaled+
+        tailscale.com/ipn/ipnlocal/netmapcache                       from tailscale.com/ipn/ipnlocal
         tailscale.com/ipn/ipnserver                                  from tailscale.com/cmd/tailscaled
         tailscale.com/ipn/ipnstate                                   from tailscale.com/control/controlclient+
         tailscale.com/ipn/localapi                                   from tailscale.com/ipn/ipnserver

+ 1 - 0
cmd/tailscaled/depaware-minbox.txt

@@ -85,6 +85,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
         tailscale.com/ipn/ipnauth                                    from tailscale.com/ipn/ipnext+
         tailscale.com/ipn/ipnext                                     from tailscale.com/ipn/ipnlocal
         tailscale.com/ipn/ipnlocal                                   from tailscale.com/cmd/tailscaled+
+        tailscale.com/ipn/ipnlocal/netmapcache                       from tailscale.com/ipn/ipnlocal
         tailscale.com/ipn/ipnserver                                  from tailscale.com/cmd/tailscaled
         tailscale.com/ipn/ipnstate                                   from tailscale.com/control/controlclient+
         tailscale.com/ipn/localapi                                   from tailscale.com/ipn/ipnserver

+ 1 - 0
cmd/tailscaled/depaware.txt

@@ -318,6 +318,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
      💣 tailscale.com/ipn/ipnauth                                    from tailscale.com/ipn/ipnlocal+
         tailscale.com/ipn/ipnext                                     from tailscale.com/ipn/auditlog+
         tailscale.com/ipn/ipnlocal                                   from tailscale.com/cmd/tailscaled+
+        tailscale.com/ipn/ipnlocal/netmapcache                       from tailscale.com/ipn/ipnlocal
         tailscale.com/ipn/ipnserver                                  from tailscale.com/cmd/tailscaled
         tailscale.com/ipn/ipnstate                                   from tailscale.com/client/local+
         tailscale.com/ipn/localapi                                   from tailscale.com/ipn/ipnserver+

+ 1 - 0
cmd/tsidp/depaware.txt

@@ -240,6 +240,7 @@ tailscale.com/cmd/tsidp dependencies: (generated by github.com/tailscale/depawar
      💣 tailscale.com/ipn/ipnauth                                    from tailscale.com/ipn/ipnext+
         tailscale.com/ipn/ipnext                                     from tailscale.com/ipn/ipnlocal
         tailscale.com/ipn/ipnlocal                                   from tailscale.com/ipn/localapi+
+        tailscale.com/ipn/ipnlocal/netmapcache                       from tailscale.com/ipn/ipnlocal
         tailscale.com/ipn/ipnstate                                   from tailscale.com/client/local+
         tailscale.com/ipn/localapi                                   from tailscale.com/tsnet
         tailscale.com/ipn/store                                      from tailscale.com/ipn/ipnlocal+

+ 56 - 0
ipn/ipnlocal/diskcache.go

@@ -0,0 +1,56 @@
+// Copyright (c) Tailscale Inc & contributors
+// SPDX-License-Identifier: BSD-3-Clause
+
+package ipnlocal
+
+import (
+	"tailscale.com/feature/buildfeatures"
+	"tailscale.com/ipn/ipnlocal/netmapcache"
+	"tailscale.com/types/netmap"
+)
+
+// diskCache is the state netmap caching to disk.
+type diskCache struct {
+	// all fields guarded by LocalBackend.mu
+
+	dir   string // active profile cache directory
+	cache *netmapcache.Cache
+}
+
+func (b *LocalBackend) writeNetmapToDiskLocked(nm *netmap.NetworkMap) error {
+	if !buildfeatures.HasCacheNetMap || nm == nil || nm.Cached {
+		return nil
+	}
+	b.logf("writing netmap to disk cache")
+
+	dir, err := b.profileMkdirAllLocked(b.pm.CurrentProfile().ID(), "netmap-cache")
+	if err != nil {
+		return err
+	}
+	if c := b.diskCache; c.cache == nil || c.dir != dir {
+		b.diskCache.cache = netmapcache.NewCache(netmapcache.FileStore(dir))
+		b.diskCache.dir = dir
+	}
+	return b.diskCache.cache.Store(b.currentNode().Context(), nm)
+}
+
+func (b *LocalBackend) loadDiskCacheLocked() (om *netmap.NetworkMap, ok bool) {
+	if !buildfeatures.HasCacheNetMap {
+		return nil, false
+	}
+	dir, err := b.profileMkdirAllLocked(b.pm.CurrentProfile().ID(), "netmap-cache")
+	if err != nil {
+		b.logf("profile data directory: %v", err)
+		return nil, false
+	}
+	if c := b.diskCache; c.cache == nil || c.dir != dir {
+		b.diskCache.cache = netmapcache.NewCache(netmapcache.FileStore(dir))
+		b.diskCache.dir = dir
+	}
+	nm, err := b.diskCache.cache.Load(b.currentNode().Context())
+	if err != nil {
+		b.logf("load netmap from cache: %v", err)
+		return nil, false
+	}
+	return nm, true
+}

+ 30 - 8
ipn/ipnlocal/local.go

@@ -271,6 +271,7 @@ type LocalBackend struct {
 	// of [LocalBackend]'s own state that is not tied to the node context.
 	currentNodeAtomic atomic.Pointer[nodeBackend]
 
+	diskCache        diskCache
 	conf             *conffile.Config // latest parsed config, or nil if not in declarative mode
 	pm               *profileManager  // mu guards access
 	lastFilterInputs *filterInputs
@@ -1573,7 +1574,13 @@ func (b *LocalBackend) SetControlClientStatus(c controlclient.Client, st control
 	}
 	b.mu.Lock()
 	defer b.mu.Unlock()
+	b.setControlClientStatusLocked(c, st)
+}
 
+// setControlClientStatusLocked is the locked version of SetControlClientStatus.
+//
+// b.mu must be held.
+func (b *LocalBackend) setControlClientStatusLocked(c controlclient.Client, st controlclient.Status) {
 	if b.cc != c {
 		b.logf("Ignoring SetControlClientStatus from old client")
 		return
@@ -2414,6 +2421,14 @@ func (b *LocalBackend) initOnce() {
 	b.extHost.Init()
 }
 
+func (b *LocalBackend) controlDebugFlags() []string {
+	debugFlags := controlDebugFlags
+	if b.sys.IsNetstackRouter() {
+		return append([]string{"netstack"}, debugFlags...)
+	}
+	return debugFlags
+}
+
 // Start applies the configuration specified in opts, and starts the
 // state machine.
 //
@@ -2570,14 +2585,18 @@ func (b *LocalBackend) startLocked(opts ipn.Options) error {
 		persistv = new(persist.Persist)
 	}
 
-	discoPublic := b.MagicConn().DiscoPublicKey()
-
-	isNetstack := b.sys.IsNetstackRouter()
-	debugFlags := controlDebugFlags
-	if isNetstack {
-		debugFlags = append([]string{"netstack"}, debugFlags...)
+	if envknob.Bool("TS_USE_CACHED_NETMAP") {
+		if nm, ok := b.loadDiskCacheLocked(); ok {
+			logf("loaded netmap from disk cache; %d peers", len(nm.Peers))
+			b.setControlClientStatusLocked(nil, controlclient.Status{
+				NetMap:   nm,
+				LoggedIn: true, // sure
+			})
+		}
 	}
 
+	discoPublic := b.MagicConn().DiscoPublicKey()
+
 	var ccShutdownCbs []func()
 	ccShutdown := func() {
 		for _, cb := range ccShutdownCbs {
@@ -2603,7 +2622,7 @@ func (b *LocalBackend) startLocked(opts ipn.Options) error {
 		Hostinfo:             b.hostInfoWithServicesLocked(),
 		HTTPTestClient:       httpTestClient,
 		DiscoPublicKey:       discoPublic,
-		DebugFlags:           debugFlags,
+		DebugFlags:           b.controlDebugFlags(),
 		HealthTracker:        b.health,
 		PolicyClient:         b.sys.PolicyClientOrDefault(),
 		Pinger:               b,
@@ -2619,7 +2638,7 @@ func (b *LocalBackend) startLocked(opts ipn.Options) error {
 
 		// Don't warn about broken Linux IP forwarding when
 		// netstack is being used.
-		SkipIPForwardingCheck: isNetstack,
+		SkipIPForwardingCheck: b.sys.IsNetstackRouter(),
 	})
 	if err != nil {
 		return err
@@ -6248,6 +6267,9 @@ func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) {
 	var login string
 	if nm != nil {
 		login = cmp.Or(profileFromView(nm.UserProfiles[nm.User()]).LoginName, "<missing-profile>")
+		if err := b.writeNetmapToDiskLocked(nm); err != nil {
+			b.logf("write netmap to cache: %v", err)
+		}
 	}
 	b.currentNode().SetNetMap(nm)
 	if ms, ok := b.sys.MagicSock.GetOK(); ok {

+ 100 - 0
ipn/ipnlocal/local_test.go

@@ -41,6 +41,7 @@ import (
 	"tailscale.com/ipn"
 	"tailscale.com/ipn/conffile"
 	"tailscale.com/ipn/ipnauth"
+	"tailscale.com/ipn/ipnlocal/netmapcache"
 	"tailscale.com/ipn/store/mem"
 	"tailscale.com/net/netcheck"
 	"tailscale.com/net/netmon"
@@ -611,6 +612,105 @@ func makeExitNode(id tailcfg.NodeID, opts ...peerOptFunc) tailcfg.NodeView {
 	return makePeer(id, append([]peerOptFunc{withCap(26), withSuggest(), withExitRoutes()}, opts...)...)
 }
 
+func TestLoadCachedNetMap(t *testing.T) {
+	t.Setenv("TS_USE_CACHED_NETMAP", "1")
+
+	// Write a small network map into a cache, and verify we can load it.
+	varRoot := t.TempDir()
+	cacheDir := filepath.Join(varRoot, "profile-data", "id0", "netmap-cache")
+	if err := os.MkdirAll(cacheDir, 0700); err != nil {
+		t.Fatalf("Create cache directory: %v", err)
+	}
+
+	testMap := &netmap.NetworkMap{
+		SelfNode: (&tailcfg.Node{
+			Name: "example.ts.net",
+			User: tailcfg.UserID(1),
+			Addresses: []netip.Prefix{
+				netip.MustParsePrefix("100.2.3.4/32"),
+			},
+		}).View(),
+		UserProfiles: map[tailcfg.UserID]tailcfg.UserProfileView{
+			tailcfg.UserID(1): (&tailcfg.UserProfile{
+				ID:          1,
+				LoginName:   "[email protected]",
+				DisplayName: "Amelie du Pangoline",
+			}).View(),
+		},
+		Peers: []tailcfg.NodeView{
+			(&tailcfg.Node{
+				ID:           601,
+				StableID:     "n601FAKE",
+				ComputedName: "some-peer",
+				User:         tailcfg.UserID(1),
+				Key:          makeNodeKeyFromID(601),
+				Addresses: []netip.Prefix{
+					netip.MustParsePrefix("100.2.3.5/32"),
+				},
+			}).View(),
+			(&tailcfg.Node{
+				ID:           602,
+				StableID:     "n602FAKE",
+				ComputedName: "some-tagged-peer",
+				Tags:         []string{"tag:server", "tag:test"},
+				User:         tailcfg.UserID(1),
+				Key:          makeNodeKeyFromID(602),
+				Addresses: []netip.Prefix{
+					netip.MustParsePrefix("100.2.3.6/32"),
+				},
+			}).View(),
+		},
+	}
+	dc := netmapcache.NewCache(netmapcache.FileStore(cacheDir))
+	if err := dc.Store(t.Context(), testMap); err != nil {
+		t.Fatalf("Store netmap in cache: %v", err)
+	}
+
+	// Now make a new backend and hook it up to have access to the cache created
+	// above, then start it to pull in the cached netmap.
+	sys := tsd.NewSystem()
+	e, err := wgengine.NewFakeUserspaceEngine(logger.Discard,
+		sys.Set,
+		sys.HealthTracker.Get(),
+		sys.UserMetricsRegistry(),
+		sys.Bus.Get(),
+	)
+	if err != nil {
+		t.Fatalf("Make userspace engine: %v", err)
+	}
+	t.Cleanup(e.Close)
+	sys.Set(e)
+	sys.Set(new(mem.Store))
+
+	logf := tstest.WhileTestRunningLogger(t)
+	clb, err := NewLocalBackend(logf, logid.PublicID{}, sys, 0)
+	if err != nil {
+		t.Fatalf("Make local backend: %v", err)
+	}
+	t.Cleanup(clb.Shutdown)
+	clb.SetVarRoot(varRoot)
+
+	pm := must.Get(newProfileManager(new(mem.Store), logf, health.NewTracker(sys.Bus.Get())))
+	pm.currentProfile = (&ipn.LoginProfile{ID: "id0"}).View()
+	clb.pm = pm
+
+	// Start up the node. We can't actually log in, because we have no
+	// controlplane, but verify that we got a network map.
+	if err := clb.Start(ipn.Options{}); err != nil {
+		t.Fatalf("Start local backend: %v", err)
+	}
+
+	// Check that the network map the backend wound up with is the one we
+	// stored, modulo uncached fields.
+	nm := clb.currentNode().NetMap()
+	if diff := cmp.Diff(nm, testMap,
+		cmpopts.IgnoreFields(netmap.NetworkMap{}, "Cached", "PacketFilter", "PacketFilterRules"),
+		cmpopts.EquateComparable(key.NodePublic{}, key.MachinePublic{}),
+	); diff != "" {
+		t.Error(diff)
+	}
+}
+
 func TestConfigureExitNode(t *testing.T) {
 	controlURL := "https://localhost:1/"
 	exitNode1 := makeExitNode(1, withName("node-1"), withDERP(1), withAddresses(netip.MustParsePrefix("100.64.1.1/32")))

+ 1 - 0
tsnet/depaware.txt

@@ -236,6 +236,7 @@ tailscale.com/tsnet dependencies: (generated by github.com/tailscale/depaware)
      💣 tailscale.com/ipn/ipnauth                                    from tailscale.com/ipn/ipnext+
         tailscale.com/ipn/ipnext                                     from tailscale.com/ipn/ipnlocal
         tailscale.com/ipn/ipnlocal                                   from tailscale.com/ipn/localapi+
+        tailscale.com/ipn/ipnlocal/netmapcache                       from tailscale.com/ipn/ipnlocal
         tailscale.com/ipn/ipnstate                                   from tailscale.com/client/local+
         tailscale.com/ipn/localapi                                   from tailscale.com/tsnet
         tailscale.com/ipn/store                                      from tailscale.com/ipn/ipnlocal+