Browse Source

util/usermetrics: make usermetrics non-global

this commit changes usermetrics to be non-global, this is a building
block for correct metrics if a go process runs multiple tsnets or
in tests.

Updates #13420
Updates tailscale/corp#22075

Signed-off-by: Kristoffer Dalby <[email protected]>
Kristoffer Dalby 1 year ago
parent
commit
0e0e53d3b3

+ 3 - 0
cmd/tailscaled/tailscaled.go

@@ -680,12 +680,15 @@ func tryEngine(logf logger.Logf, sys *tsd.System, name string) (onlyNetstack boo
 		ListenPort:    args.port,
 		NetMon:        sys.NetMon.Get(),
 		HealthTracker: sys.HealthTracker(),
+		Metrics:       sys.UserMetricsRegistry(),
 		Dialer:        sys.Dialer.Get(),
 		SetSubsystem:  sys.Set,
 		ControlKnobs:  sys.ControlKnobs(),
 		DriveForLocal: driveimpl.NewFileSystemForLocal(logf),
 	}
 
+	sys.HealthTracker().SetMetricsRegistry(sys.UserMetricsRegistry())
+
 	onlyNetstack = name == "userspace-networking"
 	netstackSubnetRouter := onlyNetstack // but mutated later on some platforms
 	netns.SetEnabled(!onlyNetstack)

+ 1 - 1
go.mod

@@ -320,7 +320,7 @@ require (
 	github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e // indirect
 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
 	github.com/polyfloyd/go-errorlint v1.4.1 // indirect
-	github.com/prometheus/client_model v0.5.0 // indirect
+	github.com/prometheus/client_model v0.5.0
 	github.com/prometheus/procfs v0.12.0 // indirect
 	github.com/quasilyte/go-ruleguard v0.3.19 // indirect
 	github.com/quasilyte/gogrep v0.5.0 // indirect

+ 29 - 18
health/health.go

@@ -20,6 +20,7 @@ import (
 	"time"
 
 	"tailscale.com/envknob"
+	"tailscale.com/metrics"
 	"tailscale.com/tailcfg"
 	"tailscale.com/types/opt"
 	"tailscale.com/util/cibuild"
@@ -111,6 +112,7 @@ type Tracker struct {
 	lastLoginErr            error
 	localLogConfigErr       error
 	tlsConnectionErrors     map[string]error // map[ServerName]error
+	metricHealthMessage     *metrics.MultiLabelMap[metricHealthMessageLabel]
 }
 
 // Subsystem is the name of a subsystem whose health can be monitored.
@@ -317,6 +319,33 @@ func (w *Warnable) IsVisible(ws *warningState) bool {
 	return time.Since(ws.BrokenSince) >= w.TimeToVisible
 }
 
+// SetMetricsRegistry sets up the metrics for the Tracker. It takes
+// a usermetric.Registry and registers the metrics there.
+func (t *Tracker) SetMetricsRegistry(reg *usermetric.Registry) {
+	if reg == nil || t.metricHealthMessage != nil {
+		return
+	}
+
+	t.metricHealthMessage = usermetric.NewMultiLabelMapWithRegistry[metricHealthMessageLabel](
+		reg,
+		"tailscaled_health_messages",
+		"gauge",
+		"Number of health messages broken down by type.",
+	)
+
+	t.metricHealthMessage.Set(metricHealthMessageLabel{
+		Type: "warning",
+	}, expvar.Func(func() any {
+		if t.nil() {
+			return 0
+		}
+		t.mu.Lock()
+		defer t.mu.Unlock()
+		t.updateBuiltinWarnablesLocked()
+		return int64(len(t.stringsLocked()))
+	}))
+}
+
 // SetUnhealthy sets a warningState for the given Warnable with the provided Args, and should be
 // called when a Warnable becomes unhealthy, or its unhealthy status needs to be updated.
 // SetUnhealthy takes ownership of args. The args can be nil if no additional information is
@@ -1205,18 +1234,6 @@ func (t *Tracker) ReceiveFuncStats(which ReceiveFunc) *ReceiveFuncStats {
 }
 
 func (t *Tracker) doOnceInit() {
-	metricHealthMessage.Set(metricHealthMessageLabel{
-		Type: "warning",
-	}, expvar.Func(func() any {
-		if t.nil() {
-			return 0
-		}
-		t.mu.Lock()
-		defer t.mu.Unlock()
-		t.updateBuiltinWarnablesLocked()
-		return int64(len(t.stringsLocked()))
-	}))
-
 	for i := range t.MagicSockReceiveFuncs {
 		f := &t.MagicSockReceiveFuncs[i]
 		f.name = (ReceiveFunc(i)).String()
@@ -1252,9 +1269,3 @@ type metricHealthMessageLabel struct {
 	// TODO: break down by warnable.severity as well?
 	Type string
 }
-
-var metricHealthMessage = usermetric.NewMultiLabelMap[metricHealthMessageLabel](
-	"tailscaled_health_messages",
-	"gauge",
-	"Number of health messages broken down by type.",
-)

+ 19 - 4
ipn/ipnlocal/local.go

@@ -119,9 +119,6 @@ import (
 	"tailscale.com/wgengine/wgcfg/nmcfg"
 )
 
-var metricAdvertisedRoutes = usermetric.NewGauge(
-	"tailscaled_advertised_routes", "Number of advertised network routes (e.g. by a subnet router)")
-
 var controlDebugFlags = getControlDebugFlags()
 
 func getControlDebugFlags() []string {
@@ -184,6 +181,7 @@ type LocalBackend struct {
 	statsLogf             logger.Logf        // for printing peers stats on change
 	sys                   *tsd.System
 	health                *health.Tracker // always non-nil
+	metrics               metrics
 	e                     wgengine.Engine // non-nil; TODO(bradfitz): remove; use sys
 	store                 ipn.StateStore  // non-nil; TODO(bradfitz): remove; use sys
 	dialer                *tsdial.Dialer  // non-nil; TODO(bradfitz): remove; use sys
@@ -377,6 +375,11 @@ func (b *LocalBackend) HealthTracker() *health.Tracker {
 	return b.health
 }
 
+// UserMetricsRegistry returns the usermetrics registry for the backend
+func (b *LocalBackend) UserMetricsRegistry() *usermetric.Registry {
+	return b.sys.UserMetricsRegistry()
+}
+
 // NetMon returns the network monitor for the backend.
 func (b *LocalBackend) NetMon() *netmon.Monitor {
 	return b.sys.NetMon.Get()
@@ -386,6 +389,12 @@ type updateStatus struct {
 	started bool
 }
 
+type metrics struct {
+	// advertisedRoutes is a metric that counts the number of network routes that are advertised by the local node.
+	// This informs the user of how many routes are being advertised by the local node, excluding exit routes.
+	advertisedRoutes *usermetric.Gauge
+}
+
 // clientGen is a func that creates a control plane client.
 // It's the type used by LocalBackend.SetControlClientGetterForTesting.
 type clientGen func(controlclient.Options) (controlclient.Client, error)
@@ -429,6 +438,11 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo
 	captiveCtx, captiveCancel := context.WithCancel(ctx)
 	captiveCancel()
 
+	m := metrics{
+		advertisedRoutes: sys.UserMetricsRegistry().NewGauge(
+			"tailscaled_advertised_routes", "Number of advertised network routes (e.g. by a subnet router)"),
+	}
+
 	b := &LocalBackend{
 		ctx:                   ctx,
 		ctxCancel:             cancel,
@@ -437,6 +451,7 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo
 		statsLogf:             logger.LogOnChange(logf, 5*time.Minute, clock.Now),
 		sys:                   sys,
 		health:                sys.HealthTracker(),
+		metrics:               m,
 		e:                     e,
 		dialer:                dialer,
 		store:                 store,
@@ -4760,7 +4775,7 @@ func (b *LocalBackend) applyPrefsToHostinfoLocked(hi *tailcfg.Hostinfo, prefs ip
 			routes++
 		}
 	}
-	metricAdvertisedRoutes.Set(float64(routes))
+	b.metrics.advertisedRoutes.Set(float64(routes))
 
 	var sshHostKeys []string
 	if prefs.RunSSH() && envknob.CanSSHD() {

+ 1 - 1
ipn/ipnlocal/local_test.go

@@ -432,7 +432,7 @@ func newTestLocalBackend(t testing.TB) *LocalBackend {
 	sys := new(tsd.System)
 	store := new(mem.Store)
 	sys.Set(store)
-	eng, err := wgengine.NewFakeUserspaceEngine(logf, sys.Set, sys.HealthTracker())
+	eng, err := wgengine.NewFakeUserspaceEngine(logf, sys.Set, sys.HealthTracker(), sys.UserMetricsRegistry())
 	if err != nil {
 		t.Fatalf("NewFakeUserspaceEngine: %v", err)
 	}

+ 1 - 1
ipn/ipnlocal/loglines_test.go

@@ -50,7 +50,7 @@ func TestLocalLogLines(t *testing.T) {
 	sys := new(tsd.System)
 	store := new(mem.Store)
 	sys.Set(store)
-	e, err := wgengine.NewFakeUserspaceEngine(logf, sys.Set, sys.HealthTracker())
+	e, err := wgengine.NewFakeUserspaceEngine(logf, sys.Set, sys.HealthTracker(), sys.UserMetricsRegistry())
 	if err != nil {
 		t.Fatal(err)
 	}

+ 9 - 4
ipn/ipnlocal/peerapi_test.go

@@ -35,6 +35,7 @@ import (
 	"tailscale.com/types/logger"
 	"tailscale.com/types/netmap"
 	"tailscale.com/util/must"
+	"tailscale.com/util/usermetric"
 	"tailscale.com/wgengine"
 	"tailscale.com/wgengine/filter"
 )
@@ -643,7 +644,8 @@ func TestPeerAPIReplyToDNSQueries(t *testing.T) {
 	h.remoteAddr = netip.MustParseAddrPort("100.150.151.152:12345")
 
 	ht := new(health.Tracker)
-	eng, _ := wgengine.NewFakeUserspaceEngine(logger.Discard, 0, ht)
+	reg := new(usermetric.Registry)
+	eng, _ := wgengine.NewFakeUserspaceEngine(logger.Discard, 0, ht, reg)
 	pm := must.Get(newProfileManager(new(mem.Store), t.Logf, ht))
 	h.ps = &peerAPIServer{
 		b: &LocalBackend{
@@ -694,7 +696,8 @@ func TestPeerAPIPrettyReplyCNAME(t *testing.T) {
 		h.remoteAddr = netip.MustParseAddrPort("100.150.151.152:12345")
 
 		ht := new(health.Tracker)
-		eng, _ := wgengine.NewFakeUserspaceEngine(logger.Discard, 0, ht)
+		reg := new(usermetric.Registry)
+		eng, _ := wgengine.NewFakeUserspaceEngine(logger.Discard, 0, ht, reg)
 		pm := must.Get(newProfileManager(new(mem.Store), t.Logf, ht))
 		var a *appc.AppConnector
 		if shouldStore {
@@ -767,7 +770,8 @@ func TestPeerAPIReplyToDNSQueriesAreObserved(t *testing.T) {
 
 		rc := &appctest.RouteCollector{}
 		ht := new(health.Tracker)
-		eng, _ := wgengine.NewFakeUserspaceEngine(logger.Discard, 0, ht)
+		reg := new(usermetric.Registry)
+		eng, _ := wgengine.NewFakeUserspaceEngine(logger.Discard, 0, ht, reg)
 		pm := must.Get(newProfileManager(new(mem.Store), t.Logf, ht))
 		var a *appc.AppConnector
 		if shouldStore {
@@ -830,8 +834,9 @@ func TestPeerAPIReplyToDNSQueriesAreObservedWithCNAMEFlattening(t *testing.T) {
 		h.remoteAddr = netip.MustParseAddrPort("100.150.151.152:12345")
 
 		ht := new(health.Tracker)
+		reg := new(usermetric.Registry)
 		rc := &appctest.RouteCollector{}
-		eng, _ := wgengine.NewFakeUserspaceEngine(logger.Discard, 0, ht)
+		eng, _ := wgengine.NewFakeUserspaceEngine(logger.Discard, 0, ht, reg)
 		pm := must.Get(newProfileManager(new(mem.Store), t.Logf, ht))
 		var a *appc.AppConnector
 		if shouldStore {

+ 1 - 0
ipn/ipnlocal/serve_test.go

@@ -684,6 +684,7 @@ func newTestBackend(t *testing.T) *LocalBackend {
 	e, err := wgengine.NewUserspaceEngine(logf, wgengine.Config{
 		SetSubsystem:  sys.Set,
 		HealthTracker: sys.HealthTracker(),
+		Metrics:       sys.UserMetricsRegistry(),
 	})
 	if err != nil {
 		t.Fatal(err)

+ 2 - 2
ipn/ipnlocal/state_test.go

@@ -298,7 +298,7 @@ func TestStateMachine(t *testing.T) {
 	sys := new(tsd.System)
 	store := new(testStateStorage)
 	sys.Set(store)
-	e, err := wgengine.NewFakeUserspaceEngine(logf, sys.Set, sys.HealthTracker())
+	e, err := wgengine.NewFakeUserspaceEngine(logf, sys.Set, sys.HealthTracker(), sys.UserMetricsRegistry())
 	if err != nil {
 		t.Fatalf("NewFakeUserspaceEngine: %v", err)
 	}
@@ -931,7 +931,7 @@ func TestEditPrefsHasNoKeys(t *testing.T) {
 	logf := tstest.WhileTestRunningLogger(t)
 	sys := new(tsd.System)
 	sys.Set(new(mem.Store))
-	e, err := wgengine.NewFakeUserspaceEngine(logf, sys.Set, sys.HealthTracker())
+	e, err := wgengine.NewFakeUserspaceEngine(logf, sys.Set, sys.HealthTracker(), sys.UserMetricsRegistry())
 	if err != nil {
 		t.Fatalf("NewFakeUserspaceEngine: %v", err)
 	}

+ 1 - 2
ipn/localapi/localapi.go

@@ -64,7 +64,6 @@ import (
 	"tailscale.com/util/progresstracking"
 	"tailscale.com/util/rands"
 	"tailscale.com/util/testenv"
-	"tailscale.com/util/usermetric"
 	"tailscale.com/version"
 	"tailscale.com/wgengine/magicsock"
 )
@@ -581,7 +580,7 @@ func (h *Handler) serveUserMetrics(w http.ResponseWriter, r *http.Request) {
 		http.Error(w, "usermetrics debug flag not enabled", http.StatusForbidden)
 		return
 	}
-	usermetric.Handler(w, r)
+	h.b.UserMetricsRegistry().Handler(w, r)
 }
 
 func (h *Handler) serveDebug(w http.ResponseWriter, r *http.Request) {

+ 1 - 1
ipn/localapi/localapi_test.go

@@ -356,7 +356,7 @@ func newTestLocalBackend(t testing.TB) *ipnlocal.LocalBackend {
 	sys := new(tsd.System)
 	store := new(mem.Store)
 	sys.Set(store)
-	eng, err := wgengine.NewFakeUserspaceEngine(logf, sys.Set, sys.HealthTracker())
+	eng, err := wgengine.NewFakeUserspaceEngine(logf, sys.Set, sys.HealthTracker(), sys.UserMetricsRegistry())
 	if err != nil {
 		t.Fatalf("NewFakeUserspaceEngine: %v", err)
 	}

+ 19 - 1
metrics/multilabelmap.go

@@ -97,7 +97,12 @@ type KeyValue[T comparable] struct {
 }
 
 func (v *MultiLabelMap[T]) String() string {
-	return `"MultiLabelMap"`
+	var sb strings.Builder
+	sb.WriteString("MultiLabelMap:\n")
+	v.Do(func(kv KeyValue[T]) {
+		fmt.Fprintf(&sb, "\t%v: %v\n", kv.Key, kv.Value)
+	})
+	return sb.String()
 }
 
 // WritePrometheus writes v to w in Prometheus exposition format.
@@ -281,3 +286,16 @@ func (v *MultiLabelMap[T]) Do(f func(KeyValue[T])) {
 		f(KeyValue[T]{e.key, e.val})
 	}
 }
+
+// ResetAllForTest resets all values for metrics to zero.
+// Should only be used in tests.
+func (v *MultiLabelMap[T]) ResetAllForTest() {
+	v.Do(func(kv KeyValue[T]) {
+		switch v := kv.Value.(type) {
+		case *expvar.Int:
+			v.Set(0)
+		case *expvar.Float:
+			v.Set(0)
+		}
+	})
+}

+ 34 - 21
net/tstun/wrap.go

@@ -24,6 +24,7 @@ import (
 	"go4.org/mem"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 	"tailscale.com/disco"
+	tsmetrics "tailscale.com/metrics"
 	"tailscale.com/net/connstats"
 	"tailscale.com/net/packet"
 	"tailscale.com/net/packet/checksum"
@@ -209,6 +210,30 @@ type Wrapper struct {
 	stats atomic.Pointer[connstats.Statistics]
 
 	captureHook syncs.AtomicValue[capture.Callback]
+
+	metrics *metrics
+}
+
+type metrics struct {
+	inboundDroppedPacketsTotal  *tsmetrics.MultiLabelMap[dropPacketLabel]
+	outboundDroppedPacketsTotal *tsmetrics.MultiLabelMap[dropPacketLabel]
+}
+
+func registerMetrics(reg *usermetric.Registry) *metrics {
+	return &metrics{
+		inboundDroppedPacketsTotal: usermetric.NewMultiLabelMapWithRegistry[dropPacketLabel](
+			reg,
+			"tailscaled_inbound_dropped_packets_total",
+			"counter",
+			"Counts the number of dropped packets received by the node from other peers",
+		),
+		outboundDroppedPacketsTotal: usermetric.NewMultiLabelMapWithRegistry[dropPacketLabel](
+			reg,
+			"tailscaled_outbound_dropped_packets_total",
+			"counter",
+			"Counts the number of packets dropped while being sent to other peers",
+		),
+	}
 }
 
 // tunInjectedRead is an injected packet pretending to be a tun.Read().
@@ -248,15 +273,15 @@ func (w *Wrapper) Start() {
 	close(w.startCh)
 }
 
-func WrapTAP(logf logger.Logf, tdev tun.Device) *Wrapper {
-	return wrap(logf, tdev, true)
+func WrapTAP(logf logger.Logf, tdev tun.Device, m *usermetric.Registry) *Wrapper {
+	return wrap(logf, tdev, true, m)
 }
 
-func Wrap(logf logger.Logf, tdev tun.Device) *Wrapper {
-	return wrap(logf, tdev, false)
+func Wrap(logf logger.Logf, tdev tun.Device, m *usermetric.Registry) *Wrapper {
+	return wrap(logf, tdev, false, m)
 }
 
-func wrap(logf logger.Logf, tdev tun.Device, isTAP bool) *Wrapper {
+func wrap(logf logger.Logf, tdev tun.Device, isTAP bool, m *usermetric.Registry) *Wrapper {
 	logf = logger.WithPrefix(logf, "tstun: ")
 	w := &Wrapper{
 		logf:        logf,
@@ -274,6 +299,7 @@ func wrap(logf logger.Logf, tdev tun.Device, isTAP bool) *Wrapper {
 		// TODO(dmytro): (highly rate-limited) hexdumps should happen on unknown packets.
 		filterFlags: filter.LogAccepts | filter.LogDrops,
 		startCh:     make(chan struct{}),
+		metrics:     registerMetrics(m),
 	}
 
 	w.vectorBuffer = make([][]byte, tdev.BatchSize())
@@ -872,7 +898,7 @@ func (t *Wrapper) filterPacketOutboundToWireGuard(p *packet.Parsed, pc *peerConf
 
 	if filt.RunOut(p, t.filterFlags) != filter.Accept {
 		metricPacketOutDropFilter.Add(1)
-		metricOutboundDroppedPacketsTotal.Add(dropPacketLabel{
+		t.metrics.outboundDroppedPacketsTotal.Add(dropPacketLabel{
 			Reason: DropReasonACL,
 		}, 1)
 		return filter.Drop, gro
@@ -1144,7 +1170,7 @@ func (t *Wrapper) filterPacketInboundFromWireGuard(p *packet.Parsed, captHook ca
 
 	if outcome != filter.Accept {
 		metricPacketInDropFilter.Add(1)
-		metricInboundDroppedPacketsTotal.Add(dropPacketLabel{
+		t.metrics.inboundDroppedPacketsTotal.Add(dropPacketLabel{
 			Reason: DropReasonACL,
 		}, 1)
 
@@ -1225,7 +1251,7 @@ func (t *Wrapper) Write(buffs [][]byte, offset int) (int, error) {
 		t.noteActivity()
 		_, err := t.tdevWrite(buffs, offset)
 		if err != nil {
-			metricInboundDroppedPacketsTotal.Add(dropPacketLabel{
+			t.metrics.inboundDroppedPacketsTotal.Add(dropPacketLabel{
 				Reason: DropReasonError,
 			}, int64(len(buffs)))
 		}
@@ -1482,19 +1508,6 @@ type dropPacketLabel struct {
 	Reason DropReason
 }
 
-var (
-	metricInboundDroppedPacketsTotal = usermetric.NewMultiLabelMap[dropPacketLabel](
-		"tailscaled_inbound_dropped_packets_total",
-		"counter",
-		"Counts the number of dropped packets received by the node from other peers",
-	)
-	metricOutboundDroppedPacketsTotal = usermetric.NewMultiLabelMap[dropPacketLabel](
-		"tailscaled_outbound_dropped_packets_total",
-		"counter",
-		"Counts the number of packets dropped while being sent to other peers",
-	)
-)
-
 func (t *Wrapper) InstallCaptureHook(cb capture.Callback) {
 	t.captureHook.Store(cb)
 }

+ 33 - 18
net/tstun/wrap_test.go

@@ -8,6 +8,7 @@ import (
 	"context"
 	"encoding/binary"
 	"encoding/hex"
+	"expvar"
 	"fmt"
 	"net/netip"
 	"reflect"
@@ -38,6 +39,7 @@ import (
 	"tailscale.com/types/ptr"
 	"tailscale.com/types/views"
 	"tailscale.com/util/must"
+	"tailscale.com/util/usermetric"
 	"tailscale.com/wgengine/capture"
 	"tailscale.com/wgengine/filter"
 	"tailscale.com/wgengine/wgcfg"
@@ -173,7 +175,8 @@ func setfilter(logf logger.Logf, tun *Wrapper) {
 
 func newChannelTUN(logf logger.Logf, secure bool) (*tuntest.ChannelTUN, *Wrapper) {
 	chtun := tuntest.NewChannelTUN()
-	tun := Wrap(logf, chtun.TUN())
+	reg := new(usermetric.Registry)
+	tun := Wrap(logf, chtun.TUN(), reg)
 	if secure {
 		setfilter(logf, tun)
 	} else {
@@ -185,7 +188,8 @@ func newChannelTUN(logf logger.Logf, secure bool) (*tuntest.ChannelTUN, *Wrapper
 
 func newFakeTUN(logf logger.Logf, secure bool) (*fakeTUN, *Wrapper) {
 	ftun := NewFake()
-	tun := Wrap(logf, ftun)
+	reg := new(usermetric.Registry)
+	tun := Wrap(logf, ftun, reg)
 	if secure {
 		setfilter(logf, tun)
 	} else {
@@ -315,15 +319,15 @@ func mustHexDecode(s string) []byte {
 }
 
 func TestFilter(t *testing.T) {
-	// Reset the metrics before test. These are global
-	// so the different tests might have affected them.
-	metricInboundDroppedPacketsTotal.SetInt(dropPacketLabel{Reason: DropReasonACL}, 0)
-	metricInboundDroppedPacketsTotal.SetInt(dropPacketLabel{Reason: DropReasonError}, 0)
-	metricOutboundDroppedPacketsTotal.SetInt(dropPacketLabel{Reason: DropReasonACL}, 0)
 
 	chtun, tun := newChannelTUN(t.Logf, true)
 	defer tun.Close()
 
+	// Reset the metrics before test. These are global
+	// so the different tests might have affected them.
+	tun.metrics.inboundDroppedPacketsTotal.ResetAllForTest()
+	tun.metrics.outboundDroppedPacketsTotal.ResetAllForTest()
+
 	type direction int
 
 	const (
@@ -436,20 +440,26 @@ func TestFilter(t *testing.T) {
 		})
 	}
 
-	inACL := metricInboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonACL})
-	inError := metricInboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonError})
-	outACL := metricOutboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonACL})
-
-	assertMetricPackets(t, "inACL", "3", inACL.String())
-	assertMetricPackets(t, "inError", "0", inError.String())
-	assertMetricPackets(t, "outACL", "1", outACL.String())
+	var metricInboundDroppedPacketsACL, metricInboundDroppedPacketsErr, metricOutboundDroppedPacketsACL int64
+	if m, ok := tun.metrics.inboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonACL}).(*expvar.Int); ok {
+		metricInboundDroppedPacketsACL = m.Value()
+	}
+	if m, ok := tun.metrics.inboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonError}).(*expvar.Int); ok {
+		metricInboundDroppedPacketsErr = m.Value()
+	}
+	if m, ok := tun.metrics.outboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonACL}).(*expvar.Int); ok {
+		metricOutboundDroppedPacketsACL = m.Value()
+	}
 
+	assertMetricPackets(t, "inACL", 3, metricInboundDroppedPacketsACL)
+	assertMetricPackets(t, "inError", 0, metricInboundDroppedPacketsErr)
+	assertMetricPackets(t, "outACL", 1, metricOutboundDroppedPacketsACL)
 }
 
-func assertMetricPackets(t *testing.T, metricName, want, got string) {
+func assertMetricPackets(t *testing.T, metricName string, want, got int64) {
 	t.Helper()
 	if want != got {
-		t.Errorf("%s got unexpected value, got %s, want %s", metricName, got, want)
+		t.Errorf("%s got unexpected value, got %d, want %d", metricName, got, want)
 	}
 }
 
@@ -512,6 +522,7 @@ func TestAtomic64Alignment(t *testing.T) {
 }
 
 func TestPeerAPIBypass(t *testing.T) {
+	reg := new(usermetric.Registry)
 	wrapperWithPeerAPI := &Wrapper{
 		PeerAPIPort: func(ip netip.Addr) (port uint16, ok bool) {
 			if ip == netip.MustParseAddr("100.64.1.2") {
@@ -519,6 +530,7 @@ func TestPeerAPIBypass(t *testing.T) {
 			}
 			return
 		},
+		metrics: registerMetrics(reg),
 	}
 
 	tests := []struct {
@@ -534,13 +546,16 @@ func TestPeerAPIBypass(t *testing.T) {
 				PeerAPIPort: func(netip.Addr) (port uint16, ok bool) {
 					return 60000, true
 				},
+				metrics: registerMetrics(reg),
 			},
 			pkt:  tcp4syn("1.2.3.4", "100.64.1.2", 1234, 60000),
 			want: filter.Drop,
 		},
 		{
-			name:   "reject_with_filter",
-			w:      &Wrapper{},
+			name: "reject_with_filter",
+			w: &Wrapper{
+				metrics: registerMetrics(reg),
+			},
 			filter: filter.NewAllowNone(logger.Discard, new(netipx.IPSet)),
 			pkt:    tcp4syn("1.2.3.4", "100.64.1.2", 1234, 60000),
 			want:   filter.Drop,

+ 1 - 1
ssh/tailssh/tailssh_test.go

@@ -826,7 +826,7 @@ func TestSSHAuthFlow(t *testing.T) {
 func TestSSH(t *testing.T) {
 	var logf logger.Logf = t.Logf
 	sys := &tsd.System{}
-	eng, err := wgengine.NewFakeUserspaceEngine(logf, sys.Set, sys.HealthTracker())
+	eng, err := wgengine.NewFakeUserspaceEngine(logf, sys.Set, sys.HealthTracker(), sys.UserMetricsRegistry())
 	if err != nil {
 		t.Fatal(err)
 	}

+ 8 - 1
tsd/tsd.go

@@ -32,6 +32,7 @@ import (
 	"tailscale.com/net/tstun"
 	"tailscale.com/proxymap"
 	"tailscale.com/types/netmap"
+	"tailscale.com/util/usermetric"
 	"tailscale.com/wgengine"
 	"tailscale.com/wgengine/magicsock"
 	"tailscale.com/wgengine/router"
@@ -65,7 +66,8 @@ type System struct {
 	controlKnobs controlknobs.Knobs
 	proxyMap     proxymap.Mapper
 
-	healthTracker health.Tracker
+	healthTracker       health.Tracker
+	userMetricsRegistry usermetric.Registry
 }
 
 // NetstackImpl is the interface that *netstack.Impl implements.
@@ -142,6 +144,11 @@ func (s *System) HealthTracker() *health.Tracker {
 	return &s.healthTracker
 }
 
+// UserMetricsRegistry returns the system usermetrics.
+func (s *System) UserMetricsRegistry() *usermetric.Registry {
+	return &s.userMetricsRegistry
+}
+
 // SubSystem represents some subsystem of the Tailscale node daemon.
 //
 // A subsystem can be set to a value, and then later retrieved. A subsystem

+ 2 - 0
tsnet/tsnet.go

@@ -536,12 +536,14 @@ func (s *Server) start() (reterr error) {
 		SetSubsystem:  sys.Set,
 		ControlKnobs:  sys.ControlKnobs(),
 		HealthTracker: sys.HealthTracker(),
+		Metrics:       sys.UserMetricsRegistry(),
 	})
 	if err != nil {
 		return err
 	}
 	closePool.add(s.dialer)
 	sys.Set(eng)
+	sys.HealthTracker().SetMetricsRegistry(sys.UserMetricsRegistry())
 
 	// TODO(oxtoacart): do we need to support Taildrive on tsnet, and if so, how?
 	ns, err := netstack.Create(tsLogf, sys.Tun.Get(), eng, sys.MagicSock.Get(), s.dialer, sys.DNSManager.Get(), sys.ProxyMapper(), nil)

+ 135 - 32
tsnet/tsnet_test.go

@@ -5,6 +5,7 @@ package tsnet
 
 import (
 	"bufio"
+	"bytes"
 	"context"
 	"crypto/ecdsa"
 	"crypto/elliptic"
@@ -31,7 +32,8 @@ import (
 	"testing"
 	"time"
 
-	"github.com/google/go-cmp/cmp"
+	dto "github.com/prometheus/client_model/go"
+	"github.com/prometheus/common/expfmt"
 	"golang.org/x/net/proxy"
 	"tailscale.com/cmd/testwrapper/flakytest"
 	"tailscale.com/health"
@@ -818,65 +820,166 @@ func TestUDPConn(t *testing.T) {
 	}
 }
 
+// testWarnable is a Warnable that is used within this package for testing purposes only.
+var testWarnable = health.Register(&health.Warnable{
+	Code:     "test-warnable-tsnet",
+	Title:    "Test warnable",
+	Severity: health.SeverityLow,
+	Text: func(args health.Args) string {
+		return args[health.ArgError]
+	},
+})
+
+func parseMetrics(m []byte) (map[string]float64, error) {
+	metrics := make(map[string]float64)
+
+	var parser expfmt.TextParser
+	mf, err := parser.TextToMetricFamilies(bytes.NewReader(m))
+	if err != nil {
+		return nil, err
+	}
+
+	for _, f := range mf {
+		for _, ff := range f.Metric {
+			val := float64(0)
+
+			switch f.GetType() {
+			case dto.MetricType_COUNTER:
+				val = ff.GetCounter().GetValue()
+			case dto.MetricType_GAUGE:
+				val = ff.GetGauge().GetValue()
+			}
+
+			metrics[f.GetName()+promMetricLabelsStr(ff.GetLabel())] = val
+		}
+	}
+
+	return metrics, nil
+}
+
+func promMetricLabelsStr(labels []*dto.LabelPair) string {
+	if len(labels) == 0 {
+		return ""
+	}
+	var b strings.Builder
+	b.WriteString("{")
+	for i, l := range labels {
+		if i > 0 {
+			b.WriteString(",")
+		}
+		b.WriteString(fmt.Sprintf("%s=%q", l.GetName(), l.GetValue()))
+	}
+	b.WriteString("}")
+	return b.String()
+}
+
 func TestUserMetrics(t *testing.T) {
+	flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/13420")
 	tstest.ResourceCheck(t)
-	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
 	defer cancel()
 
-	// testWarnable is a Warnable that is used within this package for testing purposes only.
-	var testWarnable = health.Register(&health.Warnable{
-		Code:     "test-warnable-tsnet",
-		Title:    "Test warnable",
-		Severity: health.SeverityLow,
-		Text: func(args health.Args) string {
-			return args[health.ArgError]
-		},
-	})
-
 	controlURL, c := startControl(t)
-	s1, _, s1PubKey := startServer(t, ctx, controlURL, "s1")
+	s1, s1ip, s1PubKey := startServer(t, ctx, controlURL, "s1")
+	s2, _, _ := startServer(t, ctx, controlURL, "s2")
 
 	s1.lb.EditPrefs(&ipn.MaskedPrefs{
 		Prefs: ipn.Prefs{
 			AdvertiseRoutes: []netip.Prefix{
 				netip.MustParsePrefix("192.0.2.0/24"),
 				netip.MustParsePrefix("192.0.3.0/24"),
+				netip.MustParsePrefix("192.0.5.1/32"),
+				netip.MustParsePrefix("0.0.0.0/0"),
 			},
 		},
 		AdvertiseRoutesSet: true,
 	})
-	c.SetSubnetRoutes(s1PubKey, []netip.Prefix{netip.MustParsePrefix("192.0.2.0/24")})
+	c.SetSubnetRoutes(s1PubKey, []netip.Prefix{
+		netip.MustParsePrefix("192.0.2.0/24"),
+		netip.MustParsePrefix("192.0.5.1/32"),
+		netip.MustParsePrefix("0.0.0.0/0"),
+	})
 
 	lc1, err := s1.LocalClient()
 	if err != nil {
 		t.Fatal(err)
 	}
 
+	lc2, err := s2.LocalClient()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// ping to make sure the connection is up.
+	res, err := lc2.Ping(ctx, s1ip, tailcfg.PingICMP)
+	if err != nil {
+		t.Fatalf("pinging: %s", err)
+	}
+	t.Logf("ping success: %#+v", res)
+
 	ht := s1.lb.HealthTracker()
 	ht.SetUnhealthy(testWarnable, health.Args{"Text": "Hello world 1"})
 
-	metrics1, err := lc1.UserMetrics(ctx)
+	// Force an update to the netmap to ensure that the metrics are up-to-date.
+	s1.lb.DebugForceNetmapUpdate()
+	s2.lb.DebugForceNetmapUpdate()
+
+	ctxLc, cancelLc := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancelLc()
+	metrics1, err := lc1.UserMetrics(ctxLc)
 	if err != nil {
 		t.Fatal(err)
 	}
 
-	// Note that this test will check for two warnings because the health
-	// tracker will have two warnings: one from the testWarnable, added in
-	// this test, and one because we are running the dev/unstable version
-	// of tailscale.
-	want := `# TYPE tailscaled_advertised_routes gauge
-# HELP tailscaled_advertised_routes Number of advertised network routes (e.g. by a subnet router)
-tailscaled_advertised_routes 2
-# TYPE tailscaled_health_messages gauge
-# HELP tailscaled_health_messages Number of health messages broken down by type.
-tailscaled_health_messages{type="warning"} 2
-# TYPE tailscaled_inbound_dropped_packets_total counter
-# HELP tailscaled_inbound_dropped_packets_total Counts the number of dropped packets received by the node from other peers
-# TYPE tailscaled_outbound_dropped_packets_total counter
-# HELP tailscaled_outbound_dropped_packets_total Counts the number of packets dropped while being sent to other peers
-`
+	status1, err := lc1.Status(ctxLc)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	parsedMetrics1, err := parseMetrics(metrics1)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	t.Logf("Metrics1:\n%s\n", metrics1)
+
+	// The node is advertising 4 routes:
+	// - 192.0.2.0/24
+	// - 192.0.3.0/24
+	// - 192.0.5.1/32
+	if got, want := parsedMetrics1["tailscaled_advertised_routes"], 3.0; got != want {
+		t.Errorf("metrics1, tailscaled_advertised_routes: got %v, want %v", got, want)
+	}
+
+	// Validate the health counter metric against the status of the node
+	if got, want := parsedMetrics1[`tailscaled_health_messages{type="warning"}`], float64(len(status1.Health)); got != want {
+		t.Errorf("metrics1, tailscaled_health_messages: got %v, want %v", got, want)
+	}
+
+	metrics2, err := lc2.UserMetrics(ctx)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	status2, err := lc2.Status(ctx)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	parsedMetrics2, err := parseMetrics(metrics2)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	t.Logf("Metrics2:\n%s\n", metrics2)
+
+	// The node is advertising 0 routes
+	if got, want := parsedMetrics2["tailscaled_advertised_routes"], 0.0; got != want {
+		t.Errorf("metrics2, tailscaled_advertised_routes: got %v, want %v", got, want)
+	}
 
-	if diff := cmp.Diff(want, string(metrics1)); diff != "" {
-		t.Fatalf("unexpected metrics (-want +got):\n%s", diff)
+	// Validate the health counter metric against the status of the node
+	if got, want := parsedMetrics2[`tailscaled_health_messages{type="warning"}`], float64(len(status2.Health)); got != want {
+		t.Errorf("metrics2, tailscaled_health_messages: got %v, want %v", got, want)
 	}
 }

+ 31 - 10
util/usermetric/usermetric.go

@@ -10,29 +10,33 @@ import (
 	"fmt"
 	"io"
 	"net/http"
+	"strings"
 
 	"tailscale.com/metrics"
 	"tailscale.com/tsweb/varz"
 )
 
-var vars expvar.Map
+// Registry tracks user-facing metrics of various Tailscale subsystems.
+type Registry struct {
+	vars expvar.Map
+}
 
-// NewMultiLabelMap creates and register a new
+// NewMultiLabelMapWithRegistry creates and register a new
 // MultiLabelMap[T] variable with the given name and returns it.
 // The variable is registered with the userfacing metrics package.
 //
 // Note that usermetric are not protected against duplicate
 // metrics name. It is the caller's responsibility to ensure that
 // the name is unique.
-func NewMultiLabelMap[T comparable](name string, promType, helpText string) *metrics.MultiLabelMap[T] {
-	m := &metrics.MultiLabelMap[T]{
+func NewMultiLabelMapWithRegistry[T comparable](m *Registry, name string, promType, helpText string) *metrics.MultiLabelMap[T] {
+	ml := &metrics.MultiLabelMap[T]{
 		Type: promType,
 		Help: helpText,
 	}
 	var zero T
 	_ = metrics.LabelString(zero) // panic early if T is invalid
-	vars.Set(name, m)
-	return m
+	m.vars.Set(name, ml)
+	return ml
 }
 
 // Gauge is a gauge metric with no labels.
@@ -42,20 +46,26 @@ type Gauge struct {
 }
 
 // NewGauge creates and register a new gauge metric with the given name and help text.
-func NewGauge(name, help string) *Gauge {
+func (r *Registry) NewGauge(name, help string) *Gauge {
 	g := &Gauge{&expvar.Float{}, help}
-	vars.Set(name, g)
+	r.vars.Set(name, g)
 	return g
 }
 
 // Set sets the gauge to the given value.
 func (g *Gauge) Set(v float64) {
+	if g == nil {
+		return
+	}
 	g.m.Set(v)
 }
 
 // String returns the string of the underlying expvar.Float.
 // This satisfies the expvar.Var interface.
 func (g *Gauge) String() string {
+	if g == nil {
+		return ""
+	}
 	return g.m.String()
 }
 
@@ -79,6 +89,17 @@ func (g *Gauge) WritePrometheus(w io.Writer, name string) {
 
 // Handler returns a varz.Handler that serves the userfacing expvar contained
 // in this package.
-func Handler(w http.ResponseWriter, r *http.Request) {
-	varz.ExpvarDoHandler(vars.Do)(w, r)
+func (r *Registry) Handler(w http.ResponseWriter, req *http.Request) {
+	varz.ExpvarDoHandler(r.vars.Do)(w, req)
+}
+
+// String returns the string representation of all the metrics and their
+// values in the registry. It is useful for debugging.
+func (r *Registry) String() string {
+	var sb strings.Builder
+	r.vars.Do(func(kv expvar.KeyValue) {
+		fmt.Fprintf(&sb, "%s: %v\n", kv.Key, kv.Value)
+	})
+
+	return sb.String()
 }

+ 2 - 1
util/usermetric/usermetric_test.go

@@ -9,7 +9,8 @@ import (
 )
 
 func TestGauge(t *testing.T) {
-	g := NewGauge("test_gauge", "This is a test gauge")
+	var reg Registry
+	g := reg.NewGauge("test_gauge", "This is a test gauge")
 	g.Set(15)
 
 	var buf bytes.Buffer

+ 4 - 0
wgengine/magicsock/magicsock.go

@@ -60,6 +60,7 @@ import (
 	"tailscale.com/util/set"
 	"tailscale.com/util/testenv"
 	"tailscale.com/util/uniq"
+	"tailscale.com/util/usermetric"
 	"tailscale.com/wgengine/capture"
 	"tailscale.com/wgengine/wgint"
 )
@@ -386,6 +387,9 @@ type Options struct {
 	// report errors and warnings to.
 	HealthTracker *health.Tracker
 
+	// Metrics specifies the metrics registry to record metrics to.
+	Metrics *usermetric.Registry
+
 	// ControlKnobs are the set of control knobs to use.
 	// If nil, they're ignored and not updated.
 	ControlKnobs *controlknobs.Knobs

+ 9 - 1
wgengine/magicsock/magicsock_test.go

@@ -64,6 +64,7 @@ import (
 	"tailscale.com/util/cibuild"
 	"tailscale.com/util/racebuild"
 	"tailscale.com/util/set"
+	"tailscale.com/util/usermetric"
 	"tailscale.com/wgengine/filter"
 	"tailscale.com/wgengine/wgcfg"
 	"tailscale.com/wgengine/wgcfg/nmcfg"
@@ -156,6 +157,7 @@ type magicStack struct {
 	dev        *device.Device          // the wireguard-go Device that connects the previous things
 	wgLogger   *wglog.Logger           // wireguard-go log wrapper
 	netMon     *netmon.Monitor         // always non-nil
+	metrics    *usermetric.Registry
 }
 
 // newMagicStack builds and initializes an idle magicsock and
@@ -174,9 +176,11 @@ func newMagicStackWithKey(t testing.TB, logf logger.Logf, l nettype.PacketListen
 		t.Fatalf("netmon.New: %v", err)
 	}
 
+	var reg usermetric.Registry
 	epCh := make(chan []tailcfg.Endpoint, 100) // arbitrary
 	conn, err := NewConn(Options{
 		NetMon:                 netMon,
+		Metrics:                &reg,
 		Logf:                   logf,
 		DisablePortMapper:      true,
 		TestOnlyPacketListener: l,
@@ -193,7 +197,7 @@ func newMagicStackWithKey(t testing.TB, logf logger.Logf, l nettype.PacketListen
 	}
 
 	tun := tuntest.NewChannelTUN()
-	tsTun := tstun.Wrap(logf, tun.TUN())
+	tsTun := tstun.Wrap(logf, tun.TUN(), &reg)
 	tsTun.SetFilter(filter.NewAllowAllForTest(logf))
 	tsTun.Start()
 
@@ -219,6 +223,7 @@ func newMagicStackWithKey(t testing.TB, logf logger.Logf, l nettype.PacketListen
 		dev:        dev,
 		wgLogger:   wgLogger,
 		netMon:     netMon,
+		metrics:    &reg,
 	}
 }
 
@@ -397,6 +402,7 @@ func TestNewConn(t *testing.T) {
 		EndpointsFunc:     epFunc,
 		Logf:              t.Logf,
 		NetMon:            netMon,
+		Metrics:           new(usermetric.Registry),
 	})
 	if err != nil {
 		t.Fatal(err)
@@ -523,6 +529,7 @@ func TestDeviceStartStop(t *testing.T) {
 		EndpointsFunc: func(eps []tailcfg.Endpoint) {},
 		Logf:          t.Logf,
 		NetMon:        netMon,
+		Metrics:       new(usermetric.Registry),
 	})
 	if err != nil {
 		t.Fatal(err)
@@ -1275,6 +1282,7 @@ func newTestConn(t testing.TB) *Conn {
 	conn, err := NewConn(Options{
 		NetMon:                 netMon,
 		HealthTracker:          new(health.Tracker),
+		Metrics:                new(usermetric.Registry),
 		DisablePortMapper:      true,
 		Logf:                   t.Logf,
 		Port:                   port,

+ 2 - 0
wgengine/netstack/netstack_test.go

@@ -50,6 +50,7 @@ func TestInjectInboundLeak(t *testing.T) {
 		Dialer:        dialer,
 		SetSubsystem:  sys.Set,
 		HealthTracker: sys.HealthTracker(),
+		Metrics:       sys.UserMetricsRegistry(),
 	})
 	if err != nil {
 		t.Fatal(err)
@@ -107,6 +108,7 @@ func makeNetstack(tb testing.TB, config func(*Impl)) *Impl {
 		Dialer:        dialer,
 		SetSubsystem:  sys.Set,
 		HealthTracker: sys.HealthTracker(),
+		Metrics:       sys.UserMetricsRegistry(),
 	})
 	if err != nil {
 		tb.Fatal(err)

+ 9 - 2
wgengine/userspace.go

@@ -49,6 +49,7 @@ import (
 	"tailscale.com/util/mak"
 	"tailscale.com/util/set"
 	"tailscale.com/util/testenv"
+	"tailscale.com/util/usermetric"
 	"tailscale.com/version"
 	"tailscale.com/wgengine/capture"
 	"tailscale.com/wgengine/filter"
@@ -195,6 +196,9 @@ type Config struct {
 	// HealthTracker, if non-nil, is the health tracker to use.
 	HealthTracker *health.Tracker
 
+	// Metrics, if non-nil, is the usermetrics registry to use.
+	Metrics *usermetric.Registry
+
 	// Dialer is the dialer to use for outbound connections.
 	// If nil, a new Dialer is created.
 	Dialer *tsdial.Dialer
@@ -249,6 +253,8 @@ func NewFakeUserspaceEngine(logf logger.Logf, opts ...any) (Engine, error) {
 			conf.ControlKnobs = v
 		case *health.Tracker:
 			conf.HealthTracker = v
+		case *usermetric.Registry:
+			conf.Metrics = v
 		default:
 			return nil, fmt.Errorf("unknown option type %T", v)
 		}
@@ -289,9 +295,9 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error)
 
 	var tsTUNDev *tstun.Wrapper
 	if conf.IsTAP {
-		tsTUNDev = tstun.WrapTAP(logf, conf.Tun)
+		tsTUNDev = tstun.WrapTAP(logf, conf.Tun, conf.Metrics)
 	} else {
-		tsTUNDev = tstun.Wrap(logf, conf.Tun)
+		tsTUNDev = tstun.Wrap(logf, conf.Tun, conf.Metrics)
 	}
 	closePool.add(tsTUNDev)
 
@@ -387,6 +393,7 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error)
 		NoteRecvActivity: e.noteRecvActivity,
 		NetMon:           e.netMon,
 		HealthTracker:    e.health,
+		Metrics:          conf.Metrics,
 		ControlKnobs:     conf.ControlKnobs,
 		OnPortUpdate:     onPortUpdate,
 		PeerByKeyFunc:    e.PeerByKey,

+ 2 - 0
wgengine/userspace_ext_test.go

@@ -22,6 +22,7 @@ func TestIsNetstack(t *testing.T) {
 		wgengine.Config{
 			SetSubsystem:  sys.Set,
 			HealthTracker: sys.HealthTracker(),
+			Metrics:       sys.UserMetricsRegistry(),
 		},
 	)
 	if err != nil {
@@ -72,6 +73,7 @@ func TestIsNetstackRouter(t *testing.T) {
 			conf := tt.conf
 			conf.SetSubsystem = sys.Set
 			conf.HealthTracker = sys.HealthTracker()
+			conf.Metrics = sys.UserMetricsRegistry()
 			e, err := wgengine.NewUserspaceEngine(logger.Discard, conf)
 			if err != nil {
 				t.Fatal(err)

+ 7 - 3
wgengine/userspace_test.go

@@ -25,6 +25,7 @@ import (
 	"tailscale.com/types/key"
 	"tailscale.com/types/netmap"
 	"tailscale.com/types/opt"
+	"tailscale.com/util/usermetric"
 	"tailscale.com/wgengine/router"
 	"tailscale.com/wgengine/wgcfg"
 )
@@ -100,7 +101,8 @@ func nodeViews(v []*tailcfg.Node) []tailcfg.NodeView {
 
 func TestUserspaceEngineReconfig(t *testing.T) {
 	ht := new(health.Tracker)
-	e, err := NewFakeUserspaceEngine(t.Logf, 0, ht)
+	reg := new(usermetric.Registry)
+	e, err := NewFakeUserspaceEngine(t.Logf, 0, ht, reg)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -167,9 +169,10 @@ func TestUserspaceEnginePortReconfig(t *testing.T) {
 	// Keep making a wgengine until we find an unused port
 	var ue *userspaceEngine
 	ht := new(health.Tracker)
+	reg := new(usermetric.Registry)
 	for i := range 100 {
 		attempt := uint16(defaultPort + i)
-		e, err := NewFakeUserspaceEngine(t.Logf, attempt, &knobs, ht)
+		e, err := NewFakeUserspaceEngine(t.Logf, attempt, &knobs, ht, reg)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -249,7 +252,8 @@ func TestUserspaceEnginePeerMTUReconfig(t *testing.T) {
 	var knobs controlknobs.Knobs
 
 	ht := new(health.Tracker)
-	e, err := NewFakeUserspaceEngine(t.Logf, 0, &knobs, ht)
+	reg := new(usermetric.Registry)
+	e, err := NewFakeUserspaceEngine(t.Logf, 0, &knobs, ht, reg)
 	if err != nil {
 		t.Fatal(err)
 	}

+ 3 - 1
wgengine/watchdog_test.go

@@ -9,6 +9,7 @@ import (
 	"time"
 
 	"tailscale.com/health"
+	"tailscale.com/util/usermetric"
 )
 
 func TestWatchdog(t *testing.T) {
@@ -24,7 +25,8 @@ func TestWatchdog(t *testing.T) {
 	t.Run("default watchdog does not fire", func(t *testing.T) {
 		t.Parallel()
 		ht := new(health.Tracker)
-		e, err := NewFakeUserspaceEngine(t.Logf, 0, ht)
+		reg := new(usermetric.Registry)
+		e, err := NewFakeUserspaceEngine(t.Logf, 0, ht, reg)
 		if err != nil {
 			t.Fatal(err)
 		}