Browse Source

usermetric: add initial user-facing metrics

This commit adds a new usermetric package and wires
up metrics across the tailscale client.

Updates tailscale/corp#22075

Co-authored-by: Anton Tolchanov <[email protected]>
Signed-off-by: Kristoffer Dalby <[email protected]>
Kristoffer Dalby 1 year ago
parent
commit
a2c42d3cd4

+ 6 - 0
client/tailscale/localclient.go

@@ -353,6 +353,12 @@ func (lc *LocalClient) DaemonMetrics(ctx context.Context) ([]byte, error) {
 	return lc.get200(ctx, "/localapi/v0/metrics")
 }
 
+// UserMetrics returns the user metrics in
+// the Prometheus text exposition format.
+func (lc *LocalClient) UserMetrics(ctx context.Context) ([]byte, error) {
+	return lc.get200(ctx, "/localapi/v0/usermetrics")
+}
+
 // IncrementCounter increments the value of a Tailscale daemon's counter
 // metric by the given delta. If the metric has yet to exist, a new counter
 // metric is created and initialized to delta.

+ 6 - 0
client/web/web.go

@@ -283,6 +283,12 @@ func (s *Server) serve(w http.ResponseWriter, r *http.Request) {
 		}
 	}
 
+	if r.URL.Path == "/metrics" {
+		r.URL.Path = "/api/local/v0/usermetrics"
+		s.proxyRequestToLocalAPI(w, r)
+		return
+	}
+
 	if strings.HasPrefix(r.URL.Path, "/api/") {
 		switch {
 		case r.URL.Path == "/api/auth" && r.Method == httpm.GET:

+ 1 - 0
cmd/derper/depaware.txt

@@ -163,6 +163,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa
         tailscale.com/util/syspolicy                                 from tailscale.com/ipn
         tailscale.com/util/syspolicy/internal                        from tailscale.com/util/syspolicy/setting
         tailscale.com/util/syspolicy/setting                         from tailscale.com/util/syspolicy
+        tailscale.com/util/usermetric                                from tailscale.com/health
         tailscale.com/util/vizerror                                  from tailscale.com/tailcfg+
    W 💣 tailscale.com/util/winutil                                   from tailscale.com/hostinfo+
    W 💣 tailscale.com/util/winutil/winenv                            from tailscale.com/hostinfo+

+ 2 - 0
cmd/k8s-operator/depaware.txt

@@ -754,6 +754,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
         tailscale.com/tstime                                         from tailscale.com/cmd/k8s-operator+
         tailscale.com/tstime/mono                                    from tailscale.com/net/tstun+
         tailscale.com/tstime/rate                                    from tailscale.com/derp+
+        tailscale.com/tsweb/varz                                     from tailscale.com/util/usermetric
         tailscale.com/types/appctype                                 from tailscale.com/ipn/ipnlocal
         tailscale.com/types/dnstype                                  from tailscale.com/ipn/ipnlocal+
         tailscale.com/types/empty                                    from tailscale.com/ipn+
@@ -812,6 +813,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
         tailscale.com/util/testenv                                   from tailscale.com/control/controlclient+
         tailscale.com/util/truncate                                  from tailscale.com/logtail
         tailscale.com/util/uniq                                      from tailscale.com/ipn/ipnlocal+
+        tailscale.com/util/usermetric                                from tailscale.com/health+
         tailscale.com/util/vizerror                                  from tailscale.com/tailcfg+
      💣 tailscale.com/util/winutil                                   from tailscale.com/clientupdate+
    W 💣 tailscale.com/util/winutil/authenticode                      from tailscale.com/clientupdate+

+ 3 - 1
cmd/tailscale/depaware.txt

@@ -99,7 +99,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
         tailscale.com/ipn                                            from tailscale.com/client/tailscale+
         tailscale.com/ipn/ipnstate                                   from tailscale.com/client/tailscale+
         tailscale.com/licenses                                       from tailscale.com/client/web+
-        tailscale.com/metrics                                        from tailscale.com/derp
+        tailscale.com/metrics                                        from tailscale.com/derp+
         tailscale.com/net/captivedetection                           from tailscale.com/net/netcheck
         tailscale.com/net/dns/recursive                              from tailscale.com/net/dnsfallback
         tailscale.com/net/dnscache                                   from tailscale.com/control/controlhttp+
@@ -132,6 +132,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
         tailscale.com/tstime                                         from tailscale.com/control/controlhttp+
         tailscale.com/tstime/mono                                    from tailscale.com/tstime/rate
         tailscale.com/tstime/rate                                    from tailscale.com/cmd/tailscale/cli+
+        tailscale.com/tsweb/varz                                     from tailscale.com/util/usermetric
         tailscale.com/types/dnstype                                  from tailscale.com/tailcfg
         tailscale.com/types/empty                                    from tailscale.com/ipn
         tailscale.com/types/ipproto                                  from tailscale.com/net/flowtrack+
@@ -173,6 +174,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
         tailscale.com/util/syspolicy/setting                         from tailscale.com/util/syspolicy
         tailscale.com/util/testenv                                   from tailscale.com/cmd/tailscale/cli
         tailscale.com/util/truncate                                  from tailscale.com/cmd/tailscale/cli
+        tailscale.com/util/usermetric                                from tailscale.com/health
         tailscale.com/util/vizerror                                  from tailscale.com/tailcfg+
      💣 tailscale.com/util/winutil                                   from tailscale.com/clientupdate+
    W 💣 tailscale.com/util/winutil/authenticode                      from tailscale.com/clientupdate

+ 2 - 1
cmd/tailscaled/depaware.txt

@@ -343,7 +343,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
         tailscale.com/tstime                                         from tailscale.com/control/controlclient+
         tailscale.com/tstime/mono                                    from tailscale.com/net/tstun+
         tailscale.com/tstime/rate                                    from tailscale.com/derp+
-        tailscale.com/tsweb/varz                                     from tailscale.com/cmd/tailscaled
+        tailscale.com/tsweb/varz                                     from tailscale.com/cmd/tailscaled+
         tailscale.com/types/appctype                                 from tailscale.com/ipn/ipnlocal
         tailscale.com/types/dnstype                                  from tailscale.com/ipn/ipnlocal+
         tailscale.com/types/empty                                    from tailscale.com/ipn+
@@ -403,6 +403,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
         tailscale.com/util/testenv                                   from tailscale.com/ipn/ipnlocal+
         tailscale.com/util/truncate                                  from tailscale.com/logtail
         tailscale.com/util/uniq                                      from tailscale.com/ipn/ipnlocal+
+        tailscale.com/util/usermetric                                from tailscale.com/health+
         tailscale.com/util/vizerror                                  from tailscale.com/tailcfg+
      💣 tailscale.com/util/winutil                                   from tailscale.com/clientupdate+
    W 💣 tailscale.com/util/winutil/authenticode                      from tailscale.com/clientupdate+

+ 25 - 0
health/health.go

@@ -8,6 +8,7 @@ package health
 import (
 	"context"
 	"errors"
+	"expvar"
 	"fmt"
 	"maps"
 	"net/http"
@@ -25,6 +26,7 @@ import (
 	"tailscale.com/util/mak"
 	"tailscale.com/util/multierr"
 	"tailscale.com/util/set"
+	"tailscale.com/util/usermetric"
 	"tailscale.com/version"
 )
 
@@ -1202,6 +1204,18 @@ func (t *Tracker) ReceiveFuncStats(which ReceiveFunc) *ReceiveFuncStats {
 }
 
 func (t *Tracker) doOnceInit() {
+	metricHealthMessage.Set(metricHealthMessageLabel{
+		Type: "warning",
+	}, expvar.Func(func() any {
+		if t.nil() {
+			return 0
+		}
+		t.mu.Lock()
+		defer t.mu.Unlock()
+		t.updateBuiltinWarnablesLocked()
+		return int64(len(t.stringsLocked()))
+	}))
+
 	for i := range t.MagicSockReceiveFuncs {
 		f := &t.MagicSockReceiveFuncs[i]
 		f.name = (ReceiveFunc(i)).String()
@@ -1232,3 +1246,14 @@ func (t *Tracker) checkReceiveFuncsLocked() {
 		f.missing = true
 	}
 }
+
+type metricHealthMessageLabel struct {
+	// TODO: break down by warnable.severity as well?
+	Type string
+}
+
+var metricHealthMessage = usermetric.NewMultiLabelMap[metricHealthMessageLabel](
+	"tailscaled_health_messages",
+	"gauge",
+	"Number of health messages broken down by type.",
+)

+ 13 - 0
ipn/ipnlocal/local.go

@@ -106,6 +106,7 @@ import (
 	"tailscale.com/util/systemd"
 	"tailscale.com/util/testenv"
 	"tailscale.com/util/uniq"
+	"tailscale.com/util/usermetric"
 	"tailscale.com/version"
 	"tailscale.com/version/distro"
 	"tailscale.com/wgengine"
@@ -117,6 +118,9 @@ import (
 	"tailscale.com/wgengine/wgcfg/nmcfg"
 )
 
+var metricAdvertisedRoutes = usermetric.NewGauge(
+	"tailscaled_advertised_routes", "Number of advertised network routes (e.g. by a subnet router)")
+
 var controlDebugFlags = getControlDebugFlags()
 
 func getControlDebugFlags() []string {
@@ -4646,6 +4650,15 @@ func (b *LocalBackend) applyPrefsToHostinfoLocked(hi *tailcfg.Hostinfo, prefs ip
 	hi.ShieldsUp = prefs.ShieldsUp()
 	hi.AllowsUpdate = envknob.AllowsRemoteUpdate() || prefs.AutoUpdate().Apply.EqualBool(true)
 
+	// count routes without exit node routes
+	var routes int64
+	for _, route := range hi.RoutableIPs {
+		if route.Bits() != 0 {
+			routes++
+		}
+	}
+	metricAdvertisedRoutes.Set(float64(routes))
+
 	var sshHostKeys []string
 	if prefs.RunSSH() && envknob.CanSSHD() {
 		// TODO(bradfitz): this is called with b.mu held. Not ideal.

+ 15 - 0
ipn/localapi/localapi.go

@@ -63,6 +63,8 @@ import (
 	"tailscale.com/util/osuser"
 	"tailscale.com/util/progresstracking"
 	"tailscale.com/util/rands"
+	"tailscale.com/util/testenv"
+	"tailscale.com/util/usermetric"
 	"tailscale.com/version"
 	"tailscale.com/wgengine/magicsock"
 )
@@ -141,6 +143,7 @@ var handler = map[string]localAPIHandler{
 	"update/install":              (*Handler).serveUpdateInstall,
 	"update/progress":             (*Handler).serveUpdateProgress,
 	"upload-client-metrics":       (*Handler).serveUploadClientMetrics,
+	"usermetrics":                 (*Handler).serveUserMetrics,
 	"watch-ipn-bus":               (*Handler).serveWatchIPNBus,
 	"whois":                       (*Handler).serveWhoIs,
 }
@@ -571,6 +574,18 @@ func (h *Handler) serveMetrics(w http.ResponseWriter, r *http.Request) {
 	clientmetric.WritePrometheusExpositionFormat(w)
 }
 
+// TODO(kradalby): Remove this once we have landed on a final set of
+// metrics to export to clients and consider the metrics stable.
+var debugUsermetricsEndpoint = envknob.RegisterBool("TS_DEBUG_USER_METRICS")
+
+func (h *Handler) serveUserMetrics(w http.ResponseWriter, r *http.Request) {
+	if !testenv.InTest() && !debugUsermetricsEndpoint() {
+		http.Error(w, "usermetrics debug flag not enabled", http.StatusForbidden)
+		return
+	}
+	usermetric.Handler(w, r)
+}
+
 func (h *Handler) serveDebug(w http.ResponseWriter, r *http.Request) {
 	if !h.PermitWrite {
 		http.Error(w, "debug access denied", http.StatusForbidden)

+ 27 - 5
metrics/multilabelmap.go

@@ -39,7 +39,7 @@ func NewMultiLabelMap[T comparable](name string, promType, helpText string) *Mul
 		Help: helpText,
 	}
 	var zero T
-	_ = labelString(zero) // panic early if T is invalid
+	_ = LabelString(zero) // panic early if T is invalid
 	expvar.Publish(name, m)
 	return m
 }
@@ -50,8 +50,10 @@ type labelsAndValue[T comparable] struct {
 	val    expvar.Var
 }
 
-// labelString returns a Prometheus-formatted label string for the given key.
-func labelString(k any) string {
+// LabelString returns a Prometheus-formatted label string for the given key.
+// k must be a struct type with scalar fields, as required by MultiLabelMap,
+// if k is not a struct, it will panic.
+func LabelString(k any) string {
 	rv := reflect.ValueOf(k)
 	t := rv.Type()
 	if t.Kind() != reflect.Struct {
@@ -150,7 +152,7 @@ func (v *MultiLabelMap[T]) Init() *MultiLabelMap[T] {
 //
 // v.mu must be held.
 func (v *MultiLabelMap[T]) addKeyLocked(key T, val expvar.Var) {
-	ls := labelString(key)
+	ls := LabelString(key)
 
 	ent := labelsAndValue[T]{key, ls, val}
 	// Using insertion sort to place key into the already-sorted v.keys.
@@ -209,6 +211,26 @@ func (v *MultiLabelMap[T]) Set(key T, val expvar.Var) {
 	v.m.Store(key, val)
 }
 
+// SetInt sets val to the *[expvar.Int] value stored under the given map key,
+// creating it if it doesn't exist yet.
+// It does nothing if key exists but is of the wrong type.
+func (v *MultiLabelMap[T]) SetInt(key T, val int64) {
+	// Set to Int; ignore otherwise.
+	if iv, ok := v.getOrFill(key, newInt).(*expvar.Int); ok {
+		iv.Set(val)
+	}
+}
+
+// SetFloat sets val to the *[expvar.Float] value stored under the given map key,
+// creating it if it doesn't exist yet.
+// It does nothing if key exists but is of the wrong type.
+func (v *MultiLabelMap[T]) SetFloat(key T, val float64) {
+	// Set to Float; ignore otherwise.
+	if iv, ok := v.getOrFill(key, newFloat).(*expvar.Float); ok {
+		iv.Set(val)
+	}
+}
+
 // Add adds delta to the *[expvar.Int] value stored under the given map key,
 // creating it if it doesn't exist yet.
 // It does nothing if key exists but is of the wrong type.
@@ -234,7 +256,7 @@ func (v *MultiLabelMap[T]) AddFloat(key T, delta float64) {
 // This is not optimized for highly concurrent usage; it's presumed to only be
 // used rarely, at startup.
 func (v *MultiLabelMap[T]) Delete(key T) {
-	ls := labelString(key)
+	ls := LabelString(key)
 
 	v.mu.Lock()
 	defer v.mu.Unlock()

+ 12 - 2
metrics/multilabelmap_test.go

@@ -5,6 +5,7 @@ package metrics
 
 import (
 	"bytes"
+	"expvar"
 	"fmt"
 	"io"
 	"testing"
@@ -22,6 +23,12 @@ func TestMultilabelMap(t *testing.T) {
 	m.Add(L2{"b", "b"}, 3)
 	m.Add(L2{"a", "a"}, 1)
 
+	m.SetFloat(L2{"sf", "sf"}, 3.5)
+	m.SetFloat(L2{"sf", "sf"}, 5.5)
+	m.Set(L2{"sfunc", "sfunc"}, expvar.Func(func() any { return 3 }))
+	m.SetInt(L2{"si", "si"}, 3)
+	m.SetInt(L2{"si", "si"}, 5)
+
 	cur := func() string {
 		var buf bytes.Buffer
 		m.Do(func(kv KeyValue[L2]) {
@@ -33,7 +40,7 @@ func TestMultilabelMap(t *testing.T) {
 		return buf.String()
 	}
 
-	if g, w := cur(), "a/a=1,a/b=2,b/b=3,b/c=4"; g != w {
+	if g, w := cur(), "a/a=1,a/b=2,b/b=3,b/c=4,sf/sf=5.5,sfunc/sfunc=3,si/si=5"; g != w {
 		t.Errorf("got %q; want %q", g, w)
 	}
 
@@ -43,6 +50,9 @@ func TestMultilabelMap(t *testing.T) {
 metricname{foo="a",bar="b"} 2
 metricname{foo="b",bar="b"} 3
 metricname{foo="b",bar="c"} 4
+metricname{foo="sf",bar="sf"} 5.5
+metricname{foo="sfunc",bar="sfunc"} 3
+metricname{foo="si",bar="si"} 5
 `
 	if got := buf.String(); got != want {
 		t.Errorf("promtheus output = %q; want %q", got, want)
@@ -50,7 +60,7 @@ metricname{foo="b",bar="c"} 4
 
 	m.Delete(L2{"b", "b"})
 
-	if g, w := cur(), "a/a=1,a/b=2,b/c=4"; g != w {
+	if g, w := cur(), "a/a=1,a/b=2,b/c=4,sf/sf=5.5,sfunc/sfunc=3,si/si=5"; g != w {
 		t.Errorf("got %q; want %q", g, w)
 	}
 

+ 39 - 1
net/tstun/wrap.go

@@ -34,6 +34,7 @@ import (
 	"tailscale.com/types/key"
 	"tailscale.com/types/logger"
 	"tailscale.com/util/clientmetric"
+	"tailscale.com/util/usermetric"
 	"tailscale.com/wgengine/capture"
 	"tailscale.com/wgengine/filter"
 	"tailscale.com/wgengine/netstack/gro"
@@ -868,6 +869,9 @@ func (t *Wrapper) filterPacketOutboundToWireGuard(p *packet.Parsed, pc *peerConf
 
 	if filt.RunOut(p, t.filterFlags) != filter.Accept {
 		metricPacketOutDropFilter.Add(1)
+		metricOutboundDroppedPacketsTotal.Add(dropPacketLabel{
+			Reason: DropReasonACL,
+		}, 1)
 		return filter.Drop
 	}
 
@@ -876,7 +880,6 @@ func (t *Wrapper) filterPacketOutboundToWireGuard(p *packet.Parsed, pc *peerConf
 			return res
 		}
 	}
-
 	return filter.Accept
 }
 
@@ -1133,6 +1136,9 @@ func (t *Wrapper) filterPacketInboundFromWireGuard(p *packet.Parsed, captHook ca
 
 	if outcome != filter.Accept {
 		metricPacketInDropFilter.Add(1)
+		metricInboundDroppedPacketsTotal.Add(dropPacketLabel{
+			Reason: DropReasonACL,
+		}, 1)
 
 		// Tell them, via TSMP, we're dropping them due to the ACL.
 		// Their host networking stack can translate this into ICMP
@@ -1210,6 +1216,11 @@ func (t *Wrapper) Write(buffs [][]byte, offset int) (int, error) {
 	if len(buffs) > 0 {
 		t.noteActivity()
 		_, err := t.tdevWrite(buffs, offset)
+		if err != nil {
+			metricInboundDroppedPacketsTotal.Add(dropPacketLabel{
+				Reason: DropReasonError,
+			}, int64(len(buffs)))
+		}
 		return len(buffs), err
 	}
 	return 0, nil
@@ -1449,6 +1460,33 @@ var (
 	metricPacketOutDropSelfDisco = clientmetric.NewCounter("tstun_out_to_wg_drop_self_disco")
 )
 
+type DropReason string
+
+const (
+	DropReasonACL   DropReason = "acl"
+	DropReasonError DropReason = "error"
+)
+
+type dropPacketLabel struct {
+	// Reason indicates what we have done with the packet, and has the following values:
+	// - acl (rejected packets because of ACL)
+	// - error (rejected packets because of an error)
+	Reason DropReason
+}
+
+var (
+	metricInboundDroppedPacketsTotal = usermetric.NewMultiLabelMap[dropPacketLabel](
+		"tailscaled_inbound_dropped_packets_total",
+		"counter",
+		"Counts the number of dropped packets received by the node from other peers",
+	)
+	metricOutboundDroppedPacketsTotal = usermetric.NewMultiLabelMap[dropPacketLabel](
+		"tailscaled_outbound_dropped_packets_total",
+		"counter",
+		"Counts the number of packets dropped while being sent to other peers",
+	)
+)
+
 func (t *Wrapper) InstallCaptureHook(cb capture.Callback) {
 	t.captureHook.Store(cb)
 }

+ 22 - 0
net/tstun/wrap_test.go

@@ -315,6 +315,12 @@ func mustHexDecode(s string) []byte {
 }
 
 func TestFilter(t *testing.T) {
+	// Reset the metrics before test. These are global
+	// so the different tests might have affected them.
+	metricInboundDroppedPacketsTotal.SetInt(dropPacketLabel{Reason: DropReasonACL}, 0)
+	metricInboundDroppedPacketsTotal.SetInt(dropPacketLabel{Reason: DropReasonError}, 0)
+	metricOutboundDroppedPacketsTotal.SetInt(dropPacketLabel{Reason: DropReasonACL}, 0)
+
 	chtun, tun := newChannelTUN(t.Logf, true)
 	defer tun.Close()
 
@@ -429,6 +435,22 @@ func TestFilter(t *testing.T) {
 			}
 		})
 	}
+
+	inACL := metricInboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonACL})
+	inError := metricInboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonError})
+	outACL := metricOutboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonACL})
+
+	assertMetricPackets(t, "inACL", "3", inACL.String())
+	assertMetricPackets(t, "inError", "0", inError.String())
+	assertMetricPackets(t, "outACL", "1", outACL.String())
+
+}
+
+func assertMetricPackets(t *testing.T, metricName, want, got string) {
+	t.Helper()
+	if want != got {
+		t.Errorf("%s got unexpected value, got %s, want %s", metricName, got, want)
+	}
 }
 
 func TestAllocs(t *testing.T) {

+ 65 - 0
tsnet/tsnet_test.go

@@ -31,8 +31,10 @@ import (
 	"testing"
 	"time"
 
+	"github.com/google/go-cmp/cmp"
 	"golang.org/x/net/proxy"
 	"tailscale.com/cmd/testwrapper/flakytest"
+	"tailscale.com/health"
 	"tailscale.com/ipn"
 	"tailscale.com/ipn/store/mem"
 	"tailscale.com/net/netns"
@@ -815,3 +817,66 @@ func TestUDPConn(t *testing.T) {
 		t.Errorf("got %q, want world", got)
 	}
 }
+
+func TestUserMetrics(t *testing.T) {
+	tstest.ResourceCheck(t)
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+
+	// testWarnable is a Warnable that is used within this package for testing purposes only.
+	var testWarnable = health.Register(&health.Warnable{
+		Code:     "test-warnable-tsnet",
+		Title:    "Test warnable",
+		Severity: health.SeverityLow,
+		Text: func(args health.Args) string {
+			return args[health.ArgError]
+		},
+	})
+
+	controlURL, c := startControl(t)
+	s1, _, s1PubKey := startServer(t, ctx, controlURL, "s1")
+
+	s1.lb.EditPrefs(&ipn.MaskedPrefs{
+		Prefs: ipn.Prefs{
+			AdvertiseRoutes: []netip.Prefix{
+				netip.MustParsePrefix("192.0.2.0/24"),
+				netip.MustParsePrefix("192.0.3.0/24"),
+			},
+		},
+		AdvertiseRoutesSet: true,
+	})
+	c.SetSubnetRoutes(s1PubKey, []netip.Prefix{netip.MustParsePrefix("192.0.2.0/24")})
+
+	lc1, err := s1.LocalClient()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	ht := s1.lb.HealthTracker()
+	ht.SetUnhealthy(testWarnable, health.Args{"Text": "Hello world 1"})
+
+	metrics1, err := lc1.UserMetrics(ctx)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Note that this test will check for two warnings because the health
+	// tracker will have two warnings: one from the testWarnable, added in
+	// this test, and one because we are running the dev/unstable version
+	// of tailscale.
+	want := `# TYPE tailscaled_advertised_routes gauge
+# HELP tailscaled_advertised_routes Number of advertised network routes (e.g. by a subnet router)
+tailscaled_advertised_routes 2
+# TYPE tailscaled_health_messages gauge
+# HELP tailscaled_health_messages Number of health messages broken down by type.
+tailscaled_health_messages{type="warning"} 2
+# TYPE tailscaled_inbound_dropped_packets_total counter
+# HELP tailscaled_inbound_dropped_packets_total Counts the number of dropped packets received by the node from other peers
+# TYPE tailscaled_outbound_dropped_packets_total counter
+# HELP tailscaled_outbound_dropped_packets_total Counts the number of packets dropped while being sent to other peers
+`
+
+	if diff := cmp.Diff(want, string(metrics1)); diff != "" {
+		t.Fatalf("unexpected metrics (-want +got):\n%s", diff)
+	}
+}

+ 21 - 12
tsweb/varz/varz.go

@@ -273,19 +273,28 @@ type sortedKVs struct {
 //
 // This will evolve over time, or perhaps be replaced.
 func Handler(w http.ResponseWriter, r *http.Request) {
-	w.Header().Set("Content-Type", "text/plain;version=0.0.4;charset=utf-8")
+	ExpvarDoHandler(expvarDo)(w, r)
+}
 
-	s := sortedKVsPool.Get().(*sortedKVs)
-	defer sortedKVsPool.Put(s)
-	s.kvs = s.kvs[:0]
-	expvarDo(func(kv expvar.KeyValue) {
-		s.kvs = append(s.kvs, sortedKV{kv, removeTypePrefixes(kv.Key)})
-	})
-	sort.Slice(s.kvs, func(i, j int) bool {
-		return s.kvs[i].sortKey < s.kvs[j].sortKey
-	})
-	for _, e := range s.kvs {
-		writePromExpVar(w, "", e.KeyValue)
+// ExpvarDoHandler handler returns a Handler like above, but takes an optional
+// expvar.Do func allow the usage of alternative containers of metrics, other
+// than the global expvar.Map.
+func ExpvarDoHandler(expvarDoFunc func(f func(expvar.KeyValue))) func(http.ResponseWriter, *http.Request) {
+	return func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "text/plain;version=0.0.4;charset=utf-8")
+
+		s := sortedKVsPool.Get().(*sortedKVs)
+		defer sortedKVsPool.Put(s)
+		s.kvs = s.kvs[:0]
+		expvarDoFunc(func(kv expvar.KeyValue) {
+			s.kvs = append(s.kvs, sortedKV{kv, removeTypePrefixes(kv.Key)})
+		})
+		sort.Slice(s.kvs, func(i, j int) bool {
+			return s.kvs[i].sortKey < s.kvs[j].sortKey
+		})
+		for _, e := range s.kvs {
+			writePromExpVar(w, "", e.KeyValue)
+		}
 	}
 }
 

+ 84 - 0
util/usermetric/usermetric.go

@@ -0,0 +1,84 @@
+// Copyright (c) Tailscale Inc & AUTHORS
+// SPDX-License-Identifier: BSD-3-Clause
+
+// Package usermetric provides a container and handler
+// for user-facing metrics.
+package usermetric
+
+import (
+	"expvar"
+	"fmt"
+	"io"
+	"net/http"
+
+	"tailscale.com/metrics"
+	"tailscale.com/tsweb/varz"
+)
+
+var vars expvar.Map
+
+// NewMultiLabelMap creates and register a new
+// MultiLabelMap[T] variable with the given name and returns it.
+// The variable is registered with the userfacing metrics package.
+//
+// Note that usermetric are not protected against duplicate
+// metrics name. It is the caller's responsibility to ensure that
+// the name is unique.
+func NewMultiLabelMap[T comparable](name string, promType, helpText string) *metrics.MultiLabelMap[T] {
+	m := &metrics.MultiLabelMap[T]{
+		Type: promType,
+		Help: helpText,
+	}
+	var zero T
+	_ = metrics.LabelString(zero) // panic early if T is invalid
+	vars.Set(name, m)
+	return m
+}
+
+// Gauge is a gauge metric with no labels.
+type Gauge struct {
+	m    *expvar.Float
+	help string
+}
+
+// NewGauge creates and register a new gauge metric with the given name and help text.
+func NewGauge(name, help string) *Gauge {
+	g := &Gauge{&expvar.Float{}, help}
+	vars.Set(name, g)
+	return g
+}
+
+// Set sets the gauge to the given value.
+func (g *Gauge) Set(v float64) {
+	g.m.Set(v)
+}
+
+// String returns the string of the underlying expvar.Float.
+// This satisfies the expvar.Var interface.
+func (g *Gauge) String() string {
+	return g.m.String()
+}
+
+// WritePrometheus writes the gauge metric in Prometheus format to the given writer.
+// This satisfies the varz.PrometheusWriter interface.
+func (g *Gauge) WritePrometheus(w io.Writer, name string) {
+	io.WriteString(w, "# TYPE ")
+	io.WriteString(w, name)
+	io.WriteString(w, " gauge\n")
+	if g.help != "" {
+		io.WriteString(w, "# HELP ")
+		io.WriteString(w, name)
+		io.WriteString(w, " ")
+		io.WriteString(w, g.help)
+		io.WriteString(w, "\n")
+	}
+
+	io.WriteString(w, name)
+	fmt.Fprintf(w, " %v\n", g.m.Value())
+}
+
+// Handler returns a varz.Handler that serves the userfacing expvar contained
+// in this package.
+func Handler(w http.ResponseWriter, r *http.Request) {
+	varz.ExpvarDoHandler(vars.Do)(w, r)
+}

+ 25 - 0
util/usermetric/usermetric_test.go

@@ -0,0 +1,25 @@
+// Copyright (c) Tailscale Inc & AUTHORS
+// SPDX-License-Identifier: BSD-3-Clause
+
+package usermetric
+
+import (
+	"bytes"
+	"testing"
+)
+
+func TestGauge(t *testing.T) {
+	g := NewGauge("test_gauge", "This is a test gauge")
+	g.Set(15)
+
+	var buf bytes.Buffer
+	g.WritePrometheus(&buf, "test_gauge")
+	const want = `# TYPE test_gauge gauge
+# HELP test_gauge This is a test gauge
+test_gauge 15
+`
+	if got := buf.String(); got != want {
+		t.Errorf("got %q; want %q", got, want)
+	}
+
+}