Selaa lähdekoodia

health, ipn/ipnlocal: add metrics for various client events (#15828)

updates tailscale/corp#28092

Adds metrics for various client events:
* Enabling an exit node
* Enabling a mullvad exit node
* Enabling a preferred exit node
* Setting WantRunning to true/false
* Requesting a bug report ID
* Profile counts
* Profile deletions
* Captive portal detection

Signed-off-by: Jonathan Nobels <[email protected]>
Jonathan Nobels 10 kuukautta sitten
vanhempi
sitoutus
7d6d2b4c50

+ 12 - 0
health/health.go

@@ -362,6 +362,18 @@ func (t *Tracker) SetMetricsRegistry(reg *usermetric.Registry) {
 	}))
 	}))
 }
 }
 
 
+// IsUnhealthy reports whether the current state is unhealthy because the given
+// warnable is set.
+func (t *Tracker) IsUnhealthy(w *Warnable) bool {
+	if t.nil() {
+		return false
+	}
+	t.mu.Lock()
+	defer t.mu.Unlock()
+	_, exists := t.warnableVal[w]
+	return exists
+}
+
 // SetUnhealthy sets a warningState for the given Warnable with the provided Args, and should be
 // SetUnhealthy sets a warningState for the given Warnable with the provided Args, and should be
 // called when a Warnable becomes unhealthy, or its unhealthy status needs to be updated.
 // called when a Warnable becomes unhealthy, or its unhealthy status needs to be updated.
 // SetUnhealthy takes ownership of args. The args can be nil if no additional information is
 // SetUnhealthy takes ownership of args. The args can be nil if no additional information is

+ 22 - 0
ipn/ipnlocal/local.go

@@ -166,6 +166,8 @@ type watchSession struct {
 	cancel    context.CancelFunc // to shut down the session
 	cancel    context.CancelFunc // to shut down the session
 }
 }
 
 
+var metricCaptivePortalDetected = clientmetric.NewCounter("captiveportal_detected")
+
 // LocalBackend is the glue between the major pieces of the Tailscale
 // LocalBackend is the glue between the major pieces of the Tailscale
 // network software: the cloud control plane (via controlclient), the
 // network software: the cloud control plane (via controlclient), the
 // network data plane (via wgengine), and the user-facing UIs and CLIs
 // network data plane (via wgengine), and the user-facing UIs and CLIs
@@ -2764,6 +2766,9 @@ func (b *LocalBackend) performCaptiveDetection() {
 	b.mu.Unlock()
 	b.mu.Unlock()
 	found := d.Detect(ctx, netMon, dm, preferredDERP)
 	found := d.Detect(ctx, netMon, dm, preferredDERP)
 	if found {
 	if found {
+		if !b.health.IsUnhealthy(captivePortalWarnable) {
+			metricCaptivePortalDetected.Add(1)
+		}
 		b.health.SetUnhealthy(captivePortalWarnable, health.Args{})
 		b.health.SetUnhealthy(captivePortalWarnable, health.Args{})
 	} else {
 	} else {
 		b.health.SetHealthy(captivePortalWarnable)
 		b.health.SetHealthy(captivePortalWarnable)
@@ -4379,9 +4384,11 @@ func (b *LocalBackend) editPrefsLockedOnEntry(mp *ipn.MaskedPrefs, unlock unlock
 		b.egg = true
 		b.egg = true
 		b.goTracker.Go(b.doSetHostinfoFilterServices)
 		b.goTracker.Go(b.doSetHostinfoFilterServices)
 	}
 	}
+
 	p0 := b.pm.CurrentPrefs()
 	p0 := b.pm.CurrentPrefs()
 	p1 := b.pm.CurrentPrefs().AsStruct()
 	p1 := b.pm.CurrentPrefs().AsStruct()
 	p1.ApplyEdits(mp)
 	p1.ApplyEdits(mp)
+
 	if err := b.checkPrefsLocked(p1); err != nil {
 	if err := b.checkPrefsLocked(p1); err != nil {
 		b.logf("EditPrefs check error: %v", err)
 		b.logf("EditPrefs check error: %v", err)
 		return ipn.PrefsView{}, err
 		return ipn.PrefsView{}, err
@@ -4393,9 +4400,23 @@ func (b *LocalBackend) editPrefsLockedOnEntry(mp *ipn.MaskedPrefs, unlock unlock
 	if p1.View().Equals(p0) {
 	if p1.View().Equals(p0) {
 		return stripKeysFromPrefs(p0), nil
 		return stripKeysFromPrefs(p0), nil
 	}
 	}
+
 	b.logf("EditPrefs: %v", mp.Pretty())
 	b.logf("EditPrefs: %v", mp.Pretty())
 	newPrefs := b.setPrefsLockedOnEntry(p1, unlock)
 	newPrefs := b.setPrefsLockedOnEntry(p1, unlock)
 
 
+	// This is recorded here in the EditPrefs path, not the setPrefs path on purpose.
+	// recordForEdit records metrics related to edits and changes, not the final state.
+	// If, in the future, we want to record gauge-metrics related to the state of prefs,
+	// that should be done in the setPrefs path.
+	e := prefsMetricsEditEvent{
+		change:                mp,
+		pNew:                  p1.View(),
+		pOld:                  p0,
+		node:                  b.currentNode(),
+		lastSuggestedExitNode: b.lastSuggestedExitNode,
+	}
+	e.record()
+
 	// Note: don't perform any actions for the new prefs here. Not
 	// Note: don't perform any actions for the new prefs here. Not
 	// every prefs change goes through EditPrefs. Put your actions
 	// every prefs change goes through EditPrefs. Put your actions
 	// in setPrefsLocksOnEntry instead.
 	// in setPrefsLocksOnEntry instead.
@@ -4467,6 +4488,7 @@ func (b *LocalBackend) setPrefsLockedOnEntry(newp *ipn.Prefs, unlock unlockOnce)
 	applySysPolicy(newp, b.lastSuggestedExitNode, b.overrideAlwaysOn)
 	applySysPolicy(newp, b.lastSuggestedExitNode, b.overrideAlwaysOn)
 	// setExitNodeID does likewise. No-op if no exit node resolution is needed.
 	// setExitNodeID does likewise. No-op if no exit node resolution is needed.
 	setExitNodeID(newp, netMap)
 	setExitNodeID(newp, netMap)
+
 	// We do this to avoid holding the lock while doing everything else.
 	// We do this to avoid holding the lock while doing everything else.
 
 
 	oldHi := b.hostinfo
 	oldHi := b.hostinfo

+ 11 - 0
ipn/ipnlocal/node_backend.go

@@ -172,6 +172,17 @@ func (nb *nodeBackend) PeerByID(id tailcfg.NodeID) (_ tailcfg.NodeView, ok bool)
 	return n, ok
 	return n, ok
 }
 }
 
 
+func (nb *nodeBackend) PeerByStableID(id tailcfg.StableNodeID) (_ tailcfg.NodeView, ok bool) {
+	nb.mu.Lock()
+	defer nb.mu.Unlock()
+	for _, n := range nb.peers {
+		if n.StableID() == id {
+			return n, true
+		}
+	}
+	return tailcfg.NodeView{}, false
+}
+
 func (nb *nodeBackend) UserByID(id tailcfg.UserID) (_ tailcfg.UserProfileView, ok bool) {
 func (nb *nodeBackend) UserByID(id tailcfg.UserID) (_ tailcfg.UserProfileView, ok bool) {
 	nb.mu.Lock()
 	nb.mu.Lock()
 	nm := nb.netMap
 	nm := nb.netMap

+ 99 - 0
ipn/ipnlocal/prefs_metrics.go

@@ -0,0 +1,99 @@
+// Copyright (c) Tailscale Inc & AUTHORS
+// SPDX-License-Identifier: BSD-3-Clause
+
+package ipnlocal
+
+import (
+	"errors"
+
+	"tailscale.com/ipn"
+	"tailscale.com/tailcfg"
+	"tailscale.com/util/clientmetric"
+)
+
+// Counter metrics for edit/change events
+var (
+	// metricExitNodeEnabled is incremented when the user enables an exit node independent of the node's characteristics.
+	metricExitNodeEnabled = clientmetric.NewCounter("prefs_exit_node_enabled")
+	// metricExitNodeEnabledSuggested is incremented when the user enables the suggested exit node.
+	metricExitNodeEnabledSuggested = clientmetric.NewCounter("prefs_exit_node_enabled_suggested")
+	// metricExitNodeEnabledMullvad is incremented when the user enables a Mullvad exit node.
+	metricExitNodeEnabledMullvad = clientmetric.NewCounter("prefs_exit_node_enabled_mullvad")
+	// metricWantRunningEnabled is incremented when WantRunning transitions from false to true.
+	metricWantRunningEnabled = clientmetric.NewCounter("prefs_want_running_enabled")
+	// metricWantRunningDisabled is incremented when WantRunning transitions from true to false.
+	metricWantRunningDisabled = clientmetric.NewCounter("prefs_want_running_disabled")
+)
+
+type exitNodeProperty string
+
+const (
+	exitNodeTypePreferred exitNodeProperty = "suggested" // The exit node is the last suggested exit node
+	exitNodeTypeMullvad   exitNodeProperty = "mullvad"   // The exit node is a Mullvad exit node
+)
+
+// prefsMetricsEditEvent encapsulates information needed to record metrics related
+// to any changes to preferences.
+type prefsMetricsEditEvent struct {
+	change                *ipn.MaskedPrefs     // the preference mask used to update the preferences
+	pNew                  ipn.PrefsView        // new preferences (after ApplyUpdates)
+	pOld                  ipn.PrefsView        // old preferences (before ApplyUpdates)
+	node                  *nodeBackend         // the node the event is associated with
+	lastSuggestedExitNode tailcfg.StableNodeID // the last suggested exit node
+}
+
+// record records changes to preferences as clientmetrics.
+func (e *prefsMetricsEditEvent) record() error {
+	if e.change == nil || e.node == nil {
+		return errors.New("prefsMetricsEditEvent: missing required fields")
+	}
+
+	// Record up/down events.
+	if e.change.WantRunningSet && (e.pNew.WantRunning() != e.pOld.WantRunning()) {
+		if e.pNew.WantRunning() {
+			metricWantRunningEnabled.Add(1)
+		} else {
+			metricWantRunningDisabled.Add(1)
+		}
+	}
+
+	// Record any changes to exit node settings.
+	if e.change.ExitNodeIDSet || e.change.ExitNodeIPSet {
+		if exitNodeTypes, ok := e.exitNodeType(e.pNew.ExitNodeID()); ok {
+			// We have switched to a valid exit node if ok is true.
+			metricExitNodeEnabled.Add(1)
+
+			// We may have some additional characteristics we should also record.
+			for _, t := range exitNodeTypes {
+				switch t {
+				case exitNodeTypePreferred:
+					metricExitNodeEnabledSuggested.Add(1)
+				case exitNodeTypeMullvad:
+					metricExitNodeEnabledMullvad.Add(1)
+				}
+			}
+		}
+	}
+	return nil
+}
+
+// exitNodeTypesLocked returns type of exit node for the given stable ID.
+// An exit node may have multiple type (can be both mullvad and preferred
+// simultaneously for example).
+//
+// This will return ok as true if the supplied stable ID resolves to a known peer,
+// false otherwise.  The caller is responsible for ensuring that the id belongs to
+// an exit node.
+func (e *prefsMetricsEditEvent) exitNodeType(id tailcfg.StableNodeID) (props []exitNodeProperty, isNode bool) {
+	var peer tailcfg.NodeView
+
+	if peer, isNode = e.node.PeerByStableID(id); isNode {
+		if tailcfg.StableNodeID(id) == e.lastSuggestedExitNode {
+			props = append(props, exitNodeTypePreferred)
+		}
+		if peer.IsWireGuardOnly() {
+			props = append(props, exitNodeTypeMullvad)
+		}
+	}
+	return props, isNode
+}

+ 5 - 1
ipn/ipnlocal/profiles.go

@@ -705,7 +705,6 @@ var errProfileAccessDenied = errors.New("profile access denied")
 // This is useful for deleting the last profile. In other cases, it is
 // This is useful for deleting the last profile. In other cases, it is
 // recommended to call [profileManager.SwitchProfile] first.
 // recommended to call [profileManager.SwitchProfile] first.
 func (pm *profileManager) DeleteProfile(id ipn.ProfileID) error {
 func (pm *profileManager) DeleteProfile(id ipn.ProfileID) error {
-	metricDeleteProfile.Add(1)
 	if id == pm.currentProfile.ID() {
 	if id == pm.currentProfile.ID() {
 		return pm.deleteCurrentProfile()
 		return pm.deleteCurrentProfile()
 	}
 	}
@@ -741,6 +740,7 @@ func (pm *profileManager) deleteProfileNoPermCheck(profile ipn.LoginProfileView)
 		return err
 		return err
 	}
 	}
 	delete(pm.knownProfiles, profile.ID())
 	delete(pm.knownProfiles, profile.ID())
+	metricDeleteProfile.Add(1)
 	return pm.writeKnownProfiles()
 	return pm.writeKnownProfiles()
 }
 }
 
 
@@ -781,6 +781,7 @@ func (pm *profileManager) writeKnownProfiles() error {
 	if err != nil {
 	if err != nil {
 		return err
 		return err
 	}
 	}
+	metricProfileCount.Set(int64(len(pm.knownProfiles)))
 	return pm.WriteState(ipn.KnownProfilesStateKey, b)
 	return pm.WriteState(ipn.KnownProfilesStateKey, b)
 }
 }
 
 
@@ -893,6 +894,8 @@ func newProfileManagerWithGOOS(store ipn.StateStore, logf logger.Logf, ht *healt
 		return nil, err
 		return nil, err
 	}
 	}
 
 
+	metricProfileCount.Set(int64(len(knownProfiles)))
+
 	pm := &profileManager{
 	pm := &profileManager{
 		goos:          goos,
 		goos:          goos,
 		store:         store,
 		store:         store,
@@ -961,6 +964,7 @@ var (
 	metricSwitchProfile    = clientmetric.NewCounter("profiles_switch")
 	metricSwitchProfile    = clientmetric.NewCounter("profiles_switch")
 	metricDeleteProfile    = clientmetric.NewCounter("profiles_delete")
 	metricDeleteProfile    = clientmetric.NewCounter("profiles_delete")
 	metricDeleteAllProfile = clientmetric.NewCounter("profiles_delete_all")
 	metricDeleteAllProfile = clientmetric.NewCounter("profiles_delete_all")
+	metricProfileCount     = clientmetric.NewGauge("profiles_count")
 
 
 	metricMigration        = clientmetric.NewCounter("profiles_migration")
 	metricMigration        = clientmetric.NewCounter("profiles_migration")
 	metricMigrationError   = clientmetric.NewCounter("profiles_migration_error")
 	metricMigrationError   = clientmetric.NewCounter("profiles_migration_error")

+ 9 - 8
ipn/localapi/localapi.go

@@ -62,6 +62,13 @@ import (
 	"tailscale.com/wgengine/magicsock"
 	"tailscale.com/wgengine/magicsock"
 )
 )
 
 
+var (
+	metricInvalidRequests   = clientmetric.NewCounter("localapi_invalid_requests")
+	metricDebugMetricsCalls = clientmetric.NewCounter("localapi_debugmetric_requests")
+	metricUserMetricsCalls  = clientmetric.NewCounter("localapi_usermetric_requests")
+	metricBugReportRequests = clientmetric.NewCounter("localapi_bugreport_requests")
+)
+
 type LocalAPIHandler func(*Handler, http.ResponseWriter, *http.Request)
 type LocalAPIHandler func(*Handler, http.ResponseWriter, *http.Request)
 
 
 // handler is the set of LocalAPI handlers, keyed by the part of the
 // handler is the set of LocalAPI handlers, keyed by the part of the
@@ -424,6 +431,8 @@ func (h *Handler) serveBugReport(w http.ResponseWriter, r *http.Request) {
 	// NOTE(andrew): if we have anything else we want to do while recording
 	// NOTE(andrew): if we have anything else we want to do while recording
 	// a bugreport, we can add it here.
 	// a bugreport, we can add it here.
 
 
+	metricBugReportRequests.Add(1)
+
 	// Read from the client; this will also return when the client closes
 	// Read from the client; this will also return when the client closes
 	// the connection.
 	// the connection.
 	var buf [1]byte
 	var buf [1]byte
@@ -2623,14 +2632,6 @@ func (h *Handler) serveShares(w http.ResponseWriter, r *http.Request) {
 	}
 	}
 }
 }
 
 
-var (
-	metricInvalidRequests = clientmetric.NewCounter("localapi_invalid_requests")
-
-	// User-visible LocalAPI endpoints.
-	metricDebugMetricsCalls = clientmetric.NewCounter("localapi_debugmetric_requests")
-	metricUserMetricsCalls  = clientmetric.NewCounter("localapi_usermetric_requests")
-)
-
 // serveSuggestExitNode serves a POST endpoint for returning a suggested exit node.
 // serveSuggestExitNode serves a POST endpoint for returning a suggested exit node.
 func (h *Handler) serveSuggestExitNode(w http.ResponseWriter, r *http.Request) {
 func (h *Handler) serveSuggestExitNode(w http.ResponseWriter, r *http.Request) {
 	if r.Method != "GET" {
 	if r.Method != "GET" {