Browse Source

control/controlclient,health,tailcfg: refactor control health messages (#15839)

* control/controlclient,health,tailcfg: refactor control health messages

Updates tailscale/corp#27759

Signed-off-by: James Sanderson <[email protected]>
Signed-off-by: Paul Scott <[email protected]>
Co-authored-by: Paul Scott <[email protected]>
James 'zofrex' Sanderson 9 months ago
parent
commit
aa8bc23c49

+ 6 - 1
control/controlclient/auto.go

@@ -12,6 +12,7 @@ import (
 	"sync/atomic"
 	"time"
 
+	"tailscale.com/health"
 	"tailscale.com/logtail/backoff"
 	"tailscale.com/net/sockstats"
 	"tailscale.com/tailcfg"
@@ -198,7 +199,11 @@ func NewNoStart(opts Options) (_ *Auto, err error) {
 	c.mapCtx, c.mapCancel = context.WithCancel(context.Background())
 	c.mapCtx = sockstats.WithSockStats(c.mapCtx, sockstats.LabelControlClientAuto, opts.Logf)
 
-	c.unregisterHealthWatch = opts.HealthTracker.RegisterWatcher(direct.ReportHealthChange)
+	c.unregisterHealthWatch = opts.HealthTracker.RegisterWatcher(func(c health.Change) {
+		if c.WarnableChanged {
+			direct.ReportWarnableChange(c.Warnable, c.UnhealthyState)
+		}
+	})
 	return c, nil
 
 }

+ 2 - 2
control/controlclient/direct.go

@@ -1623,9 +1623,9 @@ func postPingResult(start time.Time, logf logger.Logf, c *http.Client, pr *tailc
 	return nil
 }
 
-// ReportHealthChange reports to the control plane a change to this node's
+// ReportWarnableChange reports to the control plane a change to this node's
 // health. w must be non-nil. us can be nil to indicate a healthy state for w.
-func (c *Direct) ReportHealthChange(w *health.Warnable, us *health.UnhealthyState) {
+func (c *Direct) ReportWarnableChange(w *health.Warnable, us *health.UnhealthyState) {
 	if w == health.NetworkStatusWarnable || w == health.IPNStateWarnable || w == health.LoginStateWarnable {
 		// We don't report these. These include things like the network is down
 		// (in which case we can't report anyway) or the user wanted things

+ 21 - 1
control/controlclient/map.go

@@ -6,7 +6,10 @@ package controlclient
 import (
 	"cmp"
 	"context"
+	"crypto/sha256"
+	"encoding/hex"
 	"encoding/json"
+	"io"
 	"maps"
 	"net"
 	"reflect"
@@ -828,6 +831,16 @@ func (ms *mapSession) sortedPeers() []tailcfg.NodeView {
 func (ms *mapSession) netmap() *netmap.NetworkMap {
 	peerViews := ms.sortedPeers()
 
+	// Convert all ms.lastHealth to the new [netmap.NetworkMap.DisplayMessages].
+	var msgs map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage
+	for _, h := range ms.lastHealth {
+		mak.Set(&msgs, tailcfg.DisplayMessageID("control-health-"+strhash(h)), tailcfg.DisplayMessage{
+			Title:    "Coordination server reports an issue",
+			Severity: tailcfg.SeverityMedium,
+			Text:     "The coordination server is reporting a health issue: " + h,
+		})
+	}
+
 	nm := &netmap.NetworkMap{
 		NodeKey:           ms.publicNodeKey,
 		PrivateKey:        ms.privateNodeKey,
@@ -842,7 +855,7 @@ func (ms *mapSession) netmap() *netmap.NetworkMap {
 		SSHPolicy:         ms.lastSSHPolicy,
 		CollectServices:   ms.collectServices,
 		DERPMap:           ms.lastDERPMap,
-		ControlHealth:     ms.lastHealth,
+		DisplayMessages:   msgs,
 		TKAEnabled:        ms.lastTKAInfo != nil && !ms.lastTKAInfo.Disabled,
 	}
 
@@ -868,5 +881,12 @@ func (ms *mapSession) netmap() *netmap.NetworkMap {
 	if DevKnob.ForceProxyDNS() {
 		nm.DNS.Proxied = true
 	}
+
 	return nm
 }
+
+func strhash(h string) string {
+	s := sha256.New()
+	io.WriteString(s, h)
+	return hex.EncodeToString(s.Sum(nil))
+}

+ 28 - 14
control/controlclient/map_test.go

@@ -7,6 +7,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"maps"
 	"net/netip"
 	"reflect"
 	"strings"
@@ -1148,23 +1149,36 @@ func TestNetmapHealthIntegration(t *testing.T) {
 	ht.GotStreamedMapResponse()
 
 	nm := ms.netmapForResponse(&tailcfg.MapResponse{
-		Health: []string{"Test message"},
+		Health: []string{
+			"Test message",
+			"Another message",
+		},
 	})
-	ht.SetControlHealth(nm.ControlHealth)
-
-	state := ht.CurrentState()
-	warning, ok := state.Warnings["control-health"]
+	ht.SetControlHealth(nm.DisplayMessages)
 
-	if !ok {
-		t.Fatal("no warning found in current state with code 'control-health'")
-	}
-	if got, want := warning.Title, "Coordination server reports an issue"; got != want {
-		t.Errorf("warning.Title = %q, want %q", got, want)
+	want := map[health.WarnableCode]health.UnhealthyState{
+		"control-health-c0719e9a8d5d838d861dc6f675c899d2b309a3a65bb9fe6b11e5afcbf9a2c0b1": {
+			WarnableCode: "control-health-c0719e9a8d5d838d861dc6f675c899d2b309a3a65bb9fe6b11e5afcbf9a2c0b1",
+			Title:        "Coordination server reports an issue",
+			Severity:     health.SeverityMedium,
+			Text:         "The coordination server is reporting a health issue: Test message",
+		},
+		"control-health-1dc7017a73a3c55c0d6a8423e3813c7ab6562d9d3064c2ec6ac7822f61b1db9c": {
+			WarnableCode: "control-health-1dc7017a73a3c55c0d6a8423e3813c7ab6562d9d3064c2ec6ac7822f61b1db9c",
+			Title:        "Coordination server reports an issue",
+			Severity:     health.SeverityMedium,
+			Text:         "The coordination server is reporting a health issue: Another message",
+		},
 	}
-	if got, want := warning.Severity, health.SeverityMedium; got != want {
-		t.Errorf("warning.Severity = %s, want %s", got, want)
+
+	got := maps.Clone(ht.CurrentState().Warnings)
+	for k := range got {
+		if !strings.HasPrefix(string(k), "control-health") {
+			delete(got, k)
+		}
 	}
-	if got, want := warning.Text, "The coordination server is reporting an health issue: Test message"; got != want {
-		t.Errorf("warning.Text = %q, want %q", got, want)
+
+	if d := cmp.Diff(want, got); d != "" {
+		t.Fatalf("CurrentStatus().Warnings[\"control-health*\"] different than expected (-want +got)\n%s", d)
 	}
 }

+ 130 - 61
health/health.go

@@ -88,34 +88,35 @@ type Tracker struct {
 	// sysErr maps subsystems to their current error (or nil if the subsystem is healthy)
 	// Deprecated: using Warnables should be preferred
 	sysErr   map[Subsystem]error
-	watchers set.HandleSet[func(*Warnable, *UnhealthyState)] // opt func to run if error state changes
+	watchers set.HandleSet[func(Change)] // opt func to run if error state changes
 	timer    tstime.TimerController
 
 	latestVersion   *tailcfg.ClientVersion // or nil
 	checkForUpdates bool
 	applyUpdates    opt.Bool
 
-	inMapPoll               bool
-	inMapPollSince          time.Time
-	lastMapPollEndedAt      time.Time
-	lastStreamedMapResponse time.Time
-	lastNoiseDial           time.Time
-	derpHomeRegion          int
-	derpHomeless            bool
-	derpRegionConnected     map[int]bool
-	derpRegionHealthProblem map[int]string
-	derpRegionLastFrame     map[int]time.Time
-	derpMap                 *tailcfg.DERPMap // last DERP map from control, could be nil if never received one
-	lastMapRequestHeard     time.Time        // time we got a 200 from control for a MapRequest
-	ipnState                string
-	ipnWantRunning          bool
-	ipnWantRunningLastTrue  time.Time // when ipnWantRunning last changed false -> true
-	anyInterfaceUp          opt.Bool  // empty means unknown (assume true)
-	controlHealth           []string
-	lastLoginErr            error
-	localLogConfigErr       error
-	tlsConnectionErrors     map[string]error // map[ServerName]error
-	metricHealthMessage     *metrics.MultiLabelMap[metricHealthMessageLabel]
+	inMapPoll                   bool
+	inMapPollSince              time.Time
+	lastMapPollEndedAt          time.Time
+	lastStreamedMapResponse     time.Time
+	lastNoiseDial               time.Time
+	derpHomeRegion              int
+	derpHomeless                bool
+	derpRegionConnected         map[int]bool
+	derpRegionHealthProblem     map[int]string
+	derpRegionLastFrame         map[int]time.Time
+	derpMap                     *tailcfg.DERPMap // last DERP map from control, could be nil if never received one
+	lastMapRequestHeard         time.Time        // time we got a 200 from control for a MapRequest
+	ipnState                    string
+	ipnWantRunning              bool
+	ipnWantRunningLastTrue      time.Time                                           // when ipnWantRunning last changed false -> true
+	anyInterfaceUp              opt.Bool                                            // empty means unknown (assume true)
+	lastNotifiedControlMessages map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage // latest control messages processed, kept for change detection
+	controlMessages             map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage // latest control messages received
+	lastLoginErr                error
+	localLogConfigErr           error
+	tlsConnectionErrors         map[string]error // map[ServerName]error
+	metricHealthMessage         *metrics.MultiLabelMap[metricHealthMessageLabel]
 }
 
 func (t *Tracker) now() time.Time {
@@ -207,13 +208,15 @@ func unregister(w *Warnable) {
 // the program.
 type WarnableCode string
 
-// A Warnable is something that we might want to warn the user about, or not. A Warnable is either
-// in an healthy or unhealth state. A Warnable is unhealthy if the Tracker knows about a WarningState
-// affecting the Warnable.
-// In most cases, Warnables are components of the backend (for instance, "DNS" or "Magicsock").
-// Warnables are similar to the Subsystem type previously used in this package, but they provide
-// a unique identifying code for each Warnable, along with more metadata that makes it easier for
-// a GUI to display the Warnable in a user-friendly way.
+// A Warnable is something that we might want to warn the user about, or not. A
+// Warnable is either in a healthy or unhealthy state. A Warnable is unhealthy if
+// the Tracker knows about a WarningState affecting the Warnable.
+//
+// In most cases, Warnables are components of the backend (for instance, "DNS"
+// or "Magicsock"). Warnables are similar to the Subsystem type previously used
+// in this package, but they provide a unique identifying code for each
+// Warnable, along with more metadata that makes it easier for a GUI to display
+// the Warnable in a user-friendly way.
 type Warnable struct {
 	// Code is a string that uniquely identifies this Warnable across the entire Tailscale backend,
 	// and can be mapped to a user-displayable localized string.
@@ -409,12 +412,18 @@ func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) {
 	prevWs := t.warnableVal[w]
 	mak.Set(&t.warnableVal, w, ws)
 	if !ws.Equal(prevWs) {
+
+		change := Change{
+			WarnableChanged: true,
+			Warnable:        w,
+			UnhealthyState:  w.unhealthyState(ws),
+		}
 		for _, cb := range t.watchers {
 			// If the Warnable has been unhealthy for more than its TimeToVisible, the callback should be
 			// executed immediately. Otherwise, the callback should be enqueued to run once the Warnable
 			// becomes visible.
 			if w.IsVisible(ws, t.now) {
-				cb(w, w.unhealthyState(ws))
+				cb(change)
 				continue
 			}
 
@@ -427,7 +436,7 @@ func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) {
 				// Check if the Warnable is still unhealthy, as it could have become healthy between the time
 				// the timer was set for and the time it was executed.
 				if t.warnableVal[w] != nil {
-					cb(w, w.unhealthyState(ws))
+					cb(change)
 					delete(t.pendingVisibleTimers, w)
 				}
 			})
@@ -460,8 +469,23 @@ func (t *Tracker) setHealthyLocked(w *Warnable) {
 		delete(t.pendingVisibleTimers, w)
 	}
 
+	change := Change{
+		WarnableChanged: true,
+		Warnable:        w,
+	}
 	for _, cb := range t.watchers {
-		cb(w, nil)
+		cb(change)
+	}
+}
+
+// notifyWatchersControlChangedLocked calls each watcher to signal that control
+// health messages have changed (and should be fetched via CurrentState).
+func (t *Tracker) notifyWatchersControlChangedLocked() {
+	change := Change{
+		ControlHealthChanged: true,
+	}
+	for _, cb := range t.watchers {
+		cb(change)
 	}
 }
 
@@ -488,23 +512,57 @@ func (t *Tracker) AppendWarnableDebugFlags(base []string) []string {
 	return ret
 }
 
-// RegisterWatcher adds a function that will be called whenever the health state of any Warnable changes.
-// If a Warnable becomes unhealthy or its unhealthy state is updated, the callback will be called with its
-// current Representation.
-// If a Warnable becomes healthy, the callback will be called with ws set to nil.
-// The provided callback function will be executed in its own goroutine. The returned function can be used
-// to unregister the callback.
-func (t *Tracker) RegisterWatcher(cb func(w *Warnable, r *UnhealthyState)) (unregister func()) {
-	return t.registerSyncWatcher(func(w *Warnable, r *UnhealthyState) {
-		go cb(w, r)
+// Change is used to communicate a change to health. This could either be due to
+// a Warnable changing from health to unhealthy (or vice-versa), or because the
+// health messages received from the control-plane have changed.
+//
+// Exactly one *Changed field will be true.
+type Change struct {
+	// ControlHealthChanged indicates it was health messages from the
+	// control-plane server that changed.
+	ControlHealthChanged bool
+
+	// WarnableChanged indicates it was a client Warnable which changed state.
+	WarnableChanged bool
+	// Warnable is whose health changed, as indicated in UnhealthyState.
+	Warnable *Warnable
+	// UnhealthyState is set if the changed Warnable is now unhealthy, or nil
+	// if Warnable is now healthy.
+	UnhealthyState *UnhealthyState
+}
+
+// RegisterWatcher adds a function that will be called its own goroutine
+// whenever the health state of any client [Warnable] or control-plane health
+// messages changes. The returned function can be used to unregister the
+// callback.
+//
+// If a client [Warnable] becomes unhealthy or its unhealthy state is updated,
+// the callback will be called with WarnableChanged set to true and the Warnable
+// and its UnhealthyState:
+//
+//	go cb(Change{WarnableChanged: true, Warnable: w, UnhealthyState: us})
+//
+// If a Warnable becomes healthy, the callback will be called with
+// WarnableChanged set to true, the Warnable set, and UnhealthyState set to nil:
+//
+//	go cb(Change{WarnableChanged: true, Warnable: w, UnhealthyState: nil})
+//
+// If the health messages from the control-plane change, the callback will be
+// called with ControlHealthChanged set to true. Recipients can fetch the set of
+// control-plane health messages by calling [Tracker.CurrentState]:
+//
+//	go cb(Change{ControlHealthChanged: true})
+func (t *Tracker) RegisterWatcher(cb func(Change)) (unregister func()) {
+	return t.registerSyncWatcher(func(c Change) {
+		go cb(c)
 	})
 }
 
 // registerSyncWatcher adds a function that will be called whenever the health
-// state of any Warnable changes. The provided callback function will be
-// executed synchronously. Call RegisterWatcher to register any callbacks that
-// won't return from execution immediately.
-func (t *Tracker) registerSyncWatcher(cb func(w *Warnable, r *UnhealthyState)) (unregister func()) {
+// state changes. The provided callback function will be executed synchronously.
+// Call RegisterWatcher to register any callbacks that won't return from
+// execution immediately.
+func (t *Tracker) registerSyncWatcher(cb func(c Change)) (unregister func()) {
 	if t.nil() {
 		return func() {}
 	}
@@ -512,7 +570,7 @@ func (t *Tracker) registerSyncWatcher(cb func(w *Warnable, r *UnhealthyState)) (
 	t.mu.Lock()
 	defer t.mu.Unlock()
 	if t.watchers == nil {
-		t.watchers = set.HandleSet[func(*Warnable, *UnhealthyState)]{}
+		t.watchers = set.HandleSet[func(Change)]{}
 	}
 	handle := t.watchers.Add(cb)
 	if t.timer == nil {
@@ -659,13 +717,15 @@ func (t *Tracker) updateLegacyErrorWarnableLocked(key Subsystem, err error) {
 	}
 }
 
-func (t *Tracker) SetControlHealth(problems []string) {
+func (t *Tracker) SetControlHealth(problems map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage) {
 	if t.nil() {
 		return
 	}
 	t.mu.Lock()
 	defer t.mu.Unlock()
-	t.controlHealth = problems
+
+	t.controlMessages = problems
+
 	t.selfCheckLocked()
 }
 
@@ -961,11 +1021,11 @@ func (t *Tracker) OverallError() error {
 	return t.multiErrLocked()
 }
 
-// Strings() returns a string array containing the Text of all Warnings
-// currently known to the Tracker. These strings can be presented to the
-// user, although ideally you would use the Code property on each Warning
-// to show a localized version of them instead.
-// This function is here for legacy compatibility purposes and is deprecated.
+// Strings() returns a string array containing the Text of all Warnings and
+// ControlHealth messages currently known to the Tracker. These strings can be
+// presented to the user, although ideally you would use the Code property on
+// each Warning to show a localized version of them instead. This function is
+// here for legacy compatibility purposes and is deprecated.
 func (t *Tracker) Strings() []string {
 	if t.nil() {
 		return nil
@@ -991,6 +1051,19 @@ func (t *Tracker) stringsLocked() []string {
 			result = append(result, w.Text(ws.Args))
 		}
 	}
+
+	warnLen := len(result)
+	for _, c := range t.controlMessages {
+		if c.Title != "" && c.Text != "" {
+			result = append(result, c.Title+": "+c.Text)
+		} else if c.Title != "" {
+			result = append(result, c.Title)
+		} else if c.Text != "" {
+			result = append(result, c.Text)
+		}
+	}
+	sort.Strings(result[warnLen:])
+
 	return result
 }
 
@@ -1171,14 +1244,10 @@ func (t *Tracker) updateBuiltinWarnablesLocked() {
 		t.setHealthyLocked(derpRegionErrorWarnable)
 	}
 
-	if len(t.controlHealth) > 0 {
-		for _, s := range t.controlHealth {
-			t.setUnhealthyLocked(controlHealthWarnable, Args{
-				ArgError: s,
-			})
-		}
-	} else {
-		t.setHealthyLocked(controlHealthWarnable)
+	// Check if control health messages have changed
+	if !maps.EqualFunc(t.lastNotifiedControlMessages, t.controlMessages, tailcfg.DisplayMessage.Equal) {
+		t.lastNotifiedControlMessages = t.controlMessages
+		t.notifyWatchersControlChangedLocked()
 	}
 
 	if err := envknob.ApplyDiskConfigError(); err != nil {

+ 142 - 23
health/health_test.go

@@ -5,12 +5,14 @@ package health
 
 import (
 	"fmt"
+	"maps"
 	"reflect"
 	"slices"
 	"strconv"
 	"testing"
 	"time"
 
+	"github.com/google/go-cmp/cmp"
 	"tailscale.com/tailcfg"
 	"tailscale.com/tstest"
 	"tailscale.com/types/opt"
@@ -25,6 +27,7 @@ func TestAppendWarnableDebugFlags(t *testing.T) {
 		w := Register(&Warnable{
 			Code:         WarnableCode(fmt.Sprintf("warnable-code-%d", i)),
 			MapDebugFlag: fmt.Sprint(i),
+			Text:         StaticMessage(""),
 		})
 		defer unregister(w)
 		if i%2 == 0 {
@@ -114,7 +117,9 @@ func TestWatcher(t *testing.T) {
 	becameUnhealthy := make(chan struct{})
 	becameHealthy := make(chan struct{})
 
-	watcherFunc := func(w *Warnable, us *UnhealthyState) {
+	watcherFunc := func(c Change) {
+		w := c.Warnable
+		us := c.UnhealthyState
 		if w != testWarnable {
 			t.Fatalf("watcherFunc was called, but with an unexpected Warnable: %v, want: %v", w, testWarnable)
 		}
@@ -184,7 +189,9 @@ func TestSetUnhealthyWithTimeToVisible(t *testing.T) {
 	becameUnhealthy := make(chan struct{})
 	becameHealthy := make(chan struct{})
 
-	watchFunc := func(w *Warnable, us *UnhealthyState) {
+	watchFunc := func(c Change) {
+		w := c.Warnable
+		us := c.UnhealthyState
 		if w != mw {
 			t.Fatalf("watcherFunc was called, but with an unexpected Warnable: %v, want: %v", w, w)
 		}
@@ -457,21 +464,94 @@ func TestControlHealth(t *testing.T) {
 	ht.SetIPNState("NeedsLogin", true)
 	ht.GotStreamedMapResponse()
 
-	ht.SetControlHealth([]string{"Test message"})
-	state := ht.CurrentState()
-	warning, ok := state.Warnings["control-health"]
+	baseWarns := ht.CurrentState().Warnings
+	baseStrs := ht.Strings()
 
-	if !ok {
-		t.Fatal("no warning found in current state with code 'control-health'")
-	}
-	if got, want := warning.Title, "Coordination server reports an issue"; got != want {
-		t.Errorf("warning.Title = %q, want %q", got, want)
-	}
-	if got, want := warning.Severity, SeverityMedium; got != want {
-		t.Errorf("warning.Severity = %s, want %s", got, want)
-	}
-	if got, want := warning.Text, "The coordination server is reporting an health issue: Test message"; got != want {
-		t.Errorf("warning.Text = %q, want %q", got, want)
+	ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{
+		"control-health-test": {
+			Title: "Control health message",
+			Text:  "Extra help",
+		},
+		"control-health-title": {
+			Title: "Control health title only",
+		},
+	})
+
+	t.Run("Warnings", func(t *testing.T) {
+		wantWarns := map[WarnableCode]UnhealthyState{
+			"control-health-test": {
+				WarnableCode: "control-health-test",
+				Severity:     SeverityMedium,
+				Title:        "Control health message",
+				Text:         "Extra help",
+			},
+			"control-health-title": {
+				WarnableCode: "control-health-title",
+				Severity:     SeverityMedium,
+				Title:        "Control health title only",
+			},
+		}
+		state := ht.CurrentState()
+		gotWarns := maps.Clone(state.Warnings)
+		for k := range gotWarns {
+			if _, inBase := baseWarns[k]; inBase {
+				delete(gotWarns, k)
+			}
+		}
+		if diff := cmp.Diff(wantWarns, gotWarns); diff != "" {
+			t.Fatalf(`CurrentState().Warnings["control-health-*"] wrong (-want +got):\n%s`, diff)
+		}
+	})
+
+	t.Run("Strings()", func(t *testing.T) {
+		wantStrs := []string{
+			"Control health message: Extra help",
+			"Control health title only",
+		}
+		var gotStrs []string
+		for _, s := range ht.Strings() {
+			if !slices.Contains(baseStrs, s) {
+				gotStrs = append(gotStrs, s)
+			}
+		}
+		if diff := cmp.Diff(wantStrs, gotStrs); diff != "" {
+			t.Fatalf(`Strings() wrong (-want +got):\n%s`, diff)
+		}
+	})
+
+	t.Run("tailscaled_health_messages", func(t *testing.T) {
+		var r usermetric.Registry
+		ht.SetMetricsRegistry(&r)
+
+		got := ht.metricHealthMessage.Get(metricHealthMessageLabel{
+			Type: MetricLabelWarning,
+		}).String()
+		want := strconv.Itoa(
+			2 + // from SetControlHealth
+				len(baseStrs),
+		)
+		if got != want {
+			t.Errorf("metricsHealthMessage.Get(warning) = %q, want %q", got, want)
+		}
+	})
+}
+
+func TestControlHealthNotifiesOnSet(t *testing.T) {
+	ht := Tracker{}
+	ht.SetIPNState("NeedsLogin", true)
+	ht.GotStreamedMapResponse()
+
+	gotNotified := false
+	ht.registerSyncWatcher(func(_ Change) {
+		gotNotified = true
+	})
+
+	ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{
+		"test": {},
+	})
+
+	if !gotNotified {
+		t.Errorf("watcher did not get called, want it to be called")
 	}
 }
 
@@ -480,12 +560,45 @@ func TestControlHealthNotifiesOnChange(t *testing.T) {
 	ht.SetIPNState("NeedsLogin", true)
 	ht.GotStreamedMapResponse()
 
+	ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{
+		"test-1": {},
+	})
+
 	gotNotified := false
-	ht.registerSyncWatcher(func(_ *Warnable, _ *UnhealthyState) {
+	ht.registerSyncWatcher(func(_ Change) {
 		gotNotified = true
 	})
 
-	ht.SetControlHealth([]string{"Test message"})
+	ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{
+		"test-2": {},
+	})
+
+	if !gotNotified {
+		t.Errorf("watcher did not get called, want it to be called")
+	}
+}
+
+func TestControlHealthNotifiesOnDetailsChange(t *testing.T) {
+	ht := Tracker{}
+	ht.SetIPNState("NeedsLogin", true)
+	ht.GotStreamedMapResponse()
+
+	ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{
+		"test-1": {
+			Title: "Title",
+		},
+	})
+
+	gotNotified := false
+	ht.registerSyncWatcher(func(_ Change) {
+		gotNotified = true
+	})
+
+	ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{
+		"test-1": {
+			Title: "Updated title",
+		},
+	})
 
 	if !gotNotified {
 		t.Errorf("watcher did not get called, want it to be called")
@@ -498,16 +611,20 @@ func TestControlHealthNoNotifyOnUnchanged(t *testing.T) {
 	ht.GotStreamedMapResponse()
 
 	// Set up an existing control health issue
-	ht.SetControlHealth([]string{"Test message"})
+	ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{
+		"test": {},
+	})
 
 	// Now register our watcher
 	gotNotified := false
-	ht.registerSyncWatcher(func(_ *Warnable, _ *UnhealthyState) {
+	ht.registerSyncWatcher(func(_ Change) {
 		gotNotified = true
 	})
 
 	// Send the same control health message again - should not notify
-	ht.SetControlHealth([]string{"Test message"})
+	ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{
+		"test": {},
+	})
 
 	if gotNotified {
 		t.Errorf("watcher got called, want it to not be called")
@@ -519,11 +636,13 @@ func TestControlHealthIgnoredOutsideMapPoll(t *testing.T) {
 	ht.SetIPNState("NeedsLogin", true)
 
 	gotNotified := false
-	ht.registerSyncWatcher(func(_ *Warnable, _ *UnhealthyState) {
+	ht.registerSyncWatcher(func(_ Change) {
 		gotNotified = true
 	})
 
-	ht.SetControlHealth([]string{"Test message"})
+	ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{
+		"control-health": {},
+	})
 
 	state := ht.CurrentState()
 	_, ok := state.Warnings["control-health"]

+ 27 - 1
health/state.go

@@ -5,6 +5,8 @@ package health
 
 import (
 	"time"
+
+	"tailscale.com/tailcfg"
 )
 
 // State contains the health status of the backend, and is
@@ -21,7 +23,8 @@ type State struct {
 }
 
 // UnhealthyState contains information to be shown to the user to inform them
-// that a Warnable is currently unhealthy.
+// that a [Warnable] is currently unhealthy or [tailcfg.DisplayMessage] is being
+// sent from the control-plane.
 type UnhealthyState struct {
 	WarnableCode        WarnableCode
 	Severity            Severity
@@ -98,11 +101,34 @@ func (t *Tracker) CurrentState() *State {
 		wm[w.Code] = *w.unhealthyState(ws)
 	}
 
+	for id, msg := range t.lastNotifiedControlMessages {
+		code := WarnableCode(id)
+		wm[code] = UnhealthyState{
+			WarnableCode:        code,
+			Severity:            severityFromTailcfg(msg.Severity),
+			Title:               msg.Title,
+			Text:                msg.Text,
+			ImpactsConnectivity: msg.ImpactsConnectivity,
+			// TODO(tailscale/corp#27759): DependsOn?
+		}
+	}
+
 	return &State{
 		Warnings: wm,
 	}
 }
 
+func severityFromTailcfg(s tailcfg.DisplayMessageSeverity) Severity {
+	switch s {
+	case tailcfg.SeverityHigh:
+		return SeverityHigh
+	case tailcfg.SeverityLow:
+		return SeverityLow
+	default:
+		return SeverityMedium
+	}
+}
+
 // isEffectivelyHealthyLocked reports whether w is effectively healthy.
 // That means it's either actually healthy or it has a dependency that
 // that's unhealthy, so we should treat w as healthy to not spam users

+ 0 - 10
health/warnings.go

@@ -238,16 +238,6 @@ var applyDiskConfigWarnable = Register(&Warnable{
 	},
 })
 
-// controlHealthWarnable is a Warnable that warns the user that the coordination server is reporting an health issue.
-var controlHealthWarnable = Register(&Warnable{
-	Code:     "control-health",
-	Title:    "Coordination server reports an issue",
-	Severity: SeverityMedium,
-	Text: func(args Args) string {
-		return fmt.Sprintf("The coordination server is reporting an health issue: %v", args[ArgError])
-	},
-})
-
 // warmingUpWarnableDuration is the duration for which the warmingUpWarnable is reported by the backend after the user
 // has changed ipnWantRunning to true from false.
 const warmingUpWarnableDuration = 5 * time.Second

+ 10 - 6
ipn/ipnlocal/local.go

@@ -933,11 +933,15 @@ func (b *LocalBackend) linkChange(delta *netmon.ChangeDelta) {
 	}
 }
 
-func (b *LocalBackend) onHealthChange(w *health.Warnable, us *health.UnhealthyState) {
-	if us == nil {
-		b.logf("health(warnable=%s): ok", w.Code)
-	} else {
-		b.logf("health(warnable=%s): error: %s", w.Code, us.Text)
+func (b *LocalBackend) onHealthChange(change health.Change) {
+	if change.WarnableChanged {
+		w := change.Warnable
+		us := change.UnhealthyState
+		if us == nil {
+			b.logf("health(warnable=%s): ok", w.Code)
+		} else {
+			b.logf("health(warnable=%s): error: %s", w.Code, us.Text)
+		}
 	}
 
 	// Whenever health changes, send the current health state to the frontend.
@@ -5826,7 +5830,7 @@ func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) {
 	b.pauseOrResumeControlClientLocked()
 
 	if nm != nil {
-		b.health.SetControlHealth(nm.ControlHealth)
+		b.health.SetControlHealth(nm.DisplayMessages)
 	} else {
 		b.health.SetControlHealth(nil)
 	}

+ 51 - 1
tailcfg/tailcfg.go

@@ -2028,7 +2028,7 @@ type MapResponse struct {
 	// plane's perspective. A nil value means no change from the previous
 	// MapResponse. A non-nil 0-length slice restores the health to good (no
 	// known problems). A non-zero length slice are the list of problems that
-	// the control place sees.
+	// the control plane sees.
 	//
 	// Note that this package's type, due its use of a slice and omitempty, is
 	// unable to marshal a zero-length non-nil slice. The control server needs
@@ -2078,6 +2078,56 @@ type MapResponse struct {
 	DefaultAutoUpdate opt.Bool `json:",omitempty"`
 }
 
+// DisplayMessage represents a health state of the node from the control plane's
+// perspective. It is deliberately similar to health.Warnable as both get
+// converted into health.UnhealthyState to be sent to the GUI.
+type DisplayMessage struct {
+	// Title is a string that the GUI uses as title for this message. The title
+	// should be short and fit in a single line.
+	Title string
+
+	// Text is an extended string that the GUI will display to the user.
+	Text string
+
+	// Severity is the severity of the DisplayMessage, which the GUI can use to
+	// determine how to display it. Maps to health.Severity.
+	Severity DisplayMessageSeverity
+
+	// ImpactsConnectivity is whether the health problem will impact the user's
+	// ability to connect to the Internet or other nodes on the tailnet, which
+	// the GUI can use to determine how to display it.
+	ImpactsConnectivity bool `json:",omitempty"`
+}
+
+// DisplayMessageID is a string that uniquely identifies the kind of health
+// issue (e.g. "session-expired").
+type DisplayMessageID string
+
+// Equal returns true iff all fields are equal.
+func (m DisplayMessage) Equal(o DisplayMessage) bool {
+	return m.Title == o.Title &&
+		m.Text == o.Text &&
+		m.Severity == o.Severity &&
+		m.ImpactsConnectivity == o.ImpactsConnectivity
+}
+
+// DisplayMessageSeverity represents how serious a [DisplayMessage] is. Analogous
+// to health.Severity.
+type DisplayMessageSeverity string
+
+const (
+	// SeverityHigh is the highest severity level, used for critical errors that need immediate attention.
+	// On platforms where the client GUI can deliver notifications, a SeverityHigh message will trigger
+	// a modal notification.
+	SeverityHigh DisplayMessageSeverity = "high"
+	// SeverityMedium is used for errors that are important but not critical. This won't trigger a modal
+	// notification, however it will be displayed in a more visible way than a SeverityLow message.
+	SeverityMedium DisplayMessageSeverity = "medium"
+	// SeverityLow is used for less important notices that don't need immediate attention. The user will
+	// have to go to a Settings window, or another "hidden" GUI location to see these messages.
+	SeverityLow DisplayMessageSeverity = "low"
+)
+
 // ClientVersion is information about the latest client version that's available
 // for the client (and whether they're already running it).
 //

+ 76 - 0
tailcfg/tailcfg_test.go

@@ -878,3 +878,79 @@ func TestCheckTag(t *testing.T) {
 		})
 	}
 }
+
+func TestDisplayMessageEqual(t *testing.T) {
+	base := DisplayMessage{
+		Title:               "title",
+		Text:                "text",
+		Severity:            SeverityHigh,
+		ImpactsConnectivity: false,
+	}
+
+	type test struct {
+		name      string
+		value     DisplayMessage
+		wantEqual bool
+	}
+
+	for _, test := range []test{
+		{
+			name: "same",
+			value: DisplayMessage{
+				Title:               "title",
+				Text:                "text",
+				Severity:            SeverityHigh,
+				ImpactsConnectivity: false,
+			},
+			wantEqual: true,
+		},
+		{
+			name: "different-title",
+			value: DisplayMessage{
+				Title:               "different title",
+				Text:                "text",
+				Severity:            SeverityHigh,
+				ImpactsConnectivity: false,
+			},
+			wantEqual: false,
+		},
+		{
+			name: "different-text",
+			value: DisplayMessage{
+				Title:               "title",
+				Text:                "different text",
+				Severity:            SeverityHigh,
+				ImpactsConnectivity: false,
+			},
+			wantEqual: false,
+		},
+		{
+			name: "different-severity",
+			value: DisplayMessage{
+				Title:               "title",
+				Text:                "text",
+				Severity:            SeverityMedium,
+				ImpactsConnectivity: false,
+			},
+			wantEqual: false,
+		},
+		{
+			name: "different-impactsConnectivity",
+			value: DisplayMessage{
+				Title:               "title",
+				Text:                "text",
+				Severity:            SeverityHigh,
+				ImpactsConnectivity: true,
+			},
+			wantEqual: false,
+		},
+	} {
+		t.Run(test.name, func(t *testing.T) {
+			got := base.Equal(test.value)
+
+			if got != test.wantEqual {
+				t.Errorf("Equal: got %t, want %t", got, test.wantEqual)
+			}
+		})
+	}
+}

+ 2 - 2
types/netmap/netmap.go

@@ -54,12 +54,12 @@ type NetworkMap struct {
 	// between updates and should not be modified.
 	DERPMap *tailcfg.DERPMap
 
-	// ControlHealth are the list of health check problems for this
+	// DisplayMessages are the list of health check problems for this
 	// node from the perspective of the control plane.
 	// If empty, there are no known problems from the control plane's
 	// point of view, but the node might know about its own health
 	// check problems.
-	ControlHealth []string
+	DisplayMessages map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage
 
 	// TKAEnabled indicates whether the tailnet key authority should be
 	// enabled, from the perspective of the control plane.