Browse Source

health: add warming-up warnable (#12553)

Andrea Gottardo 1 year ago
parent
commit
6e55d8f6a1
4 changed files with 64 additions and 6 deletions
  1. 34 1
      health/health.go
  2. 7 4
      health/health_test.go
  3. 8 1
      health/state.go
  4. 15 0
      health/warnings.go

+ 34 - 1
health/health.go

@@ -92,7 +92,8 @@ type Tracker struct {
 	lastMapRequestHeard     time.Time        // time we got a 200 from control for a MapRequest
 	ipnState                string
 	ipnWantRunning          bool
-	anyInterfaceUp          opt.Bool // empty means unknown (assume true)
+	ipnWantRunningLastTrue  time.Time // when ipnWantRunning last changed false -> true
+	anyInterfaceUp          opt.Bool  // empty means unknown (assume true)
 	udp4Unbound             bool
 	controlHealth           []string
 	lastLoginErr            error
@@ -705,7 +706,29 @@ func (t *Tracker) SetIPNState(state string, wantRunning bool) {
 	t.mu.Lock()
 	defer t.mu.Unlock()
 	t.ipnState = state
+	prevWantRunning := t.ipnWantRunning
 	t.ipnWantRunning = wantRunning
+
+	if state == "Running" {
+		// Any time we are told the backend is Running (control+DERP are connected), the Warnable
+		// should be set to healthy, no matter if 5 seconds have passed or not.
+		t.setHealthyLocked(warmingUpWarnable)
+	} else if wantRunning && !prevWantRunning && t.ipnWantRunningLastTrue.IsZero() {
+		// The first time we see wantRunning=true and it used to be false, it means the user requested
+		// the backend to start. We store this timestamp and use it to silence some warnings that are
+		// expected during startup.
+		t.ipnWantRunningLastTrue = time.Now()
+		t.setUnhealthyLocked(warmingUpWarnable, nil)
+		time.AfterFunc(warmingUpWarnableDuration, func() {
+			t.mu.Lock()
+			t.updateWarmingUpWarnableLocked()
+			t.mu.Unlock()
+		})
+	} else if !wantRunning {
+		// Reset the timer when the user decides to stop the backend.
+		t.ipnWantRunningLastTrue = time.Time{}
+	}
+
 	t.selfCheckLocked()
 }
 
@@ -858,6 +881,8 @@ var fakeErrForTesting = envknob.RegisterString("TS_DEBUG_FAKE_HEALTH_ERROR")
 // updateBuiltinWarnablesLocked performs a number of checks on the state of the backend,
 // and adds/removes Warnings from the Tracker as needed.
 func (t *Tracker) updateBuiltinWarnablesLocked() {
+	t.updateWarmingUpWarnableLocked()
+
 	if t.checkForUpdates {
 		if cv := t.latestVersion; cv != nil && !cv.RunningLatest && cv.LatestVersion != "" {
 			if cv.UrgentSecurityUpdate {
@@ -1037,6 +1062,14 @@ func (t *Tracker) updateBuiltinWarnablesLocked() {
 	}
 }
 
+// updateWarmingUpWarnableLocked ensures the warmingUpWarnable is healthy if wantRunning has been set to true
+// for more than warmingUpWarnableDuration.
+func (t *Tracker) updateWarmingUpWarnableLocked() {
+	if !t.ipnWantRunningLastTrue.IsZero() && time.Now().After(t.ipnWantRunningLastTrue.Add(warmingUpWarnableDuration)) {
+		t.setHealthyLocked(warmingUpWarnable)
+	}
+}
+
 // ReceiveFuncStats tracks the calls made to a wireguard-go receive func.
 type ReceiveFuncStats struct {
 	// name is the name of the receive func.

+ 7 - 4
health/health_test.go

@@ -6,6 +6,7 @@ package health
 import (
 	"fmt"
 	"reflect"
+	"slices"
 	"testing"
 	"time"
 )
@@ -199,15 +200,17 @@ func TestCheckDependsOnAppearsInUnhealthyState(t *testing.T) {
 	if !ok {
 		t.Fatalf("Expected an UnhealthyState for w1, got nothing")
 	}
-	if len(us1.DependsOn) != 0 {
-		t.Fatalf("Expected no DependsOn in the unhealthy state, got: %v", us1.DependsOn)
+	wantDependsOn := []WarnableCode{warmingUpWarnable.Code}
+	if !reflect.DeepEqual(us1.DependsOn, wantDependsOn) {
+		t.Fatalf("Expected DependsOn = %v in the unhealthy state, got: %v", wantDependsOn, us1.DependsOn)
 	}
 	ht.SetUnhealthy(w2, Args{ArgError: "w2 is also unhealthy now"})
 	us2, ok := ht.CurrentState().Warnings[w2.Code]
 	if !ok {
 		t.Fatalf("Expected an UnhealthyState for w2, got nothing")
 	}
-	if !reflect.DeepEqual(us2.DependsOn, []WarnableCode{w1.Code}) {
-		t.Fatalf("Expected DependsOn = [w1.Code] in the unhealthy state, got: %v", us2.DependsOn)
+	wantDependsOn = slices.Concat([]WarnableCode{w1.Code}, wantDependsOn)
+	if !reflect.DeepEqual(us2.DependsOn, wantDependsOn) {
+		t.Fatalf("Expected DependsOn = %v in the unhealthy state, got: %v", wantDependsOn, us2.DependsOn)
 	}
 }

+ 8 - 1
health/state.go

@@ -41,11 +41,18 @@ func (w *Warnable) unhealthyState(ws *warningState) *UnhealthyState {
 		text = w.Text(Args{})
 	}
 
-	dependsOnWarnableCodes := make([]WarnableCode, len(w.DependsOn))
+	dependsOnWarnableCodes := make([]WarnableCode, len(w.DependsOn), len(w.DependsOn)+1)
 	for i, d := range w.DependsOn {
 		dependsOnWarnableCodes[i] = d.Code
 	}
 
+	if w != warmingUpWarnable {
+		// Here we tell the frontend that all Warnables depend on warmingUpWarnable. GUIs will silence all warnings until all
+		// their dependencies are healthy. This is a special case to prevent the GUI from showing a bunch of warnings when
+		// the backend is still warming up.
+		dependsOnWarnableCodes = append(dependsOnWarnableCodes, warmingUpWarnable.Code)
+	}
+
 	return &UnhealthyState{
 		WarnableCode: w.Code,
 		Severity:     w.Severity,

+ 15 - 0
health/warnings.go

@@ -5,6 +5,7 @@ package health
 
 import (
 	"fmt"
+	"time"
 )
 
 /**
@@ -212,3 +213,17 @@ var controlHealthWarnable = Register(&Warnable{
 		return fmt.Sprintf("The coordination server is reporting an health issue: %v", args[ArgError])
 	},
 })
+
+// warmingUpWarnableDuration is the duration for which the warmingUpWarnable is reported by the backend after the user
+// has changed ipnWantRunning to true from false.
+const warmingUpWarnableDuration = 5 * time.Second
+
+// warmingUpWarnable is a Warnable that is reported by the backend when it is starting up, for a maximum time of
+// warmingUpWarnableDuration. The GUIs use the presence of this Warnable to prevent showing any other warnings until
+// the backend is fully started.
+var warmingUpWarnable = Register(&Warnable{
+	Code:     "warming-up",
+	Title:    "Tailscale is starting",
+	Severity: SeverityLow,
+	Text:     StaticMessage("Tailscale is starting. Please wait."),
+})