Browse Source

cmd/systray: handle reconnects to IPN bus (#13386)

When tailscaled restarts and our watch connection goes down, we get
stuck in an infinite loop printing `ipnbus error: EOF` (which ended up
consuming all the disk space on my laptop via the log file). Instead,
handle errors in `watchIPNBus` and reconnect after a short delay.

Updates #1708

Signed-off-by: Andrew Lytvynov <[email protected]>
Andrew Lytvynov 1 year ago
parent
commit
e7a6e7930f
1 changed files with 21 additions and 3 deletions
  1. 21 3
      cmd/systray/systray.go

+ 21 - 3
cmd/systray/systray.go

@@ -8,6 +8,7 @@ package main
 
 import (
 	"context"
+	"errors"
 	"fmt"
 	"io"
 	"log"
@@ -180,19 +181,36 @@ func (menu *Menu) eventLoop(ctx context.Context) {
 // watchIPNBus subscribes to the tailscale event bus and sends state updates to chState.
 // This method does not return.
 func watchIPNBus(ctx context.Context) {
+	for {
+		if err := watchIPNBusInner(ctx); err != nil {
+			log.Println(err)
+			if errors.Is(err, context.Canceled) {
+				// If the context got canceled, we will never be able to
+				// reconnect to IPN bus, so exit the process.
+				log.Fatalf("watchIPNBus: %v", err)
+			}
+		}
+		// If our watch connection breaks, wait a bit before reconnecting. No
+		// reason to spam the logs if e.g. tailscaled is restarting or goes
+		// down.
+		time.Sleep(3 * time.Second)
+	}
+}
+
+func watchIPNBusInner(ctx context.Context) error {
 	watcher, err := localClient.WatchIPNBus(ctx, ipn.NotifyInitialState|ipn.NotifyNoPrivateKeys)
 	if err != nil {
-		log.Printf("watching ipn bus: %v", err)
+		return fmt.Errorf("watching ipn bus: %w", err)
 	}
 	defer watcher.Close()
 	for {
 		select {
 		case <-ctx.Done():
-			return
+			return nil
 		default:
 			n, err := watcher.Next()
 			if err != nil {
-				log.Printf("ipnbus error: %v", err)
+				return fmt.Errorf("ipnbus error: %w", err)
 			}
 			if n.State != nil {
 				chState <- *n.State