Browse Source

cmd/tailscale,controlclient,ipnlocal: fix 'up', deflake tests more

The CLI's "up" is kinda chaotic and LocalBackend.Start is kinda
chaotic and they both need to be redone/deleted (respectively), but
this fixes some buggy behavior meanwhile. We were previously calling
StartLoginInteractive (to start the controlclient's RegisterRequest)
redundantly in some cases, causing test flakes depending on timing and
up's weird state machine.

We only need to call StartLoginInteractive in the client if Start itself
doesn't. But Start doesn't tell us that. So cheat a bit and a put the
information about whether there's a current NodeKey in the ipn.Status.
It used to be accessible over LocalAPI via GetPrefs as a private key but
we removed that for security. But a bool is fine.

So then only call StartLoginInteractive if that bool is false and don't
do it in the WatchIPNBus loop.

Fixes #12028
Updates #12042

Change-Id: I0923c3f704a9d6afd825a858eb9a63ca7c1df294
Signed-off-by: Brad Fitzpatrick <[email protected]>
Brad Fitzpatrick 1 year ago
parent
commit
e968b0ecd7

+ 22 - 9
cmd/tailscale/cli/up.go

@@ -410,6 +410,11 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
 	// printAuthURL reports whether we should print out the
 	// provided auth URL from an IPN notify.
 	printAuthURL := func(url string) bool {
+		if url == "" {
+			// Probably unnecessary but we used to have a bug where tailscaled
+			// could send an empty URL over the IPN bus. ~Harmless to keep.
+			return false
+		}
 		if upArgs.authKeyOrFile != "" {
 			// Issue 1755: when using an authkey, don't
 			// show an authURL that might still be pending
@@ -527,8 +532,11 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
 		if err != nil {
 			return err
 		}
-		if upArgs.forceReauth {
-			localClient.StartLoginInteractive(ctx)
+		if upArgs.forceReauth || !st.HaveNodeKey {
+			err := localClient.StartLoginInteractive(ctx)
+			if err != nil {
+				return err
+			}
 		}
 	}
 
@@ -540,6 +548,8 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
 
 	go func() {
 		var printed bool // whether we've yet printed anything to stdout or stderr
+		var lastURLPrinted string
+
 		for {
 			n, err := watcher.Next()
 			if err != nil {
@@ -552,8 +562,6 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
 			}
 			if s := n.State; s != nil {
 				switch *s {
-				case ipn.NeedsLogin:
-					localClient.StartLoginInteractive(ctx)
 				case ipn.NeedsMachineAuth:
 					printed = true
 					if env.upArgs.json {
@@ -576,12 +584,17 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
 					cancelWatch()
 				}
 			}
-			if url := n.BrowseToURL; url != nil && printAuthURL(*url) {
+			if url := n.BrowseToURL; url != nil {
+				authURL := *url
+				if !printAuthURL(authURL) || authURL == lastURLPrinted {
+					continue
+				}
 				printed = true
+				lastURLPrinted = authURL
 				if upArgs.json {
-					js := &upOutputJSON{AuthURL: *url, BackendState: st.BackendState}
+					js := &upOutputJSON{AuthURL: authURL, BackendState: st.BackendState}
 
-					q, err := qrcode.New(*url, qrcode.Medium)
+					q, err := qrcode.New(authURL, qrcode.Medium)
 					if err == nil {
 						png, err := q.PNG(128)
 						if err == nil {
@@ -596,9 +609,9 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
 						outln(string(data))
 					}
 				} else {
-					fmt.Fprintf(Stderr, "\nTo authenticate, visit:\n\n\t%s\n\n", *url)
+					fmt.Fprintf(Stderr, "\nTo authenticate, visit:\n\n\t%s\n\n", authURL)
 					if upArgs.qr {
-						q, err := qrcode.New(*url, qrcode.Medium)
+						q, err := qrcode.New(authURL, qrcode.Medium)
 						if err != nil {
 							log.Printf("QR code error: %v", err)
 						} else {

+ 14 - 3
control/controlclient/auto.go

@@ -348,9 +348,14 @@ func (c *Auto) authRoutine() {
 			continue
 		}
 		if url != "" {
-			// goal.url ought to be empty here.
-			// However, not all control servers get this right,
-			// and logging about it here just generates noise.
+			// goal.url ought to be empty here. However, not all control servers
+			// get this right, and logging about it here just generates noise.
+			//
+			// TODO(bradfitz): I don't follow that comment. Our own testcontrol
+			// used by tstest/integration hits this path, in fact.
+			if c.direct.panicOnUse {
+				panic("tainted client")
+			}
 			c.mu.Lock()
 			c.urlToVisit = url
 			c.loginGoal = &LoginGoal{
@@ -615,6 +620,9 @@ func (c *Auto) Login(t *tailcfg.Oauth2Token, flags LoginFlags) {
 	if c.closed {
 		return
 	}
+	if c.direct != nil && c.direct.panicOnUse {
+		panic("tainted client")
+	}
 	c.wantLoggedIn = true
 	c.loginGoal = &LoginGoal{
 		token: t,
@@ -632,6 +640,9 @@ func (c *Auto) Logout(ctx context.Context) error {
 	c.wantLoggedIn = false
 	c.loginGoal = nil
 	closed := c.closed
+	if c.direct != nil && c.direct.panicOnUse {
+		panic("tainted client")
+	}
 	c.mu.Unlock()
 
 	if closed {

+ 17 - 1
control/controlclient/direct.go

@@ -80,6 +80,7 @@ type Direct struct {
 	onClientVersion            func(*tailcfg.ClientVersion) // or nil
 	onControlTime              func(time.Time)              // or nil
 	onTailnetDefaultAutoUpdate func(bool)                   // or nil
+	panicOnUse                 bool                         // if true, panic if client is used (for testing)
 
 	dialPlan ControlDialPlanner // can be nil
 
@@ -310,6 +311,9 @@ func NewDirect(opts Options) (*Direct, error) {
 		}
 		c.serverNoiseKey = key.NewMachine().Public() // prevent early error before hitting test client
 	}
+	if strings.Contains(opts.ServerURL, "controlplane.tailscale.com") && envknob.Bool("TS_PANIC_IF_HIT_MAIN_CONTROL") {
+		c.panicOnUse = true
+	}
 	return c, nil
 }
 
@@ -399,7 +403,7 @@ func (c *Direct) TryLogout(ctx context.Context) error {
 
 func (c *Direct) TryLogin(ctx context.Context, t *tailcfg.Oauth2Token, flags LoginFlags) (url string, err error) {
 	if strings.Contains(c.serverURL, "controlplane.tailscale.com") && envknob.Bool("TS_PANIC_IF_HIT_MAIN_CONTROL") {
-		panic("[unexpected] controlclient: TryLogin called on " + c.serverURL)
+		panic(fmt.Sprintf("[unexpected] controlclient: TryLogin called on %s; tainted=%v", c.serverURL, c.panicOnUse))
 	}
 	c.logf("[v1] direct.TryLogin(token=%v, flags=%v)", t != nil, flags)
 	return c.doLoginOrRegen(ctx, loginOpt{Token: t, Flags: flags})
@@ -462,6 +466,9 @@ func (c *Direct) hostInfoLocked() *tailcfg.Hostinfo {
 }
 
 func (c *Direct) doLogin(ctx context.Context, opt loginOpt) (mustRegen bool, newURL string, nks tkatype.MarshaledSignature, err error) {
+	if c.panicOnUse {
+		panic("tainted client")
+	}
 	c.mu.Lock()
 	persist := c.persist.AsStruct()
 	tryingNewKey := c.tryingNewKey
@@ -835,6 +842,9 @@ const watchdogTimeout = 120 * time.Second
 //
 // If nu is nil, OmitPeers will be set to true.
 func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu NetmapUpdater) error {
+	if c.panicOnUse {
+		panic("tainted client")
+	}
 	if isStreaming && nu == nil {
 		panic("cb must be non-nil if isStreaming is true")
 	}
@@ -1531,6 +1541,9 @@ func (c *Direct) SetDNS(ctx context.Context, req *tailcfg.SetDNSRequest) (err er
 }
 
 func (c *Direct) DoNoiseRequest(req *http.Request) (*http.Response, error) {
+	if c.panicOnUse {
+		panic("tainted client")
+	}
 	nc, err := c.getNoiseClient()
 	if err != nil {
 		return nil, err
@@ -1627,6 +1640,9 @@ func (c *Direct) ReportHealthChange(sys health.Subsystem, sysErr error) {
 	if !ok {
 		return
 	}
+	if c.panicOnUse {
+		panic("tainted client")
+	}
 	req := &tailcfg.HealthChangeRequest{
 		Subsys:  string(sys),
 		NodeKey: nodeKey,

+ 4 - 0
ipn/ipnlocal/local.go

@@ -790,6 +790,7 @@ func (b *LocalBackend) UpdateStatus(sb *ipnstate.StatusBuilder) {
 			s.ClientVersion = b.lastClientVersion
 		}
 		s.Health = b.health.AppendWarnings(s.Health)
+		s.HaveNodeKey = b.hasNodeKeyLocked()
 
 		// TODO(bradfitz): move this health check into a health.Warnable
 		// and remove from here.
@@ -4605,6 +4606,9 @@ func (b *LocalBackend) resetControlClientLocked() controlclient.Client {
 		return nil
 	}
 
+	b.authURL = ""
+	b.authURLSticky = ""
+
 	// When we clear the control client, stop any outstanding netmap expiry
 	// timer; synthesizing a new netmap while we don't have a control
 	// client will break things.

+ 3 - 0
ipn/ipnstate/ipnstate.go

@@ -41,6 +41,9 @@ type Status struct {
 	//  "Starting", "Running".
 	BackendState string
 
+	// HaveNodeKey is whether the current profile has a node key configured.
+	HaveNodeKey bool `json:",omitempty"`
+
 	AuthURL      string       // current URL provided by control to authorize client
 	TailscaleIPs []netip.Addr // Tailscale IP(s) assigned to this node
 	Self         *PeerStatus

+ 25 - 7
tstest/integration/integration_test.go

@@ -259,7 +259,7 @@ func TestStateSavedOnStart(t *testing.T) {
 	n1.MustDown()
 
 	// And change the hostname to something:
-	if err := n1.Tailscale("up", "--login-server="+n1.env.ControlServer.URL, "--hostname=foo").Run(); err != nil {
+	if err := n1.Tailscale("up", "--login-server="+n1.env.controlURL(), "--hostname=foo").Run(); err != nil {
 		t.Fatalf("up: %v", err)
 	}
 
@@ -289,9 +289,9 @@ func TestOneNodeUpAuth(t *testing.T) {
 	st := n1.MustStatus()
 	t.Logf("Status: %s", st.BackendState)
 
-	t.Logf("Running up --login-server=%s ...", env.ControlServer.URL)
+	t.Logf("Running up --login-server=%s ...", env.controlURL())
 
-	cmd := n1.Tailscale("up", "--login-server="+env.ControlServer.URL)
+	cmd := n1.Tailscale("up", "--login-server="+env.controlURL())
 	var authCountAtomic int32
 	cmd.Stdout = &authURLParserWriter{fn: func(urlStr string) error {
 		if env.Control.CompleteAuth(urlStr) {
@@ -1069,7 +1069,7 @@ func TestAutoUpdateDefaults(t *testing.T) {
 				// Should not be changed even if sent "true" later.
 				sendAndCheckDefault(t, n, true, false)
 				// But can be changed explicitly by the user.
-				if out, err := n.Tailscale("set", "--auto-update").CombinedOutput(); err != nil {
+				if out, err := n.TailscaleForOutput("set", "--auto-update").CombinedOutput(); err != nil {
 					t.Fatalf("failed to enable auto-update on node: %v\noutput: %s", err, out)
 				}
 				sendAndCheckDefault(t, n, false, true)
@@ -1083,7 +1083,7 @@ func TestAutoUpdateDefaults(t *testing.T) {
 				// Should not be changed even if sent "false" later.
 				sendAndCheckDefault(t, n, false, true)
 				// But can be changed explicitly by the user.
-				if out, err := n.Tailscale("set", "--auto-update=false").CombinedOutput(); err != nil {
+				if out, err := n.TailscaleForOutput("set", "--auto-update=false").CombinedOutput(); err != nil {
 					t.Fatalf("failed to disable auto-update on node: %v\noutput: %s", err, out)
 				}
 				sendAndCheckDefault(t, n, true, false)
@@ -1093,7 +1093,7 @@ func TestAutoUpdateDefaults(t *testing.T) {
 			desc: "user-sets-first",
 			run: func(t *testing.T, n *testNode) {
 				// User sets auto-update first, before receiving defaults.
-				if out, err := n.Tailscale("set", "--auto-update=false").CombinedOutput(); err != nil {
+				if out, err := n.TailscaleForOutput("set", "--auto-update=false").CombinedOutput(); err != nil {
 					t.Fatalf("failed to disable auto-update on node: %v\noutput: %s", err, out)
 				}
 				// Defaults sent from control should be ignored.
@@ -1135,6 +1135,16 @@ type testEnv struct {
 	TrafficTrapServer *httptest.Server
 }
 
+// controlURL returns e.ControlServer.URL, panicking if it's the empty string,
+// which it should never be in tests.
+func (e *testEnv) controlURL() string {
+	s := e.ControlServer.URL
+	if s == "" {
+		panic("control server not set")
+	}
+	return s
+}
+
 type testEnvOpt interface {
 	modifyTestEnv(*testEnv)
 }
@@ -1183,6 +1193,7 @@ func newTestEnv(t testing.TB, opts ...testEnvOpt) *testEnv {
 		e.TrafficTrapServer.Close()
 		e.ControlServer.Close()
 	})
+	t.Logf("control URL: %v", e.controlURL())
 	return e
 }
 
@@ -1445,7 +1456,7 @@ func (n *testNode) MustUp(extraArgs ...string) {
 	t.Helper()
 	args := []string{
 		"up",
-		"--login-server=" + n.env.ControlServer.URL,
+		"--login-server=" + n.env.controlURL(),
 		"--reset",
 	}
 	args = append(args, extraArgs...)
@@ -1585,6 +1596,13 @@ func (n *testNode) AwaitNeedsLogin() {
 	}
 }
 
+func (n *testNode) TailscaleForOutput(arg ...string) *exec.Cmd {
+	cmd := n.Tailscale(arg...)
+	cmd.Stdout = nil
+	cmd.Stderr = nil
+	return cmd
+}
+
 // Tailscale returns a command that runs the tailscale CLI with the provided arguments.
 // It does not start the process.
 func (n *testNode) Tailscale(arg ...string) *exec.Cmd {