Просмотр исходного кода

ipn/ipnlocal: acquire b.mu once in Start

We used to Lock, Unlock, Lock, Unlock quite a few
times in Start resulting in all sorts of weird race
conditions. Simplify it all and only Lock/Unlock once.

Updates #11649

Signed-off-by: Maisem Ali <[email protected]>
Maisem Ali 1 год назад
Родитель
Сommit
32bc596062
3 измененных файлов с 60 добавлено и 44 удалено
  1. 2 1
      control/controlclient/client.go
  2. 51 29
      ipn/ipnlocal/local.go
  3. 7 14
      ipn/ipnlocal/state_test.go

+ 2 - 1
control/controlclient/client.go

@@ -43,7 +43,8 @@ type Client interface {
 	// Login begins an interactive or non-interactive login process.
 	// Client will eventually call the Status callback with either a
 	// LoginFinished flag (on success) or an auth URL (if further
-	// interaction is needed).
+	// interaction is needed). It merely sets the process in motion,
+	// and doesn't wait for it to complete.
 	Login(*tailcfg.Oauth2Token, LoginFlags)
 	// Logout starts a synchronous logout process. It doesn't return
 	// until the logout operation has been completed.

+ 51 - 29
ipn/ipnlocal/local.go

@@ -230,7 +230,8 @@ type LocalBackend struct {
 	ccGen          clientGen          // function for producing controlclient; lazily populated
 	sshServer      SSHServer          // or nil, initialized lazily.
 	appConnector   *appc.AppConnector // or nil, initialized when configured.
-	notify         func(ipn.Notify)
+	// notifyCancel cancels notifications to the current SetNotifyCallback.
+	notifyCancel   context.CancelFunc
 	cc             controlclient.Client
 	ccAuto         *controlclient.Auto // if cc is of type *controlclient.Auto
 	machinePrivKey key.MachinePrivate
@@ -710,6 +711,9 @@ func (b *LocalBackend) Shutdown() {
 		b.debugSink.Close()
 		b.debugSink = nil
 	}
+	if b.notifyCancel != nil {
+		b.notifyCancel()
+	}
 	b.mu.Unlock()
 	b.webClientShutdown()
 
@@ -1557,10 +1561,26 @@ func endpointsEqual(x, y []tailcfg.Endpoint) bool {
 	return true
 }
 
+// SetNotifyCallback sets the function to call when the backend has something to
+// notify the frontend about. Only one callback can be set at a time, so calling
+// this function will replace the previous callback.
 func (b *LocalBackend) SetNotifyCallback(notify func(ipn.Notify)) {
+	ctx, cancel := context.WithCancel(b.ctx)
 	b.mu.Lock()
-	defer b.mu.Unlock()
-	b.notify = notify
+	prevCancel := b.notifyCancel
+	b.notifyCancel = cancel
+	b.mu.Unlock()
+	if prevCancel != nil {
+		prevCancel()
+	}
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go b.WatchNotifications(ctx, 0, wg.Done, func(n *ipn.Notify) bool {
+		notify(*n)
+		return true
+	})
+	wg.Wait()
 }
 
 // SetHTTPTestClient sets an alternate HTTP client to use with
@@ -1806,7 +1826,6 @@ func (b *LocalBackend) Start(opts ipn.Options) error {
 		tkaHead = string(head)
 	}
 	confWantRunning := b.conf != nil && wantRunning
-	unlock.UnlockEarly()
 
 	if endpoints != nil {
 		cc.UpdateEndpoints(endpoints)
@@ -1815,16 +1834,23 @@ func (b *LocalBackend) Start(opts ipn.Options) error {
 
 	blid := b.backendLogID.String()
 	b.logf("Backend: logs: be:%v fe:%v", blid, opts.FrontendLogID)
-	b.send(ipn.Notify{BackendLogID: &blid})
-	b.send(ipn.Notify{Prefs: &prefs})
+	b.sendLocked(ipn.Notify{
+		BackendLogID: &blid,
+		Prefs:        &prefs,
+	})
 
-	if !loggedOut && (b.hasNodeKey() || confWantRunning) {
-		// Even if !WantRunning, we should verify our key, if there
-		// is one. If you want tailscaled to be completely idle,
-		// use logout instead.
+	if !loggedOut && (b.hasNodeKeyLocked() || confWantRunning) {
+		// If we know that we're either logged in or meant to be
+		// running, tell the controlclient that it should also assume
+		// that we need to be logged in.
+		//
+		// Without this, the state machine transitions to "NeedsLogin" implying
+		// that user interaction is required, which is not the case and can
+		// regress tsnet.Server restarts.
 		cc.Login(nil, controlclient.LoginDefault)
 	}
-	b.stateMachine()
+	b.stateMachineLockedOnEntry(unlock)
+
 	return nil
 }
 
@@ -2390,6 +2416,13 @@ func (b *LocalBackend) DebugPickNewDERP() error {
 //
 // b.mu must not be held.
 func (b *LocalBackend) send(n ipn.Notify) {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	b.sendLocked(n)
+}
+
+// sendLocked is like send, but assumes b.mu is already held.
+func (b *LocalBackend) sendLocked(n ipn.Notify) {
 	if n.Prefs != nil {
 		n.Prefs = ptr.To(stripKeysFromPrefs(*n.Prefs))
 	}
@@ -2397,8 +2430,6 @@ func (b *LocalBackend) send(n ipn.Notify) {
 		n.Version = version.Long()
 	}
 
-	b.mu.Lock()
-	notifyFunc := b.notify
 	apiSrv := b.peerAPIServer
 	if mayDeref(apiSrv).taildrop.HasFilesWaiting() {
 		n.FilesWaiting = &empty.Message{}
@@ -2411,12 +2442,6 @@ func (b *LocalBackend) send(n ipn.Notify) {
 			// Drop the notification if the channel is full.
 		}
 	}
-
-	b.mu.Unlock()
-
-	if notifyFunc != nil {
-		notifyFunc(n)
-	}
 }
 
 func (b *LocalBackend) sendFileNotify() {
@@ -2426,9 +2451,8 @@ func (b *LocalBackend) sendFileNotify() {
 	for _, wakeWaiter := range b.fileWaiters {
 		wakeWaiter()
 	}
-	notifyFunc := b.notify
 	apiSrv := b.peerAPIServer
-	if notifyFunc == nil || apiSrv == nil {
+	if apiSrv == nil {
 		b.mu.Unlock()
 		return
 	}
@@ -4376,14 +4400,6 @@ func (b *LocalBackend) enterStateLockedOnEntry(newState ipn.State, unlock unlock
 	}
 }
 
-// hasNodeKey reports whether a non-zero node key is present in the current
-// prefs.
-func (b *LocalBackend) hasNodeKey() bool {
-	b.mu.Lock()
-	defer b.mu.Unlock()
-	return b.hasNodeKeyLocked()
-}
-
 func (b *LocalBackend) hasNodeKeyLocked() bool {
 	// we can't use b.Prefs(), because it strips the keys, oops!
 	p := b.pm.CurrentPrefs()
@@ -4481,6 +4497,12 @@ func (b *LocalBackend) nextStateLocked() ipn.State {
 // Or maybe just call the state machine from fewer places.
 func (b *LocalBackend) stateMachine() {
 	unlock := b.lockAndGetUnlock()
+	b.stateMachineLockedOnEntry(unlock)
+}
+
+// stateMachineLockedOnEntry is like stateMachine but requires b.mu be held to
+// call it, but it unlocks b.mu when done (via unlock, a once func).
+func (b *LocalBackend) stateMachineLockedOnEntry(unlock unlockOnce) {
 	b.enterStateLockedOnEntry(b.nextStateLocked(), unlock)
 }
 

+ 7 - 14
ipn/ipnlocal/state_test.go

@@ -97,7 +97,6 @@ type mockControl struct {
 	paused atomic.Bool
 
 	mu          sync.Mutex
-	machineKey  key.MachinePrivate
 	persist     *persist.Persist
 	calls       []string
 	authBlocked bool
@@ -134,12 +133,6 @@ func (cc *mockControl) populateKeys() (newKeys bool) {
 	cc.mu.Lock()
 	defer cc.mu.Unlock()
 
-	if cc.machineKey.IsZero() {
-		cc.logf("Copying machineKey.")
-		cc.machineKey, _ = cc.opts.GetMachinePrivateKey()
-		newKeys = true
-	}
-
 	if cc.persist == nil {
 		cc.persist = &persist.Persist{}
 	}
@@ -831,7 +824,7 @@ func TestStateMachine(t *testing.T) {
 	// The last test case is the most common one: restarting when both
 	// logged in and WantRunning.
 	t.Logf("\n\nStart5")
-	notifies.expect(2)
+	notifies.expect(1)
 	c.Assert(b.Start(ipn.Options{}), qt.IsNil)
 	{
 		// NOTE: cc.Shutdown() is correct here, since we didn't call
@@ -839,27 +832,27 @@ func TestStateMachine(t *testing.T) {
 		previousCC.assertShutdown(false)
 		cc.assertCalls("New", "Login")
 
-		nn := notifies.drain(2)
+		nn := notifies.drain(1)
 		cc.assertCalls()
 		c.Assert(nn[0].Prefs, qt.IsNotNil)
 		c.Assert(nn[0].Prefs.LoggedOut(), qt.IsFalse)
 		c.Assert(nn[0].Prefs.WantRunning(), qt.IsTrue)
-		c.Assert(ipn.NeedsLogin, qt.Equals, b.State())
+		c.Assert(b.State(), qt.Equals, ipn.NoState)
 	}
 
 	// Control server accepts our valid key from before.
 	t.Logf("\n\nLoginFinished5")
-	notifies.expect(2)
+	notifies.expect(1)
 	cc.send(nil, "", true, &netmap.NetworkMap{
 		SelfNode: (&tailcfg.Node{MachineAuthorized: true}).View(),
 	})
 	{
-		nn := notifies.drain(2)
+		nn := notifies.drain(1)
 		cc.assertCalls()
 		// NOTE: No LoginFinished message since no interactive
 		// login was needed.
-		c.Assert(nn[1].State, qt.IsNotNil)
-		c.Assert(ipn.Starting, qt.Equals, *nn[1].State)
+		c.Assert(nn[0].State, qt.IsNotNil)
+		c.Assert(ipn.Starting, qt.Equals, *nn[0].State)
 		// NOTE: No prefs change this time. WantRunning stays true.
 		// We were in Starting in the first place, so that doesn't
 		// change either.