Parcourir la source

appc: fix a deadlock in route advertisements (#15031)

`routeAdvertiser` is the `iplocal.LocalBackend`. Calls to
`Advertise/UnadvertiseRoute` end up calling `EditPrefs` which in turn
calls `authReconfig` which finally calls `readvertiseAppConnectorRoutes`
which calls `AppConnector.DomainRoutes` and gets stuck on a mutex that
was already held when `routeAdvertiser` was called.

Make all calls to `routeAdvertiser` in `app.AppConnector` go through the
execqueue instead as a short-term fix.

Updates tailscale/corp#25965

Signed-off-by: Andrew Lytvynov <[email protected]>
Co-authored-by: Irbe Krumina <[email protected]>
Andrew Lytvynov il y a 1 an
Parent
commit
ec5f04b274
3 fichiers modifiés avec 84 ajouts et 11 suppressions
  1. 13 11
      appc/appconnector.go
  2. 58 0
      appc/appconnector_test.go
  3. 13 0
      appc/appctest/appctest.go

+ 13 - 11
appc/appconnector.go

@@ -289,9 +289,11 @@ func (e *AppConnector) updateDomains(domains []string) {
 				toRemove = append(toRemove, netip.PrefixFrom(a, a.BitLen()))
 			}
 		}
-		if err := e.routeAdvertiser.UnadvertiseRoute(toRemove...); err != nil {
-			e.logf("failed to unadvertise routes on domain removal: %v: %v: %v", slicesx.MapKeys(oldDomains), toRemove, err)
-		}
+		e.queue.Add(func() {
+			if err := e.routeAdvertiser.UnadvertiseRoute(toRemove...); err != nil {
+				e.logf("failed to unadvertise routes on domain removal: %v: %v: %v", slicesx.MapKeys(oldDomains), toRemove, err)
+			}
+		})
 	}
 
 	e.logf("handling domains: %v and wildcards: %v", slicesx.MapKeys(e.domains), e.wildcards)
@@ -310,11 +312,6 @@ func (e *AppConnector) updateRoutes(routes []netip.Prefix) {
 		return
 	}
 
-	if err := e.routeAdvertiser.AdvertiseRoute(routes...); err != nil {
-		e.logf("failed to advertise routes: %v: %v", routes, err)
-		return
-	}
-
 	var toRemove []netip.Prefix
 
 	// If we're storing routes and know e.controlRoutes is a good
@@ -338,9 +335,14 @@ nextRoute:
 		}
 	}
 
-	if err := e.routeAdvertiser.UnadvertiseRoute(toRemove...); err != nil {
-		e.logf("failed to unadvertise routes: %v: %v", toRemove, err)
-	}
+	e.queue.Add(func() {
+		if err := e.routeAdvertiser.AdvertiseRoute(routes...); err != nil {
+			e.logf("failed to advertise routes: %v: %v", routes, err)
+		}
+		if err := e.routeAdvertiser.UnadvertiseRoute(toRemove...); err != nil {
+			e.logf("failed to unadvertise routes: %v: %v", toRemove, err)
+		}
+	})
 
 	e.controlRoutes = routes
 	if err := e.storeRoutesLocked(); err != nil {

+ 58 - 0
appc/appconnector_test.go

@@ -8,6 +8,7 @@ import (
 	"net/netip"
 	"reflect"
 	"slices"
+	"sync/atomic"
 	"testing"
 	"time"
 
@@ -86,6 +87,7 @@ func TestUpdateRoutes(t *testing.T) {
 
 		routes := []netip.Prefix{netip.MustParsePrefix("192.0.2.0/24"), netip.MustParsePrefix("192.0.0.1/32")}
 		a.updateRoutes(routes)
+		a.Wait(ctx)
 
 		slices.SortFunc(rc.Routes(), prefixCompare)
 		rc.SetRoutes(slices.Compact(rc.Routes()))
@@ -105,6 +107,7 @@ func TestUpdateRoutes(t *testing.T) {
 }
 
 func TestUpdateRoutesUnadvertisesContainedRoutes(t *testing.T) {
+	ctx := context.Background()
 	for _, shouldStore := range []bool{false, true} {
 		rc := &appctest.RouteCollector{}
 		var a *AppConnector
@@ -117,6 +120,7 @@ func TestUpdateRoutesUnadvertisesContainedRoutes(t *testing.T) {
 		rc.SetRoutes([]netip.Prefix{netip.MustParsePrefix("192.0.2.1/32")})
 		routes := []netip.Prefix{netip.MustParsePrefix("192.0.2.0/24")}
 		a.updateRoutes(routes)
+		a.Wait(ctx)
 
 		if !slices.EqualFunc(routes, rc.Routes(), prefixEqual) {
 			t.Fatalf("got %v, want %v", rc.Routes(), routes)
@@ -636,3 +640,57 @@ func TestMetricBucketsAreSorted(t *testing.T) {
 		t.Errorf("metricStoreRoutesNBuckets must be in order")
 	}
 }
+
+// TestUpdateRoutesDeadlock is a regression test for a deadlock in
+// LocalBackend<->AppConnector interaction. When using real LocalBackend as the
+// routeAdvertiser, calls to Advertise/UnadvertiseRoutes can end up calling
+// back into AppConnector via authReconfig. If everything is called
+// synchronously, this results in a deadlock on AppConnector.mu.
+func TestUpdateRoutesDeadlock(t *testing.T) {
+	ctx := context.Background()
+	rc := &appctest.RouteCollector{}
+	a := NewAppConnector(t.Logf, rc, &RouteInfo{}, fakeStoreRoutes)
+
+	advertiseCalled := new(atomic.Bool)
+	unadvertiseCalled := new(atomic.Bool)
+	rc.AdvertiseCallback = func() {
+		// Call something that requires a.mu to be held.
+		a.DomainRoutes()
+		advertiseCalled.Store(true)
+	}
+	rc.UnadvertiseCallback = func() {
+		// Call something that requires a.mu to be held.
+		a.DomainRoutes()
+		unadvertiseCalled.Store(true)
+	}
+
+	a.updateDomains([]string{"example.com"})
+	a.Wait(ctx)
+
+	// Trigger rc.AdveriseRoute.
+	a.updateRoutes(
+		[]netip.Prefix{
+			netip.MustParsePrefix("127.0.0.1/32"),
+			netip.MustParsePrefix("127.0.0.2/32"),
+		},
+	)
+	a.Wait(ctx)
+	// Trigger rc.UnadveriseRoute.
+	a.updateRoutes(
+		[]netip.Prefix{
+			netip.MustParsePrefix("127.0.0.1/32"),
+		},
+	)
+	a.Wait(ctx)
+
+	if !advertiseCalled.Load() {
+		t.Error("AdvertiseRoute was not called")
+	}
+	if !unadvertiseCalled.Load() {
+		t.Error("UnadvertiseRoute was not called")
+	}
+
+	if want := []netip.Prefix{netip.MustParsePrefix("127.0.0.1/32")}; !slices.Equal(slices.Compact(rc.Routes()), want) {
+		t.Fatalf("got %v, want %v", rc.Routes(), want)
+	}
+}

+ 13 - 0
appc/appctest/appctest.go

@@ -11,12 +11,22 @@ import (
 
 // RouteCollector is a test helper that collects the list of routes advertised
 type RouteCollector struct {
+	// AdvertiseCallback (optional) is called synchronously from
+	// AdvertiseRoute.
+	AdvertiseCallback func()
+	// UnadvertiseCallback (optional) is called synchronously from
+	// UnadvertiseRoute.
+	UnadvertiseCallback func()
+
 	routes        []netip.Prefix
 	removedRoutes []netip.Prefix
 }
 
 func (rc *RouteCollector) AdvertiseRoute(pfx ...netip.Prefix) error {
 	rc.routes = append(rc.routes, pfx...)
+	if rc.AdvertiseCallback != nil {
+		rc.AdvertiseCallback()
+	}
 	return nil
 }
 
@@ -30,6 +40,9 @@ func (rc *RouteCollector) UnadvertiseRoute(toRemove ...netip.Prefix) error {
 			rc.removedRoutes = append(rc.removedRoutes, r)
 		}
 	}
+	if rc.UnadvertiseCallback != nil {
+		rc.UnadvertiseCallback()
+	}
 	return nil
 }