소스 검색

ipn/ipnlocal: handle auto value for ExitNodeID syspolicy (#12512)

Updates tailscale/corp#19681

Signed-off-by: Claire Wang <[email protected]>
Claire Wang 1 년 전
부모
커밋
8965e87fa8
2개의 변경된 파일369개의 추가작업 그리고 24개의 파일을 삭제
  1. 69 13
      ipn/ipnlocal/local.go
  2. 300 11
      ipn/ipnlocal/local_test.go

+ 69 - 13
ipn/ipnlocal/local.go

@@ -338,6 +338,9 @@ type LocalBackend struct {
 	// lastSuggestedExitNode stores the last suggested exit node suggestion to
 	// avoid unnecessary churn between multiple equally-good options.
 	lastSuggestedExitNode tailcfg.StableNodeID
+
+	// refreshAutoExitNode indicates if the exit node should be recomputed when the next netcheck report is available.
+	refreshAutoExitNode bool
 }
 
 // HealthTracker returns the health tracker for the backend.
@@ -640,7 +643,9 @@ func (b *LocalBackend) linkChange(delta *netmon.ChangeDelta) {
 	hadPAC := b.prevIfState.HasPAC()
 	b.prevIfState = ifst
 	b.pauseOrResumeControlClientLocked()
-
+	if delta.Major && shouldAutoExitNode() {
+		b.refreshAutoExitNode = true
+	}
 	// If the PAC-ness of the network changed, reconfig wireguard+route to
 	// add/remove subnets.
 	if hadPAC != ifst.HasPAC() {
@@ -1215,7 +1220,7 @@ func (b *LocalBackend) SetControlClientStatus(c controlclient.Client, st control
 		prefs.WantRunning = true
 		prefs.LoggedOut = false
 	}
-	if setExitNodeID(prefs, st.NetMap) {
+	if setExitNodeID(prefs, st.NetMap, b.lastSuggestedExitNode) {
 		prefsChanged = true
 	}
 	if applySysPolicy(prefs) {
@@ -1418,9 +1423,8 @@ func (b *LocalBackend) UpdateNetmapDelta(muts []netmap.NodeMutation) (handled bo
 			b.send(*notify)
 		}
 	}()
-
-	b.mu.Lock()
-	defer b.mu.Unlock()
+	unlock := b.lockAndGetUnlock()
+	defer unlock()
 	if !b.updateNetmapDeltaLocked(muts) {
 		return false
 	}
@@ -1428,8 +1432,14 @@ func (b *LocalBackend) UpdateNetmapDelta(muts []netmap.NodeMutation) (handled bo
 	if b.netMap != nil && mutationsAreWorthyOfTellingIPNBus(muts) {
 		nm := ptr.To(*b.netMap) // shallow clone
 		nm.Peers = make([]tailcfg.NodeView, 0, len(b.peers))
+		shouldAutoExitNode := shouldAutoExitNode()
 		for _, p := range b.peers {
 			nm.Peers = append(nm.Peers, p)
+			// If the auto exit node currently set goes offline, find another auto exit node.
+			if shouldAutoExitNode && b.pm.prefs.ExitNodeID() == p.StableID() && p.Online() != nil && !*p.Online() {
+				b.setAutoExitNodeIDLockedOnEntry(unlock)
+				return false
+			}
 		}
 		slices.SortFunc(nm.Peers, func(a, b tailcfg.NodeView) int {
 			return cmp.Compare(a.ID(), b.ID())
@@ -1491,9 +1501,14 @@ func (b *LocalBackend) updateNetmapDeltaLocked(muts []netmap.NodeMutation) (hand
 
 // setExitNodeID updates prefs to reference an exit node by ID, rather
 // than by IP. It returns whether prefs was mutated.
-func setExitNodeID(prefs *ipn.Prefs, nm *netmap.NetworkMap) (prefsChanged bool) {
+func setExitNodeID(prefs *ipn.Prefs, nm *netmap.NetworkMap, lastSuggestedExitNode tailcfg.StableNodeID) (prefsChanged bool) {
 	if exitNodeIDStr, _ := syspolicy.GetString(syspolicy.ExitNodeID, ""); exitNodeIDStr != "" {
 		exitNodeID := tailcfg.StableNodeID(exitNodeIDStr)
+		if shouldAutoExitNode() && lastSuggestedExitNode != "" {
+			exitNodeID = lastSuggestedExitNode
+		}
+		// Note: when exitNodeIDStr == "auto" && lastSuggestedExitNode == "", then exitNodeID is now "auto" which will never match a peer's node ID.
+		// When there is no a peer matching the node ID, traffic will blackhole, preventing accidental non-exit-node usage when a policy is in effect that requires an exit node.
 		changed := prefs.ExitNodeID != exitNodeID || prefs.ExitNodeIP.IsValid()
 		prefs.ExitNodeID = exitNodeID
 		prefs.ExitNodeIP = netip.Addr{}
@@ -3357,7 +3372,7 @@ func (b *LocalBackend) setPrefsLockedOnEntry(newp *ipn.Prefs, unlock unlockOnce)
 	// setExitNodeID returns whether it updated b.prefs, but
 	// everything in this function treats b.prefs as completely new
 	// anyway. No-op if no exit node resolution is needed.
-	setExitNodeID(newp, netMap)
+	setExitNodeID(newp, netMap, b.lastSuggestedExitNode)
 	// applySysPolicy does likewise so we can also ignore its return value.
 	applySysPolicy(newp)
 	// We do this to avoid holding the lock while doing everything else.
@@ -4850,12 +4865,44 @@ func (b *LocalBackend) Logout(ctx context.Context) error {
 func (b *LocalBackend) setNetInfo(ni *tailcfg.NetInfo) {
 	b.mu.Lock()
 	cc := b.cc
+	refresh := b.refreshAutoExitNode
+	b.refreshAutoExitNode = false
 	b.mu.Unlock()
 
 	if cc == nil {
 		return
 	}
 	cc.SetNetInfo(ni)
+	if refresh {
+		unlock := b.lockAndGetUnlock()
+		defer unlock()
+		b.setAutoExitNodeIDLockedOnEntry(unlock)
+	}
+}
+
+func (b *LocalBackend) setAutoExitNodeIDLockedOnEntry(unlock unlockOnce) {
+	defer unlock()
+
+	prefs := b.pm.CurrentPrefs()
+	if !prefs.Valid() {
+		b.logf("[unexpected]: received tailnet exit node ID pref change callback but current prefs are nil")
+		return
+	}
+	prefsClone := prefs.AsStruct()
+	newSuggestion, err := b.suggestExitNodeLocked()
+	if err != nil {
+		b.logf("setAutoExitNodeID: %v", err)
+		return
+	}
+	prefsClone.ExitNodeID = newSuggestion.ID
+	_, err = b.editPrefsLockedOnEntry(&ipn.MaskedPrefs{
+		Prefs:         *prefsClone,
+		ExitNodeIDSet: true,
+	}, unlock)
+	if err != nil {
+		b.logf("setAutoExitNodeID: failed to apply exit node ID preference: %v", err)
+		return
+	}
 }
 
 // setNetMapLocked updates the LocalBackend state to reflect the newly
@@ -6526,30 +6573,33 @@ func mayDeref[T any](p *T) (v T) {
 var ErrNoPreferredDERP = errors.New("no preferred DERP, try again later")
 var ErrCannotSuggestExitNode = errors.New("unable to suggest an exit node, try again later")
 
-// SuggestExitNode computes a suggestion based on the current netmap and last netcheck report. If
+// suggestExitNodeLocked computes a suggestion based on the current netmap and last netcheck report. If
 // there are multiple equally good options, one is selected at random, so the result is not stable. To be
 // eligible for consideration, the peer must have NodeAttrSuggestExitNode in its CapMap.
 //
 // Currently, peers with a DERP home are preferred over those without (typically this means Mullvad).
 // Peers are selected based on having a DERP home that is the lowest latency to this device. For peers
 // without a DERP home, we look for geographic proximity to this device's DERP home.
-func (b *LocalBackend) SuggestExitNode() (response apitype.ExitNodeSuggestionResponse, err error) {
-	b.mu.Lock()
+// b.mu.lock() must be held.
+func (b *LocalBackend) suggestExitNodeLocked() (response apitype.ExitNodeSuggestionResponse, err error) {
 	lastReport := b.MagicConn().GetLastNetcheckReport(b.ctx)
 	netMap := b.netMap
 	prevSuggestion := b.lastSuggestedExitNode
-	b.mu.Unlock()
 
 	res, err := suggestExitNode(lastReport, netMap, prevSuggestion, randomRegion, randomNode, getAllowedSuggestions())
 	if err != nil {
 		return res, err
 	}
-	b.mu.Lock()
 	b.lastSuggestedExitNode = res.ID
-	b.mu.Unlock()
 	return res, err
 }
 
+func (b *LocalBackend) SuggestExitNode() (response apitype.ExitNodeSuggestionResponse, err error) {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	return b.suggestExitNodeLocked()
+}
+
 // selectRegionFunc returns a DERP region from the slice of candidate regions.
 // The value is returned, not the slice index.
 type selectRegionFunc func(views.Slice[int]) int
@@ -6788,6 +6838,12 @@ func longLatDistance(fromLat, fromLong, toLat, toLong float64) float64 {
 	return earthRadiusMeters * c
 }
 
+// shouldAutoExitNode checks for the auto exit node MDM policy.
+func shouldAutoExitNode() bool {
+	exitNodeIDStr, _ := syspolicy.GetString(syspolicy.ExitNodeID, "")
+	return exitNodeIDStr == "auto:any"
+}
+
 // startAutoUpdate triggers an auto-update attempt. The actual update happens
 // asynchronously. If another update is in progress, an error is returned.
 func (b *LocalBackend) startAutoUpdate(logPrefix string) (retErr error) {

+ 300 - 11
ipn/ipnlocal/local_test.go

@@ -35,6 +35,7 @@ import (
 	"tailscale.com/net/netcheck"
 	"tailscale.com/net/netmon"
 	"tailscale.com/net/tsaddr"
+	"tailscale.com/net/tsdial"
 	"tailscale.com/tailcfg"
 	"tailscale.com/tsd"
 	"tailscale.com/tstest"
@@ -1647,16 +1648,17 @@ func (h *mockSyspolicyHandler) ReadStringArray(key string) ([]string, error) {
 func TestSetExitNodeIDPolicy(t *testing.T) {
 	pfx := netip.MustParsePrefix
 	tests := []struct {
-		name           string
-		exitNodeIPKey  bool
-		exitNodeIDKey  bool
-		exitNodeID     string
-		exitNodeIP     string
-		prefs          *ipn.Prefs
-		exitNodeIPWant string
-		exitNodeIDWant string
-		prefsChanged   bool
-		nm             *netmap.NetworkMap
+		name                  string
+		exitNodeIPKey         bool
+		exitNodeIDKey         bool
+		exitNodeID            string
+		exitNodeIP            string
+		prefs                 *ipn.Prefs
+		exitNodeIPWant        string
+		exitNodeIDWant        string
+		prefsChanged          bool
+		nm                    *netmap.NetworkMap
+		lastSuggestedExitNode tailcfg.StableNodeID
 	}{
 		{
 			name:           "ExitNodeID key is set",
@@ -1835,6 +1837,21 @@ func TestSetExitNodeIDPolicy(t *testing.T) {
 				},
 			},
 		},
+		{
+			name:                  "ExitNodeID key is set to auto and last suggested exit node is populated",
+			exitNodeIDKey:         true,
+			exitNodeID:            "auto:any",
+			lastSuggestedExitNode: "123",
+			exitNodeIDWant:        "123",
+			prefsChanged:          true,
+		},
+		{
+			name:           "ExitNodeID key is set to auto and last suggested exit node is not populated",
+			exitNodeIDKey:  true,
+			exitNodeID:     "auto:any",
+			prefsChanged:   true,
+			exitNodeIDWant: "auto:any",
+		},
 	}
 
 	for _, test := range tests {
@@ -1864,7 +1881,8 @@ func TestSetExitNodeIDPolicy(t *testing.T) {
 			pm.prefs = test.prefs.View()
 			b.netMap = test.nm
 			b.pm = pm
-			changed := setExitNodeID(b.pm.prefs.AsStruct(), test.nm)
+			b.lastSuggestedExitNode = test.lastSuggestedExitNode
+			changed := setExitNodeID(b.pm.prefs.AsStruct(), test.nm, tailcfg.StableNodeID(test.lastSuggestedExitNode))
 			b.SetPrefsForTest(pm.CurrentPrefs().AsStruct())
 
 			if got := b.pm.prefs.ExitNodeID(); got != tailcfg.StableNodeID(test.exitNodeIDWant) {
@@ -1885,6 +1903,222 @@ func TestSetExitNodeIDPolicy(t *testing.T) {
 	}
 }
 
+func TestUpdateNetmapDeltaAutoExitNode(t *testing.T) {
+	peer1 := makePeer(1, withCap(26), withSuggest(), withExitRoutes())
+	peer2 := makePeer(2, withCap(26), withSuggest(), withExitRoutes())
+	derpMap := &tailcfg.DERPMap{
+		Regions: map[int]*tailcfg.DERPRegion{
+			1: {
+				Nodes: []*tailcfg.DERPNode{
+					{
+						Name:     "t1",
+						RegionID: 1,
+					},
+				},
+			},
+			2: {
+				Nodes: []*tailcfg.DERPNode{
+					{
+						Name:     "t2",
+						RegionID: 2,
+					},
+				},
+			},
+		},
+	}
+	report := &netcheck.Report{
+		RegionLatency: map[int]time.Duration{
+			1: 10 * time.Millisecond,
+			2: 5 * time.Millisecond,
+			3: 30 * time.Millisecond,
+		},
+		PreferredDERP: 2,
+	}
+	tests := []struct {
+		name                      string
+		lastSuggestedExitNode     tailcfg.StableNodeID
+		netmap                    *netmap.NetworkMap
+		muts                      []*tailcfg.PeerChange
+		exitNodeIDWant            tailcfg.StableNodeID
+		updateNetmapDeltaResponse bool
+		report                    *netcheck.Report
+	}{
+		{
+			name:                  "selected auto exit node goes offline",
+			lastSuggestedExitNode: peer1.StableID(),
+			netmap: &netmap.NetworkMap{
+				Peers: []tailcfg.NodeView{
+					peer1,
+					peer2,
+				},
+				DERPMap: derpMap,
+			},
+			muts: []*tailcfg.PeerChange{
+				{
+					NodeID: 1,
+					Online: ptr.To(false),
+				},
+				{
+					NodeID: 2,
+					Online: ptr.To(true),
+				},
+			},
+			exitNodeIDWant:            peer2.StableID(),
+			updateNetmapDeltaResponse: false,
+			report:                    report,
+		},
+		{
+			name:                  "other exit node goes offline doesn't change selected auto exit node that's still online",
+			lastSuggestedExitNode: peer2.StableID(),
+			netmap: &netmap.NetworkMap{
+				Peers: []tailcfg.NodeView{
+					peer1,
+					peer2,
+				},
+				DERPMap: derpMap,
+			},
+			muts: []*tailcfg.PeerChange{
+				{
+					NodeID: 1,
+					Online: ptr.To(false),
+				},
+				{
+					NodeID: 2,
+					Online: ptr.To(true),
+				},
+			},
+			exitNodeIDWant:            peer2.StableID(),
+			updateNetmapDeltaResponse: true,
+			report:                    report,
+		},
+	}
+	msh := &mockSyspolicyHandler{
+		t: t,
+		stringPolicies: map[syspolicy.Key]*string{
+			syspolicy.ExitNodeID: ptr.To("auto:any"),
+		},
+	}
+	syspolicy.SetHandlerForTest(t, msh)
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			b := newTestLocalBackend(t)
+			b.netMap = tt.netmap
+			b.updatePeersFromNetmapLocked(b.netMap)
+			b.lastSuggestedExitNode = tt.lastSuggestedExitNode
+			b.sys.MagicSock.Get().SetLastNetcheckReportForTest(b.ctx, tt.report)
+			b.SetPrefsForTest(b.pm.CurrentPrefs().AsStruct())
+			someTime := time.Unix(123, 0)
+			muts, ok := netmap.MutationsFromMapResponse(&tailcfg.MapResponse{
+				PeersChangedPatch: tt.muts,
+			}, someTime)
+			if !ok {
+				t.Fatal("netmap.MutationsFromMapResponse failed")
+			}
+			if b.pm.prefs.ExitNodeID() != tt.lastSuggestedExitNode {
+				t.Fatalf("did not set exit node ID to last suggested exit node despite auto policy")
+			}
+
+			got := b.UpdateNetmapDelta(muts)
+			if got != tt.updateNetmapDeltaResponse {
+				t.Fatalf("got %v expected %v from UpdateNetmapDelta", got, tt.updateNetmapDeltaResponse)
+			}
+			if b.pm.prefs.ExitNodeID() != tt.exitNodeIDWant {
+				t.Fatalf("did not get expected exit node id after UpdateNetmapDelta")
+			}
+		})
+	}
+}
+
+func TestAutoExitNodeSetNetInfoCallback(t *testing.T) {
+	b := newTestLocalBackend(t)
+	hi := hostinfo.New()
+	ni := tailcfg.NetInfo{LinkType: "wired"}
+	hi.NetInfo = &ni
+	b.hostinfo = hi
+	k := key.NewMachine()
+	var cc *mockControl
+	opts := controlclient.Options{
+		ServerURL: "https://example.com",
+		GetMachinePrivateKey: func() (key.MachinePrivate, error) {
+			return k, nil
+		},
+		Dialer: tsdial.NewDialer(netmon.NewStatic()),
+		Logf:   b.logf,
+	}
+	cc = newClient(t, opts)
+	b.cc = cc
+	msh := &mockSyspolicyHandler{
+		t: t,
+		stringPolicies: map[syspolicy.Key]*string{
+			syspolicy.ExitNodeID: ptr.To("auto:any"),
+		},
+	}
+	syspolicy.SetHandlerForTest(t, msh)
+	peer1 := makePeer(1, withCap(26), withDERP(3), withSuggest(), withExitRoutes())
+	peer2 := makePeer(2, withCap(26), withDERP(2), withSuggest(), withExitRoutes())
+	selfNode := tailcfg.Node{
+		Addresses: []netip.Prefix{
+			netip.MustParsePrefix("100.64.1.1/32"),
+			netip.MustParsePrefix("fe70::1/128"),
+		},
+		DERP: "127.3.3.40:2",
+	}
+	defaultDERPMap := &tailcfg.DERPMap{
+		Regions: map[int]*tailcfg.DERPRegion{
+			1: {
+				Nodes: []*tailcfg.DERPNode{
+					{
+						Name:     "t1",
+						RegionID: 1,
+					},
+				},
+			},
+			2: {
+				Nodes: []*tailcfg.DERPNode{
+					{
+						Name:     "t2",
+						RegionID: 2,
+					},
+				},
+			},
+			3: {
+				Nodes: []*tailcfg.DERPNode{
+					{
+						Name:     "t3",
+						RegionID: 3,
+					},
+				},
+			},
+		},
+	}
+	b.netMap = &netmap.NetworkMap{
+		SelfNode: selfNode.View(),
+		Peers: []tailcfg.NodeView{
+			peer1,
+			peer2,
+		},
+		DERPMap: defaultDERPMap,
+	}
+	b.lastSuggestedExitNode = peer1.StableID()
+	b.SetPrefsForTest(b.pm.CurrentPrefs().AsStruct())
+	if eid := b.Prefs().ExitNodeID(); eid != peer1.StableID() {
+		t.Errorf("got initial exit node %v, want %v", eid, peer1.StableID())
+	}
+	b.refreshAutoExitNode = true
+	b.sys.MagicSock.Get().SetLastNetcheckReportForTest(b.ctx, &netcheck.Report{
+		RegionLatency: map[int]time.Duration{
+			1: 10 * time.Millisecond,
+			2: 5 * time.Millisecond,
+			3: 30 * time.Millisecond,
+		},
+		PreferredDERP: 2,
+	})
+	b.setNetInfo(&ni)
+	if eid := b.Prefs().ExitNodeID(); eid != peer2.StableID() {
+		t.Errorf("got final exit node %v, want %v", eid, peer2.StableID())
+	}
+}
+
 func TestApplySysPolicy(t *testing.T) {
 	tests := []struct {
 		name           string
@@ -2796,6 +3030,12 @@ func withSuggest() peerOptFunc {
 	}
 }
 
+func withCap(version tailcfg.CapabilityVersion) peerOptFunc {
+	return func(n *tailcfg.Node) {
+		n.Cap = version
+	}
+}
+
 func deterministicRegionForTest(t testing.TB, want views.Slice[int], use int) selectRegionFunc {
 	t.Helper()
 
@@ -3473,6 +3713,55 @@ func TestMinLatencyDERPregion(t *testing.T) {
 	}
 }
 
+func TestShouldAutoExitNode(t *testing.T) {
+	tests := []struct {
+		name                  string
+		exitNodeIDPolicyValue string
+		expectedBool          bool
+	}{
+		{
+			name:                  "auto:any",
+			exitNodeIDPolicyValue: "auto:any",
+			expectedBool:          true,
+		},
+		{
+			name:                  "no auto prefix",
+			exitNodeIDPolicyValue: "foo",
+			expectedBool:          false,
+		},
+		{
+			name:                  "auto prefix but empty suffix",
+			exitNodeIDPolicyValue: "auto:",
+			expectedBool:          false,
+		},
+		{
+			name:                  "auto prefix no colon",
+			exitNodeIDPolicyValue: "auto",
+			expectedBool:          false,
+		},
+		{
+			name:                  "auto prefix invalid suffix",
+			exitNodeIDPolicyValue: "auto:foo",
+			expectedBool:          false,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			msh := &mockSyspolicyHandler{
+				t: t,
+				stringPolicies: map[syspolicy.Key]*string{
+					syspolicy.ExitNodeID: ptr.To(tt.exitNodeIDPolicyValue),
+				},
+			}
+			syspolicy.SetHandlerForTest(t, msh)
+			got := shouldAutoExitNode()
+			if got != tt.expectedBool {
+				t.Fatalf("expected %v got %v for %v policy value", tt.expectedBool, got, tt.exitNodeIDPolicyValue)
+			}
+		})
+	}
+}
+
 func TestEnableAutoUpdates(t *testing.T) {
 	lb := newTestLocalBackend(t)