Browse Source

wgengine/netstack: add local tailscale service IPs to route and terminate locally (#18461)

* wgengine/netstack: add local tailscale service IPs to route and terminate locally

This commit adds the tailscales service IPs served locally to OS routes, and
make interception to packets so that the traffic terminates locally without
making affects to the HA traffics.

Fixes tailscale/corp#34048

Signed-off-by: KevinLiang10 <[email protected]>

* fix test

Signed-off-by: KevinLiang10 <[email protected]>

* add ready field to avoid accessing lb before netstack starts

Signed-off-by: KevinLiang10 <[email protected]>

* wgengine/netstack: store values from lb to avoid acquiring a lock

Signed-off-by: KevinLiang10 <[email protected]>

* add active services to netstack on starts with stored prefs.

Signed-off-by: KevinLiang10 <[email protected]>

* fix comments

Signed-off-by: KevinLiang10 <[email protected]>

* update comments

Signed-off-by: KevinLiang10 <[email protected]>

---------

Signed-off-by: KevinLiang10 <[email protected]>
KevinLiang10 1 month ago
parent
commit
03461ea7fb
5 changed files with 205 additions and 10 deletions
  1. 45 3
      ipn/ipnlocal/local.go
  2. 24 1
      ipn/ipnlocal/local_test.go
  3. 3 0
      tsd/tsd.go
  4. 75 2
      wgengine/netstack/netstack.go
  5. 58 4
      wgengine/netstack/netstack_test.go

+ 45 - 3
ipn/ipnlocal/local.go

@@ -922,6 +922,22 @@ func (b *LocalBackend) setStateLocked(state ipn.State) {
 	}
 }
 
+func (b *LocalBackend) IPServiceMappings() netmap.IPServiceMappings {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	return b.ipVIPServiceMap
+}
+
+func (b *LocalBackend) SetIPServiceMappingsForTest(m netmap.IPServiceMappings) {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	testenv.AssertInTest()
+	b.ipVIPServiceMap = m
+	if ns, ok := b.sys.Netstack.GetOK(); ok {
+		ns.UpdateIPServiceMappings(m)
+	}
+}
+
 // setConfigLocked uses the provided config to update the backend's prefs
 // and other state.
 func (b *LocalBackend) setConfigLocked(conf *conffile.Config) error {
@@ -4502,6 +4518,12 @@ func (b *LocalBackend) onEditPrefsLocked(_ ipnauth.Actor, mp *ipn.MaskedPrefs, o
 		}
 	}
 
+	if mp.AdvertiseServicesSet {
+		if ns, ok := b.sys.Netstack.GetOK(); ok {
+			ns.UpdateActiveVIPServices(newPrefs.AdvertiseServices())
+		}
+	}
+
 	// This is recorded here in the EditPrefs path, not the setPrefs path on purpose.
 	// recordForEdit records metrics related to edits and changes, not the final state.
 	// If, in the future, we want to record gauge-metrics related to the state of prefs,
@@ -5125,7 +5147,7 @@ func (b *LocalBackend) authReconfigLocked() {
 	}
 
 	oneCGNATRoute := shouldUseOneCGNATRoute(b.logf, b.sys.NetMon.Get(), b.sys.ControlKnobs(), version.OS())
-	rcfg := b.routerConfigLocked(cfg, prefs, oneCGNATRoute)
+	rcfg := b.routerConfigLocked(cfg, prefs, nm, oneCGNATRoute)
 
 	err = b.e.Reconfig(cfg, rcfg, dcfg)
 	if err == wgengine.ErrNoChanges {
@@ -5500,7 +5522,7 @@ func peerRoutes(logf logger.Logf, peers []wgcfg.Peer, cgnatThreshold int, routeA
 // routerConfig produces a router.Config from a wireguard config and IPN prefs.
 //
 // b.mu must be held.
-func (b *LocalBackend) routerConfigLocked(cfg *wgcfg.Config, prefs ipn.PrefsView, oneCGNATRoute bool) *router.Config {
+func (b *LocalBackend) routerConfigLocked(cfg *wgcfg.Config, prefs ipn.PrefsView, nm *netmap.NetworkMap, oneCGNATRoute bool) *router.Config {
 	singleRouteThreshold := 10_000
 	if oneCGNATRoute {
 		singleRouteThreshold = 1
@@ -5585,11 +5607,23 @@ func (b *LocalBackend) routerConfigLocked(cfg *wgcfg.Config, prefs ipn.PrefsView
 		}
 	}
 
+	// Get the VIPs for VIP services this node hosts. We will add all locally served VIPs to routes then
+	// we terminate these connection locally in netstack instead of routing to peer.
+	vipServiceIPs := nm.GetIPVIPServiceMap()
+	v4, v6 := false, false
+
 	if slices.ContainsFunc(rs.LocalAddrs, tsaddr.PrefixIs4) {
 		rs.Routes = append(rs.Routes, netip.PrefixFrom(tsaddr.TailscaleServiceIP(), 32))
+		v4 = true
 	}
 	if slices.ContainsFunc(rs.LocalAddrs, tsaddr.PrefixIs6) {
 		rs.Routes = append(rs.Routes, netip.PrefixFrom(tsaddr.TailscaleServiceIPv6(), 128))
+		v6 = true
+	}
+	for vip := range vipServiceIPs {
+		if (vip.Is4() && v4) || (vip.Is6() && v6) {
+			rs.Routes = append(rs.Routes, netip.PrefixFrom(vip, vip.BitLen()))
+		}
 	}
 
 	return rs
@@ -6267,7 +6301,15 @@ func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) {
 
 	b.setTCPPortsInterceptedFromNetmapAndPrefsLocked(b.pm.CurrentPrefs())
 	if buildfeatures.HasServe {
-		b.ipVIPServiceMap = nm.GetIPVIPServiceMap()
+		m := nm.GetIPVIPServiceMap()
+		b.ipVIPServiceMap = m
+		if ns, ok := b.sys.Netstack.GetOK(); ok {
+			ns.UpdateIPServiceMappings(m)
+			// In case the prefs reloaded from Profile Manager but didn't change,
+			// we still need to load the active VIP services into netstack.
+			ns.UpdateActiveVIPServices(b.pm.CurrentPrefs().AdvertiseServices())
+		}
+
 	}
 
 	if !oldSelf.Equal(nm.SelfNodeOrZero()) {

+ 24 - 1
ipn/ipnlocal/local_test.go

@@ -7430,8 +7430,31 @@ func TestRouteAllDisabled(t *testing.T) {
 			cfg := &wgcfg.Config{
 				Peers: tt.peers,
 			}
+			ServiceIPMappings := tailcfg.ServiceIPMappings{
+				"svc:test-service": []netip.Addr{
+					netip.MustParseAddr("100.64.1.2"),
+					netip.MustParseAddr("fd7a:abcd:1234::1"),
+				},
+			}
+			svcIPMapJSON, err := json.Marshal(ServiceIPMappings)
+			if err != nil {
+				t.Fatalf("failed to marshal ServiceIPMappings: %v", err)
+			}
+			nm := &netmap.NetworkMap{
+				SelfNode: (&tailcfg.Node{
+					Name: "test-node",
+					Addresses: []netip.Prefix{
+						pp("100.64.1.1/32"),
+					},
+					CapMap: tailcfg.NodeCapMap{
+						tailcfg.NodeAttrServiceHost: []tailcfg.RawMessage{
+							tailcfg.RawMessage(svcIPMapJSON),
+						},
+					},
+				}).View(),
+			}
 
-			rcfg := lb.routerConfigLocked(cfg, prefs.View(), false)
+			rcfg := lb.routerConfigLocked(cfg, prefs.View(), nm, false)
 			for _, p := range rcfg.Routes {
 				found := false
 				for _, r := range tt.wantEndpoints {

+ 3 - 0
tsd/tsd.go

@@ -32,6 +32,7 @@ import (
 	"tailscale.com/net/tstun"
 	"tailscale.com/proxymap"
 	"tailscale.com/types/netmap"
+	"tailscale.com/types/views"
 	"tailscale.com/util/eventbus"
 	"tailscale.com/util/syspolicy/policyclient"
 	"tailscale.com/util/usermetric"
@@ -111,6 +112,8 @@ type LocalBackend = any
 type NetstackImpl interface {
 	Start(LocalBackend) error
 	UpdateNetstackIPs(*netmap.NetworkMap)
+	UpdateIPServiceMappings(netmap.IPServiceMappings)
+	UpdateActiveVIPServices(views.Slice[string])
 }
 
 // Set is a convenience method to set a subsystem value.

+ 75 - 2
wgengine/netstack/netstack.go

@@ -51,6 +51,7 @@ import (
 	"tailscale.com/types/logger"
 	"tailscale.com/types/netmap"
 	"tailscale.com/types/nettype"
+	"tailscale.com/types/views"
 	"tailscale.com/util/clientmetric"
 	"tailscale.com/util/set"
 	"tailscale.com/version"
@@ -200,6 +201,10 @@ type Impl struct {
 	lb        *ipnlocal.LocalBackend // or nil
 	dns       *dns.Manager
 
+	// Before Start is called, there can IPv6 Neighbor Discovery from the
+	// OS landing on netstack. We need to drop those packets until Start.
+	ready atomic.Bool // set to true once Start has been called
+
 	// loopbackPort, if non-nil, will enable Impl to loop back (dnat to
 	// <address-family-loopback>:loopbackPort) TCP & UDP flows originally
 	// destined to serviceIP{v6}:loopbackPort.
@@ -216,6 +221,10 @@ type Impl struct {
 
 	atomicIsVIPServiceIPFunc syncs.AtomicValue[func(netip.Addr) bool]
 
+	atomicIPVIPServiceMap syncs.AtomicValue[netmap.IPServiceMappings]
+	// make this a set of strings for faster lookup
+	atomicActiveVIPServices syncs.AtomicValue[set.Set[tailcfg.ServiceName]]
+
 	// forwardDialFunc, if non-nil, is the net.Dialer.DialContext-style
 	// function that is used to make outgoing connections when forwarding a
 	// TCP connection to another host (e.g. in subnet router mode).
@@ -608,6 +617,9 @@ func (ns *Impl) Start(b LocalBackend) error {
 	ns.ipstack.SetTransportProtocolHandler(tcp.ProtocolNumber, ns.wrapTCPProtocolHandler(tcpFwd.HandlePacket))
 	ns.ipstack.SetTransportProtocolHandler(udp.ProtocolNumber, ns.wrapUDPProtocolHandler(udpFwd.HandlePacket))
 	go ns.inject()
+	if ns.ready.Swap(true) {
+		panic("already started")
+	}
 	return nil
 }
 
@@ -765,6 +777,25 @@ func (ns *Impl) UpdateNetstackIPs(nm *netmap.NetworkMap) {
 	}
 }
 
+// UpdateIPServiceMappings updates the IPServiceMappings when there is a change
+// in this value in localbackend. This is usually triggered from a netmap update.
+func (ns *Impl) UpdateIPServiceMappings(mappings netmap.IPServiceMappings) {
+	ns.mu.Lock()
+	defer ns.mu.Unlock()
+	ns.atomicIPVIPServiceMap.Store(mappings)
+}
+
+// UpdateActiveVIPServices updates the set of active VIP services names.
+func (ns *Impl) UpdateActiveVIPServices(activeServices views.Slice[string]) {
+	ns.mu.Lock()
+	defer ns.mu.Unlock()
+	activeServicesSet := make(set.Set[tailcfg.ServiceName], activeServices.Len())
+	for _, s := range activeServices.All() {
+		activeServicesSet.Add(tailcfg.AsServiceName(s))
+	}
+	ns.atomicActiveVIPServices.Store(activeServicesSet)
+}
+
 func (ns *Impl) isLoopbackPort(port uint16) bool {
 	if ns.loopbackPort != nil && int(port) == *ns.loopbackPort {
 		return true
@@ -775,13 +806,15 @@ func (ns *Impl) isLoopbackPort(port uint16) bool {
 // handleLocalPackets is hooked into the tun datapath for packets leaving
 // the host and arriving at tailscaled. This method returns filter.DropSilently
 // to intercept a packet for handling, for instance traffic to quad-100.
+// Caution: can be called before Start
 func (ns *Impl) handleLocalPackets(p *packet.Parsed, t *tstun.Wrapper, gro *gro.GRO) (filter.Response, *gro.GRO) {
-	if ns.ctx.Err() != nil {
+	if !ns.ready.Load() || ns.ctx.Err() != nil {
 		return filter.DropSilently, gro
 	}
 
 	// Determine if we care about this local packet.
 	dst := p.Dst.Addr()
+	serviceName, isVIPServiceIP := ns.atomicIPVIPServiceMap.Load()[dst]
 	switch {
 	case dst == serviceIP || dst == serviceIPv6:
 		// We want to intercept some traffic to the "service IP" (e.g.
@@ -798,6 +831,25 @@ func (ns *Impl) handleLocalPackets(p *packet.Parsed, t *tstun.Wrapper, gro *gro.
 				return filter.Accept, gro
 			}
 		}
+	case isVIPServiceIP:
+		// returns all active VIP services in a set, since the IPVIPServiceMap
+		// contains inactive service IPs when node hosts the service, we need to
+		// check the service is active or not before dropping the packet.
+		activeServices := ns.atomicActiveVIPServices.Load()
+		if !activeServices.Contains(serviceName) {
+			// Other host might have the service active, so we let the packet go through.
+			return filter.Accept, gro
+		}
+		if p.IPProto != ipproto.TCP {
+			// We currenly only support VIP services over TCP. If service is in Tun mode,
+			// it's up to the service host to set up local packet handling which shouldn't
+			// arrive here.
+			return filter.DropSilently, gro
+		}
+		if debugNetstack() {
+			ns.logf("netstack: intercepting local VIP service packet: proto=%v dst=%v src=%v",
+				p.IPProto, p.Dst, p.Src)
+		}
 	case viaRange.Contains(dst):
 		// We need to handle 4via6 packets leaving the host if the via
 		// route is for this host; otherwise the packet will be dropped
@@ -1009,12 +1061,32 @@ func (ns *Impl) shouldSendToHost(pkt *stack.PacketBuffer) bool {
 			return true
 		}
 
+		if ns.isVIPServiceIP(srcIP) {
+			dstIP := netip.AddrFrom4(v.DestinationAddress().As4())
+			if ns.isLocalIP(dstIP) {
+				if debugNetstack() {
+					ns.logf("netstack: sending VIP service packet to host: src=%v dst=%v", srcIP, dstIP)
+				}
+				return true
+			}
+		}
+
 	case header.IPv6:
 		srcIP := netip.AddrFrom16(v.SourceAddress().As16())
 		if srcIP == serviceIPv6 {
 			return true
 		}
 
+		if ns.isVIPServiceIP(srcIP) {
+			dstIP := netip.AddrFrom16(v.DestinationAddress().As16())
+			if ns.isLocalIP(dstIP) {
+				if debugNetstack() {
+					ns.logf("netstack: sending VIP service packet to host: src=%v dst=%v", srcIP, dstIP)
+				}
+				return true
+			}
+		}
+
 		if viaRange.Contains(srcIP) {
 			// Only send to the host if this 4via6 route is
 			// something this node handles.
@@ -1233,8 +1305,9 @@ func (ns *Impl) userPing(dstIP netip.Addr, pingResPkt []byte, direction userPing
 // continue normally (typically being delivered to the host networking stack),
 // whereas returning filter.DropSilently is done when netstack intercepts the
 // packet and no further processing towards to host should be done.
+// Caution: can be called before Start
 func (ns *Impl) injectInbound(p *packet.Parsed, t *tstun.Wrapper, gro *gro.GRO) (filter.Response, *gro.GRO) {
-	if ns.ctx.Err() != nil {
+	if !ns.ready.Load() || ns.ctx.Err() != nil {
 		return filter.DropSilently, gro
 	}
 

+ 58 - 4
wgengine/netstack/netstack_test.go

@@ -31,6 +31,7 @@ import (
 	"tailscale.com/tstest"
 	"tailscale.com/types/ipproto"
 	"tailscale.com/types/logid"
+	"tailscale.com/types/netmap"
 	"tailscale.com/wgengine"
 	"tailscale.com/wgengine/filter"
 )
@@ -125,6 +126,7 @@ func makeNetstack(tb testing.TB, config func(*Impl)) *Impl {
 		tb.Fatal(err)
 	}
 	tb.Cleanup(func() { ns.Close() })
+	sys.Set(ns)
 
 	lb, err := ipnlocal.NewLocalBackend(logf, logid.PublicID{}, sys, 0)
 	if err != nil {
@@ -741,13 +743,20 @@ func TestHandleLocalPackets(t *testing.T) {
 		// fd7a:115c:a1e0:b1a:0:7:a01:100/120
 		netip.MustParsePrefix("fd7a:115c:a1e0:b1a:0:7:a01:100/120"),
 	}
+	prefs.AdvertiseServices = []string{"svc:test-service"}
 	_, err := impl.lb.EditPrefs(&ipn.MaskedPrefs{
-		Prefs:              *prefs,
-		AdvertiseRoutesSet: true,
+		Prefs:                *prefs,
+		AdvertiseRoutesSet:   true,
+		AdvertiseServicesSet: true,
 	})
 	if err != nil {
 		t.Fatalf("EditPrefs: %v", err)
 	}
+	IPServiceMap := netmap.IPServiceMappings{
+		netip.MustParseAddr("100.99.55.111"):        "svc:test-service",
+		netip.MustParseAddr("fd7a:115c:a1e0::abcd"): "svc:test-service",
+	}
+	impl.lb.SetIPServiceMappingsForTest(IPServiceMap)
 
 	t.Run("ShouldHandleServiceIP", func(t *testing.T) {
 		pkt := &packet.Parsed{
@@ -784,6 +793,19 @@ func TestHandleLocalPackets(t *testing.T) {
 			t.Errorf("got filter outcome %v, want filter.DropSilently", resp)
 		}
 	})
+	t.Run("ShouldHandleLocalTailscaleServices", func(t *testing.T) {
+		pkt := &packet.Parsed{
+			IPVersion: 4,
+			IPProto:   ipproto.TCP,
+			Src:       netip.MustParseAddrPort("127.0.0.1:9999"),
+			Dst:       netip.MustParseAddrPort("100.99.55.111:80"),
+			TCPFlags:  packet.TCPSyn,
+		}
+		resp, _ := impl.handleLocalPackets(pkt, impl.tundev, nil)
+		if resp != filter.DropSilently {
+			t.Errorf("got filter outcome %v, want filter.DropSilently", resp)
+		}
+	})
 	t.Run("OtherNonHandled", func(t *testing.T) {
 		pkt := &packet.Parsed{
 			IPVersion: 6,
@@ -809,8 +831,10 @@ func TestHandleLocalPackets(t *testing.T) {
 
 func TestShouldSendToHost(t *testing.T) {
 	var (
-		selfIP4 = netip.MustParseAddr("100.64.1.2")
-		selfIP6 = netip.MustParseAddr("fd7a:115c:a1e0::123")
+		selfIP4             = netip.MustParseAddr("100.64.1.2")
+		selfIP6             = netip.MustParseAddr("fd7a:115c:a1e0::123")
+		tailscaleServiceIP4 = netip.MustParseAddr("100.99.55.111")
+		tailscaleServiceIP6 = netip.MustParseAddr("fd7a:115c:a1e0::abcd")
 	)
 
 	makeTestNetstack := func(tb testing.TB) *Impl {
@@ -820,6 +844,9 @@ func TestShouldSendToHost(t *testing.T) {
 			impl.atomicIsLocalIPFunc.Store(func(addr netip.Addr) bool {
 				return addr == selfIP4 || addr == selfIP6
 			})
+			impl.atomicIsVIPServiceIPFunc.Store(func(addr netip.Addr) bool {
+				return addr == tailscaleServiceIP4 || addr == tailscaleServiceIP6
+			})
 		})
 
 		prefs := ipn.NewPrefs()
@@ -919,6 +946,33 @@ func TestShouldSendToHost(t *testing.T) {
 			dst:  netip.MustParseAddrPort("[fd7a:115:a1e0::99]:7777"),
 			want: false,
 		},
+		// After accessing the Tailscale service from host, replies from Tailscale Service IPs
+		// to the local Tailscale IPs should be sent to the host.
+		{
+			name: "from_service_ip_to_local_ip",
+			src:  netip.AddrPortFrom(tailscaleServiceIP4, 80),
+			dst:  netip.AddrPortFrom(selfIP4, 12345),
+			want: true,
+		},
+		{
+			name: "from_service_ip_to_local_ip_v6",
+			src:  netip.AddrPortFrom(tailscaleServiceIP6, 80),
+			dst:  netip.AddrPortFrom(selfIP6, 12345),
+			want: true,
+		},
+		// Traffic from remote IPs to Tailscale Service IPs should be sent over WireGuard.
+		{
+			name: "from_service_ip_to_remote",
+			src:  netip.AddrPortFrom(tailscaleServiceIP4, 80),
+			dst:  netip.MustParseAddrPort("173.201.32.56:54321"),
+			want: false,
+		},
+		{
+			name: "from_service_ip_to_remote_v6",
+			src:  netip.AddrPortFrom(tailscaleServiceIP6, 80),
+			dst:  netip.MustParseAddrPort("[2001:4860:4860::8888]:54321"),
+			want: false,
+		},
 	}
 
 	for _, tt := range testCases {