Browse Source

net/dns,ipn/ipnlocal: add nodecap to resolve subdomains (#18258)

This adds a new node capability 'dns-subdomain-resolve' that signals
that all of hosts' subdomains should resolve to the same IP address.
It allows wildcard matching on any node marked with this capability.

This change also includes an util/dnsname utility function that lets
us access the parent of a full qualified domain name. MagicDNS takes
this function and recursively searchs for a matching real node name.

One important thing to observe is that, in this context, a subdomain
can have multiple sub labels. This means that for a given node named
machine, both my.machine and be.my.machine will be a positive match.

Updates #1196

Signed-off-by: Fernando Serboncini <[email protected]>
Fernando Serboncini 1 month ago
parent
commit
f48cd46662

+ 33 - 0
ipn/ipnlocal/dnsconfig_test.go

@@ -106,6 +106,39 @@ func TestDNSConfigForNetmap(t *testing.T) {
 				},
 			},
 		},
+		{
+			name: "subdomain_resolve_capability",
+			nm: &netmap.NetworkMap{
+				SelfNode: (&tailcfg.Node{
+					Name:      "myname.net.",
+					Addresses: ipps("100.101.101.101"),
+				}).View(),
+				AllCaps: set.SetOf([]tailcfg.NodeCapability{tailcfg.NodeAttrDNSSubdomainResolve}),
+			},
+			peers: nodeViews([]*tailcfg.Node{
+				{
+					ID:        1,
+					Name:      "peer-with-cap.net.",
+					Addresses: ipps("100.102.0.1"),
+					CapMap:    tailcfg.NodeCapMap{tailcfg.NodeAttrDNSSubdomainResolve: nil},
+				},
+				{
+					ID:        2,
+					Name:      "peer-without-cap.net.",
+					Addresses: ipps("100.102.0.2"),
+				},
+			}),
+			prefs: &ipn.Prefs{},
+			want: &dns.Config{
+				Routes: map[dnsname.FQDN][]*dnstype.Resolver{},
+				Hosts: map[dnsname.FQDN][]netip.Addr{
+					"myname.net.":           ips("100.101.101.101"),
+					"peer-with-cap.net.":    ips("100.102.0.1"),
+					"peer-without-cap.net.": ips("100.102.0.2"),
+				},
+				SubdomainHosts: set.Of[dnsname.FQDN]("myname.net.", "peer-with-cap.net."),
+			},
+		},
 		{
 			// An ephemeral node with only an IPv6 address
 			// should get IPv6 records for all its peers,

+ 12 - 0
ipn/ipnlocal/node_backend.go

@@ -751,8 +751,20 @@ func dnsConfigForNetmap(nm *netmap.NetworkMap, peers map[tailcfg.NodeID]tailcfg.
 		dcfg.Hosts[fqdn] = ips
 	}
 	set(nm.SelfName(), nm.GetAddresses())
+	if nm.AllCaps.Contains(tailcfg.NodeAttrDNSSubdomainResolve) {
+		if fqdn, err := dnsname.ToFQDN(nm.SelfName()); err == nil {
+			dcfg.SubdomainHosts.Make()
+			dcfg.SubdomainHosts.Add(fqdn)
+		}
+	}
 	for _, peer := range peers {
 		set(peer.Name(), peer.Addresses())
+		if peer.CapMap().Contains(tailcfg.NodeAttrDNSSubdomainResolve) {
+			if fqdn, err := dnsname.ToFQDN(peer.Name()); err == nil {
+				dcfg.SubdomainHosts.Make()
+				dcfg.SubdomainHosts.Add(fqdn)
+			}
+		}
 	}
 	for _, rec := range nm.DNS.ExtraRecords {
 		switch rec.Type {

+ 6 - 0
net/dns/config.go

@@ -21,6 +21,7 @@ import (
 	"tailscale.com/net/tsaddr"
 	"tailscale.com/types/dnstype"
 	"tailscale.com/util/dnsname"
+	"tailscale.com/util/set"
 )
 
 // Config is a DNS configuration.
@@ -48,6 +49,11 @@ type Config struct {
 	// it to resolve, you also need to add appropriate routes to
 	// Routes.
 	Hosts map[dnsname.FQDN][]netip.Addr
+	// SubdomainHosts is a set of FQDNs from Hosts that should also
+	// resolve subdomain queries to the same IPs. For example, if
+	// "node.tailnet.ts.net" is in SubdomainHosts, then queries for
+	// "anything.node.tailnet.ts.net" will resolve to node's IPs.
+	SubdomainHosts set.Set[dnsname.FQDN]
 	// OnlyIPv6, if true, uses the IPv6 service IP (for MagicDNS)
 	// instead of the IPv4 version (100.100.100.100).
 	OnlyIPv6 bool

+ 4 - 0
net/dns/dns_clone.go

@@ -6,10 +6,12 @@
 package dns
 
 import (
+	"maps"
 	"net/netip"
 
 	"tailscale.com/types/dnstype"
 	"tailscale.com/util/dnsname"
+	"tailscale.com/util/set"
 )
 
 // Clone makes a deep copy of Config.
@@ -43,6 +45,7 @@ func (src *Config) Clone() *Config {
 			dst.Hosts[k] = append([]netip.Addr{}, src.Hosts[k]...)
 		}
 	}
+	dst.SubdomainHosts = maps.Clone(src.SubdomainHosts)
 	return dst
 }
 
@@ -52,6 +55,7 @@ var _ConfigCloneNeedsRegeneration = Config(struct {
 	Routes           map[dnsname.FQDN][]*dnstype.Resolver
 	SearchDomains    []dnsname.FQDN
 	Hosts            map[dnsname.FQDN][]netip.Addr
+	SubdomainHosts   set.Set[dnsname.FQDN]
 	OnlyIPv6         bool
 }{})
 

+ 10 - 0
net/dns/dns_view.go

@@ -15,6 +15,7 @@ import (
 	"tailscale.com/types/dnstype"
 	"tailscale.com/types/views"
 	"tailscale.com/util/dnsname"
+	"tailscale.com/util/set"
 )
 
 //go:generate go run tailscale.com/cmd/cloner  -clonefunc=true -type=Config
@@ -123,6 +124,14 @@ func (v ConfigView) Hosts() views.MapSlice[dnsname.FQDN, netip.Addr] {
 	return views.MapSliceOf(v.ж.Hosts)
 }
 
+// SubdomainHosts is a set of FQDNs from Hosts that should also
+// resolve subdomain queries to the same IPs. For example, if
+// "node.tailnet.ts.net" is in SubdomainHosts, then queries for
+// "anything.node.tailnet.ts.net" will resolve to node's IPs.
+func (v ConfigView) SubdomainHosts() views.Map[dnsname.FQDN, struct{}] {
+	return views.MapOf(v.ж.SubdomainHosts)
+}
+
 // OnlyIPv6, if true, uses the IPv6 service IP (for MagicDNS)
 // instead of the IPv4 version (100.100.100.100).
 func (v ConfigView) OnlyIPv6() bool           { return v.ж.OnlyIPv6 }
@@ -134,5 +143,6 @@ var _ConfigViewNeedsRegeneration = Config(struct {
 	Routes           map[dnsname.FQDN][]*dnstype.Resolver
 	SearchDomains    []dnsname.FQDN
 	Hosts            map[dnsname.FQDN][]netip.Addr
+	SubdomainHosts   set.Set[dnsname.FQDN]
 	OnlyIPv6         bool
 }{})

+ 1 - 0
net/dns/manager.go

@@ -291,6 +291,7 @@ func (m *Manager) compileConfig(cfg Config) (rcfg resolver.Config, ocfg OSConfig
 	// authoritative suffixes, even if we don't propagate MagicDNS to
 	// the OS.
 	rcfg.Hosts = cfg.Hosts
+	rcfg.SubdomainHosts = cfg.SubdomainHosts
 	routes := map[dnsname.FQDN][]*dnstype.Resolver{} // assigned conditionally to rcfg.Routes below.
 	var propagateHostsToOS bool
 	for suffix, resolvers := range cfg.Routes {

+ 22 - 4
net/dns/resolver/tsdns.go

@@ -39,6 +39,7 @@ import (
 	"tailscale.com/util/clientmetric"
 	"tailscale.com/util/cloudenv"
 	"tailscale.com/util/dnsname"
+	"tailscale.com/util/set"
 )
 
 const dnsSymbolicFQDN = "magicdns.localhost-tailscale-daemon."
@@ -79,6 +80,12 @@ type Config struct {
 	// LocalDomains is a list of DNS name suffixes that should not be
 	// routed to upstream resolvers.
 	LocalDomains []dnsname.FQDN
+	// SubdomainHosts is a set of FQDNs from Hosts that should also
+	// resolve subdomain queries to the same IPs. If a query like
+	// "sub.node.tailnet.ts.net" doesn't match Hosts directly, and
+	// "node.tailnet.ts.net" is in SubdomainHosts, the query resolves
+	// to the IPs for "node.tailnet.ts.net".
+	SubdomainHosts set.Set[dnsname.FQDN]
 }
 
 // WriteToBufioWriter write a debug version of c for logs to w, omitting
@@ -214,10 +221,11 @@ type Resolver struct {
 	closed chan struct{}
 
 	// mu guards the following fields from being updated while used.
-	mu           syncs.Mutex
-	localDomains []dnsname.FQDN
-	hostToIP     map[dnsname.FQDN][]netip.Addr
-	ipToHost     map[netip.Addr]dnsname.FQDN
+	mu             syncs.Mutex
+	localDomains   []dnsname.FQDN
+	hostToIP       map[dnsname.FQDN][]netip.Addr
+	ipToHost       map[netip.Addr]dnsname.FQDN
+	subdomainHosts set.Set[dnsname.FQDN]
 }
 
 type ForwardLinkSelector interface {
@@ -278,6 +286,7 @@ func (r *Resolver) SetConfig(cfg Config) error {
 	r.localDomains = cfg.LocalDomains
 	r.hostToIP = cfg.Hosts
 	r.ipToHost = reverse
+	r.subdomainHosts = cfg.SubdomainHosts
 	return nil
 }
 
@@ -642,9 +651,18 @@ func (r *Resolver) resolveLocal(domain dnsname.FQDN, typ dns.Type) (netip.Addr,
 	r.mu.Lock()
 	hosts := r.hostToIP
 	localDomains := r.localDomains
+	subdomainHosts := r.subdomainHosts
 	r.mu.Unlock()
 
 	addrs, found := hosts[domain]
+	if !found {
+		for parent := domain.Parent(); parent != ""; parent = parent.Parent() {
+			if subdomainHosts.Contains(parent) {
+				addrs, found = hosts[parent]
+				break
+			}
+		}
+	}
 	if !found {
 		for _, suffix := range localDomains {
 			if suffix.Contains(domain) {

+ 51 - 0
net/dns/resolver/tsdns_test.go

@@ -32,6 +32,7 @@ import (
 	"tailscale.com/types/logger"
 	"tailscale.com/util/dnsname"
 	"tailscale.com/util/eventbus/eventbustest"
+	"tailscale.com/util/set"
 )
 
 var (
@@ -429,6 +430,56 @@ func TestResolveLocal(t *testing.T) {
 	}
 }
 
+func TestResolveLocalSubdomain(t *testing.T) {
+	r := newResolver(t)
+	defer r.Close()
+
+	// Configure with SubdomainHosts set for test1.ipn.dev
+	cfg := Config{
+		Hosts: map[dnsname.FQDN][]netip.Addr{
+			"test1.ipn.dev.": {testipv4},
+			"test2.ipn.dev.": {testipv6},
+		},
+		LocalDomains:   []dnsname.FQDN{"ipn.dev."},
+		SubdomainHosts: set.Of[dnsname.FQDN]("test1.ipn.dev."),
+	}
+	r.SetConfig(cfg)
+
+	tests := []struct {
+		name  string
+		qname dnsname.FQDN
+		qtype dns.Type
+		ip    netip.Addr
+		code  dns.RCode
+	}{
+		// Exact matches still work
+		{"exact-ipv4", "test1.ipn.dev.", dns.TypeA, testipv4, dns.RCodeSuccess},
+		{"exact-ipv6", "test2.ipn.dev.", dns.TypeAAAA, testipv6, dns.RCodeSuccess},
+
+		// Subdomain of test1 resolves (test1 has SubdomainHosts set)
+		{"subdomain-ipv4", "foo.test1.ipn.dev.", dns.TypeA, testipv4, dns.RCodeSuccess},
+		{"subdomain-deep", "bar.foo.test1.ipn.dev.", dns.TypeA, testipv4, dns.RCodeSuccess}, // Multi-level subdomain
+
+		// Subdomain of test2 does NOT resolve (test2 lacks SubdomainHosts)
+		{"subdomain-no-cap", "foo.test2.ipn.dev.", dns.TypeAAAA, netip.Addr{}, dns.RCodeNameError},
+
+		// Non-existent parent still returns NXDOMAIN
+		{"subdomain-no-parent", "foo.test3.ipn.dev.", dns.TypeA, netip.Addr{}, dns.RCodeNameError},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ip, code := r.resolveLocal(tt.qname, tt.qtype)
+			if code != tt.code {
+				t.Errorf("code = %v; want %v", code, tt.code)
+			}
+			if ip != tt.ip {
+				t.Errorf("ip = %v; want %v", ip, tt.ip)
+			}
+		})
+	}
+}
+
 func TestResolveLocalReverse(t *testing.T) {
 	r := newResolver(t)
 	defer r.Close()

+ 7 - 0
tailcfg/tailcfg.go

@@ -2707,6 +2707,13 @@ const (
 	// server to answer AAAA queries about its peers. See tailscale/tailscale#1152.
 	NodeAttrMagicDNSPeerAAAA NodeCapability = "magicdns-aaaa"
 
+	// NodeAttrDNSSubdomainResolve, when set on Self or a Peer node, indicates
+	// that the subdomains of that node's MagicDNS name should resolve to the
+	// same IP addresses as the node itself.
+	// For example, if node "myserver.tailnet.ts.net" has this capability,
+	// then "anything.myserver.tailnet.ts.net" will resolve to myserver's IPs.
+	NodeAttrDNSSubdomainResolve NodeCapability = "dns-subdomain-resolve"
+
 	// NodeAttrTrafficSteering configures the node to use the traffic
 	// steering subsystem for via routes. See tailscale/corp#29966.
 	NodeAttrTrafficSteering NodeCapability = "traffic-steering"

+ 12 - 0
util/dnsname/dnsname.go

@@ -94,6 +94,18 @@ func (f FQDN) Contains(other FQDN) bool {
 	return strings.HasSuffix(other.WithTrailingDot(), cmp)
 }
 
+// Parent returns the parent domain by stripping the first label.
+// For "foo.bar.baz.", it returns "bar.baz."
+// It returns an empty FQDN for root or single-label domains.
+func (f FQDN) Parent() FQDN {
+	s := f.WithTrailingDot()
+	_, rest, ok := strings.Cut(s, ".")
+	if !ok || rest == "" {
+		return ""
+	}
+	return FQDN(rest)
+}
+
 // ValidLabel reports whether label is a valid DNS label. All errors are
 // [vizerror.Error].
 func ValidLabel(label string) error {

+ 28 - 0
util/dnsname/dnsname_test.go

@@ -123,6 +123,34 @@ func TestFQDNContains(t *testing.T) {
 	}
 }
 
+func TestFQDNParent(t *testing.T) {
+	tests := []struct {
+		in   string
+		want FQDN
+	}{
+		{"", ""},
+		{".", ""},
+		{"com.", ""},
+		{"foo.com.", "com."},
+		{"www.foo.com.", "foo.com."},
+		{"a.b.c.d.", "b.c.d."},
+		{"sub.node.tailnet.ts.net.", "node.tailnet.ts.net."},
+	}
+
+	for _, test := range tests {
+		t.Run(test.in, func(t *testing.T) {
+			in, err := ToFQDN(test.in)
+			if err != nil {
+				t.Fatalf("ToFQDN(%q): %v", test.in, err)
+			}
+			got := in.Parent()
+			if got != test.want {
+				t.Errorf("ToFQDN(%q).Parent() = %q, want %q", test.in, got, test.want)
+			}
+		})
+	}
+}
+
 func TestSanitizeLabel(t *testing.T) {
 	tests := []struct {
 		name string