ソースを参照

ipn/ipnlocal, net/dns*, util/cloudenv: add AWS DNS support

And remove the GCP special-casing from ipn/ipnlocal; do it only in the
forwarder for *.internal.

Fixes #4980
Fixes #4981

Change-Id: I5c481e96d91f3d51d274a80fbd37c38f16dfa5cb
Signed-off-by: Brad Fitzpatrick <[email protected]>
Brad Fitzpatrick 3 年 前
コミット
aa37aece9c

+ 1 - 1
cmd/tailscaled/depaware.txt

@@ -260,7 +260,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
         tailscale.com/types/structs                                  from tailscale.com/control/controlclient+
         tailscale.com/types/views                                    from tailscale.com/ipn/ipnlocal+
         tailscale.com/util/clientmetric                              from tailscale.com/control/controlclient+
-        tailscale.com/util/cloudenv                                  from tailscale.com/ipn/ipnlocal+
+        tailscale.com/util/cloudenv                                  from tailscale.com/net/dns/resolver+
   LW    tailscale.com/util/cmpver                                    from tailscale.com/net/dns+
      💣 tailscale.com/util/deephash                                  from tailscale.com/ipn/ipnlocal+
         tailscale.com/util/dnsname                                   from tailscale.com/hostinfo+

+ 1 - 37
ipn/ipnlocal/dnsconfig_test.go

@@ -306,42 +306,6 @@ func TestDNSConfigForNetmap(t *testing.T) {
 				Routes: map[dnsname.FQDN][]*dnstype.Resolver{},
 			},
 		},
-		{
-			name: "google_cloud",
-			nm: &netmap.NetworkMap{
-				DNS: tailcfg.DNSConfig{},
-			},
-			cloud: cloudenv.GCP,
-			prefs: &ipn.Prefs{
-				CorpDNS: true,
-			},
-			want: &dns.Config{
-				Hosts: map[dnsname.FQDN][]netaddr.IP{},
-				Routes: map[dnsname.FQDN][]*dnstype.Resolver{
-					"internal.": []*dnstype.Resolver{{Addr: cloudenv.GoogleMetadataAndDNSIP}},
-				},
-			},
-		},
-		{
-			name: "google_cloud_with_exiting_internal",
-			nm: &netmap.NetworkMap{
-				DNS: tailcfg.DNSConfig{
-					Routes: map[string][]*dnstype.Resolver{
-						".internal": []*dnstype.Resolver{{Addr: "1.2.3.4"}},
-					},
-				},
-			},
-			cloud: cloudenv.GCP,
-			prefs: &ipn.Prefs{
-				CorpDNS: true,
-			},
-			want: &dns.Config{
-				Hosts: map[dnsname.FQDN][]netaddr.IP{},
-				Routes: map[dnsname.FQDN][]*dnstype.Resolver{
-					"internal.": []*dnstype.Resolver{{Addr: "1.2.3.4"}},
-				},
-			},
-		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
@@ -350,7 +314,7 @@ func TestDNSConfigForNetmap(t *testing.T) {
 				verOS = "linux"
 			}
 			var log tstest.MemLogger
-			got := dnsConfigForNetmap(tt.nm, tt.prefs, log.Logf, verOS, tt.cloud)
+			got := dnsConfigForNetmap(tt.nm, tt.prefs, log.Logf, verOS)
 			if !reflect.DeepEqual(got, tt.want) {
 				gotj, _ := json.MarshalIndent(got, "", "\t")
 				wantj, _ := json.MarshalIndent(tt.want, "", "\t")

+ 2 - 15
ipn/ipnlocal/local.go

@@ -48,7 +48,6 @@ import (
 	"tailscale.com/types/persist"
 	"tailscale.com/types/preftype"
 	"tailscale.com/types/views"
-	"tailscale.com/util/cloudenv"
 	"tailscale.com/util/deephash"
 	"tailscale.com/util/dnsname"
 	"tailscale.com/util/multierr"
@@ -2222,7 +2221,7 @@ func (b *LocalBackend) authReconfig() {
 	}
 
 	rcfg := b.routerConfig(cfg, prefs, oneCGNATRoute)
-	dcfg := dnsConfigForNetmap(nm, prefs, b.logf, version.OS(), cloudenv.Get())
+	dcfg := dnsConfigForNetmap(nm, prefs, b.logf, version.OS())
 
 	err = b.e.Reconfig(cfg, rcfg, dcfg, nm.Debug)
 	if err == wgengine.ErrNoChanges {
@@ -2238,7 +2237,7 @@ func (b *LocalBackend) authReconfig() {
 //
 // The versionOS is a Tailscale-style version ("iOS", "macOS") and not
 // a runtime.GOOS.
-func dnsConfigForNetmap(nm *netmap.NetworkMap, prefs *ipn.Prefs, logf logger.Logf, versionOS string, cloud cloudenv.Cloud) *dns.Config {
+func dnsConfigForNetmap(nm *netmap.NetworkMap, prefs *ipn.Prefs, logf logger.Logf, versionOS string) *dns.Config {
 	dcfg := &dns.Config{
 		Routes: map[dnsname.FQDN][]*dnstype.Resolver{},
 		Hosts:  map[dnsname.FQDN][]netaddr.IP{},
@@ -2327,18 +2326,6 @@ func dnsConfigForNetmap(nm *netmap.NetworkMap, prefs *ipn.Prefs, logf logger.Log
 		}
 	}
 
-	// If we're running on Google Cloud Platform, add a DNS route for its
-	// *.internal DNS names to its metadata DNS IP, unless the tailnet already
-	// defines one. This is especially important on their standard VM images
-	// that don't included systemd-resolved, so we were effectively breaking
-	// their *.internal DNS names previously when the tailnet had explicit DNS
-	// servers set ("override local DNS" checked).
-	if cloud == cloudenv.GCP {
-		if _, ok := dcfg.Routes["internal."]; !ok {
-			dcfg.Routes["internal."] = []*dnstype.Resolver{{Addr: cloudenv.GoogleMetadataAndDNSIP}}
-		}
-	}
-
 	addDefault := func(resolvers []*dnstype.Resolver) {
 		for _, r := range resolvers {
 			dcfg.DefaultResolvers = append(dcfg.DefaultResolvers, r)

+ 52 - 27
net/dns/resolver/forwarder.go

@@ -198,6 +198,16 @@ type forwarder struct {
 	// routes are per-suffix resolvers to use, with
 	// the most specific routes first.
 	routes []route
+	// cloudHostFallback are last resort resolvers to use if no per-suffix
+	// resolver matches. These are only populated on cloud hosts where the
+	// platform provides a well-known recursive resolver.
+	//
+	// That is, if we're running on GCP or AWS where there's always a well-known
+	// IP of a recursive resolver, return that rather than having callers return
+	// errNoUpstreams. This fixes both normal 100.100.100.100 resolution when
+	// /etc/resolv.conf is missing/corrupt, and the peerapi ExitDNS stub
+	// resolver lookup.
+	cloudHostFallback []resolverAndDelay
 }
 
 func init() {
@@ -297,18 +307,52 @@ func resolversWithDelays(resolvers []*dnstype.Resolver) []resolverAndDelay {
 	return rr
 }
 
+var (
+	cloudResolversOnce sync.Once
+	cloudResolversLazy []resolverAndDelay
+)
+
+func cloudResolvers() []resolverAndDelay {
+	cloudResolversOnce.Do(func() {
+		if ip := cloudenv.Get().ResolverIP(); ip != "" {
+			cloudResolver := []*dnstype.Resolver{{Addr: ip}}
+			cloudResolversLazy = resolversWithDelays(cloudResolver)
+		}
+	})
+	return cloudResolversLazy
+}
+
 // setRoutes sets the routes to use for DNS forwarding. It's called by
 // Resolver.SetConfig on reconfig.
 //
 // The memory referenced by routesBySuffix should not be modified.
 func (f *forwarder) setRoutes(routesBySuffix map[dnsname.FQDN][]*dnstype.Resolver) {
 	routes := make([]route, 0, len(routesBySuffix))
+
+	cloudHostFallback := cloudResolvers()
 	for suffix, rs := range routesBySuffix {
-		routes = append(routes, route{
-			Suffix:    suffix,
-			Resolvers: resolversWithDelays(rs),
-		})
+		if suffix == "." && len(rs) == 0 && len(cloudHostFallback) > 0 {
+			routes = append(routes, route{
+				Suffix:    suffix,
+				Resolvers: cloudHostFallback,
+			})
+		} else {
+			routes = append(routes, route{
+				Suffix:    suffix,
+				Resolvers: resolversWithDelays(rs),
+			})
+		}
+	}
+
+	if cloudenv.Get().HasInternalTLD() && len(cloudHostFallback) > 0 {
+		if _, ok := routesBySuffix["internal."]; !ok {
+			routes = append(routes, route{
+				Suffix:    "internal.",
+				Resolvers: cloudHostFallback,
+			})
+		}
 	}
+
 	// Sort from longest prefix to shortest.
 	sort.Slice(routes, func(i, j int) bool {
 		return routes[i].Suffix.NumLabels() > routes[j].Suffix.NumLabels()
@@ -317,6 +361,7 @@ func (f *forwarder) setRoutes(routesBySuffix map[dnsname.FQDN][]*dnstype.Resolve
 	f.mu.Lock()
 	defer f.mu.Unlock()
 	f.routes = routes
+	f.cloudHostFallback = cloudHostFallback
 }
 
 var stdNetPacketListener packetListener = new(net.ListenConfig)
@@ -561,38 +606,18 @@ func (f *forwarder) sendUDP(ctx context.Context, fq *forwardQuery, rr resolverAn
 	return out, nil
 }
 
-// gcpResolverFallback is the fallback resolver for Google Cloud.
-var gcpResolverFallback = []resolverAndDelay{{name: &dnstype.Resolver{Addr: cloudenv.GoogleMetadataAndDNSIP}}}
-
 // resolvers returns the resolvers to use for domain.
 func (f *forwarder) resolvers(domain dnsname.FQDN) []resolverAndDelay {
 	f.mu.Lock()
 	routes := f.routes
+	cloudHostFallback := f.cloudHostFallback
 	f.mu.Unlock()
-	var ret []resolverAndDelay
-	var matchedSuffix dnsname.FQDN
 	for _, route := range routes {
 		if route.Suffix == "." || route.Suffix.Contains(domain) {
-			ret = route.Resolvers
-			matchedSuffix = route.Suffix
-			break
+			return route.Resolvers
 		}
 	}
-
-	if len(ret) == 0 && cloudenv.Get() == cloudenv.GCP && (matchedSuffix == "" || matchedSuffix == ".") {
-		// If we're running on GCP where there's always a well-known IP of a
-		// recursive resolver, return that rather than having callers return
-		// errNoUpstreams. This fixes both normal 100.100.100.100 resolution
-		// when /etc/resolv.conf is missing/corrupt, and the peerapi ExitDNS
-		// stub resolver lookup.
-		//
-		// But we only do this if no route matched (matchedSuffix == "") or
-		// if we had no resolvers for the top-level route (matchedSuffix == ".").
-		// If they had an explicit empty route that we matched, don't do the auto
-		// fallback in that case.
-		ret = gcpResolverFallback
-	}
-	return ret
+	return cloudHostFallback // or nil if no fallback
 }
 
 // forwardQuery is information and state about a forwarded DNS query that's

+ 4 - 0
net/dns/resolver/tsdns.go

@@ -31,6 +31,7 @@ import (
 	"tailscale.com/types/dnstype"
 	"tailscale.com/types/logger"
 	"tailscale.com/util/clientmetric"
+	"tailscale.com/util/cloudenv"
 	"tailscale.com/util/dnsname"
 	"tailscale.com/wgengine/monitor"
 )
@@ -97,6 +98,9 @@ func (c *Config) WriteToBufioWriter(w *bufio.Writer) {
 	if arpa > 0 {
 		fmt.Fprintf(w, "+%darpa", arpa)
 	}
+	if c := cloudenv.Get(); c != "" {
+		fmt.Fprintf(w, ", cloud=%q", string(c))
+	}
 	w.WriteString("}")
 }
 

+ 3 - 2
net/dnscache/dnscache.go

@@ -119,14 +119,15 @@ func (r *Resolver) cloudHostResolver() (v *net.Resolver, ok bool) {
 		// which supports net.Resolver.PreferGo on Windows.
 		return nil, false
 	}
-	if cloudenv.Get() != cloudenv.GCP {
+	ip := cloudenv.Get().ResolverIP()
+	if ip == "" {
 		return nil, false
 	}
 	return &net.Resolver{
 		PreferGo: true,
 		Dial: func(ctx context.Context, network, address string) (net.Conn, error) {
 			var d net.Dialer
-			return d.DialContext(ctx, network, net.JoinHostPort(cloudenv.GoogleMetadataAndDNSIP, "53"))
+			return d.DialContext(ctx, network, net.JoinHostPort(ip, "53"))
 		},
 	}, true
 }

+ 40 - 0
util/cloudenv/cloudenv.go

@@ -6,6 +6,9 @@
 package cloudenv
 
 import (
+	"os"
+	"runtime"
+	"strings"
 	"sync/atomic"
 
 	gcpmetadata "cloud.google.com/go/compute/metadata"
@@ -15,14 +18,41 @@ import (
 // It's also the *.internal DNS server, and proxies to 8.8.8.8.
 const GoogleMetadataAndDNSIP = "169.254.169.254"
 
+// AWSResolverIP is the IP address of the AWS DNS server.
+// See https://docs.aws.amazon.com/vpc/latest/userguide/vpc-dns.html
+const AWSResolverIP = "169.254.169.253"
+
 // Cloud is a recognize cloud environment with properties that
 // Tailscale can specialize for in places.
 type Cloud string
 
 const (
 	GCP = Cloud("gcp") // Google Cloud
+	AWS = Cloud("aws") // Amazon Web Services (EC2 in particular)
 )
 
+// ResolverIP returns the cloud host's recursive DNS server or the
+// empty string if not available.
+func (c Cloud) ResolverIP() string {
+	switch c {
+	case GCP:
+		return GoogleMetadataAndDNSIP
+	case AWS:
+		return AWSResolverIP
+	}
+	return ""
+}
+
+// HasInternalTLD reports whether c is a cloud environment
+// whose ResolverIP serves *.internal records.
+func (c Cloud) HasInternalTLD() bool {
+	switch c {
+	case GCP, AWS:
+		return true
+	}
+	return false
+}
+
 var cloudAtomic atomic.Value // of Cloud
 
 // Get returns the current cloud, or the empty string if unknown.
@@ -37,6 +67,16 @@ func Get() Cloud {
 }
 
 func getCloud() Cloud {
+	// TODO(bradfitz): also detect AWS on Windows, etc. Just try to hit the metadata server
+	// and see if it's there? But it might be turned off. Do some small-timeout DNS request
+	// to 169.254.169.253 and see if it replies? But which DNS request?
+	if runtime.GOOS == "linux" {
+		biosVendorB, _ := os.ReadFile("/sys/class/dmi/id/bios_vendor")
+		biosVendor := strings.TrimSpace(string(biosVendorB))
+		if biosVendor == "Amazon EC2" || strings.HasSuffix(biosVendor, ".amazon") {
+			return AWS
+		}
+	}
 	if gcpmetadata.OnGCE() {
 		return GCP
 	}