Browse Source

net/dns: retrample resolve.conf when another process has trampled it (#18069)

When using the resolve.conf file for setting DNS, it is possible that
some other services will trample the file and overwrite our set DNS
server. Experiments has shown this to be a racy error depending on how
quickly processes start.

Make an attempt to trample back the file a limited number of times if
the file is changed.

Updates #16635

Signed-off-by: Claus Lensbøl <[email protected]>
Claus Lensbøl 2 months ago
parent
commit
1dfdee8521

+ 2 - 2
cmd/tailscaled/tailscaled.go

@@ -772,7 +772,7 @@ func tryEngine(logf logger.Logf, sys *tsd.System, name string) (onlyNetstack boo
 			// configuration being unavailable (from the noop
 			// manager). More in Issue 4017.
 			// TODO(bradfitz): add a Synology-specific DNS manager.
-			conf.DNS, err = dns.NewOSConfigurator(logf, sys.HealthTracker.Get(), sys.PolicyClientOrDefault(), sys.ControlKnobs(), "") // empty interface name
+			conf.DNS, err = dns.NewOSConfigurator(logf, sys.HealthTracker.Get(), sys.Bus.Get(), sys.PolicyClientOrDefault(), sys.ControlKnobs(), "") // empty interface name
 			if err != nil {
 				return false, fmt.Errorf("dns.NewOSConfigurator: %w", err)
 			}
@@ -806,7 +806,7 @@ func tryEngine(logf logger.Logf, sys *tsd.System, name string) (onlyNetstack boo
 			return false, fmt.Errorf("creating router: %w", err)
 		}
 
-		d, err := dns.NewOSConfigurator(logf, sys.HealthTracker.Get(), sys.PolicyClientOrDefault(), sys.ControlKnobs(), devName)
+		d, err := dns.NewOSConfigurator(logf, sys.HealthTracker.Get(), sys.Bus.Get(), sys.PolicyClientOrDefault(), sys.ControlKnobs(), devName)
 		if err != nil {
 			dev.Close()
 			r.Close()

+ 35 - 3
net/dns/direct.go

@@ -21,6 +21,7 @@ import (
 	"slices"
 	"strings"
 	"sync"
+	"sync/atomic"
 	"time"
 
 	"tailscale.com/feature"
@@ -29,6 +30,7 @@ import (
 	"tailscale.com/net/tsaddr"
 	"tailscale.com/types/logger"
 	"tailscale.com/util/dnsname"
+	"tailscale.com/util/eventbus"
 	"tailscale.com/version/distro"
 )
 
@@ -135,6 +137,11 @@ type directManager struct {
 	// but is better than having non-functioning DNS.
 	renameBroken bool
 
+	trampleCount  atomic.Int64
+	trampleTimer  *time.Timer
+	eventClient   *eventbus.Client
+	trampleDNSPub *eventbus.Publisher[TrampleDNS]
+
 	ctx      context.Context    // valid until Close
 	ctxClose context.CancelFunc // closes ctx
 
@@ -145,11 +152,13 @@ type directManager struct {
 }
 
 //lint:ignore U1000 used in manager_{freebsd,openbsd}.go
-func newDirectManager(logf logger.Logf, health *health.Tracker) *directManager {
-	return newDirectManagerOnFS(logf, health, directFS{})
+func newDirectManager(logf logger.Logf, health *health.Tracker, bus *eventbus.Bus) *directManager {
+	return newDirectManagerOnFS(logf, health, bus, directFS{})
 }
 
-func newDirectManagerOnFS(logf logger.Logf, health *health.Tracker, fs wholeFileFS) *directManager {
+var trampleWatchDuration = 5 * time.Second
+
+func newDirectManagerOnFS(logf logger.Logf, health *health.Tracker, bus *eventbus.Bus, fs wholeFileFS) *directManager {
 	ctx, cancel := context.WithCancel(context.Background())
 	m := &directManager{
 		logf:     logf,
@@ -158,6 +167,13 @@ func newDirectManagerOnFS(logf logger.Logf, health *health.Tracker, fs wholeFile
 		ctx:      ctx,
 		ctxClose: cancel,
 	}
+	if bus != nil {
+		m.eventClient = bus.Client("dns.directManager")
+		m.trampleDNSPub = eventbus.Publish[TrampleDNS](m.eventClient)
+	}
+	m.trampleTimer = time.AfterFunc(trampleWatchDuration, func() {
+		m.trampleCount.Store(0)
+	})
 	go m.runFileWatcher()
 	return m
 }
@@ -481,10 +497,26 @@ func (m *directManager) checkForFileTrample() {
 	}
 	m.logf("trample: resolv.conf changed from what we expected. did some other program interfere? current contents: %q", show)
 	m.health.SetUnhealthy(resolvTrampleWarnable, nil)
+	if m.trampleDNSPub != nil {
+		n := m.trampleCount.Add(1)
+
+		if n < 10 {
+			m.trampleDNSPub.Publish(TrampleDNS{
+				LastTrample:       time.Now(),
+				TramplesInTimeout: n,
+			})
+			m.trampleTimer.Reset(trampleWatchDuration)
+		} else {
+			m.logf("trample: resolv.conf overwritten %d times, no longer attempting to replace it.", n)
+		}
+	}
 }
 
 func (m *directManager) Close() error {
 	m.ctxClose()
+	if m.eventClient != nil {
+		m.eventClient.Close()
+	}
 
 	// We used to keep a file for the tailscale config and symlinked
 	// to it, but then we stopped because /etc/resolv.conf being a

+ 109 - 0
net/dns/direct_linux_test.go

@@ -0,0 +1,109 @@
+// Copyright (c) Tailscale Inc & AUTHORS
+// SPDX-License-Identifier: BSD-3-Clause
+
+//go:build linux
+
+package dns
+
+import (
+	"context"
+	"fmt"
+	"net/netip"
+	"os"
+	"path/filepath"
+	"testing"
+	"testing/synctest"
+
+	"github.com/illarion/gonotify/v3"
+
+	"tailscale.com/util/dnsname"
+	"tailscale.com/util/eventbus/eventbustest"
+)
+
+func TestDNSTrampleRecovery(t *testing.T) {
+	HookWatchFile.Set(watchFile)
+	synctest.Test(t, func(t *testing.T) {
+		tmp := t.TempDir()
+		if err := os.MkdirAll(filepath.Join(tmp, "etc"), 0700); err != nil {
+			t.Fatal(err)
+		}
+		const resolvPath = "/etc/resolv.conf"
+		fs := directFS{prefix: tmp}
+		readFile := func(t *testing.T, path string) string {
+			t.Helper()
+			b, err := fs.ReadFile(path)
+			if err != nil {
+				t.Errorf("Reading DNS config: %v", err)
+			}
+			return string(b)
+		}
+
+		bus := eventbustest.NewBus(t)
+		eventbustest.LogAllEvents(t, bus)
+		m := newDirectManagerOnFS(t.Logf, nil, bus, fs)
+		defer m.Close()
+
+		if err := m.SetDNS(OSConfig{
+			Nameservers:   []netip.Addr{netip.MustParseAddr("8.8.8.8"), netip.MustParseAddr("8.8.4.4")},
+			SearchDomains: []dnsname.FQDN{"ts.net.", "ts-dns.test."},
+			MatchDomains:  []dnsname.FQDN{"ignored."},
+		}); err != nil {
+			t.Fatal(err)
+		}
+
+		const want = `# resolv.conf(5) file generated by tailscale
+# For more info, see https://tailscale.com/s/resolvconf-overwrite
+# DO NOT EDIT THIS FILE BY HAND -- CHANGES WILL BE OVERWRITTEN
+
+nameserver 8.8.8.8
+nameserver 8.8.4.4
+search ts.net ts-dns.test
+`
+		if got := readFile(t, resolvPath); got != want {
+			t.Fatalf("resolv.conf:\n%s, want:\n%s", got, want)
+		}
+
+		tw := eventbustest.NewWatcher(t, bus)
+
+		const trample = "Hvem er det som tramper på min bro?"
+		if err := fs.WriteFile(resolvPath, []byte(trample), 0644); err != nil {
+			t.Fatal(err)
+		}
+		synctest.Wait()
+
+		if err := eventbustest.Expect(tw, eventbustest.Type[TrampleDNS]()); err != nil {
+			t.Errorf("did not see trample event: %s", err)
+		}
+	})
+}
+
+// watchFile is generally copied from linuxtrample, but cancels the context
+// after the first call to cb() after the first trample to end the test.
+func watchFile(ctx context.Context, dir, filename string, cb func()) error {
+	ctx, cancel := context.WithCancel(ctx)
+	defer cancel()
+
+	const events = gonotify.IN_ATTRIB |
+		gonotify.IN_CLOSE_WRITE |
+		gonotify.IN_CREATE |
+		gonotify.IN_DELETE |
+		gonotify.IN_MODIFY |
+		gonotify.IN_MOVE
+
+	watcher, err := gonotify.NewDirWatcher(ctx, events, dir)
+	if err != nil {
+		return fmt.Errorf("NewDirWatcher: %w", err)
+	}
+
+	for {
+		select {
+		case event := <-watcher.C:
+			if event.Name == filename {
+				cb()
+				cancel()
+			}
+		case <-ctx.Done():
+			return ctx.Err()
+		}
+	}
+}

+ 39 - 10
net/dns/manager.go

@@ -55,6 +55,8 @@ type Manager struct {
 	logf   logger.Logf
 	health *health.Tracker
 
+	eventClient *eventbus.Client
+
 	activeQueriesAtomic int32
 
 	ctx       context.Context    // good until Down
@@ -69,10 +71,10 @@ type Manager struct {
 	config *Config    // Tracks the last viable DNS configuration set by Set.  nil on failures other than compilation failures or if set has never been called.
 }
 
-// NewManagers created a new manager from the given config.
+// NewManager created a new manager from the given config.
 //
 // knobs may be nil.
-func NewManager(logf logger.Logf, oscfg OSConfigurator, health *health.Tracker, dialer *tsdial.Dialer, linkSel resolver.ForwardLinkSelector, knobs *controlknobs.Knobs, goos string) *Manager {
+func NewManager(logf logger.Logf, oscfg OSConfigurator, health *health.Tracker, dialer *tsdial.Dialer, linkSel resolver.ForwardLinkSelector, knobs *controlknobs.Knobs, goos string, bus *eventbus.Bus) *Manager {
 	if !buildfeatures.HasDNS {
 		return nil
 	}
@@ -96,6 +98,20 @@ func NewManager(logf logger.Logf, oscfg OSConfigurator, health *health.Tracker,
 		goos:     goos,
 	}
 
+	m.eventClient = bus.Client("dns.Manager")
+	eventbus.SubscribeFunc(m.eventClient, func(trample TrampleDNS) {
+		m.mu.Lock()
+		defer m.mu.Unlock()
+		if m.config == nil {
+			m.logf("resolve.conf was trampled, but there is no DNS config")
+			return
+		}
+		m.logf("resolve.conf was trampled, setting existing config again")
+		if err := m.setLocked(*m.config); err != nil {
+			m.logf("error setting DNS config: %s", err)
+		}
+	})
+
 	m.ctx, m.ctxCancel = context.WithCancel(context.Background())
 	m.logf("using %T", m.os)
 	return m
@@ -178,9 +194,7 @@ func (m *Manager) setLocked(cfg Config) error {
 		m.config = nil
 		return err
 	}
-	if err := m.os.SetDNS(ocfg); err != nil {
-		m.config = nil
-		m.health.SetUnhealthy(osConfigurationSetWarnable, health.Args{health.ArgError: err.Error()})
+	if err := m.setDNSLocked(ocfg); err != nil {
 		return err
 	}
 
@@ -190,6 +204,15 @@ func (m *Manager) setLocked(cfg Config) error {
 	return nil
 }
 
+func (m *Manager) setDNSLocked(ocfg OSConfig) error {
+	if err := m.os.SetDNS(ocfg); err != nil {
+		m.config = nil
+		m.health.SetUnhealthy(osConfigurationSetWarnable, health.Args{health.ArgError: err.Error()})
+		return err
+	}
+	return nil
+}
+
 // compileHostEntries creates a list of single-label resolutions possible
 // from the configured hosts and search domains.
 // The entries are compiled in the order of the search domains, then the hosts.
@@ -457,6 +480,13 @@ const (
 	maxReqSizeTCP = 4096
 )
 
+// TrampleDNS is an an event indicating we detected that DNS config was
+// overwritten by another process.
+type TrampleDNS struct {
+	LastTrample       time.Time
+	TramplesInTimeout int64
+}
+
 // dnsTCPSession services DNS requests sent over TCP.
 type dnsTCPSession struct {
 	m *Manager
@@ -585,6 +615,7 @@ func (m *Manager) Down() error {
 	if err := m.os.Close(); err != nil {
 		return err
 	}
+	m.eventClient.Close()
 	m.resolver.Close()
 	return nil
 }
@@ -605,7 +636,7 @@ func CleanUp(logf logger.Logf, netMon *netmon.Monitor, bus *eventbus.Bus, health
 	if !buildfeatures.HasDNS {
 		return
 	}
-	oscfg, err := NewOSConfigurator(logf, health, policyclient.Get(), nil, interfaceName)
+	oscfg, err := NewOSConfigurator(logf, health, bus, policyclient.Get(), nil, interfaceName)
 	if err != nil {
 		logf("creating dns cleanup: %v", err)
 		return
@@ -613,12 +644,10 @@ func CleanUp(logf logger.Logf, netMon *netmon.Monitor, bus *eventbus.Bus, health
 	d := &tsdial.Dialer{Logf: logf}
 	d.SetNetMon(netMon)
 	d.SetBus(bus)
-	dns := NewManager(logf, oscfg, health, d, nil, nil, runtime.GOOS)
+	dns := NewManager(logf, oscfg, health, d, nil, nil, runtime.GOOS, bus)
 	if err := dns.Down(); err != nil {
 		logf("dns down: %v", err)
 	}
 }
 
-var (
-	metricDNSQueryErrorQueue = clientmetric.NewCounter("dns_query_local_error_queue")
-)
+var metricDNSQueryErrorQueue = clientmetric.NewCounter("dns_query_local_error_queue")

+ 3 - 2
net/dns/manager_darwin.go

@@ -13,14 +13,15 @@ import (
 	"tailscale.com/net/dns/resolvconffile"
 	"tailscale.com/net/tsaddr"
 	"tailscale.com/types/logger"
+	"tailscale.com/util/eventbus"
 	"tailscale.com/util/mak"
 	"tailscale.com/util/syspolicy/policyclient"
 )
 
 // NewOSConfigurator creates a new OS configurator.
 //
-// The health tracker and the knobs may be nil and are ignored on this platform.
-func NewOSConfigurator(logf logger.Logf, _ *health.Tracker, _ policyclient.Client, _ *controlknobs.Knobs, ifName string) (OSConfigurator, error) {
+// The health tracker, bus and the knobs may be nil and are ignored on this platform.
+func NewOSConfigurator(logf logger.Logf, _ *health.Tracker, _ *eventbus.Bus, _ policyclient.Client, _ *controlknobs.Knobs, ifName string) (OSConfigurator, error) {
 	return &darwinConfigurator{logf: logf, ifName: ifName}, nil
 }
 

+ 2 - 1
net/dns/manager_default.go

@@ -9,12 +9,13 @@ import (
 	"tailscale.com/control/controlknobs"
 	"tailscale.com/health"
 	"tailscale.com/types/logger"
+	"tailscale.com/util/eventbus"
 	"tailscale.com/util/syspolicy/policyclient"
 )
 
 // NewOSConfigurator creates a new OS configurator.
 //
 // The health tracker and the knobs may be nil and are ignored on this platform.
-func NewOSConfigurator(logger.Logf, *health.Tracker, policyclient.Client, *controlknobs.Knobs, string) (OSConfigurator, error) {
+func NewOSConfigurator(logger.Logf, *health.Tracker, *eventbus.Bus, policyclient.Client, *controlknobs.Knobs, string) (OSConfigurator, error) {
 	return NewNoopManager()
 }

+ 6 - 5
net/dns/manager_freebsd.go

@@ -10,16 +10,17 @@ import (
 	"tailscale.com/control/controlknobs"
 	"tailscale.com/health"
 	"tailscale.com/types/logger"
+	"tailscale.com/util/eventbus"
 	"tailscale.com/util/syspolicy/policyclient"
 )
 
 // NewOSConfigurator creates a new OS configurator.
 //
 // The health tracker may be nil; the knobs may be nil and are ignored on this platform.
-func NewOSConfigurator(logf logger.Logf, health *health.Tracker, _ policyclient.Client, _ *controlknobs.Knobs, _ string) (OSConfigurator, error) {
+func NewOSConfigurator(logf logger.Logf, health *health.Tracker, bus *eventbus.Bus, _ policyclient.Client, _ *controlknobs.Knobs, _ string) (OSConfigurator, error) {
 	bs, err := os.ReadFile("/etc/resolv.conf")
 	if os.IsNotExist(err) {
-		return newDirectManager(logf, health), nil
+		return newDirectManager(logf, health, bus), nil
 	}
 	if err != nil {
 		return nil, fmt.Errorf("reading /etc/resolv.conf: %w", err)
@@ -29,16 +30,16 @@ func NewOSConfigurator(logf logger.Logf, health *health.Tracker, _ policyclient.
 	case "resolvconf":
 		switch resolvconfStyle() {
 		case "":
-			return newDirectManager(logf, health), nil
+			return newDirectManager(logf, health, bus), nil
 		case "debian":
 			return newDebianResolvconfManager(logf)
 		case "openresolv":
 			return newOpenresolvManager(logf)
 		default:
 			logf("[unexpected] got unknown flavor of resolvconf %q, falling back to direct manager", resolvconfStyle())
-			return newDirectManager(logf, health), nil
+			return newDirectManager(logf, health, bus), nil
 		}
 	default:
-		return newDirectManager(logf, health), nil
+		return newDirectManager(logf, health, bus), nil
 	}
 }

+ 4 - 3
net/dns/manager_linux.go

@@ -21,6 +21,7 @@ import (
 	"tailscale.com/net/netaddr"
 	"tailscale.com/types/logger"
 	"tailscale.com/util/clientmetric"
+	"tailscale.com/util/eventbus"
 	"tailscale.com/util/syspolicy/policyclient"
 	"tailscale.com/version/distro"
 )
@@ -63,7 +64,7 @@ var (
 // NewOSConfigurator created a new OS configurator.
 //
 // The health tracker may be nil; the knobs may be nil and are ignored on this platform.
-func NewOSConfigurator(logf logger.Logf, health *health.Tracker, _ policyclient.Client, _ *controlknobs.Knobs, interfaceName string) (ret OSConfigurator, err error) {
+func NewOSConfigurator(logf logger.Logf, health *health.Tracker, bus *eventbus.Bus, _ policyclient.Client, _ *controlknobs.Knobs, interfaceName string) (ret OSConfigurator, err error) {
 	if !buildfeatures.HasDNS || distro.Get() == distro.JetKVM {
 		return NewNoopManager()
 	}
@@ -100,7 +101,7 @@ func NewOSConfigurator(logf logger.Logf, health *health.Tracker, _ policyclient.
 	logf("dns: using %q mode", mode)
 	switch mode {
 	case "direct":
-		return newDirectManagerOnFS(logf, health, env.fs), nil
+		return newDirectManagerOnFS(logf, health, bus, env.fs), nil
 	case "systemd-resolved":
 		if f, ok := optNewResolvedManager.GetOk(); ok {
 			return f(logf, health, interfaceName)
@@ -119,7 +120,7 @@ func NewOSConfigurator(logf logger.Logf, health *health.Tracker, _ policyclient.
 		logf("[unexpected] detected unknown DNS mode %q, using direct manager as last resort", mode)
 	}
 
-	return newDirectManagerOnFS(logf, health, env.fs), nil
+	return newDirectManagerOnFS(logf, health, bus, env.fs), nil
 }
 
 // newOSConfigEnv are the funcs newOSConfigurator needs, pulled out for testing.

+ 6 - 5
net/dns/manager_openbsd.go

@@ -11,6 +11,7 @@ import (
 	"tailscale.com/control/controlknobs"
 	"tailscale.com/health"
 	"tailscale.com/types/logger"
+	"tailscale.com/util/eventbus"
 	"tailscale.com/util/syspolicy/policyclient"
 )
 
@@ -25,8 +26,8 @@ func (kv kv) String() string {
 // NewOSConfigurator created a new OS configurator.
 //
 // The health tracker may be nil; the knobs may be nil and are ignored on this platform.
-func NewOSConfigurator(logf logger.Logf, health *health.Tracker, _ policyclient.Client, _ *controlknobs.Knobs, interfaceName string) (OSConfigurator, error) {
-	return newOSConfigurator(logf, health, interfaceName,
+func NewOSConfigurator(logf logger.Logf, health *health.Tracker, bus *eventbus.Bus, _ policyclient.Client, _ *controlknobs.Knobs, interfaceName string) (OSConfigurator, error) {
+	return newOSConfigurator(logf, health, bus, interfaceName,
 		newOSConfigEnv{
 			rcIsResolvd: rcIsResolvd,
 			fs:          directFS{},
@@ -39,7 +40,7 @@ type newOSConfigEnv struct {
 	rcIsResolvd func(resolvConfContents []byte) bool
 }
 
-func newOSConfigurator(logf logger.Logf, health *health.Tracker, interfaceName string, env newOSConfigEnv) (ret OSConfigurator, err error) {
+func newOSConfigurator(logf logger.Logf, health *health.Tracker, bus *eventbus.Bus, interfaceName string, env newOSConfigEnv) (ret OSConfigurator, err error) {
 	var debug []kv
 	dbg := func(k, v string) {
 		debug = append(debug, kv{k, v})
@@ -54,7 +55,7 @@ func newOSConfigurator(logf logger.Logf, health *health.Tracker, interfaceName s
 	bs, err := env.fs.ReadFile(resolvConf)
 	if os.IsNotExist(err) {
 		dbg("rc", "missing")
-		return newDirectManager(logf, health), nil
+		return newDirectManager(logf, health, bus), nil
 	}
 	if err != nil {
 		return nil, fmt.Errorf("reading /etc/resolv.conf: %w", err)
@@ -66,7 +67,7 @@ func newOSConfigurator(logf logger.Logf, health *health.Tracker, interfaceName s
 	}
 
 	dbg("resolvd", "missing")
-	return newDirectManager(logf, health), nil
+	return newDirectManager(logf, health, bus), nil
 }
 
 func rcIsResolvd(resolvConfContents []byte) bool {

+ 2 - 1
net/dns/manager_plan9.go

@@ -20,11 +20,12 @@ import (
 	"tailscale.com/control/controlknobs"
 	"tailscale.com/health"
 	"tailscale.com/types/logger"
+	"tailscale.com/util/eventbus"
 	"tailscale.com/util/set"
 	"tailscale.com/util/syspolicy/policyclient"
 )
 
-func NewOSConfigurator(logf logger.Logf, ht *health.Tracker, _ policyclient.Client, knobs *controlknobs.Knobs, interfaceName string) (OSConfigurator, error) {
+func NewOSConfigurator(logf logger.Logf, ht *health.Tracker, _ *eventbus.Bus, _ policyclient.Client, knobs *controlknobs.Knobs, interfaceName string) (OSConfigurator, error) {
 	return &plan9DNSManager{
 		logf:  logf,
 		ht:    ht,

+ 3 - 2
net/dns/manager_solaris.go

@@ -7,9 +7,10 @@ import (
 	"tailscale.com/control/controlknobs"
 	"tailscale.com/health"
 	"tailscale.com/types/logger"
+	"tailscale.com/util/eventbus"
 	"tailscale.com/util/syspolicy/policyclient"
 )
 
-func NewOSConfigurator(logf logger.Logf, health *health.Tracker, _ policyclient.Client, _ *controlknobs.Knobs, iface string) (OSConfigurator, error) {
-	return newDirectManager(logf, health), nil
+func NewOSConfigurator(logf logger.Logf, health *health.Tracker, bus *eventbus.Bus, _ policyclient.Client, _ *controlknobs.Knobs, iface string) (OSConfigurator, error) {
+	return newDirectManager(logf, health, bus), nil
 }

+ 2 - 2
net/dns/manager_tcp_test.go

@@ -93,7 +93,7 @@ func TestDNSOverTCP(t *testing.T) {
 	bus := eventbustest.NewBus(t)
 	dialer := tsdial.NewDialer(netmon.NewStatic())
 	dialer.SetBus(bus)
-	m := NewManager(t.Logf, &f, health.NewTracker(bus), dialer, nil, nil, "")
+	m := NewManager(t.Logf, &f, health.NewTracker(bus), dialer, nil, nil, "", bus)
 	m.resolver.TestOnlySetHook(f.SetResolver)
 	m.Set(Config{
 		Hosts: hosts(
@@ -181,7 +181,7 @@ func TestDNSOverTCP_TooLarge(t *testing.T) {
 	bus := eventbustest.NewBus(t)
 	dialer := tsdial.NewDialer(netmon.NewStatic())
 	dialer.SetBus(bus)
-	m := NewManager(log, &f, health.NewTracker(bus), dialer, nil, nil, "")
+	m := NewManager(log, &f, health.NewTracker(bus), dialer, nil, nil, "", bus)
 	m.resolver.TestOnlySetHook(f.SetResolver)
 	m.Set(Config{
 		Hosts:         hosts("andrew.ts.com.", "1.2.3.4"),

+ 40 - 2
net/dns/manager_test.go

@@ -6,9 +6,11 @@ package dns
 import (
 	"errors"
 	"net/netip"
+	"reflect"
 	"runtime"
 	"strings"
 	"testing"
+	"testing/synctest"
 
 	"github.com/google/go-cmp/cmp"
 	"github.com/google/go-cmp/cmp/cmpopts"
@@ -936,7 +938,7 @@ func TestManager(t *testing.T) {
 			bus := eventbustest.NewBus(t)
 			dialer := tsdial.NewDialer(netmon.NewStatic())
 			dialer.SetBus(bus)
-			m := NewManager(t.Logf, &f, health.NewTracker(bus), dialer, nil, knobs, goos)
+			m := NewManager(t.Logf, &f, health.NewTracker(bus), dialer, nil, knobs, goos, bus)
 			m.resolver.TestOnlySetHook(f.SetResolver)
 
 			if err := m.Set(test.in); err != nil {
@@ -1045,7 +1047,7 @@ func TestConfigRecompilation(t *testing.T) {
 	bus := eventbustest.NewBus(t)
 	dialer := tsdial.NewDialer(netmon.NewStatic())
 	dialer.SetBus(bus)
-	m := NewManager(t.Logf, f, health.NewTracker(bus), dialer, nil, nil, "darwin")
+	m := NewManager(t.Logf, f, health.NewTracker(bus), dialer, nil, nil, "darwin", bus)
 
 	var managerConfig *resolver.Config
 	m.resolver.TestOnlySetHook(func(cfg resolver.Config) {
@@ -1078,3 +1080,39 @@ func TestConfigRecompilation(t *testing.T) {
 		t.Fatalf("Want non nil managerConfig.  Got nil")
 	}
 }
+
+func TestTrampleRetrample(t *testing.T) {
+	synctest.Test(t, func(t *testing.T) {
+		f := &fakeOSConfigurator{}
+		f.BaseConfig = OSConfig{
+			Nameservers: mustIPs("1.1.1.1"),
+		}
+
+		config := Config{
+			Routes:        upstreams("ts.net", "69.4.2.0", "foo.ts.net", ""),
+			SearchDomains: fqdns("foo.ts.net"),
+		}
+
+		bus := eventbustest.NewBus(t)
+		dialer := tsdial.NewDialer(netmon.NewStatic())
+		dialer.SetBus(bus)
+		m := NewManager(t.Logf, f, health.NewTracker(bus), dialer, nil, nil, "linux", bus)
+
+		// Initial set should error out and store the config
+		if err := m.Set(config); err != nil {
+			t.Fatalf("Want nil error. Got non-nil")
+		}
+
+		// Set no config
+		f.OSConfig = OSConfig{}
+
+		inj := eventbustest.NewInjector(t, bus)
+		eventbustest.Inject(inj, TrampleDNS{})
+		synctest.Wait()
+
+		t.Logf("OSConfig: %+v", f.OSConfig)
+		if reflect.DeepEqual(f.OSConfig, OSConfig{}) {
+			t.Errorf("Expected config to be set, got empty config")
+		}
+	})
+}

+ 4 - 3
net/dns/manager_windows.go

@@ -29,6 +29,7 @@ import (
 	"tailscale.com/syncs"
 	"tailscale.com/types/logger"
 	"tailscale.com/util/dnsname"
+	"tailscale.com/util/eventbus"
 	"tailscale.com/util/syspolicy/pkey"
 	"tailscale.com/util/syspolicy/policyclient"
 	"tailscale.com/util/syspolicy/ptype"
@@ -57,8 +58,8 @@ type windowsManager struct {
 
 // NewOSConfigurator created a new OS configurator.
 //
-// The health tracker and the knobs may be nil.
-func NewOSConfigurator(logf logger.Logf, health *health.Tracker, polc policyclient.Client, knobs *controlknobs.Knobs, interfaceName string) (OSConfigurator, error) {
+// The health tracker, eventbus and the knobs may be nil.
+func NewOSConfigurator(logf logger.Logf, health *health.Tracker, bus *eventbus.Bus, polc policyclient.Client, knobs *controlknobs.Knobs, interfaceName string) (OSConfigurator, error) {
 	if polc == nil {
 		panic("nil policyclient.Client")
 	}
@@ -163,7 +164,7 @@ func setTailscaleHosts(logf logger.Logf, prevHostsFile []byte, hosts []*HostEntr
 		header = "# TailscaleHostsSectionStart"
 		footer = "# TailscaleHostsSectionEnd"
 	)
-	var comments = []string{
+	comments := []string{
 		"# This section contains MagicDNS entries for Tailscale.",
 		"# Do not edit this section manually.",
 	}

+ 2 - 2
net/dns/manager_windows_test.go

@@ -134,7 +134,7 @@ func TestManagerWindowsGPCopy(t *testing.T) {
 	}
 	defer delIfKey()
 
-	cfg, err := NewOSConfigurator(logf, nil, policyclient.NoPolicyClient{}, nil, fakeInterface.String())
+	cfg, err := NewOSConfigurator(logf, nil, nil, policyclient.NoPolicyClient{}, nil, fakeInterface.String())
 	if err != nil {
 		t.Fatalf("NewOSConfigurator: %v\n", err)
 	}
@@ -263,7 +263,7 @@ func runTest(t *testing.T, isLocal bool) {
 	}
 	defer delIfKey()
 
-	cfg, err := NewOSConfigurator(logf, nil, policyclient.NoPolicyClient{}, nil, fakeInterface.String())
+	cfg, err := NewOSConfigurator(logf, nil, nil, policyclient.NoPolicyClient{}, nil, fakeInterface.String())
 	if err != nil {
 		t.Fatalf("NewOSConfigurator: %v\n", err)
 	}

+ 1 - 1
net/dns/wsl_windows.go

@@ -76,7 +76,7 @@ func (wm *wslManager) SetDNS(cfg OSConfig) error {
 	}
 	managers := make(map[string]*directManager)
 	for _, distro := range distros {
-		managers[distro] = newDirectManagerOnFS(wm.logf, wm.health, wslFS{
+		managers[distro] = newDirectManagerOnFS(wm.logf, wm.health, nil, wslFS{
 			user:   "root",
 			distro: distro,
 		})

+ 1 - 1
wgengine/userspace.go

@@ -387,7 +387,7 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error)
 	conf.Dialer.SetTUNName(tunName)
 	conf.Dialer.SetNetMon(e.netMon)
 	conf.Dialer.SetBus(e.eventBus)
-	e.dns = dns.NewManager(logf, conf.DNS, e.health, conf.Dialer, fwdDNSLinkSelector{e, tunName}, conf.ControlKnobs, runtime.GOOS)
+	e.dns = dns.NewManager(logf, conf.DNS, e.health, conf.Dialer, fwdDNSLinkSelector{e, tunName}, conf.ControlKnobs, runtime.GOOS, e.eventBus)
 
 	// TODO: there's probably a better place for this
 	sockstats.SetNetMon(e.netMon)