direct.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621
  1. // Copyright (c) Tailscale Inc & AUTHORS
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. //go:build !android && !ios
  4. package dns
  5. import (
  6. "bytes"
  7. "context"
  8. "crypto/rand"
  9. "errors"
  10. "fmt"
  11. "io"
  12. "io/fs"
  13. "net/netip"
  14. "os"
  15. "os/exec"
  16. "path/filepath"
  17. "runtime"
  18. "slices"
  19. "strings"
  20. "sync"
  21. "time"
  22. "tailscale.com/feature"
  23. "tailscale.com/health"
  24. "tailscale.com/net/dns/resolvconffile"
  25. "tailscale.com/net/tsaddr"
  26. "tailscale.com/types/logger"
  27. "tailscale.com/util/dnsname"
  28. "tailscale.com/version/distro"
  29. )
  30. // writeResolvConf writes DNS configuration in resolv.conf format to the given writer.
  31. func writeResolvConf(w io.Writer, servers []netip.Addr, domains []dnsname.FQDN) error {
  32. c := &resolvconffile.Config{
  33. Nameservers: servers,
  34. SearchDomains: domains,
  35. }
  36. return c.Write(w)
  37. }
  38. func readResolv(r io.Reader) (OSConfig, error) {
  39. c, err := resolvconffile.Parse(r)
  40. if err != nil {
  41. return OSConfig{}, err
  42. }
  43. return OSConfig{
  44. Nameservers: c.Nameservers,
  45. SearchDomains: c.SearchDomains,
  46. }, nil
  47. }
  48. // resolvOwner returns the apparent owner of the resolv.conf
  49. // configuration in bs - one of "resolvconf", "systemd-resolved" or
  50. // "NetworkManager", or "" if no known owner was found.
  51. //
  52. //lint:ignore U1000 used in linux and freebsd code
  53. func resolvOwner(bs []byte) string {
  54. likely := ""
  55. b := bytes.NewBuffer(bs)
  56. for {
  57. line, err := b.ReadString('\n')
  58. if err != nil {
  59. return likely
  60. }
  61. line = strings.TrimSpace(line)
  62. if line == "" {
  63. continue
  64. }
  65. if line[0] != '#' {
  66. // First non-empty, non-comment line. Assume the owner
  67. // isn't hiding further down.
  68. return likely
  69. }
  70. if strings.Contains(line, "systemd-resolved") {
  71. likely = "systemd-resolved"
  72. } else if strings.Contains(line, "NetworkManager") {
  73. likely = "NetworkManager"
  74. } else if strings.Contains(line, "resolvconf") {
  75. likely = "resolvconf"
  76. }
  77. }
  78. }
  79. // isResolvedRunning reports whether systemd-resolved is running on the system,
  80. // even if it is not managing the system DNS settings.
  81. func isResolvedRunning() bool {
  82. if runtime.GOOS != "linux" {
  83. return false
  84. }
  85. // systemd-resolved is never installed without systemd.
  86. _, err := exec.LookPath("systemctl")
  87. if err != nil {
  88. return false
  89. }
  90. ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
  91. defer cancel()
  92. err = exec.CommandContext(ctx, "systemctl", "is-active", "systemd-resolved.service").Run()
  93. // is-active exits with code 3 if the service is not active.
  94. return err == nil
  95. }
  96. func restartResolved() error {
  97. ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
  98. defer cancel()
  99. return exec.CommandContext(ctx, "systemctl", "restart", "systemd-resolved.service").Run()
  100. }
  101. // directManager is an OSConfigurator which replaces /etc/resolv.conf with a file
  102. // generated from the given configuration, creating a backup of its old state.
  103. //
  104. // This way of configuring DNS is precarious, since it does not react
  105. // to the disappearance of the Tailscale interface.
  106. // The caller must call Down before program shutdown
  107. // or as cleanup if the program terminates unexpectedly.
  108. type directManager struct {
  109. logf logger.Logf
  110. health *health.Tracker
  111. fs wholeFileFS
  112. // renameBroken is set if fs.Rename to or from /etc/resolv.conf
  113. // fails. This can happen in some container runtimes, where
  114. // /etc/resolv.conf is bind-mounted from outside the container,
  115. // and therefore /etc and /etc/resolv.conf are different
  116. // filesystems as far as rename(2) is concerned.
  117. //
  118. // In those situations, we fall back to emulating rename with file
  119. // copies and truncations, which is not as good (opens up a race
  120. // where a reader can see an empty or partial /etc/resolv.conf),
  121. // but is better than having non-functioning DNS.
  122. renameBroken bool
  123. ctx context.Context // valid until Close
  124. ctxClose context.CancelFunc // closes ctx
  125. mu sync.Mutex
  126. wantResolvConf []byte // if non-nil, what we expect /etc/resolv.conf to contain
  127. //lint:ignore U1000 used in direct_linux.go
  128. lastWarnContents []byte // last resolv.conf contents that we warned about
  129. }
  130. //lint:ignore U1000 used in manager_{freebsd,openbsd}.go
  131. func newDirectManager(logf logger.Logf, health *health.Tracker) *directManager {
  132. return newDirectManagerOnFS(logf, health, directFS{})
  133. }
  134. func newDirectManagerOnFS(logf logger.Logf, health *health.Tracker, fs wholeFileFS) *directManager {
  135. ctx, cancel := context.WithCancel(context.Background())
  136. m := &directManager{
  137. logf: logf,
  138. health: health,
  139. fs: fs,
  140. ctx: ctx,
  141. ctxClose: cancel,
  142. }
  143. go m.runFileWatcher()
  144. return m
  145. }
  146. func (m *directManager) readResolvFile(path string) (OSConfig, error) {
  147. b, err := m.fs.ReadFile(path)
  148. if err != nil {
  149. return OSConfig{}, err
  150. }
  151. return readResolv(bytes.NewReader(b))
  152. }
  153. // ownedByTailscale reports whether /etc/resolv.conf seems to be a
  154. // tailscale-managed file.
  155. func (m *directManager) ownedByTailscale() (bool, error) {
  156. isRegular, err := m.fs.Stat(resolvConf)
  157. if err != nil {
  158. if os.IsNotExist(err) {
  159. return false, nil
  160. }
  161. return false, err
  162. }
  163. if !isRegular {
  164. return false, nil
  165. }
  166. bs, err := m.fs.ReadFile(resolvConf)
  167. if err != nil {
  168. return false, err
  169. }
  170. if bytes.Contains(bs, []byte("generated by tailscale")) {
  171. return true, nil
  172. }
  173. return false, nil
  174. }
  175. // backupConfig creates or updates a backup of /etc/resolv.conf, if
  176. // resolv.conf does not currently contain a Tailscale-managed config.
  177. func (m *directManager) backupConfig() error {
  178. if _, err := m.fs.Stat(resolvConf); err != nil {
  179. if os.IsNotExist(err) {
  180. // No resolv.conf, nothing to back up. Also get rid of any
  181. // existing backup file, to avoid restoring something old.
  182. m.fs.Remove(backupConf)
  183. return nil
  184. }
  185. return err
  186. }
  187. owned, err := m.ownedByTailscale()
  188. if err != nil {
  189. return err
  190. }
  191. if owned {
  192. return nil
  193. }
  194. return m.rename(resolvConf, backupConf)
  195. }
  196. func (m *directManager) restoreBackup() (restored bool, err error) {
  197. if _, err := m.fs.Stat(backupConf); err != nil {
  198. if os.IsNotExist(err) {
  199. // No backup, nothing we can do.
  200. return false, nil
  201. }
  202. return false, err
  203. }
  204. owned, err := m.ownedByTailscale()
  205. if err != nil {
  206. return false, err
  207. }
  208. _, err = m.fs.Stat(resolvConf)
  209. if err != nil && !os.IsNotExist(err) {
  210. return false, err
  211. }
  212. resolvConfExists := !os.IsNotExist(err)
  213. if resolvConfExists && !owned {
  214. // There's already a non-tailscale config in place, get rid of
  215. // our backup.
  216. m.fs.Remove(backupConf)
  217. return false, nil
  218. }
  219. // We own resolv.conf, and a backup exists.
  220. if err := m.rename(backupConf, resolvConf); err != nil {
  221. return false, err
  222. }
  223. return true, nil
  224. }
  225. // rename tries to rename old to new using m.fs.Rename, and falls back
  226. // to hand-copying bytes and truncating old if that fails.
  227. //
  228. // This is a workaround to /etc/resolv.conf being a bind-mounted file
  229. // some container environments, which cannot be moved elsewhere in
  230. // /etc (because that would be a cross-filesystem move) or deleted
  231. // (because that would break the bind in surprising ways).
  232. func (m *directManager) rename(old, new string) error {
  233. if !m.renameBroken {
  234. err := m.fs.Rename(old, new)
  235. if err == nil {
  236. return nil
  237. }
  238. if runtime.GOOS == "linux" && distro.Get() == distro.Synology {
  239. // Fail fast. The fallback case below won't work anyway.
  240. return err
  241. }
  242. m.logf("rename of %q to %q failed (%v), falling back to copy+delete", old, new, err)
  243. m.renameBroken = true
  244. }
  245. bs, err := m.fs.ReadFile(old)
  246. if err != nil {
  247. return fmt.Errorf("reading %q to rename: %w", old, err)
  248. }
  249. if err := m.fs.WriteFile(new, bs, 0644); err != nil {
  250. return fmt.Errorf("writing to %q in rename of %q: %w", new, old, err)
  251. }
  252. // Explicitly set the permissions on the new file. This ensures that
  253. // if we have a umask set which prevents creating world-readable files,
  254. // the file will still have the correct permissions once it's renamed
  255. // into place. See #12609.
  256. if err := m.fs.Chmod(new, 0644); err != nil {
  257. return fmt.Errorf("chmod %q in rename of %q: %w", new, old, err)
  258. }
  259. if err := m.fs.Remove(old); err != nil {
  260. err2 := m.fs.Truncate(old)
  261. if err2 != nil {
  262. return fmt.Errorf("remove of %q failed (%w) and so did truncate: %v", old, err, err2)
  263. }
  264. }
  265. return nil
  266. }
  267. // setWant sets the expected contents of /etc/resolv.conf, if any.
  268. //
  269. // A value of nil means no particular value is expected.
  270. //
  271. // m takes ownership of want.
  272. func (m *directManager) setWant(want []byte) {
  273. m.mu.Lock()
  274. defer m.mu.Unlock()
  275. m.wantResolvConf = want
  276. }
  277. func (m *directManager) SetDNS(config OSConfig) (err error) {
  278. defer func() {
  279. if err != nil && errors.Is(err, fs.ErrPermission) && runtime.GOOS == "linux" &&
  280. distro.Get() == distro.Synology && os.Geteuid() != 0 {
  281. // On Synology (notably DSM7 where we don't run as root), ignore all
  282. // DNS configuration errors for now. We don't have permission.
  283. // See https://github.com/tailscale/tailscale/issues/4017
  284. m.logf("ignoring SetDNS permission error on Synology (Issue 4017); was: %v", err)
  285. err = nil
  286. }
  287. }()
  288. m.setWant(nil) // reset our expectations before any work
  289. var changed bool
  290. if config.IsZero() {
  291. changed, err = m.restoreBackup()
  292. if err != nil {
  293. return err
  294. }
  295. } else {
  296. changed = true
  297. if err := m.backupConfig(); err != nil {
  298. return err
  299. }
  300. buf := new(bytes.Buffer)
  301. writeResolvConf(buf, config.Nameservers, config.SearchDomains)
  302. if err := m.atomicWriteFile(m.fs, resolvConf, buf.Bytes(), 0644); err != nil {
  303. return err
  304. }
  305. // Now that we've successfully written to the file, lock it in.
  306. // If we see /etc/resolv.conf with different contents, we know somebody
  307. // else trampled on it.
  308. m.setWant(buf.Bytes())
  309. }
  310. // We might have taken over a configuration managed by resolved,
  311. // in which case it will notice this on restart and gracefully
  312. // start using our configuration. This shouldn't happen because we
  313. // try to manage DNS through resolved when it's around, but as a
  314. // best-effort fallback if we messed up the detection, try to
  315. // restart resolved to make the system configuration consistent.
  316. //
  317. // We take care to only kick systemd-resolved if we've made some
  318. // change to the system's DNS configuration, because this codepath
  319. // can end up running in cases where the user has manually
  320. // configured /etc/resolv.conf to point to systemd-resolved (but
  321. // it's not managed explicitly by systemd-resolved), *and* has
  322. // --accept-dns=false, meaning we pass an empty configuration to
  323. // the running DNS manager. In that very edge-case scenario, we
  324. // cause a disruptive DNS outage each time we reset an empty
  325. // OS configuration.
  326. if changed && isResolvedRunning() && !runningAsGUIDesktopUser() {
  327. t0 := time.Now()
  328. err := restartResolved()
  329. d := time.Since(t0).Round(time.Millisecond)
  330. if err != nil {
  331. m.logf("error restarting resolved after %v: %v", d, err)
  332. } else {
  333. m.logf("restarted resolved after %v", d)
  334. }
  335. }
  336. return nil
  337. }
  338. func (m *directManager) SupportsSplitDNS() bool {
  339. return false
  340. }
  341. func (m *directManager) GetBaseConfig() (OSConfig, error) {
  342. owned, err := m.ownedByTailscale()
  343. if err != nil {
  344. return OSConfig{}, err
  345. }
  346. fileToRead := resolvConf
  347. if owned {
  348. fileToRead = backupConf
  349. }
  350. oscfg, err := m.readResolvFile(fileToRead)
  351. if err != nil {
  352. return OSConfig{}, err
  353. }
  354. // On some systems, the backup configuration file is actually a
  355. // symbolic link to something owned by another DNS service (commonly,
  356. // resolved). Thus, it can be updated out from underneath us to contain
  357. // the Tailscale service IP, which results in an infinite loop of us
  358. // trying to send traffic to resolved, which sends back to us, and so
  359. // on. To solve this, drop the Tailscale service IP from the base
  360. // configuration; we do this in all situations since there's
  361. // essentially no world where we want to forward to ourselves.
  362. //
  363. // See: https://github.com/tailscale/tailscale/issues/7816
  364. var removed bool
  365. oscfg.Nameservers = slices.DeleteFunc(oscfg.Nameservers, func(ip netip.Addr) bool {
  366. if ip == tsaddr.TailscaleServiceIP() || ip == tsaddr.TailscaleServiceIPv6() {
  367. removed = true
  368. return true
  369. }
  370. return false
  371. })
  372. if removed {
  373. m.logf("[v1] dropped Tailscale IP from base config that was a symlink")
  374. }
  375. return oscfg, nil
  376. }
  377. // HookWatchFile is a hook for watching file changes, for platforms that support it.
  378. // The function is called with a directory and filename to watch, and a callback
  379. // to call when the file changes. It returns an error if the watch could not be set up.
  380. var HookWatchFile feature.Hook[func(ctx context.Context, dir, filename string, cb func()) error]
  381. func (m *directManager) runFileWatcher() {
  382. watchFile, ok := HookWatchFile.GetOk()
  383. if !ok {
  384. return
  385. }
  386. if err := watchFile(m.ctx, "/etc/", resolvConf, m.checkForFileTrample); err != nil {
  387. // This is all best effort for now, so surface warnings to users.
  388. m.logf("dns: inotify: %s", err)
  389. }
  390. }
  391. var resolvTrampleWarnable = health.Register(&health.Warnable{
  392. Code: "resolv-conf-overwritten",
  393. Severity: health.SeverityMedium,
  394. Title: "DNS configuration issue",
  395. Text: health.StaticMessage("System DNS config not ideal. /etc/resolv.conf overwritten. See https://tailscale.com/s/dns-fight"),
  396. })
  397. // checkForFileTrample checks whether /etc/resolv.conf has been trampled
  398. // by another program on the system. (e.g. a DHCP client)
  399. func (m *directManager) checkForFileTrample() {
  400. m.mu.Lock()
  401. want := m.wantResolvConf
  402. lastWarn := m.lastWarnContents
  403. m.mu.Unlock()
  404. if want == nil {
  405. return
  406. }
  407. cur, err := m.fs.ReadFile(resolvConf)
  408. if err != nil {
  409. m.logf("trample: read error: %v", err)
  410. return
  411. }
  412. if bytes.Equal(cur, want) {
  413. m.health.SetHealthy(resolvTrampleWarnable)
  414. if lastWarn != nil {
  415. m.mu.Lock()
  416. m.lastWarnContents = nil
  417. m.mu.Unlock()
  418. m.logf("trample: resolv.conf again matches expected content")
  419. }
  420. return
  421. }
  422. if bytes.Equal(cur, lastWarn) {
  423. // We already logged about this, so not worth doing it again.
  424. return
  425. }
  426. m.mu.Lock()
  427. m.lastWarnContents = cur
  428. m.mu.Unlock()
  429. show := cur
  430. if len(show) > 1024 {
  431. show = show[:1024]
  432. }
  433. m.logf("trample: resolv.conf changed from what we expected. did some other program interfere? current contents: %q", show)
  434. m.health.SetUnhealthy(resolvTrampleWarnable, nil)
  435. }
  436. func (m *directManager) Close() error {
  437. m.ctxClose()
  438. // We used to keep a file for the tailscale config and symlinked
  439. // to it, but then we stopped because /etc/resolv.conf being a
  440. // symlink to surprising places breaks snaps and other sandboxing
  441. // things. Clean it up if it's still there.
  442. m.fs.Remove("/etc/resolv.tailscale.conf")
  443. if _, err := m.fs.Stat(backupConf); err != nil {
  444. if os.IsNotExist(err) {
  445. // No backup, nothing we can do.
  446. return nil
  447. }
  448. return err
  449. }
  450. owned, err := m.ownedByTailscale()
  451. if err != nil {
  452. return err
  453. }
  454. _, err = m.fs.Stat(resolvConf)
  455. if err != nil && !os.IsNotExist(err) {
  456. return err
  457. }
  458. resolvConfExists := !os.IsNotExist(err)
  459. if resolvConfExists && !owned {
  460. // There's already a non-tailscale config in place, get rid of
  461. // our backup.
  462. m.fs.Remove(backupConf)
  463. return nil
  464. }
  465. // We own resolv.conf, and a backup exists.
  466. if err := m.rename(backupConf, resolvConf); err != nil {
  467. return err
  468. }
  469. if isResolvedRunning() && !runningAsGUIDesktopUser() {
  470. m.logf("restarting systemd-resolved...")
  471. if err := restartResolved(); err != nil {
  472. m.logf("restart of systemd-resolved failed: %v", err)
  473. } else {
  474. m.logf("restarted systemd-resolved")
  475. }
  476. }
  477. return nil
  478. }
  479. func (m *directManager) atomicWriteFile(fs wholeFileFS, filename string, data []byte, perm os.FileMode) error {
  480. var randBytes [12]byte
  481. if _, err := rand.Read(randBytes[:]); err != nil {
  482. return fmt.Errorf("atomicWriteFile: %w", err)
  483. }
  484. tmpName := fmt.Sprintf("%s.%x.tmp", filename, randBytes[:])
  485. defer fs.Remove(tmpName)
  486. if err := fs.WriteFile(tmpName, data, perm); err != nil {
  487. return fmt.Errorf("atomicWriteFile: %w", err)
  488. }
  489. // Explicitly set the permissions on the temporary file before renaming
  490. // it. This ensures that if we have a umask set which prevents creating
  491. // world-readable files, the file will still have the correct
  492. // permissions once it's renamed into place. See #12609.
  493. if err := fs.Chmod(tmpName, perm); err != nil {
  494. return fmt.Errorf("atomicWriteFile: Chmod: %w", err)
  495. }
  496. return m.rename(tmpName, filename)
  497. }
  498. // wholeFileFS is a high-level file system abstraction designed just for use
  499. // by directManager, with the goal that it is easy to implement over wsl.exe.
  500. //
  501. // All name parameters are absolute paths.
  502. type wholeFileFS interface {
  503. Chmod(name string, mode os.FileMode) error
  504. ReadFile(name string) ([]byte, error)
  505. Remove(name string) error
  506. Rename(oldName, newName string) error
  507. Stat(name string) (isRegular bool, err error)
  508. Truncate(name string) error
  509. WriteFile(name string, contents []byte, perm os.FileMode) error
  510. }
  511. // directFS is a wholeFileFS implemented directly on the OS.
  512. type directFS struct {
  513. // prefix is file path prefix.
  514. //
  515. // All name parameters are absolute paths so this is typically a
  516. // testing temporary directory like "/tmp".
  517. prefix string
  518. }
  519. func (fs directFS) path(name string) string { return filepath.Join(fs.prefix, name) }
  520. func (fs directFS) Stat(name string) (isRegular bool, err error) {
  521. fi, err := os.Stat(fs.path(name))
  522. if err != nil {
  523. return false, err
  524. }
  525. return fi.Mode().IsRegular(), nil
  526. }
  527. func (fs directFS) Chmod(name string, mode os.FileMode) error {
  528. return os.Chmod(fs.path(name), mode)
  529. }
  530. func (fs directFS) Rename(oldName, newName string) error {
  531. return os.Rename(fs.path(oldName), fs.path(newName))
  532. }
  533. func (fs directFS) Remove(name string) error { return os.Remove(fs.path(name)) }
  534. func (fs directFS) ReadFile(name string) ([]byte, error) {
  535. return os.ReadFile(fs.path(name))
  536. }
  537. func (fs directFS) Truncate(name string) error {
  538. return os.Truncate(fs.path(name), 0)
  539. }
  540. func (fs directFS) WriteFile(name string, contents []byte, perm os.FileMode) error {
  541. return os.WriteFile(fs.path(name), contents, perm)
  542. }
  543. // runningAsGUIDesktopUser reports whether it seems that this code is
  544. // being run as a regular user on a Linux desktop. This is a quick
  545. // hack to fix Issue 2672 where PolicyKit pops up a GUI dialog asking
  546. // to proceed we do a best effort attempt to restart
  547. // systemd-resolved.service. There's surely a better way.
  548. func runningAsGUIDesktopUser() bool {
  549. return os.Getuid() != 0 && os.Getenv("DISPLAY") != ""
  550. }