manager_linux.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. // Copyright (c) Tailscale Inc & AUTHORS
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. //go:build linux && !android
  4. package dns
  5. import (
  6. "bytes"
  7. "errors"
  8. "fmt"
  9. "os"
  10. "strings"
  11. "sync"
  12. "time"
  13. "tailscale.com/control/controlknobs"
  14. "tailscale.com/feature"
  15. "tailscale.com/feature/buildfeatures"
  16. "tailscale.com/health"
  17. "tailscale.com/net/netaddr"
  18. "tailscale.com/types/logger"
  19. "tailscale.com/util/clientmetric"
  20. "tailscale.com/util/eventbus"
  21. "tailscale.com/util/syspolicy/policyclient"
  22. "tailscale.com/version/distro"
  23. )
  24. type kv struct {
  25. k, v string
  26. }
  27. func (kv kv) String() string {
  28. return fmt.Sprintf("%s=%s", kv.k, kv.v)
  29. }
  30. var publishOnce sync.Once
  31. // reconfigTimeout is the time interval within which Manager.{Up,Down} should complete.
  32. //
  33. // This is particularly useful because certain conditions can cause indefinite hangs
  34. // (such as improper dbus auth followed by contextless dbus.Object.Call).
  35. // Such operations should be wrapped in a timeout context.
  36. const reconfigTimeout = time.Second
  37. // Set unless ts_omit_networkmanager
  38. var (
  39. optNewNMManager feature.Hook[func(ifName string) (OSConfigurator, error)]
  40. optNMIsUsingResolved feature.Hook[func() error]
  41. optNMVersionBetween feature.Hook[func(v1, v2 string) (bool, error)]
  42. )
  43. // Set unless ts_omit_resolved
  44. var (
  45. optNewResolvedManager feature.Hook[func(logf logger.Logf, health *health.Tracker, interfaceName string) (OSConfigurator, error)]
  46. )
  47. // Set unless ts_omit_dbus
  48. var (
  49. optDBusPing feature.Hook[func(name, objectPath string) error]
  50. optDBusReadString feature.Hook[func(name, objectPath, iface, member string) (string, error)]
  51. )
  52. // NewOSConfigurator created a new OS configurator.
  53. //
  54. // The health tracker may be nil; the knobs may be nil and are ignored on this platform.
  55. func NewOSConfigurator(logf logger.Logf, health *health.Tracker, bus *eventbus.Bus, _ policyclient.Client, _ *controlknobs.Knobs, interfaceName string) (ret OSConfigurator, err error) {
  56. if !buildfeatures.HasDNS || distro.Get() == distro.JetKVM {
  57. return NewNoopManager()
  58. }
  59. env := newOSConfigEnv{
  60. fs: directFS{},
  61. resolvconfStyle: resolvconfStyle,
  62. }
  63. if f, ok := optDBusPing.GetOk(); ok {
  64. env.dbusPing = f
  65. } else {
  66. env.dbusPing = func(_, _ string) error { return errors.ErrUnsupported }
  67. }
  68. if f, ok := optDBusReadString.GetOk(); ok {
  69. env.dbusReadString = f
  70. } else {
  71. env.dbusReadString = func(_, _, _, _ string) (string, error) { return "", errors.ErrUnsupported }
  72. }
  73. if f, ok := optNMIsUsingResolved.GetOk(); ok {
  74. env.nmIsUsingResolved = f
  75. } else {
  76. env.nmIsUsingResolved = func() error { return errors.ErrUnsupported }
  77. }
  78. env.nmVersionBetween, _ = optNMVersionBetween.GetOk() // GetOk to not panic if nil; unused if optNMIsUsingResolved returns an error
  79. mode, err := dnsMode(logf, health, env)
  80. if err != nil {
  81. return nil, err
  82. }
  83. publishOnce.Do(func() {
  84. sanitizedMode := strings.ReplaceAll(mode, "-", "_")
  85. m := clientmetric.NewGauge(fmt.Sprintf("dns_manager_linux_mode_%s", sanitizedMode))
  86. m.Set(1)
  87. })
  88. logf("dns: using %q mode", mode)
  89. switch mode {
  90. case "direct":
  91. return newDirectManagerOnFS(logf, health, bus, env.fs), nil
  92. case "systemd-resolved":
  93. if f, ok := optNewResolvedManager.GetOk(); ok {
  94. return f(logf, health, interfaceName)
  95. }
  96. return nil, fmt.Errorf("tailscaled was built without DNS %q support", mode)
  97. case "network-manager":
  98. if f, ok := optNewNMManager.GetOk(); ok {
  99. return f(interfaceName)
  100. }
  101. return nil, fmt.Errorf("tailscaled was built without DNS %q support", mode)
  102. case "debian-resolvconf":
  103. return newDebianResolvconfManager(logf)
  104. case "openresolv":
  105. return newOpenresolvManager(logf)
  106. default:
  107. logf("[unexpected] detected unknown DNS mode %q, using direct manager as last resort", mode)
  108. }
  109. return newDirectManagerOnFS(logf, health, bus, env.fs), nil
  110. }
  111. // newOSConfigEnv are the funcs newOSConfigurator needs, pulled out for testing.
  112. type newOSConfigEnv struct {
  113. fs wholeFileFS
  114. dbusPing func(string, string) error
  115. dbusReadString func(string, string, string, string) (string, error)
  116. nmIsUsingResolved func() error
  117. nmVersionBetween func(v1, v2 string) (safe bool, err error)
  118. resolvconfStyle func() string
  119. }
  120. func dnsMode(logf logger.Logf, health *health.Tracker, env newOSConfigEnv) (ret string, err error) {
  121. var debug []kv
  122. dbg := func(k, v string) {
  123. debug = append(debug, kv{k, v})
  124. }
  125. defer func() {
  126. if ret != "" {
  127. dbg("ret", ret)
  128. }
  129. logf("dns: %v", debug)
  130. }()
  131. // In all cases that we detect systemd-resolved, try asking it what it
  132. // thinks the current resolv.conf mode is so we can add it to our logs.
  133. defer func() {
  134. if ret != "systemd-resolved" {
  135. return
  136. }
  137. // Try to ask systemd-resolved what it thinks the current
  138. // status of resolv.conf is. This is documented at:
  139. // https://www.freedesktop.org/software/systemd/man/org.freedesktop.resolve1.html
  140. mode, err := env.dbusReadString("org.freedesktop.resolve1", "/org/freedesktop/resolve1", "org.freedesktop.resolve1.Manager", "ResolvConfMode")
  141. if err != nil {
  142. logf("dns: ResolvConfMode error: %v", err)
  143. dbg("resolv-conf-mode", "error")
  144. } else {
  145. dbg("resolv-conf-mode", mode)
  146. }
  147. }()
  148. // Before we read /etc/resolv.conf (which might be in a broken
  149. // or symlink-dangling state), try to ping the D-Bus service
  150. // for systemd-resolved. If it's active on the machine, this
  151. // will make it start up and write the /etc/resolv.conf file
  152. // before it replies to the ping. (see how systemd's
  153. // src/resolve/resolved.c calls manager_write_resolv_conf
  154. // before the sd_event_loop starts)
  155. resolvedUp := env.dbusPing("org.freedesktop.resolve1", "/org/freedesktop/resolve1") == nil
  156. if resolvedUp {
  157. dbg("resolved-ping", "yes")
  158. }
  159. bs, err := env.fs.ReadFile(resolvConf)
  160. if os.IsNotExist(err) {
  161. dbg("rc", "missing")
  162. return "direct", nil
  163. }
  164. if err != nil {
  165. return "", fmt.Errorf("reading /etc/resolv.conf: %w", err)
  166. }
  167. switch resolvOwner(bs) {
  168. case "systemd-resolved":
  169. dbg("rc", "resolved")
  170. // Some systems, for reasons known only to them, have a
  171. // resolv.conf that has the word "systemd-resolved" in its
  172. // header, but doesn't actually point to resolved. We mustn't
  173. // try to program resolved in that case.
  174. // https://github.com/tailscale/tailscale/issues/2136
  175. if err := resolvedIsActuallyResolver(logf, env, dbg, bs); err != nil {
  176. logf("dns: resolvedIsActuallyResolver error: %v", err)
  177. dbg("resolved", "not-in-use")
  178. return "direct", nil
  179. }
  180. if err := env.dbusPing("org.freedesktop.NetworkManager", "/org/freedesktop/NetworkManager/DnsManager"); err != nil {
  181. dbg("nm", "no")
  182. return "systemd-resolved", nil
  183. }
  184. dbg("nm", "yes")
  185. if err := env.nmIsUsingResolved(); err != nil {
  186. dbg("nm-resolved", "no")
  187. return "systemd-resolved", nil
  188. }
  189. dbg("nm-resolved", "yes")
  190. // Version of NetworkManager before 1.26.6 programmed resolved
  191. // incorrectly, such that NM's settings would always take
  192. // precedence over other settings set by other resolved
  193. // clients.
  194. //
  195. // If we're dealing with such a version, we have to set our
  196. // DNS settings through NM to have them take.
  197. //
  198. // However, versions 1.26.6 later both fixed the resolved
  199. // programming issue _and_ started ignoring DNS settings for
  200. // "unmanaged" interfaces - meaning NM 1.26.6 and later
  201. // actively ignore DNS configuration we give it. So, for those
  202. // NM versions, we can and must use resolved directly.
  203. //
  204. // Even more fun, even-older versions of NM won't let us set
  205. // DNS settings if the interface isn't managed by NM, with a
  206. // hard failure on DBus requests. Empirically, NM 1.22 does
  207. // this. Based on the versions popular distros shipped, we
  208. // conservatively decree that only 1.26.0 through 1.26.5 are
  209. // "safe" to use for our purposes. This roughly matches
  210. // distros released in the latter half of 2020.
  211. //
  212. // In a perfect world, we'd avoid this by replacing
  213. // configuration out from under NM entirely (e.g. using
  214. // directManager to overwrite resolv.conf), but in a world
  215. // where resolved runs, we need to get correct configuration
  216. // into resolved regardless of what's in resolv.conf (because
  217. // resolved can also be queried over dbus, or via an NSS
  218. // module that bypasses /etc/resolv.conf). Given that we must
  219. // get correct configuration into resolved, we have no choice
  220. // but to use NM, and accept the loss of IPv6 configuration
  221. // that comes with it (see
  222. // https://github.com/tailscale/tailscale/issues/1699,
  223. // https://github.com/tailscale/tailscale/pull/1945)
  224. safe, err := env.nmVersionBetween("1.26.0", "1.26.5")
  225. if err != nil {
  226. // Failed to figure out NM's version, can't make a correct
  227. // decision.
  228. return "", fmt.Errorf("checking NetworkManager version: %v", err)
  229. }
  230. if safe {
  231. dbg("nm-safe", "yes")
  232. return "network-manager", nil
  233. }
  234. dbg("nm-safe", "no")
  235. return "systemd-resolved", nil
  236. case "resolvconf":
  237. dbg("rc", "resolvconf")
  238. style := env.resolvconfStyle()
  239. switch style {
  240. case "":
  241. dbg("resolvconf", "no")
  242. return "direct", nil
  243. case "debian":
  244. dbg("resolvconf", "debian")
  245. return "debian-resolvconf", nil
  246. case "openresolv":
  247. dbg("resolvconf", "openresolv")
  248. return "openresolv", nil
  249. default:
  250. // Shouldn't happen, that means we updated flavors of
  251. // resolvconf without updating here.
  252. dbg("resolvconf", style)
  253. logf("[unexpected] got unknown flavor of resolvconf %q, falling back to direct manager", env.resolvconfStyle())
  254. return "direct", nil
  255. }
  256. case "NetworkManager":
  257. dbg("rc", "nm")
  258. // Sometimes, NetworkManager owns the configuration but points
  259. // it at systemd-resolved.
  260. if err := resolvedIsActuallyResolver(logf, env, dbg, bs); err != nil {
  261. logf("dns: resolvedIsActuallyResolver error: %v", err)
  262. dbg("resolved", "not-in-use")
  263. // You'd think we would use newNMManager here. However, as
  264. // explained in
  265. // https://github.com/tailscale/tailscale/issues/1699 ,
  266. // using NetworkManager for DNS configuration carries with
  267. // it the cost of losing IPv6 configuration on the
  268. // Tailscale network interface. So, when we can avoid it,
  269. // we bypass NetworkManager by replacing resolv.conf
  270. // directly.
  271. //
  272. // If you ever try to put NMManager back here, keep in mind
  273. // that versions >=1.26.6 will ignore DNS configuration
  274. // anyway, so you still need a fallback path that uses
  275. // directManager.
  276. return "direct", nil
  277. }
  278. dbg("nm-resolved", "yes")
  279. // See large comment above for reasons we'd use NM rather than
  280. // resolved. systemd-resolved is actually in charge of DNS
  281. // configuration, but in some cases we might need to configure
  282. // it via NetworkManager. All the logic below is probing for
  283. // that case: is NetworkManager running? If so, is it one of
  284. // the versions that requires direct interaction with it?
  285. if err := env.dbusPing("org.freedesktop.NetworkManager", "/org/freedesktop/NetworkManager/DnsManager"); err != nil {
  286. dbg("nm", "no")
  287. return "systemd-resolved", nil
  288. }
  289. safe, err := env.nmVersionBetween("1.26.0", "1.26.5")
  290. if err != nil {
  291. // Failed to figure out NM's version, can't make a correct
  292. // decision.
  293. return "", fmt.Errorf("checking NetworkManager version: %v", err)
  294. }
  295. if safe {
  296. dbg("nm-safe", "yes")
  297. return "network-manager", nil
  298. }
  299. if err := env.nmIsUsingResolved(); err != nil {
  300. // If systemd-resolved is not running at all, then we don't have any
  301. // other choice: we take direct control of DNS.
  302. dbg("nm-resolved", "no")
  303. return "direct", nil
  304. }
  305. health.SetDNSManagerHealth(errors.New("systemd-resolved and NetworkManager are wired together incorrectly; MagicDNS will probably not work. For more info, see https://tailscale.com/s/resolved-nm"))
  306. dbg("nm-safe", "no")
  307. return "systemd-resolved", nil
  308. default:
  309. dbg("rc", "unknown")
  310. return "direct", nil
  311. }
  312. }
  313. // resolvedIsActuallyResolver reports whether the system is using
  314. // systemd-resolved as the resolver. There are two different ways to
  315. // use systemd-resolved:
  316. // - libnss_resolve, which requires adding `resolve` to the "hosts:"
  317. // line in /etc/nsswitch.conf
  318. // - setting the only nameserver configured in `resolv.conf` to
  319. // systemd-resolved IP (127.0.0.53)
  320. //
  321. // Returns an error if the configuration is something other than
  322. // exclusively systemd-resolved, or nil if the config is only
  323. // systemd-resolved.
  324. func resolvedIsActuallyResolver(logf logger.Logf, env newOSConfigEnv, dbg func(k, v string), bs []byte) error {
  325. if err := isLibnssResolveUsed(env); err == nil {
  326. dbg("resolved", "nss")
  327. return nil
  328. }
  329. cfg, err := readResolv(bytes.NewBuffer(bs))
  330. if err != nil {
  331. return err
  332. }
  333. // We've encountered at least one system where the line
  334. // "nameserver 127.0.0.53" appears twice, so we look exhaustively
  335. // through all of them and allow any number of repeated mentions
  336. // of the systemd-resolved stub IP.
  337. if len(cfg.Nameservers) == 0 {
  338. return errors.New("resolv.conf has no nameservers")
  339. }
  340. for _, ns := range cfg.Nameservers {
  341. if ns != netaddr.IPv4(127, 0, 0, 53) {
  342. return fmt.Errorf("resolv.conf doesn't point to systemd-resolved; points to %v", cfg.Nameservers)
  343. }
  344. }
  345. dbg("resolved", "file")
  346. return nil
  347. }
  348. // isLibnssResolveUsed reports whether libnss_resolve is used
  349. // for resolving names. Returns nil if it is, and an error otherwise.
  350. func isLibnssResolveUsed(env newOSConfigEnv) error {
  351. bs, err := env.fs.ReadFile("/etc/nsswitch.conf")
  352. if err != nil {
  353. return fmt.Errorf("reading /etc/resolv.conf: %w", err)
  354. }
  355. for _, line := range strings.Split(string(bs), "\n") {
  356. fields := strings.Fields(line)
  357. if len(fields) < 2 || fields[0] != "hosts:" {
  358. continue
  359. }
  360. for _, module := range fields[1:] {
  361. if module == "dns" {
  362. return fmt.Errorf("dns with a higher priority than libnss_resolve")
  363. }
  364. if module == "resolve" {
  365. return nil
  366. }
  367. }
  368. }
  369. return fmt.Errorf("libnss_resolve not used")
  370. }