main.go 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745
  1. // Copyright (c) Tailscale Inc & AUTHORS
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. //go:build linux
  4. // The containerboot binary is a wrapper for starting tailscaled in a container.
  5. // It handles reading the desired mode of operation out of environment
  6. // variables, bringing up and authenticating Tailscale, and any other
  7. // kubernetes-specific side jobs.
  8. //
  9. // As with most container things, configuration is passed through environment
  10. // variables. All configuration is optional.
  11. //
  12. // - TS_AUTHKEY: the authkey to use for login.
  13. // - TS_HOSTNAME: the hostname to request for the node.
  14. // - TS_ROUTES: subnet routes to advertise. Explicitly setting it to an empty
  15. // value will cause containerboot to stop acting as a subnet router for any
  16. // previously advertised routes. To accept routes, use TS_EXTRA_ARGS to pass
  17. // in --accept-routes.
  18. // - TS_DEST_IP: proxy all incoming Tailscale traffic to the given
  19. // destination defined by an IP address.
  20. // - TS_EXPERIMENTAL_DEST_DNS_NAME: proxy all incoming Tailscale traffic to the given
  21. // destination defined by a DNS name. The DNS name will be periodically resolved and firewall rules updated accordingly.
  22. // This is currently intended to be used by the Kubernetes operator (ExternalName Services).
  23. // This is an experimental env var and will likely change in the future.
  24. // - TS_TAILNET_TARGET_IP: proxy all incoming non-Tailscale traffic to the given
  25. // destination defined by an IP.
  26. // - TS_TAILNET_TARGET_FQDN: proxy all incoming non-Tailscale traffic to the given
  27. // destination defined by a MagicDNS name.
  28. // - TS_TAILSCALED_EXTRA_ARGS: extra arguments to 'tailscaled'.
  29. // - TS_EXTRA_ARGS: extra arguments to 'tailscale up'.
  30. // - TS_USERSPACE: run with userspace networking (the default)
  31. // instead of kernel networking.
  32. // - TS_STATE_DIR: the directory in which to store tailscaled
  33. // state. The data should persist across container
  34. // restarts.
  35. // - TS_ACCEPT_DNS: whether to use the tailnet's DNS configuration.
  36. // - TS_KUBE_SECRET: the name of the Kubernetes secret in which to
  37. // store tailscaled state.
  38. // - TS_SOCKS5_SERVER: the address on which to listen for SOCKS5
  39. // proxying into the tailnet.
  40. // - TS_OUTBOUND_HTTP_PROXY_LISTEN: the address on which to listen
  41. // for HTTP proxying into the tailnet.
  42. // - TS_SOCKET: the path where the tailscaled LocalAPI socket should
  43. // be created.
  44. // - TS_AUTH_ONCE: if true, only attempt to log in if not already
  45. // logged in. If false (the default, for backwards
  46. // compatibility), forcibly log in every time the
  47. // container starts.
  48. // - TS_SERVE_CONFIG: if specified, is the file path where the ipn.ServeConfig is located.
  49. // It will be applied once tailscaled is up and running. If the file contains
  50. // ${TS_CERT_DOMAIN}, it will be replaced with the value of the available FQDN.
  51. // It cannot be used in conjunction with TS_DEST_IP. The file is watched for changes,
  52. // and will be re-applied when it changes.
  53. // - TS_HEALTHCHECK_ADDR_PORT: if specified, an HTTP health endpoint will be
  54. // served at /healthz at the provided address, which should be in form [<address>]:<port>.
  55. // If not set, no health check will be run. If set to :<port>, addr will default to 0.0.0.0
  56. // The health endpoint will return 200 OK if this node has at least one tailnet IP address,
  57. // otherwise returns 503.
  58. // NB: the health criteria might change in the future.
  59. // - TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR: if specified, a path to a
  60. // directory that containers tailscaled config in file. The config file needs to be
  61. // named cap-<current-tailscaled-cap>.hujson. If this is set, TS_HOSTNAME,
  62. // TS_EXTRA_ARGS, TS_AUTHKEY,
  63. // TS_ROUTES, TS_ACCEPT_DNS env vars must not be set. If this is set,
  64. // containerboot only runs `tailscaled --config <path-to-this-configfile>`
  65. // and not `tailscale up` or `tailscale set`.
  66. // The config file contents are currently read once on container start.
  67. // NB: This env var is currently experimental and the logic will likely change!
  68. // TS_EXPERIMENTAL_ENABLE_FORWARDING_OPTIMIZATIONS: set to true to
  69. // autoconfigure the default network interface for optimal performance for
  70. // Tailscale subnet router/exit node.
  71. // https://tailscale.com/kb/1320/performance-best-practices#linux-optimizations-for-subnet-routers-and-exit-nodes
  72. // NB: This env var is currently experimental and the logic will likely change!
  73. // - EXPERIMENTAL_ALLOW_PROXYING_CLUSTER_TRAFFIC_VIA_INGRESS: if set to true
  74. // and if this containerboot instance is an L7 ingress proxy (created by
  75. // the Kubernetes operator), set up rules to allow proxying cluster traffic,
  76. // received on the Pod IP of this node, to the ingress target in the cluster.
  77. // This, in conjunction with MagicDNS name resolution in cluster, can be
  78. // useful for cases where a cluster workload needs to access a target in
  79. // cluster using the same hostname (in this case, the MagicDNS name of the ingress proxy)
  80. // as a non-cluster workload on tailnet.
  81. // This is only meant to be configured by the Kubernetes operator.
  82. //
  83. // When running on Kubernetes, containerboot defaults to storing state in the
  84. // "tailscale" kube secret. To store state on local disk instead, set
  85. // TS_KUBE_SECRET="" and TS_STATE_DIR=/path/to/storage/dir. The state dir should
  86. // be persistent storage.
  87. //
  88. // Additionally, if TS_AUTHKEY is not set and the TS_KUBE_SECRET contains an
  89. // "authkey" field, that key is used as the tailscale authkey.
  90. package main
  91. import (
  92. "context"
  93. "errors"
  94. "fmt"
  95. "io/fs"
  96. "log"
  97. "math"
  98. "net"
  99. "net/netip"
  100. "os"
  101. "os/signal"
  102. "path/filepath"
  103. "slices"
  104. "strings"
  105. "sync"
  106. "sync/atomic"
  107. "syscall"
  108. "time"
  109. "golang.org/x/sys/unix"
  110. "tailscale.com/client/tailscale"
  111. "tailscale.com/ipn"
  112. kubeutils "tailscale.com/k8s-operator"
  113. "tailscale.com/tailcfg"
  114. "tailscale.com/types/logger"
  115. "tailscale.com/types/ptr"
  116. "tailscale.com/util/deephash"
  117. "tailscale.com/util/linuxfw"
  118. )
  119. func newNetfilterRunner(logf logger.Logf) (linuxfw.NetfilterRunner, error) {
  120. if defaultBool("TS_TEST_FAKE_NETFILTER", false) {
  121. return linuxfw.NewFakeIPTablesRunner(), nil
  122. }
  123. return linuxfw.New(logf, "")
  124. }
  125. func main() {
  126. log.SetPrefix("boot: ")
  127. tailscale.I_Acknowledge_This_API_Is_Unstable = true
  128. cfg, err := configFromEnv()
  129. if err != nil {
  130. log.Fatalf("invalid configuration: %v", err)
  131. }
  132. if !cfg.UserspaceMode {
  133. if err := ensureTunFile(cfg.Root); err != nil {
  134. log.Fatalf("Unable to create tuntap device file: %v", err)
  135. }
  136. if cfg.ProxyTargetIP != "" || cfg.ProxyTargetDNSName != "" || cfg.Routes != nil || cfg.TailnetTargetIP != "" || cfg.TailnetTargetFQDN != "" {
  137. if err := ensureIPForwarding(cfg.Root, cfg.ProxyTargetIP, cfg.TailnetTargetIP, cfg.TailnetTargetFQDN, cfg.Routes); err != nil {
  138. log.Printf("Failed to enable IP forwarding: %v", err)
  139. log.Printf("To run tailscale as a proxy or router container, IP forwarding must be enabled.")
  140. if cfg.InKubernetes {
  141. log.Fatalf("You can either set the sysctls as a privileged initContainer, or run the tailscale container with privileged=true.")
  142. } else {
  143. log.Fatalf("You can fix this by running the container with privileged=true, or the equivalent in your container runtime that permits access to sysctls.")
  144. }
  145. }
  146. }
  147. }
  148. // Context is used for all setup stuff until we're in steady
  149. // state, so that if something is hanging we eventually time out
  150. // and crashloop the container.
  151. bootCtx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
  152. defer cancel()
  153. if cfg.InKubernetes {
  154. initKubeClient(cfg.Root)
  155. if err := cfg.setupKube(bootCtx); err != nil {
  156. log.Fatalf("error setting up for running on Kubernetes: %v", err)
  157. }
  158. }
  159. client, daemonProcess, err := startTailscaled(bootCtx, cfg)
  160. if err != nil {
  161. log.Fatalf("failed to bring up tailscale: %v", err)
  162. }
  163. killTailscaled := func() {
  164. if err := daemonProcess.Signal(unix.SIGTERM); err != nil {
  165. log.Fatalf("error shutting tailscaled down: %v", err)
  166. }
  167. }
  168. defer killTailscaled()
  169. if cfg.EnableForwardingOptimizations {
  170. if err := client.SetUDPGROForwarding(bootCtx); err != nil {
  171. log.Printf("[unexpected] error enabling UDP GRO forwarding: %v", err)
  172. }
  173. }
  174. w, err := client.WatchIPNBus(bootCtx, ipn.NotifyInitialNetMap|ipn.NotifyInitialPrefs|ipn.NotifyInitialState)
  175. if err != nil {
  176. log.Fatalf("failed to watch tailscaled for updates: %v", err)
  177. }
  178. // Now that we've started tailscaled, we can symlink the socket to the
  179. // default location if needed.
  180. const defaultTailscaledSocketPath = "/var/run/tailscale/tailscaled.sock"
  181. if cfg.Socket != "" && cfg.Socket != defaultTailscaledSocketPath {
  182. // If we were given a socket path, symlink it to the default location so
  183. // that the CLI can find it without any extra flags.
  184. // See #6849.
  185. dir := filepath.Dir(defaultTailscaledSocketPath)
  186. err := os.MkdirAll(dir, 0700)
  187. if err == nil {
  188. err = syscall.Symlink(cfg.Socket, defaultTailscaledSocketPath)
  189. }
  190. if err != nil {
  191. log.Printf("[warning] failed to symlink socket: %v\n\tTo interact with the Tailscale CLI please use `tailscale --socket=%q`", err, cfg.Socket)
  192. }
  193. }
  194. // Because we're still shelling out to `tailscale up` to get access to its
  195. // flag parser, we have to stop watching the IPN bus so that we can block on
  196. // the subcommand without stalling anything. Then once it's done, we resume
  197. // watching the bus.
  198. //
  199. // Depending on the requested mode of operation, this auth step happens at
  200. // different points in containerboot's lifecycle, hence the helper function.
  201. didLogin := false
  202. authTailscale := func() error {
  203. if didLogin {
  204. return nil
  205. }
  206. didLogin = true
  207. w.Close()
  208. if err := tailscaleUp(bootCtx, cfg); err != nil {
  209. return fmt.Errorf("failed to auth tailscale: %v", err)
  210. }
  211. w, err = client.WatchIPNBus(bootCtx, ipn.NotifyInitialNetMap|ipn.NotifyInitialState)
  212. if err != nil {
  213. return fmt.Errorf("rewatching tailscaled for updates after auth: %v", err)
  214. }
  215. return nil
  216. }
  217. if isTwoStepConfigAlwaysAuth(cfg) {
  218. if err := authTailscale(); err != nil {
  219. log.Fatalf("failed to auth tailscale: %v", err)
  220. }
  221. }
  222. authLoop:
  223. for {
  224. n, err := w.Next()
  225. if err != nil {
  226. log.Fatalf("failed to read from tailscaled: %v", err)
  227. }
  228. if n.State != nil {
  229. switch *n.State {
  230. case ipn.NeedsLogin:
  231. if isOneStepConfig(cfg) {
  232. // This could happen if this is the first time tailscaled was run for this
  233. // device and the auth key was not passed via the configfile.
  234. log.Fatalf("invalid state: tailscaled daemon started with a config file, but tailscale is not logged in: ensure you pass a valid auth key in the config file.")
  235. }
  236. if err := authTailscale(); err != nil {
  237. log.Fatalf("failed to auth tailscale: %v", err)
  238. }
  239. case ipn.NeedsMachineAuth:
  240. log.Printf("machine authorization required, please visit the admin panel")
  241. case ipn.Running:
  242. // Technically, all we want is to keep monitoring the bus for
  243. // netmap updates. However, in order to make the container crash
  244. // if tailscale doesn't initially come up, the watch has a
  245. // startup deadline on it. So, we have to break out of this
  246. // watch loop, cancel the watch, and watch again with no
  247. // deadline to continue monitoring for changes.
  248. break authLoop
  249. default:
  250. log.Printf("tailscaled in state %q, waiting", *n.State)
  251. }
  252. }
  253. }
  254. w.Close()
  255. ctx, cancel := contextWithExitSignalWatch()
  256. defer cancel()
  257. if isTwoStepConfigAuthOnce(cfg) {
  258. // Now that we are authenticated, we can set/reset any of the
  259. // settings that we need to.
  260. if err := tailscaleSet(ctx, cfg); err != nil {
  261. log.Fatalf("failed to auth tailscale: %v", err)
  262. }
  263. }
  264. if cfg.ServeConfigPath != "" {
  265. // Remove any serve config that may have been set by a previous run of
  266. // containerboot, but only if we're providing a new one.
  267. if err := client.SetServeConfig(ctx, new(ipn.ServeConfig)); err != nil {
  268. log.Fatalf("failed to unset serve config: %v", err)
  269. }
  270. }
  271. if hasKubeStateStore(cfg) && isTwoStepConfigAuthOnce(cfg) {
  272. // We were told to only auth once, so any secret-bound
  273. // authkey is no longer needed. We don't strictly need to
  274. // wipe it, but it's good hygiene.
  275. log.Printf("Deleting authkey from kube secret")
  276. if err := deleteAuthKey(ctx, cfg.KubeSecret); err != nil {
  277. log.Fatalf("deleting authkey from kube secret: %v", err)
  278. }
  279. }
  280. w, err = client.WatchIPNBus(ctx, ipn.NotifyInitialNetMap|ipn.NotifyInitialState)
  281. if err != nil {
  282. log.Fatalf("rewatching tailscaled for updates after auth: %v", err)
  283. }
  284. var (
  285. startupTasksDone = false
  286. currentIPs deephash.Sum // tailscale IPs assigned to device
  287. currentDeviceID deephash.Sum // device ID
  288. currentDeviceEndpoints deephash.Sum // device FQDN and IPs
  289. currentEgressIPs deephash.Sum
  290. addrs []netip.Prefix
  291. backendAddrs []net.IP
  292. certDomain = new(atomic.Pointer[string])
  293. certDomainChanged = make(chan bool, 1)
  294. h = &healthz{} // http server for the healthz endpoint
  295. healthzRunner = sync.OnceFunc(func() { runHealthz(cfg.HealthCheckAddrPort, h) })
  296. )
  297. if cfg.ServeConfigPath != "" {
  298. go watchServeConfigChanges(ctx, cfg.ServeConfigPath, certDomainChanged, certDomain, client)
  299. }
  300. var nfr linuxfw.NetfilterRunner
  301. if isL3Proxy(cfg) {
  302. nfr, err = newNetfilterRunner(log.Printf)
  303. if err != nil {
  304. log.Fatalf("error creating new netfilter runner: %v", err)
  305. }
  306. }
  307. // Setup for proxies that are configured to proxy to a target specified
  308. // by a DNS name (TS_EXPERIMENTAL_DEST_DNS_NAME).
  309. const defaultCheckPeriod = time.Minute * 10 // how often to check what IPs the DNS name resolves to
  310. var (
  311. tc = make(chan string, 1)
  312. failedResolveAttempts int
  313. t *time.Timer = time.AfterFunc(defaultCheckPeriod, func() {
  314. if cfg.ProxyTargetDNSName != "" {
  315. tc <- "recheck"
  316. }
  317. })
  318. )
  319. // egressSvcsErrorChan will get an error sent to it if this containerboot instance is configured to expose 1+
  320. // egress services in HA mode and errored.
  321. var egressSvcsErrorChan = make(chan error)
  322. defer t.Stop()
  323. // resetTimer resets timer for when to next attempt to resolve the DNS
  324. // name for the proxy configured with TS_EXPERIMENTAL_DEST_DNS_NAME. The
  325. // timer gets reset to 10 minutes from now unless the last resolution
  326. // attempt failed. If one or more consecutive previous resolution
  327. // attempts failed, the next resolution attempt will happen after the smallest
  328. // of (10 minutes, 2 ^ number-of-consecutive-failed-resolution-attempts
  329. // seconds) i.e 2s, 4s, 8s ... 10 minutes.
  330. resetTimer := func(lastResolveFailed bool) {
  331. if !lastResolveFailed {
  332. log.Printf("reconfigureTimer: next DNS resolution attempt in %s", defaultCheckPeriod)
  333. t.Reset(defaultCheckPeriod)
  334. failedResolveAttempts = 0
  335. return
  336. }
  337. minDelay := 2 // 2 seconds
  338. nextTick := time.Second * time.Duration(math.Pow(float64(minDelay), float64(failedResolveAttempts)))
  339. if nextTick > defaultCheckPeriod {
  340. nextTick = defaultCheckPeriod // cap at 10 minutes
  341. }
  342. log.Printf("reconfigureTimer: last DNS resolution attempt failed, next DNS resolution attempt in %v", nextTick)
  343. t.Reset(nextTick)
  344. failedResolveAttempts++
  345. }
  346. var egressSvcsNotify chan ipn.Notify
  347. notifyChan := make(chan ipn.Notify)
  348. errChan := make(chan error)
  349. go func() {
  350. for {
  351. n, err := w.Next()
  352. if err != nil {
  353. errChan <- err
  354. break
  355. } else {
  356. notifyChan <- n
  357. }
  358. }
  359. }()
  360. var wg sync.WaitGroup
  361. runLoop:
  362. for {
  363. select {
  364. case <-ctx.Done():
  365. // Although killTailscaled() is deferred earlier, if we
  366. // have started the reaper defined below, we need to
  367. // kill tailscaled and let reaper clean up child
  368. // processes.
  369. killTailscaled()
  370. break runLoop
  371. case err := <-errChan:
  372. log.Fatalf("failed to read from tailscaled: %v", err)
  373. case n := <-notifyChan:
  374. if n.State != nil && *n.State != ipn.Running {
  375. // Something's gone wrong and we've left the authenticated state.
  376. // Our container image never recovered gracefully from this, and the
  377. // control flow required to make it work now is hard. So, just crash
  378. // the container and rely on the container runtime to restart us,
  379. // whereupon we'll go through initial auth again.
  380. log.Fatalf("tailscaled left running state (now in state %q), exiting", *n.State)
  381. }
  382. if n.NetMap != nil {
  383. addrs = n.NetMap.SelfNode.Addresses().AsSlice()
  384. newCurrentIPs := deephash.Hash(&addrs)
  385. ipsHaveChanged := newCurrentIPs != currentIPs
  386. // Store device ID in a Kubernetes Secret before
  387. // setting up any routing rules. This ensures
  388. // that, for containerboot instances that are
  389. // Kubernetes operator proxies, the operator is
  390. // able to retrieve the device ID from the
  391. // Kubernetes Secret to clean up tailnet nodes
  392. // for proxies whose route setup continuously
  393. // fails.
  394. deviceID := n.NetMap.SelfNode.StableID()
  395. if hasKubeStateStore(cfg) && deephash.Update(&currentDeviceID, &deviceID) {
  396. if err := storeDeviceID(ctx, cfg.KubeSecret, n.NetMap.SelfNode.StableID()); err != nil {
  397. log.Fatalf("storing device ID in Kubernetes Secret: %v", err)
  398. }
  399. }
  400. if cfg.TailnetTargetFQDN != "" {
  401. var (
  402. egressAddrs []netip.Prefix
  403. newCurentEgressIPs deephash.Sum
  404. egressIPsHaveChanged bool
  405. node tailcfg.NodeView
  406. nodeFound bool
  407. )
  408. for _, n := range n.NetMap.Peers {
  409. if strings.EqualFold(n.Name(), cfg.TailnetTargetFQDN) {
  410. node = n
  411. nodeFound = true
  412. break
  413. }
  414. }
  415. if !nodeFound {
  416. log.Printf("Tailscale node %q not found; it either does not exist, or not reachable because of ACLs", cfg.TailnetTargetFQDN)
  417. break
  418. }
  419. egressAddrs = node.Addresses().AsSlice()
  420. newCurentEgressIPs = deephash.Hash(&egressAddrs)
  421. egressIPsHaveChanged = newCurentEgressIPs != currentEgressIPs
  422. // The firewall rules get (re-)installed:
  423. // - on startup
  424. // - when the tailnet IPs of the tailnet target have changed
  425. // - when the tailnet IPs of this node have changed
  426. if (egressIPsHaveChanged || ipsHaveChanged) && len(egressAddrs) != 0 {
  427. var rulesInstalled bool
  428. for _, egressAddr := range egressAddrs {
  429. ea := egressAddr.Addr()
  430. if ea.Is4() || (ea.Is6() && nfr.HasIPV6NAT()) {
  431. rulesInstalled = true
  432. log.Printf("Installing forwarding rules for destination %v", ea.String())
  433. if err := installEgressForwardingRule(ctx, ea.String(), addrs, nfr); err != nil {
  434. log.Fatalf("installing egress proxy rules for destination %s: %v", ea.String(), err)
  435. }
  436. }
  437. }
  438. if !rulesInstalled {
  439. log.Fatalf("no forwarding rules for egress addresses %v, host supports IPv6: %v", egressAddrs, nfr.HasIPV6NAT())
  440. }
  441. }
  442. currentEgressIPs = newCurentEgressIPs
  443. }
  444. if cfg.ProxyTargetIP != "" && len(addrs) != 0 && ipsHaveChanged {
  445. log.Printf("Installing proxy rules")
  446. if err := installIngressForwardingRule(ctx, cfg.ProxyTargetIP, addrs, nfr); err != nil {
  447. log.Fatalf("installing ingress proxy rules: %v", err)
  448. }
  449. }
  450. if cfg.ProxyTargetDNSName != "" && len(addrs) != 0 && ipsHaveChanged {
  451. newBackendAddrs, err := resolveDNS(ctx, cfg.ProxyTargetDNSName)
  452. if err != nil {
  453. log.Printf("[unexpected] error resolving DNS name %s: %v", cfg.ProxyTargetDNSName, err)
  454. resetTimer(true)
  455. continue
  456. }
  457. backendsHaveChanged := !(slices.EqualFunc(backendAddrs, newBackendAddrs, func(ip1 net.IP, ip2 net.IP) bool {
  458. return slices.ContainsFunc(newBackendAddrs, func(ip net.IP) bool { return ip.Equal(ip1) })
  459. }))
  460. if backendsHaveChanged {
  461. log.Printf("installing ingress proxy rules for backends %v", newBackendAddrs)
  462. if err := installIngressForwardingRuleForDNSTarget(ctx, newBackendAddrs, addrs, nfr); err != nil {
  463. log.Fatalf("error installing ingress proxy rules: %v", err)
  464. }
  465. }
  466. resetTimer(false)
  467. backendAddrs = newBackendAddrs
  468. }
  469. if cfg.ServeConfigPath != "" && len(n.NetMap.DNS.CertDomains) != 0 {
  470. cd := n.NetMap.DNS.CertDomains[0]
  471. prev := certDomain.Swap(ptr.To(cd))
  472. if prev == nil || *prev != cd {
  473. select {
  474. case certDomainChanged <- true:
  475. default:
  476. }
  477. }
  478. }
  479. if cfg.TailnetTargetIP != "" && ipsHaveChanged && len(addrs) != 0 {
  480. log.Printf("Installing forwarding rules for destination %v", cfg.TailnetTargetIP)
  481. if err := installEgressForwardingRule(ctx, cfg.TailnetTargetIP, addrs, nfr); err != nil {
  482. log.Fatalf("installing egress proxy rules: %v", err)
  483. }
  484. }
  485. // If this is a L7 cluster ingress proxy (set up
  486. // by Kubernetes operator) and proxying of
  487. // cluster traffic to the ingress target is
  488. // enabled, set up proxy rule each time the
  489. // tailnet IPs of this node change (including
  490. // the first time they become available).
  491. if cfg.AllowProxyingClusterTrafficViaIngress && cfg.ServeConfigPath != "" && ipsHaveChanged && len(addrs) != 0 {
  492. log.Printf("installing rules to forward traffic for %s to node's tailnet IP", cfg.PodIP)
  493. if err := installTSForwardingRuleForDestination(ctx, cfg.PodIP, addrs, nfr); err != nil {
  494. log.Fatalf("installing rules to forward traffic to node's tailnet IP: %v", err)
  495. }
  496. }
  497. currentIPs = newCurrentIPs
  498. // Only store device FQDN and IP addresses to
  499. // Kubernetes Secret when any required proxy
  500. // route setup has succeeded. IPs and FQDN are
  501. // read from the Secret by the Tailscale
  502. // Kubernetes operator and, for some proxy
  503. // types, such as Tailscale Ingress, advertized
  504. // on the Ingress status. Writing them to the
  505. // Secret only after the proxy routing has been
  506. // set up ensures that the operator does not
  507. // advertize endpoints of broken proxies.
  508. // TODO (irbekrm): instead of using the IP and FQDN, have some other mechanism for the proxy signal that it is 'Ready'.
  509. deviceEndpoints := []any{n.NetMap.SelfNode.Name(), n.NetMap.SelfNode.Addresses()}
  510. if hasKubeStateStore(cfg) && deephash.Update(&currentDeviceEndpoints, &deviceEndpoints) {
  511. if err := storeDeviceEndpoints(ctx, cfg.KubeSecret, n.NetMap.SelfNode.Name(), n.NetMap.SelfNode.Addresses().AsSlice()); err != nil {
  512. log.Fatalf("storing device IPs and FQDN in Kubernetes Secret: %v", err)
  513. }
  514. }
  515. if cfg.HealthCheckAddrPort != "" {
  516. h.Lock()
  517. h.hasAddrs = len(addrs) != 0
  518. h.Unlock()
  519. healthzRunner()
  520. }
  521. if egressSvcsNotify != nil {
  522. egressSvcsNotify <- n
  523. }
  524. }
  525. if !startupTasksDone {
  526. // For containerboot instances that act as TCP proxies (proxying traffic to an endpoint
  527. // passed via one of the env vars that containerboot reads) and store state in a
  528. // Kubernetes Secret, we consider startup tasks done at the point when device info has
  529. // been successfully stored to state Secret. For all other containerboot instances, if
  530. // we just get to this point the startup tasks can be considered done.
  531. if !isL3Proxy(cfg) || !hasKubeStateStore(cfg) || (currentDeviceEndpoints != deephash.Sum{} && currentDeviceID != deephash.Sum{}) {
  532. // This log message is used in tests to detect when all
  533. // post-auth configuration is done.
  534. log.Println("Startup complete, waiting for shutdown signal")
  535. startupTasksDone = true
  536. // Configure egress proxy. Egress proxy will set up firewall rules to proxy
  537. // traffic to tailnet targets configured in the provided configuration file. It
  538. // will then continuously monitor the config file and netmap updates and
  539. // reconfigure the firewall rules as needed. If any of its operations fail, it
  540. // will crash this node.
  541. if cfg.EgressSvcsCfgPath != "" {
  542. log.Printf("configuring egress proxy using configuration file at %s", cfg.EgressSvcsCfgPath)
  543. egressSvcsNotify = make(chan ipn.Notify)
  544. ep := egressProxy{
  545. cfgPath: cfg.EgressSvcsCfgPath,
  546. nfr: nfr,
  547. kc: kc,
  548. stateSecret: cfg.KubeSecret,
  549. netmapChan: egressSvcsNotify,
  550. podIPv4: cfg.PodIPv4,
  551. tailnetAddrs: addrs,
  552. }
  553. go func() {
  554. if err := ep.run(ctx, n); err != nil {
  555. egressSvcsErrorChan <- err
  556. }
  557. }()
  558. }
  559. // Wait on tailscaled process. It won't be cleaned up by default when the
  560. // container exits as it is not PID1. TODO (irbekrm): perhaps we can replace the
  561. // reaper by a running cmd.Wait in a goroutine immediately after starting
  562. // tailscaled?
  563. reaper := func() {
  564. defer wg.Done()
  565. for {
  566. var status unix.WaitStatus
  567. _, err := unix.Wait4(daemonProcess.Pid, &status, 0, nil)
  568. if errors.Is(err, unix.EINTR) {
  569. continue
  570. }
  571. if err != nil {
  572. log.Fatalf("Waiting for tailscaled to exit: %v", err)
  573. }
  574. log.Print("tailscaled exited")
  575. os.Exit(0)
  576. }
  577. }
  578. wg.Add(1)
  579. go reaper()
  580. }
  581. }
  582. case <-tc:
  583. newBackendAddrs, err := resolveDNS(ctx, cfg.ProxyTargetDNSName)
  584. if err != nil {
  585. log.Printf("[unexpected] error resolving DNS name %s: %v", cfg.ProxyTargetDNSName, err)
  586. resetTimer(true)
  587. continue
  588. }
  589. backendsHaveChanged := !(slices.EqualFunc(backendAddrs, newBackendAddrs, func(ip1 net.IP, ip2 net.IP) bool {
  590. return slices.ContainsFunc(newBackendAddrs, func(ip net.IP) bool { return ip.Equal(ip1) })
  591. }))
  592. if backendsHaveChanged && len(addrs) != 0 {
  593. log.Printf("Backend address change detected, installing proxy rules for backends %v", newBackendAddrs)
  594. if err := installIngressForwardingRuleForDNSTarget(ctx, newBackendAddrs, addrs, nfr); err != nil {
  595. log.Fatalf("installing ingress proxy rules for DNS target %s: %v", cfg.ProxyTargetDNSName, err)
  596. }
  597. }
  598. backendAddrs = newBackendAddrs
  599. resetTimer(false)
  600. case e := <-egressSvcsErrorChan:
  601. log.Fatalf("egress proxy failed: %v", e)
  602. }
  603. }
  604. wg.Wait()
  605. }
  606. // ensureTunFile checks that /dev/net/tun exists, creating it if
  607. // missing.
  608. func ensureTunFile(root string) error {
  609. // Verify that /dev/net/tun exists, in some container envs it
  610. // needs to be mknod-ed.
  611. if _, err := os.Stat(filepath.Join(root, "dev/net")); errors.Is(err, fs.ErrNotExist) {
  612. if err := os.MkdirAll(filepath.Join(root, "dev/net"), 0755); err != nil {
  613. return err
  614. }
  615. }
  616. if _, err := os.Stat(filepath.Join(root, "dev/net/tun")); errors.Is(err, fs.ErrNotExist) {
  617. dev := unix.Mkdev(10, 200) // tuntap major and minor
  618. if err := unix.Mknod(filepath.Join(root, "dev/net/tun"), 0600|unix.S_IFCHR, int(dev)); err != nil {
  619. return err
  620. }
  621. }
  622. return nil
  623. }
  624. func resolveDNS(ctx context.Context, name string) ([]net.IP, error) {
  625. // TODO (irbekrm): look at using recursive.Resolver instead to resolve
  626. // the DNS names as well as retrieve TTLs. It looks though that this
  627. // seems to return very short TTLs (shorter than on the actual records).
  628. ip4s, err := net.DefaultResolver.LookupIP(ctx, "ip4", name)
  629. if err != nil {
  630. if e, ok := err.(*net.DNSError); !(ok && e.IsNotFound) {
  631. return nil, fmt.Errorf("error looking up IPv4 addresses: %v", err)
  632. }
  633. }
  634. ip6s, err := net.DefaultResolver.LookupIP(ctx, "ip6", name)
  635. if err != nil {
  636. if e, ok := err.(*net.DNSError); !(ok && e.IsNotFound) {
  637. return nil, fmt.Errorf("error looking up IPv6 addresses: %v", err)
  638. }
  639. }
  640. if len(ip4s) == 0 && len(ip6s) == 0 {
  641. return nil, fmt.Errorf("no IPv4 or IPv6 addresses found for host: %s", name)
  642. }
  643. return append(ip4s, ip6s...), nil
  644. }
  645. // contextWithExitSignalWatch watches for SIGTERM/SIGINT signals. It returns a
  646. // context that gets cancelled when a signal is received and a cancel function
  647. // that can be called to free the resources when the watch should be stopped.
  648. func contextWithExitSignalWatch() (context.Context, func()) {
  649. closeChan := make(chan string)
  650. ctx, cancel := context.WithCancel(context.Background())
  651. signalChan := make(chan os.Signal, 1)
  652. signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM)
  653. go func() {
  654. select {
  655. case <-signalChan:
  656. cancel()
  657. case <-closeChan:
  658. return
  659. }
  660. }()
  661. f := func() {
  662. closeChan <- "goodbye"
  663. }
  664. return ctx, f
  665. }
  666. // tailscaledConfigFilePath returns the path to the tailscaled config file that
  667. // should be used for the current capability version. It is determined by the
  668. // TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR environment variable and looks for a
  669. // file named cap-<capability_version>.hujson in the directory. It searches for
  670. // the highest capability version that is less than or equal to the current
  671. // capability version.
  672. func tailscaledConfigFilePath() string {
  673. dir := os.Getenv("TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR")
  674. if dir == "" {
  675. return ""
  676. }
  677. fe, err := os.ReadDir(dir)
  678. if err != nil {
  679. log.Fatalf("error reading tailscaled config directory %q: %v", dir, err)
  680. }
  681. maxCompatVer := tailcfg.CapabilityVersion(-1)
  682. for _, e := range fe {
  683. // We don't check if type if file as in most cases this will
  684. // come from a mounted kube Secret, where the directory contents
  685. // will be various symlinks.
  686. if e.Type().IsDir() {
  687. continue
  688. }
  689. cv, err := kubeutils.CapVerFromFileName(e.Name())
  690. if err != nil {
  691. continue
  692. }
  693. if cv > maxCompatVer && cv <= tailcfg.CurrentCapabilityVersion {
  694. maxCompatVer = cv
  695. }
  696. }
  697. if maxCompatVer == -1 {
  698. log.Fatalf("no tailscaled config file found in %q for current capability version %d", dir, tailcfg.CurrentCapabilityVersion)
  699. }
  700. filePath := filepath.Join(dir, kubeutils.TailscaledConfigFileName(maxCompatVer))
  701. log.Printf("Using tailscaled config file %q to match current capability version %d", filePath, tailcfg.CurrentCapabilityVersion)
  702. return filePath
  703. }