userspace.go 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584
  1. // Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package wgengine
  5. import (
  6. "bufio"
  7. "bytes"
  8. crand "crypto/rand"
  9. "errors"
  10. "fmt"
  11. "io"
  12. "reflect"
  13. "runtime"
  14. "strings"
  15. "sync"
  16. "sync/atomic"
  17. "time"
  18. "go4.org/mem"
  19. "golang.zx2c4.com/wireguard/device"
  20. "golang.zx2c4.com/wireguard/tun"
  21. "inet.af/netaddr"
  22. "tailscale.com/control/controlclient"
  23. "tailscale.com/envknob"
  24. "tailscale.com/health"
  25. "tailscale.com/ipn/ipnstate"
  26. "tailscale.com/net/dns"
  27. "tailscale.com/net/dns/resolver"
  28. "tailscale.com/net/flowtrack"
  29. "tailscale.com/net/interfaces"
  30. "tailscale.com/net/packet"
  31. "tailscale.com/net/tsaddr"
  32. "tailscale.com/net/tsdial"
  33. "tailscale.com/net/tshttpproxy"
  34. "tailscale.com/net/tstun"
  35. "tailscale.com/tailcfg"
  36. "tailscale.com/tstime/mono"
  37. "tailscale.com/types/dnstype"
  38. "tailscale.com/types/ipproto"
  39. "tailscale.com/types/key"
  40. "tailscale.com/types/logger"
  41. "tailscale.com/types/netmap"
  42. "tailscale.com/util/clientmetric"
  43. "tailscale.com/util/deephash"
  44. "tailscale.com/version"
  45. "tailscale.com/wgengine/filter"
  46. "tailscale.com/wgengine/magicsock"
  47. "tailscale.com/wgengine/monitor"
  48. "tailscale.com/wgengine/router"
  49. "tailscale.com/wgengine/wgcfg"
  50. "tailscale.com/wgengine/wglog"
  51. )
  52. const magicDNSPort = 53
  53. var (
  54. magicDNSIP = tsaddr.TailscaleServiceIP()
  55. magicDNSIPv6 = tsaddr.TailscaleServiceIPv6()
  56. )
  57. // Lazy wireguard-go configuration parameters.
  58. const (
  59. // lazyPeerIdleThreshold is the idle duration after
  60. // which we remove a peer from the wireguard configuration.
  61. // (This includes peers that have never been idle, which
  62. // effectively have infinite idleness)
  63. lazyPeerIdleThreshold = 5 * time.Minute
  64. // packetSendTimeUpdateFrequency controls how often we record
  65. // the time that we wrote a packet to an IP address.
  66. packetSendTimeUpdateFrequency = 10 * time.Second
  67. // packetSendRecheckWireguardThreshold controls how long we can go
  68. // between packet sends to an IP before checking to see
  69. // whether this IP address needs to be added back to the
  70. // Wireguard peer oconfig.
  71. packetSendRecheckWireguardThreshold = 1 * time.Minute
  72. )
  73. // statusPollInterval is how often we ask wireguard-go for its engine
  74. // status (as long as there's activity). See docs on its use below.
  75. const statusPollInterval = 1 * time.Minute
  76. type userspaceEngine struct {
  77. logf logger.Logf
  78. wgLogger *wglog.Logger //a wireguard-go logging wrapper
  79. reqCh chan struct{}
  80. waitCh chan struct{} // chan is closed when first Close call completes; contrast with closing bool
  81. timeNow func() mono.Time
  82. tundev *tstun.Wrapper
  83. wgdev *device.Device
  84. router router.Router
  85. confListenPort uint16 // original conf.ListenPort
  86. dns *dns.Manager
  87. magicConn *magicsock.Conn
  88. linkMon *monitor.Mon
  89. linkMonOwned bool // whether we created linkMon (and thus need to close it)
  90. linkMonUnregister func() // unsubscribes from changes; used regardless of linkMonOwned
  91. birdClient BIRDClient // or nil
  92. testMaybeReconfigHook func() // for tests; if non-nil, fires if maybeReconfigWireguardLocked called
  93. // isLocalAddr reports the whether an IP is assigned to the local
  94. // tunnel interface. It's used to reflect local packets
  95. // incorrectly sent to us.
  96. isLocalAddr atomic.Value // of func(netaddr.IP)bool
  97. // isDNSIPOverTailscale reports the whether a DNS resolver's IP
  98. // is being routed over Tailscale.
  99. isDNSIPOverTailscale atomic.Value // of func(netaddr.IP)bool
  100. wgLock sync.Mutex // serializes all wgdev operations; see lock order comment below
  101. lastCfgFull wgcfg.Config
  102. lastNMinPeers int
  103. lastRouterSig deephash.Sum // of router.Config
  104. lastEngineSigFull deephash.Sum // of full wireguard config
  105. lastEngineSigTrim deephash.Sum // of trimmed wireguard config
  106. lastDNSConfig *dns.Config
  107. recvActivityAt map[key.NodePublic]mono.Time
  108. trimmedNodes map[key.NodePublic]bool // set of node keys of peers currently excluded from wireguard config
  109. sentActivityAt map[netaddr.IP]*mono.Time // value is accessed atomically
  110. destIPActivityFuncs map[netaddr.IP]func()
  111. statusBufioReader *bufio.Reader // reusable for UAPI
  112. lastStatusPollTime mono.Time // last time we polled the engine status
  113. lastIsSubnetRouter bool // was the node a primary subnet router in the last run.
  114. mu sync.Mutex // guards following; see lock order comment below
  115. netMap *netmap.NetworkMap // or nil
  116. closing bool // Close was called (even if we're still closing)
  117. statusCallback StatusCallback
  118. peerSequence []key.NodePublic
  119. endpoints []tailcfg.Endpoint
  120. pendOpen map[flowtrack.Tuple]*pendingOpenFlow // see pendopen.go
  121. networkMapCallbacks map[*someHandle]NetworkMapCallback
  122. tsIPByIPPort map[netaddr.IPPort]netaddr.IP // allows registration of IP:ports as belonging to a certain Tailscale IP for whois lookups
  123. pongCallback map[[8]byte]func(packet.TSMPPongReply) // for TSMP pong responses
  124. // Lock ordering: magicsock.Conn.mu, wgLock, then mu.
  125. }
  126. // InternalsGetter is implemented by Engines that can export their internals.
  127. type InternalsGetter interface {
  128. GetInternals() (_ *tstun.Wrapper, _ *magicsock.Conn, ok bool)
  129. }
  130. func (e *userspaceEngine) GetInternals() (_ *tstun.Wrapper, _ *magicsock.Conn, ok bool) {
  131. return e.tundev, e.magicConn, true
  132. }
  133. // ResolvingEngine is implemented by Engines that have DNS resolvers.
  134. type ResolvingEngine interface {
  135. GetResolver() (_ *resolver.Resolver, ok bool)
  136. }
  137. var (
  138. _ ResolvingEngine = (*userspaceEngine)(nil)
  139. _ ResolvingEngine = (*watchdogEngine)(nil)
  140. )
  141. func (e *userspaceEngine) GetResolver() (r *resolver.Resolver, ok bool) {
  142. return e.dns.Resolver(), true
  143. }
  144. // BIRDClient handles communication with the BIRD Internet Routing Daemon.
  145. type BIRDClient interface {
  146. EnableProtocol(proto string) error
  147. DisableProtocol(proto string) error
  148. Close() error
  149. }
  150. // Config is the engine configuration.
  151. type Config struct {
  152. // Tun is the device used by the Engine to exchange packets with
  153. // the OS.
  154. // If nil, a fake Device that does nothing is used.
  155. Tun tun.Device
  156. // IsTAP is whether Tun is actually a TAP (Layer 2) device that'll
  157. // require ethernet headers.
  158. IsTAP bool
  159. // Router interfaces the Engine to the OS network stack.
  160. // If nil, a fake Router that does nothing is used.
  161. Router router.Router
  162. // DNS interfaces the Engine to the OS DNS resolver configuration.
  163. // If nil, a fake OSConfigurator that does nothing is used.
  164. DNS dns.OSConfigurator
  165. // LinkMonitor optionally provides an existing link monitor to re-use.
  166. // If nil, a new link monitor is created.
  167. LinkMonitor *monitor.Mon
  168. // Dialer is the dialer to use for outbound connections.
  169. // If nil, a new Dialer is created
  170. Dialer *tsdial.Dialer
  171. // ListenPort is the port on which the engine will listen.
  172. // If zero, a port is automatically selected.
  173. ListenPort uint16
  174. // RespondToPing determines whether this engine should internally
  175. // reply to ICMP pings, without involving the OS.
  176. // Used in "fake" mode for development.
  177. RespondToPing bool
  178. // BIRDClient, if non-nil, will be used to configure BIRD whenever
  179. // this node is a primary subnet router.
  180. BIRDClient BIRDClient
  181. }
  182. func NewFakeUserspaceEngine(logf logger.Logf, listenPort uint16) (Engine, error) {
  183. logf("Starting userspace wireguard engine (with fake TUN device)")
  184. return NewUserspaceEngine(logf, Config{
  185. ListenPort: listenPort,
  186. RespondToPing: true,
  187. })
  188. }
  189. // NetstackRouterType is a gross cross-package init-time registration
  190. // from netstack to here, informing this package of netstack's router
  191. // type.
  192. var NetstackRouterType reflect.Type
  193. // IsNetstackRouter reports whether e is either fully netstack based
  194. // (without TUN) or is at least using netstack for routing.
  195. func IsNetstackRouter(e Engine) bool {
  196. switch e := e.(type) {
  197. case *userspaceEngine:
  198. if reflect.TypeOf(e.router) == NetstackRouterType {
  199. return true
  200. }
  201. case *watchdogEngine:
  202. return IsNetstackRouter(e.wrap)
  203. }
  204. return IsNetstack(e)
  205. }
  206. // IsNetstack reports whether e is a netstack-based TUN-free engine.
  207. func IsNetstack(e Engine) bool {
  208. ig, ok := e.(InternalsGetter)
  209. if !ok {
  210. return false
  211. }
  212. tw, _, ok := ig.GetInternals()
  213. if !ok {
  214. return false
  215. }
  216. name, err := tw.Name()
  217. return err == nil && name == "FakeTUN"
  218. }
  219. // NewUserspaceEngine creates the named tun device and returns a
  220. // Tailscale Engine running on it.
  221. func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error) {
  222. var closePool closeOnErrorPool
  223. defer closePool.closeAllIfError(&reterr)
  224. if conf.Tun == nil {
  225. logf("[v1] using fake (no-op) tun device")
  226. conf.Tun = tstun.NewFake()
  227. }
  228. if conf.Router == nil {
  229. logf("[v1] using fake (no-op) OS network configurator")
  230. conf.Router = router.NewFake(logf)
  231. }
  232. if conf.DNS == nil {
  233. logf("[v1] using fake (no-op) DNS configurator")
  234. d, err := dns.NewNoopManager()
  235. if err != nil {
  236. return nil, err
  237. }
  238. conf.DNS = d
  239. }
  240. if conf.Dialer == nil {
  241. conf.Dialer = new(tsdial.Dialer)
  242. }
  243. var tsTUNDev *tstun.Wrapper
  244. if conf.IsTAP {
  245. tsTUNDev = tstun.WrapTAP(logf, conf.Tun)
  246. } else {
  247. tsTUNDev = tstun.Wrap(logf, conf.Tun)
  248. }
  249. closePool.add(tsTUNDev)
  250. e := &userspaceEngine{
  251. timeNow: mono.Now,
  252. logf: logf,
  253. reqCh: make(chan struct{}, 1),
  254. waitCh: make(chan struct{}),
  255. tundev: tsTUNDev,
  256. router: conf.Router,
  257. confListenPort: conf.ListenPort,
  258. birdClient: conf.BIRDClient,
  259. }
  260. if e.birdClient != nil {
  261. // Disable the protocol at start time.
  262. if err := e.birdClient.DisableProtocol("tailscale"); err != nil {
  263. return nil, err
  264. }
  265. }
  266. e.isLocalAddr.Store(tsaddr.NewContainsIPFunc(nil))
  267. e.isDNSIPOverTailscale.Store(tsaddr.NewContainsIPFunc(nil))
  268. if conf.LinkMonitor != nil {
  269. e.linkMon = conf.LinkMonitor
  270. } else {
  271. mon, err := monitor.New(logf)
  272. if err != nil {
  273. return nil, err
  274. }
  275. closePool.add(mon)
  276. e.linkMon = mon
  277. e.linkMonOwned = true
  278. }
  279. tunName, _ := conf.Tun.Name()
  280. conf.Dialer.SetTUNName(tunName)
  281. conf.Dialer.SetLinkMonitor(e.linkMon)
  282. e.dns = dns.NewManager(logf, conf.DNS, e.linkMon, conf.Dialer, fwdDNSLinkSelector{e, tunName})
  283. logf("link state: %+v", e.linkMon.InterfaceState())
  284. unregisterMonWatch := e.linkMon.RegisterChangeCallback(func(changed bool, st *interfaces.State) {
  285. tshttpproxy.InvalidateCache()
  286. e.linkChange(changed, st)
  287. })
  288. closePool.addFunc(unregisterMonWatch)
  289. e.linkMonUnregister = unregisterMonWatch
  290. endpointsFn := func(endpoints []tailcfg.Endpoint) {
  291. e.mu.Lock()
  292. e.endpoints = append(e.endpoints[:0], endpoints...)
  293. e.mu.Unlock()
  294. e.RequestStatus()
  295. }
  296. magicsockOpts := magicsock.Options{
  297. Logf: logf,
  298. Port: conf.ListenPort,
  299. EndpointsFunc: endpointsFn,
  300. DERPActiveFunc: e.RequestStatus,
  301. IdleFunc: e.tundev.IdleDuration,
  302. NoteRecvActivity: e.noteRecvActivity,
  303. LinkMonitor: e.linkMon,
  304. }
  305. var err error
  306. e.magicConn, err = magicsock.NewConn(magicsockOpts)
  307. if err != nil {
  308. return nil, fmt.Errorf("wgengine: %v", err)
  309. }
  310. closePool.add(e.magicConn)
  311. e.magicConn.SetNetworkUp(e.linkMon.InterfaceState().AnyInterfaceUp())
  312. tsTUNDev.SetDiscoKey(e.magicConn.DiscoPublicKey())
  313. if conf.RespondToPing {
  314. e.tundev.PostFilterIn = echoRespondToAll
  315. }
  316. e.tundev.PreFilterOut = e.handleLocalPackets
  317. if envknob.BoolDefaultTrue("TS_DEBUG_CONNECT_FAILURES") {
  318. if e.tundev.PreFilterIn != nil {
  319. return nil, errors.New("unexpected PreFilterIn already set")
  320. }
  321. e.tundev.PreFilterIn = e.trackOpenPreFilterIn
  322. if e.tundev.PostFilterOut != nil {
  323. return nil, errors.New("unexpected PostFilterOut already set")
  324. }
  325. e.tundev.PostFilterOut = e.trackOpenPostFilterOut
  326. }
  327. e.wgLogger = wglog.NewLogger(logf)
  328. e.tundev.OnTSMPPongReceived = func(pong packet.TSMPPongReply) {
  329. e.mu.Lock()
  330. defer e.mu.Unlock()
  331. cb := e.pongCallback[pong.Data]
  332. e.logf("wgengine: got TSMP pong %02x, peerAPIPort=%v; cb=%v", pong.Data, pong.PeerAPIPort, cb != nil)
  333. if cb != nil {
  334. go cb(pong)
  335. }
  336. }
  337. // wgdev takes ownership of tundev, will close it when closed.
  338. e.logf("Creating wireguard device...")
  339. e.wgdev = wgcfg.NewDevice(e.tundev, e.magicConn.Bind(), e.wgLogger.DeviceLogger)
  340. closePool.addFunc(e.wgdev.Close)
  341. closePool.addFunc(func() {
  342. if err := e.magicConn.Close(); err != nil {
  343. e.logf("error closing magicconn: %v", err)
  344. }
  345. })
  346. go func() {
  347. up := false
  348. for event := range e.tundev.EventsUpDown() {
  349. if event&tun.EventUp != 0 && !up {
  350. e.logf("external route: up")
  351. e.RequestStatus()
  352. up = true
  353. }
  354. if event&tun.EventDown != 0 && up {
  355. e.logf("external route: down")
  356. e.RequestStatus()
  357. up = false
  358. }
  359. }
  360. }()
  361. e.logf("Bringing wireguard device up...")
  362. if err := e.wgdev.Up(); err != nil {
  363. return nil, fmt.Errorf("wgdev.Up: %w", err)
  364. }
  365. e.logf("Bringing router up...")
  366. if err := e.router.Up(); err != nil {
  367. return nil, fmt.Errorf("router.Up: %w", err)
  368. }
  369. // It's a little pointless to apply no-op settings here (they
  370. // should already be empty?), but it at least exercises the
  371. // router implementation early on.
  372. e.logf("Clearing router settings...")
  373. if err := e.router.Set(nil); err != nil {
  374. return nil, fmt.Errorf("router.Set(nil): %w", err)
  375. }
  376. e.logf("Starting link monitor...")
  377. e.linkMon.Start()
  378. go e.pollResolver()
  379. e.logf("Engine created.")
  380. return e, nil
  381. }
  382. // echoRespondToAll is an inbound post-filter responding to all echo requests.
  383. func echoRespondToAll(p *packet.Parsed, t *tstun.Wrapper) filter.Response {
  384. if p.IsEchoRequest() {
  385. header := p.ICMP4Header()
  386. header.ToResponse()
  387. outp := packet.Generate(&header, p.Payload())
  388. t.InjectOutbound(outp)
  389. // We already responded to it, but it's not an error.
  390. // Proceed with regular delivery. (Since this code is only
  391. // used in fake mode, regular delivery just means throwing
  392. // it away. If this ever gets run in non-fake mode, you'll
  393. // get double responses to pings, which is an indicator you
  394. // shouldn't be doing that I guess.)
  395. return filter.Accept
  396. }
  397. return filter.Accept
  398. }
  399. // handleLocalPackets inspects packets coming from the local network
  400. // stack, and intercepts any packets that should be handled by
  401. // tailscaled directly. Other packets are allowed to proceed into the
  402. // main ACL filter.
  403. func (e *userspaceEngine) handleLocalPackets(p *packet.Parsed, t *tstun.Wrapper) filter.Response {
  404. if verdict := e.handleDNS(p, t); verdict == filter.Drop {
  405. metricMagicDNSPacketIn.Add(1)
  406. // local DNS handled the packet.
  407. return filter.Drop
  408. }
  409. if runtime.GOOS == "darwin" || runtime.GOOS == "ios" {
  410. isLocalAddr, ok := e.isLocalAddr.Load().(func(netaddr.IP) bool)
  411. if !ok {
  412. e.logf("[unexpected] e.isLocalAddr was nil, can't check for loopback packet")
  413. } else if isLocalAddr(p.Dst.IP()) {
  414. // macOS NetworkExtension directs packets destined to the
  415. // tunnel's local IP address into the tunnel, instead of
  416. // looping back within the kernel network stack. We have to
  417. // notice that an outbound packet is actually destined for
  418. // ourselves, and loop it back into macOS.
  419. t.InjectInboundCopy(p.Buffer())
  420. metricReflectToOS.Add(1)
  421. return filter.Drop
  422. }
  423. }
  424. return filter.Accept
  425. }
  426. // handleDNS is an outbound pre-filter resolving Tailscale domains.
  427. func (e *userspaceEngine) handleDNS(p *packet.Parsed, t *tstun.Wrapper) filter.Response {
  428. if p.Dst.Port() == magicDNSPort && p.IPProto == ipproto.UDP {
  429. switch p.Dst.IP() {
  430. case magicDNSIP, magicDNSIPv6:
  431. err := e.dns.EnqueueRequest(append([]byte(nil), p.Payload()...), p.Src)
  432. if err != nil {
  433. e.logf("dns: enqueue: %v", err)
  434. }
  435. return filter.Drop
  436. }
  437. }
  438. return filter.Accept
  439. }
  440. // pollResolver reads responses from the DNS resolver and injects them inbound.
  441. func (e *userspaceEngine) pollResolver() {
  442. for {
  443. bs, to, err := e.dns.NextResponse()
  444. if err == resolver.ErrClosed {
  445. return
  446. }
  447. if err != nil {
  448. e.logf("dns: error: %v", err)
  449. continue
  450. }
  451. var buf []byte
  452. const offset = tstun.PacketStartOffset
  453. switch {
  454. case to.IP().Is4():
  455. h := packet.UDP4Header{
  456. IP4Header: packet.IP4Header{
  457. Src: magicDNSIP,
  458. Dst: to.IP(),
  459. },
  460. SrcPort: magicDNSPort,
  461. DstPort: to.Port(),
  462. }
  463. hlen := h.Len()
  464. // TODO(dmytro): avoid this allocation without importing tstun quirks into dns.
  465. buf = make([]byte, offset+hlen+len(bs))
  466. copy(buf[offset+hlen:], bs)
  467. h.Marshal(buf[offset:])
  468. case to.IP().Is6():
  469. h := packet.UDP6Header{
  470. IP6Header: packet.IP6Header{
  471. Src: magicDNSIPv6,
  472. Dst: to.IP(),
  473. },
  474. SrcPort: magicDNSPort,
  475. DstPort: to.Port(),
  476. }
  477. hlen := h.Len()
  478. // TODO(dmytro): avoid this allocation without importing tstun quirks into dns.
  479. buf = make([]byte, offset+hlen+len(bs))
  480. copy(buf[offset+hlen:], bs)
  481. h.Marshal(buf[offset:])
  482. }
  483. e.tundev.InjectInboundDirect(buf, offset)
  484. }
  485. }
  486. var debugTrimWireguard = envknob.OptBool("TS_DEBUG_TRIM_WIREGUARD")
  487. // forceFullWireguardConfig reports whether we should give wireguard
  488. // our full network map, even for inactive peers
  489. //
  490. // TODO(bradfitz): remove this after our 1.0 launch; we don't want to
  491. // enable wireguard config trimming quite yet because it just landed
  492. // and we haven't got enough time testing it.
  493. func forceFullWireguardConfig(numPeers int) bool {
  494. // Did the user explicitly enable trimmming via the environment variable knob?
  495. if b, ok := debugTrimWireguard.Get(); ok {
  496. return !b
  497. }
  498. if opt := controlclient.TrimWGConfig(); opt != "" {
  499. return !opt.EqualBool(true)
  500. }
  501. // On iOS with large networks, it's critical, so turn on trimming.
  502. // Otherwise we run out of memory from wireguard-go goroutine stacks+buffers.
  503. // This will be the default later for all platforms and network sizes.
  504. if numPeers > 50 && version.OS() == "iOS" {
  505. return false
  506. }
  507. return false
  508. }
  509. // isTrimmablePeer reports whether p is a peer that we can trim out of the
  510. // network map.
  511. //
  512. // For implementation simplificy, we can only trim peers that have
  513. // only non-subnet AllowedIPs (an IPv4 /32 or IPv6 /128), which is the
  514. // common case for most peers. Subnet router nodes will just always be
  515. // created in the wireguard-go config.
  516. func isTrimmablePeer(p *wgcfg.Peer, numPeers int) bool {
  517. if forceFullWireguardConfig(numPeers) {
  518. return false
  519. }
  520. // AllowedIPs must all be single IPs, not subnets.
  521. for _, aip := range p.AllowedIPs {
  522. if !aip.IsSingleIP() {
  523. return false
  524. }
  525. }
  526. return true
  527. }
  528. // noteRecvActivity is called by magicsock when a packet has been
  529. // received for the peer with node key nk. Magicsock calls this no
  530. // more than every 10 seconds for a given peer.
  531. func (e *userspaceEngine) noteRecvActivity(nk key.NodePublic) {
  532. e.wgLock.Lock()
  533. defer e.wgLock.Unlock()
  534. if _, ok := e.recvActivityAt[nk]; !ok {
  535. // Not a trimmable peer we care about tracking. (See isTrimmablePeer)
  536. if e.trimmedNodes[nk] {
  537. e.logf("wgengine: [unexpected] noteReceiveActivity called on idle node %v that's not in recvActivityAt", nk.ShortString())
  538. }
  539. return
  540. }
  541. now := e.timeNow()
  542. e.recvActivityAt[nk] = now
  543. // As long as there's activity, periodically poll the engine to get
  544. // stats for the far away side effect of
  545. // ipn/ipnlocal.LocalBackend.parseWgStatusLocked to log activity, for
  546. // use in various admin dashboards.
  547. // This particularly matters on platforms without a connected GUI, as
  548. // the GUIs generally poll this enough to cause that logging. But
  549. // tailscaled alone did not, hence this.
  550. if e.lastStatusPollTime.IsZero() || now.Sub(e.lastStatusPollTime) >= statusPollInterval {
  551. e.lastStatusPollTime = now
  552. go e.RequestStatus()
  553. }
  554. // If the last activity time jumped a bunch (say, at least
  555. // half the idle timeout) then see if we need to reprogram
  556. // Wireguard. This could probably be just
  557. // lazyPeerIdleThreshold without the divide by 2, but
  558. // maybeReconfigWireguardLocked is cheap enough to call every
  559. // couple minutes (just not on every packet).
  560. if e.trimmedNodes[nk] {
  561. e.logf("wgengine: idle peer %v now active, reconfiguring wireguard", nk.ShortString())
  562. e.maybeReconfigWireguardLocked(nil)
  563. }
  564. }
  565. // isActiveSinceLocked reports whether the peer identified by (nk, ip)
  566. // has had a packet sent to or received from it since t.
  567. //
  568. // e.wgLock must be held.
  569. func (e *userspaceEngine) isActiveSinceLocked(nk key.NodePublic, ip netaddr.IP, t mono.Time) bool {
  570. if e.recvActivityAt[nk].After(t) {
  571. return true
  572. }
  573. timePtr, ok := e.sentActivityAt[ip]
  574. if !ok {
  575. return false
  576. }
  577. return timePtr.LoadAtomic().After(t)
  578. }
  579. // discoChanged are the set of peers whose disco keys have changed, implying they've restarted.
  580. // If a peer is in this set and was previously in the live wireguard config,
  581. // it needs to be first removed and then re-added to flush out its wireguard session key.
  582. // If discoChanged is nil or empty, this extra removal step isn't done.
  583. //
  584. // e.wgLock must be held.
  585. func (e *userspaceEngine) maybeReconfigWireguardLocked(discoChanged map[key.NodePublic]bool) error {
  586. if hook := e.testMaybeReconfigHook; hook != nil {
  587. hook()
  588. return nil
  589. }
  590. full := e.lastCfgFull
  591. e.wgLogger.SetPeers(full.Peers)
  592. // Compute a minimal config to pass to wireguard-go
  593. // based on the full config. Prune off all the peers
  594. // and only add the active ones back.
  595. min := full
  596. min.Peers = make([]wgcfg.Peer, 0, e.lastNMinPeers)
  597. // We'll only keep a peer around if it's been active in
  598. // the past 5 minutes. That's more than WireGuard's key
  599. // rotation time anyway so it's no harm if we remove it
  600. // later if it's been inactive.
  601. activeCutoff := e.timeNow().Add(-lazyPeerIdleThreshold)
  602. // Not all peers can be trimmed from the network map (see
  603. // isTrimmablePeer). For those are are trimmable, keep track of
  604. // their NodeKey and Tailscale IPs. These are the ones we'll need
  605. // to install tracking hooks for to watch their send/receive
  606. // activity.
  607. trackNodes := make([]key.NodePublic, 0, len(full.Peers))
  608. trackIPs := make([]netaddr.IP, 0, len(full.Peers))
  609. trimmedNodes := map[key.NodePublic]bool{} // TODO: don't re-alloc this map each time
  610. needRemoveStep := false
  611. for i := range full.Peers {
  612. p := &full.Peers[i]
  613. nk := p.PublicKey
  614. if !isTrimmablePeer(p, len(full.Peers)) {
  615. min.Peers = append(min.Peers, *p)
  616. if discoChanged[nk] {
  617. needRemoveStep = true
  618. }
  619. continue
  620. }
  621. trackNodes = append(trackNodes, nk)
  622. recentlyActive := false
  623. for _, cidr := range p.AllowedIPs {
  624. trackIPs = append(trackIPs, cidr.IP())
  625. recentlyActive = recentlyActive || e.isActiveSinceLocked(nk, cidr.IP(), activeCutoff)
  626. }
  627. if recentlyActive {
  628. min.Peers = append(min.Peers, *p)
  629. if discoChanged[nk] {
  630. needRemoveStep = true
  631. }
  632. } else {
  633. trimmedNodes[nk] = true
  634. }
  635. }
  636. e.lastNMinPeers = len(min.Peers)
  637. if !deephash.Update(&e.lastEngineSigTrim, &min, trimmedNodes, trackNodes, trackIPs) {
  638. // No changes
  639. return nil
  640. }
  641. e.trimmedNodes = trimmedNodes
  642. e.updateActivityMapsLocked(trackNodes, trackIPs)
  643. if needRemoveStep {
  644. minner := min
  645. minner.Peers = nil
  646. numRemove := 0
  647. for _, p := range min.Peers {
  648. if discoChanged[p.PublicKey] {
  649. numRemove++
  650. continue
  651. }
  652. minner.Peers = append(minner.Peers, p)
  653. }
  654. if numRemove > 0 {
  655. e.logf("wgengine: Reconfig: removing session keys for %d peers", numRemove)
  656. if err := wgcfg.ReconfigDevice(e.wgdev, &minner, e.logf); err != nil {
  657. e.logf("wgdev.Reconfig: %v", err)
  658. return err
  659. }
  660. }
  661. }
  662. e.logf("wgengine: Reconfig: configuring userspace wireguard config (with %d/%d peers)", len(min.Peers), len(full.Peers))
  663. if err := wgcfg.ReconfigDevice(e.wgdev, &min, e.logf); err != nil {
  664. e.logf("wgdev.Reconfig: %v", err)
  665. return err
  666. }
  667. return nil
  668. }
  669. // updateActivityMapsLocked updates the data structures used for tracking the activity
  670. // of wireguard peers that we might add/remove dynamically from the real config
  671. // as given to wireguard-go.
  672. //
  673. // e.wgLock must be held.
  674. func (e *userspaceEngine) updateActivityMapsLocked(trackNodes []key.NodePublic, trackIPs []netaddr.IP) {
  675. // Generate the new map of which nodekeys we want to track
  676. // receive times for.
  677. mr := map[key.NodePublic]mono.Time{} // TODO: only recreate this if set of keys changed
  678. for _, nk := range trackNodes {
  679. // Preserve old times in the new map, but also
  680. // populate map entries for new trackNodes values with
  681. // time.Time{} zero values. (Only entries in this map
  682. // are tracked, so the Time zero values allow it to be
  683. // tracked later)
  684. mr[nk] = e.recvActivityAt[nk]
  685. }
  686. e.recvActivityAt = mr
  687. oldTime := e.sentActivityAt
  688. e.sentActivityAt = make(map[netaddr.IP]*mono.Time, len(oldTime))
  689. oldFunc := e.destIPActivityFuncs
  690. e.destIPActivityFuncs = make(map[netaddr.IP]func(), len(oldFunc))
  691. updateFn := func(timePtr *mono.Time) func() {
  692. return func() {
  693. now := e.timeNow()
  694. old := timePtr.LoadAtomic()
  695. // How long's it been since we last sent a packet?
  696. elapsed := now.Sub(old)
  697. if old == 0 {
  698. // For our first packet, old is 0, which has indeterminate meaning.
  699. // Set elapsed to a big number (four score and seven years).
  700. elapsed = 762642 * time.Hour
  701. }
  702. if elapsed >= packetSendTimeUpdateFrequency {
  703. timePtr.StoreAtomic(now)
  704. }
  705. // On a big jump, assume we might no longer be in the wireguard
  706. // config and go check.
  707. if elapsed >= packetSendRecheckWireguardThreshold {
  708. e.wgLock.Lock()
  709. defer e.wgLock.Unlock()
  710. e.maybeReconfigWireguardLocked(nil)
  711. }
  712. }
  713. }
  714. for _, ip := range trackIPs {
  715. timePtr := oldTime[ip]
  716. if timePtr == nil {
  717. timePtr = new(mono.Time)
  718. }
  719. e.sentActivityAt[ip] = timePtr
  720. fn := oldFunc[ip]
  721. if fn == nil {
  722. fn = updateFn(timePtr)
  723. }
  724. e.destIPActivityFuncs[ip] = fn
  725. }
  726. e.tundev.SetDestIPActivityFuncs(e.destIPActivityFuncs)
  727. }
  728. // hasOverlap checks if there is a IPPrefix which is common amongst the two
  729. // provided slices.
  730. func hasOverlap(aips, rips []netaddr.IPPrefix) bool {
  731. for _, aip := range aips {
  732. for _, rip := range rips {
  733. if aip == rip {
  734. return true
  735. }
  736. }
  737. }
  738. return false
  739. }
  740. func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config, dnsCfg *dns.Config, debug *tailcfg.Debug) error {
  741. if routerCfg == nil {
  742. panic("routerCfg must not be nil")
  743. }
  744. if dnsCfg == nil {
  745. panic("dnsCfg must not be nil")
  746. }
  747. e.isLocalAddr.Store(tsaddr.NewContainsIPFunc(routerCfg.LocalAddrs))
  748. e.wgLock.Lock()
  749. defer e.wgLock.Unlock()
  750. e.lastDNSConfig = dnsCfg
  751. peerSet := make(map[key.NodePublic]struct{}, len(cfg.Peers))
  752. e.mu.Lock()
  753. e.peerSequence = e.peerSequence[:0]
  754. for _, p := range cfg.Peers {
  755. e.peerSequence = append(e.peerSequence, p.PublicKey)
  756. peerSet[p.PublicKey] = struct{}{}
  757. }
  758. e.mu.Unlock()
  759. listenPort := e.confListenPort
  760. if debug != nil && debug.RandomizeClientPort {
  761. listenPort = 0
  762. }
  763. isSubnetRouter := false
  764. if e.birdClient != nil {
  765. isSubnetRouter = hasOverlap(e.netMap.SelfNode.PrimaryRoutes, e.netMap.Hostinfo.RoutableIPs)
  766. }
  767. isSubnetRouterChanged := isSubnetRouter != e.lastIsSubnetRouter
  768. engineChanged := deephash.Update(&e.lastEngineSigFull, cfg)
  769. routerChanged := deephash.Update(&e.lastRouterSig, routerCfg, dnsCfg)
  770. if !engineChanged && !routerChanged && listenPort == e.magicConn.LocalPort() && !isSubnetRouterChanged {
  771. return ErrNoChanges
  772. }
  773. // TODO(bradfitz,danderson): maybe delete this isDNSIPOverTailscale
  774. // field and delete the resolver.ForwardLinkSelector hook and
  775. // instead have ipnlocal populate a map of DNS IP => linkName and
  776. // put that in the *dns.Config instead, and plumb it down to the
  777. // dns.Manager. Maybe also with isLocalAddr above.
  778. e.isDNSIPOverTailscale.Store(tsaddr.NewContainsIPFunc(dnsIPsOverTailscale(dnsCfg, routerCfg)))
  779. // See if any peers have changed disco keys, which means they've restarted.
  780. // If so, we need to update the wireguard-go/device.Device in two phases:
  781. // once without the node which has restarted, to clear its wireguard session key,
  782. // and a second time with it.
  783. discoChanged := make(map[key.NodePublic]bool)
  784. {
  785. prevEP := make(map[key.NodePublic]key.DiscoPublic)
  786. for i := range e.lastCfgFull.Peers {
  787. if p := &e.lastCfgFull.Peers[i]; !p.DiscoKey.IsZero() {
  788. prevEP[p.PublicKey] = p.DiscoKey
  789. }
  790. }
  791. for i := range cfg.Peers {
  792. p := &cfg.Peers[i]
  793. if p.DiscoKey.IsZero() {
  794. continue
  795. }
  796. pub := p.PublicKey
  797. if old, ok := prevEP[pub]; ok && old != p.DiscoKey {
  798. discoChanged[pub] = true
  799. e.logf("wgengine: Reconfig: %s changed from %q to %q", pub.ShortString(), old, p.DiscoKey)
  800. }
  801. }
  802. }
  803. e.lastCfgFull = *cfg.Clone()
  804. // Tell magicsock about the new (or initial) private key
  805. // (which is needed by DERP) before wgdev gets it, as wgdev
  806. // will start trying to handshake, which we want to be able to
  807. // go over DERP.
  808. if err := e.magicConn.SetPrivateKey(cfg.PrivateKey); err != nil {
  809. e.logf("wgengine: Reconfig: SetPrivateKey: %v", err)
  810. }
  811. e.magicConn.UpdatePeers(peerSet)
  812. e.magicConn.SetPreferredPort(listenPort)
  813. if err := e.maybeReconfigWireguardLocked(discoChanged); err != nil {
  814. return err
  815. }
  816. if routerChanged {
  817. e.logf("wgengine: Reconfig: configuring router")
  818. err := e.router.Set(routerCfg)
  819. health.SetRouterHealth(err)
  820. if err != nil {
  821. return err
  822. }
  823. // Keep DNS configuration after router configuration, as some
  824. // DNS managers refuse to apply settings if the device has no
  825. // assigned address.
  826. e.logf("wgengine: Reconfig: configuring DNS")
  827. err = e.dns.Set(*dnsCfg)
  828. health.SetDNSHealth(err)
  829. if err != nil {
  830. return err
  831. }
  832. }
  833. if isSubnetRouterChanged && e.birdClient != nil {
  834. e.logf("wgengine: Reconfig: configuring BIRD")
  835. var err error
  836. if isSubnetRouter {
  837. err = e.birdClient.EnableProtocol("tailscale")
  838. } else {
  839. err = e.birdClient.DisableProtocol("tailscale")
  840. }
  841. if err != nil {
  842. // Log but don't fail here.
  843. e.logf("wgengine: error configuring BIRD: %v", err)
  844. } else {
  845. e.lastIsSubnetRouter = isSubnetRouter
  846. }
  847. }
  848. e.logf("[v1] wgengine: Reconfig done")
  849. return nil
  850. }
  851. func (e *userspaceEngine) GetFilter() *filter.Filter {
  852. return e.tundev.GetFilter()
  853. }
  854. func (e *userspaceEngine) SetFilter(filt *filter.Filter) {
  855. e.tundev.SetFilter(filt)
  856. }
  857. func (e *userspaceEngine) SetStatusCallback(cb StatusCallback) {
  858. e.mu.Lock()
  859. defer e.mu.Unlock()
  860. e.statusCallback = cb
  861. }
  862. func (e *userspaceEngine) getStatusCallback() StatusCallback {
  863. e.mu.Lock()
  864. defer e.mu.Unlock()
  865. return e.statusCallback
  866. }
  867. var singleNewline = []byte{'\n'}
  868. var ErrEngineClosing = errors.New("engine closing; no status")
  869. func (e *userspaceEngine) getStatus() (*Status, error) {
  870. // Grab derpConns before acquiring wgLock to not violate lock ordering;
  871. // the DERPs method acquires magicsock.Conn.mu.
  872. // (See comment in userspaceEngine's declaration.)
  873. derpConns := e.magicConn.DERPs()
  874. e.wgLock.Lock()
  875. defer e.wgLock.Unlock()
  876. e.mu.Lock()
  877. closing := e.closing
  878. e.mu.Unlock()
  879. if closing {
  880. return nil, ErrEngineClosing
  881. }
  882. if e.wgdev == nil {
  883. // RequestStatus was invoked before the wgengine has
  884. // finished initializing. This can happen when wgegine
  885. // provides a callback to magicsock for endpoint
  886. // updates that calls RequestStatus.
  887. return nil, nil
  888. }
  889. pr, pw := io.Pipe()
  890. defer pr.Close() // to unblock writes on error path returns
  891. errc := make(chan error, 1)
  892. go func() {
  893. defer pw.Close()
  894. // TODO(apenwarr): get rid of silly uapi stuff for in-process comms
  895. // FIXME: get notified of status changes instead of polling.
  896. err := e.wgdev.IpcGetOperation(pw)
  897. if err != nil {
  898. err = fmt.Errorf("IpcGetOperation: %w", err)
  899. }
  900. errc <- err
  901. }()
  902. pp := make(map[key.NodePublic]ipnstate.PeerStatusLite)
  903. var p ipnstate.PeerStatusLite
  904. var hst1, hst2, n int64
  905. br := e.statusBufioReader
  906. if br != nil {
  907. br.Reset(pr)
  908. } else {
  909. br = bufio.NewReaderSize(pr, 1<<10)
  910. e.statusBufioReader = br
  911. }
  912. for {
  913. line, err := br.ReadSlice('\n')
  914. if err == io.EOF {
  915. break
  916. }
  917. if err != nil {
  918. return nil, fmt.Errorf("reading from UAPI pipe: %w", err)
  919. }
  920. line = bytes.TrimSuffix(line, singleNewline)
  921. k := line
  922. var v mem.RO
  923. if i := bytes.IndexByte(line, '='); i != -1 {
  924. k = line[:i]
  925. v = mem.B(line[i+1:])
  926. }
  927. switch string(k) {
  928. case "public_key":
  929. pk, err := key.ParseNodePublicUntyped(v)
  930. if err != nil {
  931. return nil, fmt.Errorf("IpcGetOperation: invalid key in line %q", line)
  932. }
  933. if !p.NodeKey.IsZero() {
  934. pp[p.NodeKey] = p
  935. }
  936. p = ipnstate.PeerStatusLite{NodeKey: pk}
  937. case "rx_bytes":
  938. n, err = mem.ParseInt(v, 10, 64)
  939. p.RxBytes = n
  940. if err != nil {
  941. return nil, fmt.Errorf("IpcGetOperation: rx_bytes invalid: %#v", line)
  942. }
  943. case "tx_bytes":
  944. n, err = mem.ParseInt(v, 10, 64)
  945. p.TxBytes = n
  946. if err != nil {
  947. return nil, fmt.Errorf("IpcGetOperation: tx_bytes invalid: %#v", line)
  948. }
  949. case "last_handshake_time_sec":
  950. hst1, err = mem.ParseInt(v, 10, 64)
  951. if err != nil {
  952. return nil, fmt.Errorf("IpcGetOperation: hst1 invalid: %#v", line)
  953. }
  954. case "last_handshake_time_nsec":
  955. hst2, err = mem.ParseInt(v, 10, 64)
  956. if err != nil {
  957. return nil, fmt.Errorf("IpcGetOperation: hst2 invalid: %#v", line)
  958. }
  959. if hst1 != 0 || hst2 != 0 {
  960. p.LastHandshake = time.Unix(hst1, hst2)
  961. } // else leave at time.IsZero()
  962. }
  963. }
  964. if !p.NodeKey.IsZero() {
  965. pp[p.NodeKey] = p
  966. }
  967. if err := <-errc; err != nil {
  968. return nil, fmt.Errorf("IpcGetOperation: %v", err)
  969. }
  970. e.mu.Lock()
  971. defer e.mu.Unlock()
  972. // Do two passes, one to calculate size and the other to populate.
  973. // This code is sensitive to allocations.
  974. npeers := 0
  975. for _, pk := range e.peerSequence {
  976. if _, ok := pp[pk]; ok { // ignore idle ones not in wireguard-go's config
  977. npeers++
  978. }
  979. }
  980. peers := make([]ipnstate.PeerStatusLite, 0, npeers)
  981. for _, pk := range e.peerSequence {
  982. if p, ok := pp[pk]; ok { // ignore idle ones not in wireguard-go's config
  983. peers = append(peers, p)
  984. }
  985. }
  986. return &Status{
  987. LocalAddrs: append([]tailcfg.Endpoint(nil), e.endpoints...),
  988. Peers: peers,
  989. DERPs: derpConns,
  990. }, nil
  991. }
  992. func (e *userspaceEngine) RequestStatus() {
  993. // This is slightly tricky. e.getStatus() can theoretically get
  994. // blocked inside wireguard for a while, and RequestStatus() is
  995. // sometimes called from a goroutine, so we don't want a lot of
  996. // them hanging around. On the other hand, requesting multiple
  997. // status updates simultaneously is pointless anyway; they will
  998. // all say the same thing.
  999. // Enqueue at most one request. If one is in progress already, this
  1000. // adds one more to the queue. If one has been requested but not
  1001. // started, it is a no-op.
  1002. select {
  1003. case e.reqCh <- struct{}{}:
  1004. default:
  1005. }
  1006. // Dequeue at most one request. Another thread may have already
  1007. // dequeued the request we enqueued above, which is fine, since the
  1008. // information is guaranteed to be at least as recent as the current
  1009. // call to RequestStatus().
  1010. select {
  1011. case <-e.reqCh:
  1012. s, err := e.getStatus()
  1013. if s == nil && err == nil {
  1014. e.logf("[unexpected] RequestStatus: both s and err are nil")
  1015. return
  1016. }
  1017. if cb := e.getStatusCallback(); cb != nil {
  1018. cb(s, err)
  1019. }
  1020. default:
  1021. }
  1022. }
  1023. func (e *userspaceEngine) Close() {
  1024. e.mu.Lock()
  1025. if e.closing {
  1026. e.mu.Unlock()
  1027. return
  1028. }
  1029. e.closing = true
  1030. e.mu.Unlock()
  1031. r := bufio.NewReader(strings.NewReader(""))
  1032. e.wgdev.IpcSetOperation(r)
  1033. e.magicConn.Close()
  1034. e.linkMonUnregister()
  1035. if e.linkMonOwned {
  1036. e.linkMon.Close()
  1037. }
  1038. e.dns.Down()
  1039. e.router.Close()
  1040. e.wgdev.Close()
  1041. e.tundev.Close()
  1042. if e.birdClient != nil {
  1043. e.birdClient.DisableProtocol("tailscale")
  1044. e.birdClient.Close()
  1045. }
  1046. close(e.waitCh)
  1047. }
  1048. func (e *userspaceEngine) Wait() {
  1049. <-e.waitCh
  1050. }
  1051. func (e *userspaceEngine) GetLinkMonitor() *monitor.Mon {
  1052. return e.linkMon
  1053. }
  1054. // LinkChange signals a network change event. It's currently
  1055. // (2021-03-03) only called on Android. On other platforms, linkMon
  1056. // generates link change events for us.
  1057. func (e *userspaceEngine) LinkChange(_ bool) {
  1058. e.linkMon.InjectEvent()
  1059. }
  1060. func (e *userspaceEngine) linkChange(changed bool, cur *interfaces.State) {
  1061. up := cur.AnyInterfaceUp()
  1062. if !up {
  1063. e.logf("LinkChange: all links down; pausing: %v", cur)
  1064. } else if changed {
  1065. e.logf("LinkChange: major, rebinding. New state: %v", cur)
  1066. } else {
  1067. e.logf("[v1] LinkChange: minor")
  1068. }
  1069. health.SetAnyInterfaceUp(up)
  1070. e.magicConn.SetNetworkUp(up)
  1071. if !up || changed {
  1072. if err := e.dns.FlushCaches(); err != nil {
  1073. e.logf("wgengine: dns flush failed after major link change: %v", err)
  1074. }
  1075. }
  1076. // Hacky workaround for Linux DNS issue 2458: on
  1077. // suspend/resume or whenever NetworkManager is started, it
  1078. // nukes all systemd-resolved configs. So reapply our DNS
  1079. // config on major link change.
  1080. if (runtime.GOOS == "linux" || runtime.GOOS == "android") && changed {
  1081. e.wgLock.Lock()
  1082. dnsCfg := e.lastDNSConfig
  1083. e.wgLock.Unlock()
  1084. if dnsCfg != nil {
  1085. if err := e.dns.Set(*dnsCfg); err != nil {
  1086. e.logf("wgengine: error setting DNS config after major link change: %v", err)
  1087. } else {
  1088. e.logf("wgengine: set DNS config again after major link change")
  1089. }
  1090. }
  1091. }
  1092. why := "link-change-minor"
  1093. if changed {
  1094. why = "link-change-major"
  1095. e.magicConn.Rebind()
  1096. }
  1097. e.magicConn.ReSTUN(why)
  1098. }
  1099. func (e *userspaceEngine) AddNetworkMapCallback(cb NetworkMapCallback) func() {
  1100. e.mu.Lock()
  1101. defer e.mu.Unlock()
  1102. if e.networkMapCallbacks == nil {
  1103. e.networkMapCallbacks = make(map[*someHandle]NetworkMapCallback)
  1104. }
  1105. h := new(someHandle)
  1106. e.networkMapCallbacks[h] = cb
  1107. return func() {
  1108. e.mu.Lock()
  1109. defer e.mu.Unlock()
  1110. delete(e.networkMapCallbacks, h)
  1111. }
  1112. }
  1113. func (e *userspaceEngine) SetNetInfoCallback(cb NetInfoCallback) {
  1114. e.magicConn.SetNetInfoCallback(cb)
  1115. }
  1116. func (e *userspaceEngine) SetDERPMap(dm *tailcfg.DERPMap) {
  1117. e.magicConn.SetDERPMap(dm)
  1118. }
  1119. func (e *userspaceEngine) SetNetworkMap(nm *netmap.NetworkMap) {
  1120. e.magicConn.SetNetworkMap(nm)
  1121. e.mu.Lock()
  1122. e.netMap = nm
  1123. callbacks := make([]NetworkMapCallback, 0, 4)
  1124. for _, fn := range e.networkMapCallbacks {
  1125. callbacks = append(callbacks, fn)
  1126. }
  1127. e.mu.Unlock()
  1128. for _, fn := range callbacks {
  1129. fn(nm)
  1130. }
  1131. }
  1132. func (e *userspaceEngine) DiscoPublicKey() key.DiscoPublic {
  1133. return e.magicConn.DiscoPublicKey()
  1134. }
  1135. func (e *userspaceEngine) UpdateStatus(sb *ipnstate.StatusBuilder) {
  1136. st, err := e.getStatus()
  1137. if err != nil {
  1138. e.logf("wgengine: getStatus: %v", err)
  1139. return
  1140. }
  1141. for _, ps := range st.Peers {
  1142. sb.AddPeer(ps.NodeKey, &ipnstate.PeerStatus{
  1143. RxBytes: int64(ps.RxBytes),
  1144. TxBytes: int64(ps.TxBytes),
  1145. LastHandshake: ps.LastHandshake,
  1146. InEngine: true,
  1147. })
  1148. }
  1149. e.magicConn.UpdateStatus(sb)
  1150. }
  1151. func (e *userspaceEngine) Ping(ip netaddr.IP, useTSMP bool, cb func(*ipnstate.PingResult)) {
  1152. res := &ipnstate.PingResult{IP: ip.String()}
  1153. pip, ok := e.PeerForIP(ip)
  1154. if !ok {
  1155. e.logf("ping(%v): no matching peer", ip)
  1156. res.Err = "no matching peer"
  1157. cb(res)
  1158. return
  1159. }
  1160. if pip.IsSelf {
  1161. res.Err = fmt.Sprintf("%v is local Tailscale IP", ip)
  1162. res.IsLocalIP = true
  1163. cb(res)
  1164. return
  1165. }
  1166. peer := pip.Node
  1167. pingType := "disco"
  1168. if useTSMP {
  1169. pingType = "TSMP"
  1170. }
  1171. e.logf("ping(%v): sending %v ping to %v %v ...", ip, pingType, peer.Key.ShortString(), peer.ComputedName)
  1172. if useTSMP {
  1173. e.sendTSMPPing(ip, peer, res, cb)
  1174. } else {
  1175. e.magicConn.Ping(peer, res, cb)
  1176. }
  1177. }
  1178. func (e *userspaceEngine) mySelfIPMatchingFamily(dst netaddr.IP) (src netaddr.IP, err error) {
  1179. e.mu.Lock()
  1180. defer e.mu.Unlock()
  1181. if e.netMap == nil {
  1182. return netaddr.IP{}, errors.New("no netmap")
  1183. }
  1184. for _, a := range e.netMap.Addresses {
  1185. if a.IsSingleIP() && a.IP().BitLen() == dst.BitLen() {
  1186. return a.IP(), nil
  1187. }
  1188. }
  1189. if len(e.netMap.Addresses) == 0 {
  1190. return netaddr.IP{}, errors.New("no self address in netmap")
  1191. }
  1192. return netaddr.IP{}, errors.New("no self address in netmap matching address family")
  1193. }
  1194. func (e *userspaceEngine) sendTSMPPing(ip netaddr.IP, peer *tailcfg.Node, res *ipnstate.PingResult, cb func(*ipnstate.PingResult)) {
  1195. srcIP, err := e.mySelfIPMatchingFamily(ip)
  1196. if err != nil {
  1197. res.Err = err.Error()
  1198. cb(res)
  1199. return
  1200. }
  1201. var iph packet.Header
  1202. if srcIP.Is4() {
  1203. iph = packet.IP4Header{
  1204. IPProto: ipproto.TSMP,
  1205. Src: srcIP,
  1206. Dst: ip,
  1207. }
  1208. } else {
  1209. iph = packet.IP6Header{
  1210. IPProto: ipproto.TSMP,
  1211. Src: srcIP,
  1212. Dst: ip,
  1213. }
  1214. }
  1215. var data [8]byte
  1216. crand.Read(data[:])
  1217. expireTimer := time.AfterFunc(10*time.Second, func() {
  1218. e.setTSMPPongCallback(data, nil)
  1219. })
  1220. t0 := time.Now()
  1221. e.setTSMPPongCallback(data, func(pong packet.TSMPPongReply) {
  1222. expireTimer.Stop()
  1223. d := time.Since(t0)
  1224. res.LatencySeconds = d.Seconds()
  1225. res.NodeIP = ip.String()
  1226. res.NodeName = peer.ComputedName
  1227. res.PeerAPIPort = pong.PeerAPIPort
  1228. cb(res)
  1229. })
  1230. var tsmpPayload [9]byte
  1231. tsmpPayload[0] = byte(packet.TSMPTypePing)
  1232. copy(tsmpPayload[1:], data[:])
  1233. tsmpPing := packet.Generate(iph, tsmpPayload[:])
  1234. e.tundev.InjectOutbound(tsmpPing)
  1235. }
  1236. func (e *userspaceEngine) setTSMPPongCallback(data [8]byte, cb func(packet.TSMPPongReply)) {
  1237. e.mu.Lock()
  1238. defer e.mu.Unlock()
  1239. if e.pongCallback == nil {
  1240. e.pongCallback = map[[8]byte]func(packet.TSMPPongReply){}
  1241. }
  1242. if cb == nil {
  1243. delete(e.pongCallback, data)
  1244. } else {
  1245. e.pongCallback[data] = cb
  1246. }
  1247. }
  1248. func (e *userspaceEngine) RegisterIPPortIdentity(ipport netaddr.IPPort, tsIP netaddr.IP) {
  1249. e.mu.Lock()
  1250. defer e.mu.Unlock()
  1251. if e.tsIPByIPPort == nil {
  1252. e.tsIPByIPPort = make(map[netaddr.IPPort]netaddr.IP)
  1253. }
  1254. e.tsIPByIPPort[ipport] = tsIP
  1255. }
  1256. func (e *userspaceEngine) UnregisterIPPortIdentity(ipport netaddr.IPPort) {
  1257. e.mu.Lock()
  1258. defer e.mu.Unlock()
  1259. if e.tsIPByIPPort == nil {
  1260. return
  1261. }
  1262. delete(e.tsIPByIPPort, ipport)
  1263. }
  1264. var whoIsSleeps = [...]time.Duration{
  1265. 0,
  1266. 10 * time.Millisecond,
  1267. 20 * time.Millisecond,
  1268. 50 * time.Millisecond,
  1269. 100 * time.Millisecond,
  1270. }
  1271. func (e *userspaceEngine) WhoIsIPPort(ipport netaddr.IPPort) (tsIP netaddr.IP, ok bool) {
  1272. // We currently have a registration race,
  1273. // https://github.com/tailscale/tailscale/issues/1616,
  1274. // so loop a few times for now waiting for the registration
  1275. // to appear.
  1276. // TODO(bradfitz,namansood): remove this once #1616 is fixed.
  1277. for _, d := range whoIsSleeps {
  1278. time.Sleep(d)
  1279. e.mu.Lock()
  1280. tsIP, ok = e.tsIPByIPPort[ipport]
  1281. e.mu.Unlock()
  1282. if ok {
  1283. return tsIP, true
  1284. }
  1285. }
  1286. return tsIP, false
  1287. }
  1288. // PeerForIP returns the Node in the wireguard config
  1289. // that's responsible for handling the given IP address.
  1290. //
  1291. // If none is found in the wireguard config but one is found in
  1292. // the netmap, it's described in an error.
  1293. //
  1294. //
  1295. // peerForIP acquires both e.mu and e.wgLock, but neither at the same
  1296. // time.
  1297. func (e *userspaceEngine) PeerForIP(ip netaddr.IP) (ret PeerForIP, ok bool) {
  1298. e.mu.Lock()
  1299. nm := e.netMap
  1300. e.mu.Unlock()
  1301. if nm == nil {
  1302. return ret, false
  1303. }
  1304. // Check for exact matches before looking for subnet matches.
  1305. // TODO(bradfitz): add maps for these. on NetworkMap?
  1306. for _, p := range nm.Peers {
  1307. for _, a := range p.Addresses {
  1308. if a.IP() == ip && a.IsSingleIP() && tsaddr.IsTailscaleIP(ip) {
  1309. return PeerForIP{Node: p, Route: a}, true
  1310. }
  1311. }
  1312. }
  1313. for _, a := range nm.Addresses {
  1314. if a.IP() == ip && a.IsSingleIP() && tsaddr.IsTailscaleIP(ip) {
  1315. return PeerForIP{Node: nm.SelfNode, IsSelf: true, Route: a}, true
  1316. }
  1317. }
  1318. e.wgLock.Lock()
  1319. defer e.wgLock.Unlock()
  1320. // TODO(bradfitz): this is O(n peers). Add ART to netaddr?
  1321. var best netaddr.IPPrefix
  1322. var bestKey key.NodePublic
  1323. for _, p := range e.lastCfgFull.Peers {
  1324. for _, cidr := range p.AllowedIPs {
  1325. if !cidr.Contains(ip) {
  1326. continue
  1327. }
  1328. if best.IsZero() || cidr.Bits() > best.Bits() {
  1329. best = cidr
  1330. bestKey = p.PublicKey
  1331. }
  1332. }
  1333. }
  1334. // And another pass. Probably better than allocating a map per peerForIP
  1335. // call. But TODO(bradfitz): add a lookup map to netmap.NetworkMap.
  1336. if !bestKey.IsZero() {
  1337. for _, p := range nm.Peers {
  1338. if p.Key == bestKey {
  1339. return PeerForIP{Node: p, Route: best}, true
  1340. }
  1341. }
  1342. }
  1343. return ret, false
  1344. }
  1345. type closeOnErrorPool []func()
  1346. func (p *closeOnErrorPool) add(c io.Closer) { *p = append(*p, func() { c.Close() }) }
  1347. func (p *closeOnErrorPool) addFunc(fn func()) { *p = append(*p, fn) }
  1348. func (p closeOnErrorPool) closeAllIfError(errp *error) {
  1349. if *errp != nil {
  1350. for _, closeFn := range p {
  1351. closeFn()
  1352. }
  1353. }
  1354. }
  1355. // ipInPrefixes reports whether ip is in any of pp.
  1356. func ipInPrefixes(ip netaddr.IP, pp []netaddr.IPPrefix) bool {
  1357. for _, p := range pp {
  1358. if p.Contains(ip) {
  1359. return true
  1360. }
  1361. }
  1362. return false
  1363. }
  1364. // dnsIPsOverTailscale returns the IPPrefixes of DNS resolver IPs that are
  1365. // routed over Tailscale. The returned value does not contain duplicates is
  1366. // not necessarily sorted.
  1367. func dnsIPsOverTailscale(dnsCfg *dns.Config, routerCfg *router.Config) (ret []netaddr.IPPrefix) {
  1368. m := map[netaddr.IP]bool{}
  1369. add := func(resolvers []dnstype.Resolver) {
  1370. for _, r := range resolvers {
  1371. ip, err := netaddr.ParseIP(r.Addr)
  1372. if err != nil {
  1373. if ipp, err := netaddr.ParseIPPort(r.Addr); err == nil {
  1374. ip = ipp.IP()
  1375. } else {
  1376. continue
  1377. }
  1378. }
  1379. if ipInPrefixes(ip, routerCfg.Routes) && !ipInPrefixes(ip, routerCfg.LocalRoutes) {
  1380. m[ip] = true
  1381. }
  1382. }
  1383. }
  1384. add(dnsCfg.DefaultResolvers)
  1385. for _, resolvers := range dnsCfg.Routes {
  1386. add(resolvers)
  1387. }
  1388. ret = make([]netaddr.IPPrefix, 0, len(m))
  1389. for ip := range m {
  1390. ret = append(ret, netaddr.IPPrefixFrom(ip, ip.BitLen()))
  1391. }
  1392. return ret
  1393. }
  1394. // fwdDNSLinkSelector is userspaceEngine's resolver.ForwardLinkSelector, to pick
  1395. // which network interface to send DNS queries out of.
  1396. type fwdDNSLinkSelector struct {
  1397. ue *userspaceEngine
  1398. tunName string
  1399. }
  1400. func (ls fwdDNSLinkSelector) PickLink(ip netaddr.IP) (linkName string) {
  1401. if ls.ue.isDNSIPOverTailscale.Load().(func(netaddr.IP) bool)(ip) {
  1402. return ls.tunName
  1403. }
  1404. return ""
  1405. }
  1406. var (
  1407. metricMagicDNSPacketIn = clientmetric.NewGauge("magicdns_packet_in") // for 100.100.100.100
  1408. metricReflectToOS = clientmetric.NewGauge("packet_reflect_to_os")
  1409. )