userspace.go 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606
  1. // Copyright (c) Tailscale Inc & AUTHORS
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. package wgengine
  4. import (
  5. "bufio"
  6. "context"
  7. crand "crypto/rand"
  8. "errors"
  9. "fmt"
  10. "io"
  11. "math"
  12. "net/netip"
  13. "runtime"
  14. "slices"
  15. "strings"
  16. "sync"
  17. "time"
  18. "github.com/tailscale/wireguard-go/device"
  19. "github.com/tailscale/wireguard-go/tun"
  20. "tailscale.com/control/controlknobs"
  21. "tailscale.com/drive"
  22. "tailscale.com/envknob"
  23. "tailscale.com/health"
  24. "tailscale.com/ipn/ipnstate"
  25. "tailscale.com/net/dns"
  26. "tailscale.com/net/flowtrack"
  27. "tailscale.com/net/ipset"
  28. "tailscale.com/net/netmon"
  29. "tailscale.com/net/packet"
  30. "tailscale.com/net/sockstats"
  31. "tailscale.com/net/tsaddr"
  32. "tailscale.com/net/tsdial"
  33. "tailscale.com/net/tshttpproxy"
  34. "tailscale.com/net/tstun"
  35. "tailscale.com/syncs"
  36. "tailscale.com/tailcfg"
  37. "tailscale.com/tstime/mono"
  38. "tailscale.com/types/dnstype"
  39. "tailscale.com/types/ipproto"
  40. "tailscale.com/types/key"
  41. "tailscale.com/types/logger"
  42. "tailscale.com/types/netmap"
  43. "tailscale.com/types/views"
  44. "tailscale.com/util/clientmetric"
  45. "tailscale.com/util/deephash"
  46. "tailscale.com/util/mak"
  47. "tailscale.com/util/set"
  48. "tailscale.com/util/testenv"
  49. "tailscale.com/util/usermetric"
  50. "tailscale.com/version"
  51. "tailscale.com/wgengine/filter"
  52. "tailscale.com/wgengine/magicsock"
  53. "tailscale.com/wgengine/netlog"
  54. "tailscale.com/wgengine/netstack/gro"
  55. "tailscale.com/wgengine/router"
  56. "tailscale.com/wgengine/wgcfg"
  57. "tailscale.com/wgengine/wgint"
  58. "tailscale.com/wgengine/wglog"
  59. )
  60. // Lazy wireguard-go configuration parameters.
  61. const (
  62. // lazyPeerIdleThreshold is the idle duration after
  63. // which we remove a peer from the wireguard configuration.
  64. // (This includes peers that have never been idle, which
  65. // effectively have infinite idleness)
  66. lazyPeerIdleThreshold = 5 * time.Minute
  67. // packetSendTimeUpdateFrequency controls how often we record
  68. // the time that we wrote a packet to an IP address.
  69. packetSendTimeUpdateFrequency = 10 * time.Second
  70. // packetSendRecheckWireguardThreshold controls how long we can go
  71. // between packet sends to an IP before checking to see
  72. // whether this IP address needs to be added back to the
  73. // WireGuard peer oconfig.
  74. packetSendRecheckWireguardThreshold = 1 * time.Minute
  75. )
  76. // statusPollInterval is how often we ask wireguard-go for its engine
  77. // status (as long as there's activity). See docs on its use below.
  78. const statusPollInterval = 1 * time.Minute
  79. // networkLoggerUploadTimeout is the maximum timeout to wait when
  80. // shutting down the network logger as it uploads the last network log messages.
  81. const networkLoggerUploadTimeout = 5 * time.Second
  82. type userspaceEngine struct {
  83. logf logger.Logf
  84. wgLogger *wglog.Logger //a wireguard-go logging wrapper
  85. reqCh chan struct{}
  86. waitCh chan struct{} // chan is closed when first Close call completes; contrast with closing bool
  87. timeNow func() mono.Time
  88. tundev *tstun.Wrapper
  89. wgdev *device.Device
  90. router router.Router
  91. confListenPort uint16 // original conf.ListenPort
  92. dns *dns.Manager
  93. magicConn *magicsock.Conn
  94. netMon *netmon.Monitor
  95. health *health.Tracker
  96. netMonOwned bool // whether we created netMon (and thus need to close it)
  97. netMonUnregister func() // unsubscribes from changes; used regardless of netMonOwned
  98. birdClient BIRDClient // or nil
  99. controlKnobs *controlknobs.Knobs // or nil
  100. testMaybeReconfigHook func() // for tests; if non-nil, fires if maybeReconfigWireguardLocked called
  101. // isLocalAddr reports the whether an IP is assigned to the local
  102. // tunnel interface. It's used to reflect local packets
  103. // incorrectly sent to us.
  104. isLocalAddr syncs.AtomicValue[func(netip.Addr) bool]
  105. // isDNSIPOverTailscale reports the whether a DNS resolver's IP
  106. // is being routed over Tailscale.
  107. isDNSIPOverTailscale syncs.AtomicValue[func(netip.Addr) bool]
  108. wgLock sync.Mutex // serializes all wgdev operations; see lock order comment below
  109. lastCfgFull wgcfg.Config
  110. lastNMinPeers int
  111. lastRouterSig deephash.Sum // of router.Config
  112. lastEngineSigFull deephash.Sum // of full wireguard config
  113. lastEngineSigTrim deephash.Sum // of trimmed wireguard config
  114. lastDNSConfig *dns.Config
  115. lastIsSubnetRouter bool // was the node a primary subnet router in the last run.
  116. recvActivityAt map[key.NodePublic]mono.Time
  117. trimmedNodes map[key.NodePublic]bool // set of node keys of peers currently excluded from wireguard config
  118. sentActivityAt map[netip.Addr]*mono.Time // value is accessed atomically
  119. destIPActivityFuncs map[netip.Addr]func()
  120. lastStatusPollTime mono.Time // last time we polled the engine status
  121. reconfigureVPN func() error // or nil
  122. mu sync.Mutex // guards following; see lock order comment below
  123. netMap *netmap.NetworkMap // or nil
  124. closing bool // Close was called (even if we're still closing)
  125. statusCallback StatusCallback
  126. peerSequence []key.NodePublic
  127. endpoints []tailcfg.Endpoint
  128. pendOpen map[flowtrack.Tuple]*pendingOpenFlow // see pendopen.go
  129. // pongCallback is the map of response handlers waiting for disco or TSMP
  130. // pong callbacks. The map key is a random slice of bytes.
  131. pongCallback map[[8]byte]func(packet.TSMPPongReply)
  132. // icmpEchoResponseCallback is the map of response handlers waiting for ICMP
  133. // echo responses. The map key is a random uint32 that is the little endian
  134. // value of the ICMP identifier and sequence number concatenated.
  135. icmpEchoResponseCallback map[uint32]func()
  136. // networkLogger logs statistics about network connections.
  137. networkLogger netlog.Logger
  138. // Lock ordering: magicsock.Conn.mu, wgLock, then mu.
  139. }
  140. // BIRDClient handles communication with the BIRD Internet Routing Daemon.
  141. type BIRDClient interface {
  142. EnableProtocol(proto string) error
  143. DisableProtocol(proto string) error
  144. Close() error
  145. }
  146. // Config is the engine configuration.
  147. type Config struct {
  148. // Tun is the device used by the Engine to exchange packets with
  149. // the OS.
  150. // If nil, a fake Device that does nothing is used.
  151. Tun tun.Device
  152. // IsTAP is whether Tun is actually a TAP (Layer 2) device that'll
  153. // require ethernet headers.
  154. IsTAP bool
  155. // Router interfaces the Engine to the OS network stack.
  156. // If nil, a fake Router that does nothing is used.
  157. Router router.Router
  158. // DNS interfaces the Engine to the OS DNS resolver configuration.
  159. // If nil, a fake OSConfigurator that does nothing is used.
  160. DNS dns.OSConfigurator
  161. // ReconfigureVPN provides an optional hook for platforms like Android to
  162. // know when it's time to reconfigure their VPN implementation. Such
  163. // platforms can only set their entire VPN configuration (routes, DNS, etc)
  164. // at all once and can't make piecemeal incremental changes, so this
  165. // provides a hook to "flush" a batch of Router and/or DNS changes.
  166. ReconfigureVPN func() error
  167. // NetMon optionally provides an existing network monitor to re-use.
  168. // If nil, a new network monitor is created.
  169. NetMon *netmon.Monitor
  170. // HealthTracker, if non-nil, is the health tracker to use.
  171. HealthTracker *health.Tracker
  172. // Metrics is the usermetrics registry to use.
  173. // Mandatory, if not set, an error is returned.
  174. Metrics *usermetric.Registry
  175. // Dialer is the dialer to use for outbound connections.
  176. // If nil, a new Dialer is created.
  177. Dialer *tsdial.Dialer
  178. // ControlKnobs is the set of control plane-provied knobs
  179. // to use.
  180. // If nil, defaults are used.
  181. ControlKnobs *controlknobs.Knobs
  182. // ListenPort is the port on which the engine will listen.
  183. // If zero, a port is automatically selected.
  184. ListenPort uint16
  185. // RespondToPing determines whether this engine should internally
  186. // reply to ICMP pings, without involving the OS.
  187. // Used in "fake" mode for development.
  188. RespondToPing bool
  189. // BIRDClient, if non-nil, will be used to configure BIRD whenever
  190. // this node is a primary subnet router.
  191. BIRDClient BIRDClient
  192. // SetSubsystem, if non-nil, is called for each new subsystem created, just before a successful return.
  193. SetSubsystem func(any)
  194. // DriveForLocal, if populated, will cause the engine to expose a Taildrive
  195. // listener at 100.100.100.100:8080.
  196. DriveForLocal drive.FileSystemForLocal
  197. }
  198. // NewFakeUserspaceEngine returns a new userspace engine for testing.
  199. //
  200. // The opts may contain the following types:
  201. //
  202. // - int or uint16: to set the ListenPort.
  203. func NewFakeUserspaceEngine(logf logger.Logf, opts ...any) (Engine, error) {
  204. conf := Config{
  205. RespondToPing: true,
  206. }
  207. for _, o := range opts {
  208. switch v := o.(type) {
  209. case uint16:
  210. conf.ListenPort = v
  211. case int:
  212. if v < 0 || v > math.MaxUint16 {
  213. return nil, fmt.Errorf("invalid ListenPort: %d", v)
  214. }
  215. conf.ListenPort = uint16(v)
  216. case func(any):
  217. conf.SetSubsystem = v
  218. case *controlknobs.Knobs:
  219. conf.ControlKnobs = v
  220. case *health.Tracker:
  221. conf.HealthTracker = v
  222. case *usermetric.Registry:
  223. conf.Metrics = v
  224. default:
  225. return nil, fmt.Errorf("unknown option type %T", v)
  226. }
  227. }
  228. logf("Starting userspace WireGuard engine (with fake TUN device)")
  229. return NewUserspaceEngine(logf, conf)
  230. }
  231. // NewUserspaceEngine creates the named tun device and returns a
  232. // Tailscale Engine running on it.
  233. func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error) {
  234. var closePool closeOnErrorPool
  235. defer closePool.closeAllIfError(&reterr)
  236. if testenv.InTest() && conf.HealthTracker == nil {
  237. panic("NewUserspaceEngine called without HealthTracker (being strict in tests)")
  238. }
  239. if conf.Metrics == nil {
  240. return nil, errors.New("NewUserspaceEngine: opts.Metrics is required, please pass a *usermetric.Registry")
  241. }
  242. if conf.Tun == nil {
  243. logf("[v1] using fake (no-op) tun device")
  244. conf.Tun = tstun.NewFake()
  245. }
  246. if conf.Router == nil {
  247. logf("[v1] using fake (no-op) OS network configurator")
  248. conf.Router = router.NewFake(logf)
  249. }
  250. if conf.DNS == nil {
  251. logf("[v1] using fake (no-op) DNS configurator")
  252. d, err := dns.NewNoopManager()
  253. if err != nil {
  254. return nil, err
  255. }
  256. conf.DNS = d
  257. }
  258. if conf.Dialer == nil {
  259. conf.Dialer = &tsdial.Dialer{Logf: logf}
  260. }
  261. var tsTUNDev *tstun.Wrapper
  262. if conf.IsTAP {
  263. tsTUNDev = tstun.WrapTAP(logf, conf.Tun, conf.Metrics)
  264. } else {
  265. tsTUNDev = tstun.Wrap(logf, conf.Tun, conf.Metrics)
  266. }
  267. closePool.add(tsTUNDev)
  268. rtr := conf.Router
  269. if version.IsMobile() {
  270. // Android and iOS don't handle large numbers of routes well, so we
  271. // wrap the Router with one that consolidates routes down to the
  272. // smallest number possible.
  273. //
  274. // On Android, too many routes at VPN configuration time result in an
  275. // android.os.TransactionTooLargeException because Android's VPNBuilder
  276. // tries to send the entire set of routes to the VPNService as a single
  277. // Bundle, which is typically limited to 1 MB. The number of routes
  278. // that's too much seems to be very roughly around 4000.
  279. //
  280. // On iOS, the VPNExtension is limited to only 50 MB of memory, so
  281. // keeping the number of routes down helps with memory consumption.
  282. rtr = router.ConsolidatingRoutes(logf, rtr)
  283. }
  284. e := &userspaceEngine{
  285. timeNow: mono.Now,
  286. logf: logf,
  287. reqCh: make(chan struct{}, 1),
  288. waitCh: make(chan struct{}),
  289. tundev: tsTUNDev,
  290. router: rtr,
  291. confListenPort: conf.ListenPort,
  292. birdClient: conf.BIRDClient,
  293. controlKnobs: conf.ControlKnobs,
  294. reconfigureVPN: conf.ReconfigureVPN,
  295. health: conf.HealthTracker,
  296. }
  297. if e.birdClient != nil {
  298. // Disable the protocol at start time.
  299. if err := e.birdClient.DisableProtocol("tailscale"); err != nil {
  300. return nil, err
  301. }
  302. }
  303. e.isLocalAddr.Store(ipset.FalseContainsIPFunc())
  304. e.isDNSIPOverTailscale.Store(ipset.FalseContainsIPFunc())
  305. if conf.NetMon != nil {
  306. e.netMon = conf.NetMon
  307. } else {
  308. mon, err := netmon.New(logf)
  309. if err != nil {
  310. return nil, err
  311. }
  312. closePool.add(mon)
  313. e.netMon = mon
  314. e.netMonOwned = true
  315. }
  316. tunName, _ := conf.Tun.Name()
  317. conf.Dialer.SetTUNName(tunName)
  318. conf.Dialer.SetNetMon(e.netMon)
  319. e.dns = dns.NewManager(logf, conf.DNS, e.health, conf.Dialer, fwdDNSLinkSelector{e, tunName}, conf.ControlKnobs, runtime.GOOS)
  320. // TODO: there's probably a better place for this
  321. sockstats.SetNetMon(e.netMon)
  322. logf("link state: %+v", e.netMon.InterfaceState())
  323. unregisterMonWatch := e.netMon.RegisterChangeCallback(func(delta *netmon.ChangeDelta) {
  324. tshttpproxy.InvalidateCache()
  325. e.linkChange(delta)
  326. })
  327. closePool.addFunc(unregisterMonWatch)
  328. e.netMonUnregister = unregisterMonWatch
  329. endpointsFn := func(endpoints []tailcfg.Endpoint) {
  330. e.mu.Lock()
  331. e.endpoints = append(e.endpoints[:0], endpoints...)
  332. e.mu.Unlock()
  333. e.RequestStatus()
  334. }
  335. onPortUpdate := func(port uint16, network string) {
  336. e.logf("onPortUpdate(port=%v, network=%s)", port, network)
  337. if err := e.router.UpdateMagicsockPort(port, network); err != nil {
  338. e.logf("UpdateMagicsockPort(port=%v, network=%s) failed: %v", port, network, err)
  339. }
  340. }
  341. magicsockOpts := magicsock.Options{
  342. Logf: logf,
  343. Port: conf.ListenPort,
  344. EndpointsFunc: endpointsFn,
  345. DERPActiveFunc: e.RequestStatus,
  346. IdleFunc: e.tundev.IdleDuration,
  347. NoteRecvActivity: e.noteRecvActivity,
  348. NetMon: e.netMon,
  349. HealthTracker: e.health,
  350. Metrics: conf.Metrics,
  351. ControlKnobs: conf.ControlKnobs,
  352. OnPortUpdate: onPortUpdate,
  353. PeerByKeyFunc: e.PeerByKey,
  354. }
  355. var err error
  356. e.magicConn, err = magicsock.NewConn(magicsockOpts)
  357. if err != nil {
  358. return nil, fmt.Errorf("wgengine: %v", err)
  359. }
  360. closePool.add(e.magicConn)
  361. e.magicConn.SetNetworkUp(e.netMon.InterfaceState().AnyInterfaceUp())
  362. tsTUNDev.SetDiscoKey(e.magicConn.DiscoPublicKey())
  363. if conf.RespondToPing {
  364. e.tundev.PostFilterPacketInboundFromWireGuard = echoRespondToAll
  365. }
  366. e.tundev.PreFilterPacketOutboundToWireGuardEngineIntercept = e.handleLocalPackets
  367. if envknob.BoolDefaultTrue("TS_DEBUG_CONNECT_FAILURES") {
  368. if e.tundev.PreFilterPacketInboundFromWireGuard != nil {
  369. return nil, errors.New("unexpected PreFilterIn already set")
  370. }
  371. e.tundev.PreFilterPacketInboundFromWireGuard = e.trackOpenPreFilterIn
  372. if e.tundev.PostFilterPacketOutboundToWireGuard != nil {
  373. return nil, errors.New("unexpected PostFilterOut already set")
  374. }
  375. e.tundev.PostFilterPacketOutboundToWireGuard = e.trackOpenPostFilterOut
  376. }
  377. e.wgLogger = wglog.NewLogger(logf)
  378. e.tundev.OnTSMPPongReceived = func(pong packet.TSMPPongReply) {
  379. e.mu.Lock()
  380. defer e.mu.Unlock()
  381. cb := e.pongCallback[pong.Data]
  382. e.logf("wgengine: got TSMP pong %02x, peerAPIPort=%v; cb=%v", pong.Data, pong.PeerAPIPort, cb != nil)
  383. if cb != nil {
  384. go cb(pong)
  385. }
  386. }
  387. e.tundev.OnICMPEchoResponseReceived = func(p *packet.Parsed) bool {
  388. idSeq := p.EchoIDSeq()
  389. e.mu.Lock()
  390. defer e.mu.Unlock()
  391. cb := e.icmpEchoResponseCallback[idSeq]
  392. if cb == nil {
  393. // We didn't swallow it, so let it flow to the host.
  394. return false
  395. }
  396. e.logf("wgengine: got diagnostic ICMP response %02x", idSeq)
  397. go cb()
  398. return true
  399. }
  400. // wgdev takes ownership of tundev, will close it when closed.
  401. e.logf("Creating WireGuard device...")
  402. e.wgdev = wgcfg.NewDevice(e.tundev, e.magicConn.Bind(), e.wgLogger.DeviceLogger)
  403. closePool.addFunc(e.wgdev.Close)
  404. closePool.addFunc(func() {
  405. if err := e.magicConn.Close(); err != nil {
  406. e.logf("error closing magicconn: %v", err)
  407. }
  408. })
  409. go func() {
  410. up := false
  411. for event := range e.tundev.EventsUpDown() {
  412. if event&tun.EventUp != 0 && !up {
  413. e.logf("external route: up")
  414. e.RequestStatus()
  415. up = true
  416. }
  417. if event&tun.EventDown != 0 && up {
  418. e.logf("external route: down")
  419. e.RequestStatus()
  420. up = false
  421. }
  422. }
  423. }()
  424. go func() {
  425. select {
  426. case <-e.wgdev.Wait():
  427. e.mu.Lock()
  428. closing := e.closing
  429. e.mu.Unlock()
  430. if !closing {
  431. e.logf("Closing the engine because the WireGuard device has been closed...")
  432. e.Close()
  433. }
  434. case <-e.waitCh:
  435. // continue
  436. }
  437. }()
  438. e.logf("Bringing WireGuard device up...")
  439. if err := e.wgdev.Up(); err != nil {
  440. return nil, fmt.Errorf("wgdev.Up: %w", err)
  441. }
  442. e.logf("Bringing router up...")
  443. if err := e.router.Up(); err != nil {
  444. return nil, fmt.Errorf("router.Up: %w", err)
  445. }
  446. tsTUNDev.SetLinkFeaturesPostUp()
  447. // It's a little pointless to apply no-op settings here (they
  448. // should already be empty?), but it at least exercises the
  449. // router implementation early on.
  450. e.logf("Clearing router settings...")
  451. if err := e.router.Set(nil); err != nil {
  452. return nil, fmt.Errorf("router.Set(nil): %w", err)
  453. }
  454. e.logf("Starting network monitor...")
  455. e.netMon.Start()
  456. if conf.SetSubsystem != nil {
  457. conf.SetSubsystem(e.tundev)
  458. conf.SetSubsystem(e.magicConn)
  459. conf.SetSubsystem(e.dns)
  460. conf.SetSubsystem(conf.Router)
  461. conf.SetSubsystem(conf.Dialer)
  462. conf.SetSubsystem(e.netMon)
  463. if conf.DriveForLocal != nil {
  464. conf.SetSubsystem(conf.DriveForLocal)
  465. }
  466. }
  467. e.logf("Engine created.")
  468. return e, nil
  469. }
  470. // echoRespondToAll is an inbound post-filter responding to all echo requests.
  471. func echoRespondToAll(p *packet.Parsed, t *tstun.Wrapper, gro *gro.GRO) (filter.Response, *gro.GRO) {
  472. if p.IsEchoRequest() {
  473. header := p.ICMP4Header()
  474. header.ToResponse()
  475. outp := packet.Generate(&header, p.Payload())
  476. t.InjectOutbound(outp)
  477. // We already responded to it, but it's not an error.
  478. // Proceed with regular delivery. (Since this code is only
  479. // used in fake mode, regular delivery just means throwing
  480. // it away. If this ever gets run in non-fake mode, you'll
  481. // get double responses to pings, which is an indicator you
  482. // shouldn't be doing that I guess.)
  483. return filter.Accept, gro
  484. }
  485. return filter.Accept, gro
  486. }
  487. // handleLocalPackets inspects packets coming from the local network
  488. // stack, and intercepts any packets that should be handled by
  489. // tailscaled directly. Other packets are allowed to proceed into the
  490. // main ACL filter.
  491. func (e *userspaceEngine) handleLocalPackets(p *packet.Parsed, t *tstun.Wrapper) filter.Response {
  492. if runtime.GOOS == "darwin" || runtime.GOOS == "ios" {
  493. isLocalAddr, ok := e.isLocalAddr.LoadOk()
  494. if !ok {
  495. e.logf("[unexpected] e.isLocalAddr was nil, can't check for loopback packet")
  496. } else if isLocalAddr(p.Dst.Addr()) {
  497. // macOS NetworkExtension directs packets destined to the
  498. // tunnel's local IP address into the tunnel, instead of
  499. // looping back within the kernel network stack. We have to
  500. // notice that an outbound packet is actually destined for
  501. // ourselves, and loop it back into macOS.
  502. t.InjectInboundCopy(p.Buffer())
  503. metricReflectToOS.Add(1)
  504. return filter.Drop
  505. }
  506. }
  507. return filter.Accept
  508. }
  509. var debugTrimWireguard = envknob.RegisterOptBool("TS_DEBUG_TRIM_WIREGUARD")
  510. // forceFullWireguardConfig reports whether we should give wireguard our full
  511. // network map, even for inactive peers.
  512. //
  513. // TODO(bradfitz): remove this at some point. We had a TODO to do it before 1.0
  514. // but it's still there as of 1.30. Really we should not do this wireguard lazy
  515. // peer config at all and just fix wireguard-go to not have so much extra memory
  516. // usage per peer. That would simplify a lot of Tailscale code. OTOH, we have 50
  517. // MB of memory on iOS now instead of 15 MB, so the other option is to just give
  518. // up on lazy wireguard config and blow the memory and hope for the best on iOS.
  519. // That's sad too. Or we get rid of these knobs (lazy wireguard config has been
  520. // stable!) but I'm worried that a future regression would be easier to debug
  521. // with these knobs in place.
  522. func (e *userspaceEngine) forceFullWireguardConfig(numPeers int) bool {
  523. // Did the user explicitly enable trimming via the environment variable knob?
  524. if b, ok := debugTrimWireguard().Get(); ok {
  525. return !b
  526. }
  527. return e.controlKnobs != nil && e.controlKnobs.KeepFullWGConfig.Load()
  528. }
  529. // isTrimmablePeer reports whether p is a peer that we can trim out of the
  530. // network map.
  531. //
  532. // For implementation simplicity, we can only trim peers that have
  533. // only non-subnet AllowedIPs (an IPv4 /32 or IPv6 /128), which is the
  534. // common case for most peers. Subnet router nodes will just always be
  535. // created in the wireguard-go config.
  536. func (e *userspaceEngine) isTrimmablePeer(p *wgcfg.Peer, numPeers int) bool {
  537. if e.forceFullWireguardConfig(numPeers) {
  538. return false
  539. }
  540. // AllowedIPs must all be single IPs, not subnets.
  541. for _, aip := range p.AllowedIPs {
  542. if !aip.IsSingleIP() {
  543. return false
  544. }
  545. }
  546. return true
  547. }
  548. // noteRecvActivity is called by magicsock when a packet has been
  549. // received for the peer with node key nk. Magicsock calls this no
  550. // more than every 10 seconds for a given peer.
  551. func (e *userspaceEngine) noteRecvActivity(nk key.NodePublic) {
  552. e.wgLock.Lock()
  553. defer e.wgLock.Unlock()
  554. if _, ok := e.recvActivityAt[nk]; !ok {
  555. // Not a trimmable peer we care about tracking. (See isTrimmablePeer)
  556. if e.trimmedNodes[nk] {
  557. e.logf("wgengine: [unexpected] noteReceiveActivity called on idle node %v that's not in recvActivityAt", nk.ShortString())
  558. }
  559. return
  560. }
  561. now := e.timeNow()
  562. e.recvActivityAt[nk] = now
  563. // As long as there's activity, periodically poll the engine to get
  564. // stats for the far away side effect of
  565. // ipn/ipnlocal.LocalBackend.parseWgStatusLocked to log activity, for
  566. // use in various admin dashboards.
  567. // This particularly matters on platforms without a connected GUI, as
  568. // the GUIs generally poll this enough to cause that logging. But
  569. // tailscaled alone did not, hence this.
  570. if e.lastStatusPollTime.IsZero() || now.Sub(e.lastStatusPollTime) >= statusPollInterval {
  571. e.lastStatusPollTime = now
  572. go e.RequestStatus()
  573. }
  574. // If the last activity time jumped a bunch (say, at least
  575. // half the idle timeout) then see if we need to reprogram
  576. // WireGuard. This could probably be just
  577. // lazyPeerIdleThreshold without the divide by 2, but
  578. // maybeReconfigWireguardLocked is cheap enough to call every
  579. // couple minutes (just not on every packet).
  580. if e.trimmedNodes[nk] {
  581. e.logf("wgengine: idle peer %v now active, reconfiguring WireGuard", nk.ShortString())
  582. e.maybeReconfigWireguardLocked(nil)
  583. }
  584. }
  585. // isActiveSinceLocked reports whether the peer identified by (nk, ip)
  586. // has had a packet sent to or received from it since t.
  587. //
  588. // e.wgLock must be held.
  589. func (e *userspaceEngine) isActiveSinceLocked(nk key.NodePublic, ip netip.Addr, t mono.Time) bool {
  590. if e.recvActivityAt[nk].After(t) {
  591. return true
  592. }
  593. timePtr, ok := e.sentActivityAt[ip]
  594. if !ok {
  595. return false
  596. }
  597. return timePtr.LoadAtomic().After(t)
  598. }
  599. // discoChanged are the set of peers whose disco keys have changed, implying they've restarted.
  600. // If a peer is in this set and was previously in the live wireguard config,
  601. // it needs to be first removed and then re-added to flush out its wireguard session key.
  602. // If discoChanged is nil or empty, this extra removal step isn't done.
  603. //
  604. // e.wgLock must be held.
  605. func (e *userspaceEngine) maybeReconfigWireguardLocked(discoChanged map[key.NodePublic]bool) error {
  606. if hook := e.testMaybeReconfigHook; hook != nil {
  607. hook()
  608. return nil
  609. }
  610. full := e.lastCfgFull
  611. e.wgLogger.SetPeers(full.Peers)
  612. // Compute a minimal config to pass to wireguard-go
  613. // based on the full config. Prune off all the peers
  614. // and only add the active ones back.
  615. min := full
  616. min.Peers = make([]wgcfg.Peer, 0, e.lastNMinPeers)
  617. // We'll only keep a peer around if it's been active in
  618. // the past 5 minutes. That's more than WireGuard's key
  619. // rotation time anyway so it's no harm if we remove it
  620. // later if it's been inactive.
  621. activeCutoff := e.timeNow().Add(-lazyPeerIdleThreshold)
  622. // Not all peers can be trimmed from the network map (see
  623. // isTrimmablePeer). For those that are trimmable, keep track of
  624. // their NodeKey and Tailscale IPs. These are the ones we'll need
  625. // to install tracking hooks for to watch their send/receive
  626. // activity.
  627. trackNodes := make([]key.NodePublic, 0, len(full.Peers))
  628. trackIPs := make([]netip.Addr, 0, len(full.Peers))
  629. // Don't re-alloc the map; the Go compiler optimizes map clears as of
  630. // Go 1.11, so we can re-use the existing + allocated map.
  631. if e.trimmedNodes != nil {
  632. clear(e.trimmedNodes)
  633. } else {
  634. e.trimmedNodes = make(map[key.NodePublic]bool)
  635. }
  636. needRemoveStep := false
  637. for i := range full.Peers {
  638. p := &full.Peers[i]
  639. nk := p.PublicKey
  640. if !e.isTrimmablePeer(p, len(full.Peers)) {
  641. min.Peers = append(min.Peers, *p)
  642. if discoChanged[nk] {
  643. needRemoveStep = true
  644. }
  645. continue
  646. }
  647. trackNodes = append(trackNodes, nk)
  648. recentlyActive := false
  649. for _, cidr := range p.AllowedIPs {
  650. trackIPs = append(trackIPs, cidr.Addr())
  651. recentlyActive = recentlyActive || e.isActiveSinceLocked(nk, cidr.Addr(), activeCutoff)
  652. }
  653. if recentlyActive {
  654. min.Peers = append(min.Peers, *p)
  655. if discoChanged[nk] {
  656. needRemoveStep = true
  657. }
  658. } else {
  659. e.trimmedNodes[nk] = true
  660. }
  661. }
  662. e.lastNMinPeers = len(min.Peers)
  663. if changed := deephash.Update(&e.lastEngineSigTrim, &struct {
  664. WGConfig *wgcfg.Config
  665. TrimmedNodes map[key.NodePublic]bool
  666. TrackNodes []key.NodePublic
  667. TrackIPs []netip.Addr
  668. }{&min, e.trimmedNodes, trackNodes, trackIPs}); !changed {
  669. return nil
  670. }
  671. e.updateActivityMapsLocked(trackNodes, trackIPs)
  672. if needRemoveStep {
  673. minner := min
  674. minner.Peers = nil
  675. numRemove := 0
  676. for _, p := range min.Peers {
  677. if discoChanged[p.PublicKey] {
  678. numRemove++
  679. continue
  680. }
  681. minner.Peers = append(minner.Peers, p)
  682. }
  683. if numRemove > 0 {
  684. e.logf("wgengine: Reconfig: removing session keys for %d peers", numRemove)
  685. if err := wgcfg.ReconfigDevice(e.wgdev, &minner, e.logf); err != nil {
  686. e.logf("wgdev.Reconfig: %v", err)
  687. return err
  688. }
  689. }
  690. }
  691. e.logf("wgengine: Reconfig: configuring userspace WireGuard config (with %d/%d peers)", len(min.Peers), len(full.Peers))
  692. if err := wgcfg.ReconfigDevice(e.wgdev, &min, e.logf); err != nil {
  693. e.logf("wgdev.Reconfig: %v", err)
  694. return err
  695. }
  696. return nil
  697. }
  698. // updateActivityMapsLocked updates the data structures used for tracking the activity
  699. // of wireguard peers that we might add/remove dynamically from the real config
  700. // as given to wireguard-go.
  701. //
  702. // e.wgLock must be held.
  703. func (e *userspaceEngine) updateActivityMapsLocked(trackNodes []key.NodePublic, trackIPs []netip.Addr) {
  704. // Generate the new map of which nodekeys we want to track
  705. // receive times for.
  706. mr := map[key.NodePublic]mono.Time{} // TODO: only recreate this if set of keys changed
  707. for _, nk := range trackNodes {
  708. // Preserve old times in the new map, but also
  709. // populate map entries for new trackNodes values with
  710. // time.Time{} zero values. (Only entries in this map
  711. // are tracked, so the Time zero values allow it to be
  712. // tracked later)
  713. mr[nk] = e.recvActivityAt[nk]
  714. }
  715. e.recvActivityAt = mr
  716. oldTime := e.sentActivityAt
  717. e.sentActivityAt = make(map[netip.Addr]*mono.Time, len(oldTime))
  718. oldFunc := e.destIPActivityFuncs
  719. e.destIPActivityFuncs = make(map[netip.Addr]func(), len(oldFunc))
  720. updateFn := func(timePtr *mono.Time) func() {
  721. return func() {
  722. now := e.timeNow()
  723. old := timePtr.LoadAtomic()
  724. // How long's it been since we last sent a packet?
  725. elapsed := now.Sub(old)
  726. if old == 0 {
  727. // For our first packet, old is 0, which has indeterminate meaning.
  728. // Set elapsed to a big number (four score and seven years).
  729. elapsed = 762642 * time.Hour
  730. }
  731. if elapsed >= packetSendTimeUpdateFrequency {
  732. timePtr.StoreAtomic(now)
  733. }
  734. // On a big jump, assume we might no longer be in the wireguard
  735. // config and go check.
  736. if elapsed >= packetSendRecheckWireguardThreshold {
  737. e.wgLock.Lock()
  738. defer e.wgLock.Unlock()
  739. e.maybeReconfigWireguardLocked(nil)
  740. }
  741. }
  742. }
  743. for _, ip := range trackIPs {
  744. timePtr := oldTime[ip]
  745. if timePtr == nil {
  746. timePtr = new(mono.Time)
  747. }
  748. e.sentActivityAt[ip] = timePtr
  749. fn := oldFunc[ip]
  750. if fn == nil {
  751. fn = updateFn(timePtr)
  752. }
  753. e.destIPActivityFuncs[ip] = fn
  754. }
  755. e.tundev.SetDestIPActivityFuncs(e.destIPActivityFuncs)
  756. }
  757. // hasOverlap checks if there is a IPPrefix which is common amongst the two
  758. // provided slices.
  759. func hasOverlap(aips, rips views.Slice[netip.Prefix]) bool {
  760. for _, aip := range aips.All() {
  761. if views.SliceContains(rips, aip) {
  762. return true
  763. }
  764. }
  765. return false
  766. }
  767. func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config, dnsCfg *dns.Config) error {
  768. if routerCfg == nil {
  769. panic("routerCfg must not be nil")
  770. }
  771. if dnsCfg == nil {
  772. panic("dnsCfg must not be nil")
  773. }
  774. e.isLocalAddr.Store(ipset.NewContainsIPFunc(views.SliceOf(routerCfg.LocalAddrs)))
  775. e.wgLock.Lock()
  776. defer e.wgLock.Unlock()
  777. e.tundev.SetWGConfig(cfg)
  778. e.lastDNSConfig = dnsCfg
  779. peerSet := make(set.Set[key.NodePublic], len(cfg.Peers))
  780. e.mu.Lock()
  781. e.peerSequence = e.peerSequence[:0]
  782. for _, p := range cfg.Peers {
  783. e.peerSequence = append(e.peerSequence, p.PublicKey)
  784. peerSet.Add(p.PublicKey)
  785. }
  786. nm := e.netMap
  787. e.mu.Unlock()
  788. listenPort := e.confListenPort
  789. if e.controlKnobs != nil && e.controlKnobs.RandomizeClientPort.Load() {
  790. listenPort = 0
  791. }
  792. peerMTUEnable := e.magicConn.ShouldPMTUD()
  793. isSubnetRouter := false
  794. if e.birdClient != nil && nm != nil && nm.SelfNode.Valid() {
  795. isSubnetRouter = hasOverlap(nm.SelfNode.PrimaryRoutes(), nm.SelfNode.Hostinfo().RoutableIPs())
  796. e.logf("[v1] Reconfig: hasOverlap(%v, %v) = %v; isSubnetRouter=%v lastIsSubnetRouter=%v",
  797. nm.SelfNode.PrimaryRoutes(), nm.SelfNode.Hostinfo().RoutableIPs(),
  798. isSubnetRouter, isSubnetRouter, e.lastIsSubnetRouter)
  799. }
  800. isSubnetRouterChanged := isSubnetRouter != e.lastIsSubnetRouter
  801. engineChanged := deephash.Update(&e.lastEngineSigFull, cfg)
  802. routerChanged := deephash.Update(&e.lastRouterSig, &struct {
  803. RouterConfig *router.Config
  804. DNSConfig *dns.Config
  805. }{routerCfg, dnsCfg})
  806. listenPortChanged := listenPort != e.magicConn.LocalPort()
  807. peerMTUChanged := peerMTUEnable != e.magicConn.PeerMTUEnabled()
  808. if !engineChanged && !routerChanged && !listenPortChanged && !isSubnetRouterChanged && !peerMTUChanged {
  809. return ErrNoChanges
  810. }
  811. newLogIDs := cfg.NetworkLogging
  812. oldLogIDs := e.lastCfgFull.NetworkLogging
  813. netLogIDsNowValid := !newLogIDs.NodeID.IsZero() && !newLogIDs.DomainID.IsZero()
  814. netLogIDsWasValid := !oldLogIDs.NodeID.IsZero() && !oldLogIDs.DomainID.IsZero()
  815. netLogIDsChanged := netLogIDsNowValid && netLogIDsWasValid && newLogIDs != oldLogIDs
  816. netLogRunning := netLogIDsNowValid && !routerCfg.Equal(&router.Config{})
  817. if envknob.NoLogsNoSupport() {
  818. netLogRunning = false
  819. }
  820. // TODO(bradfitz,danderson): maybe delete this isDNSIPOverTailscale
  821. // field and delete the resolver.ForwardLinkSelector hook and
  822. // instead have ipnlocal populate a map of DNS IP => linkName and
  823. // put that in the *dns.Config instead, and plumb it down to the
  824. // dns.Manager. Maybe also with isLocalAddr above.
  825. e.isDNSIPOverTailscale.Store(ipset.NewContainsIPFunc(views.SliceOf(dnsIPsOverTailscale(dnsCfg, routerCfg))))
  826. // See if any peers have changed disco keys, which means they've restarted.
  827. // If so, we need to update the wireguard-go/device.Device in two phases:
  828. // once without the node which has restarted, to clear its wireguard session key,
  829. // and a second time with it.
  830. discoChanged := make(map[key.NodePublic]bool)
  831. {
  832. prevEP := make(map[key.NodePublic]key.DiscoPublic)
  833. for i := range e.lastCfgFull.Peers {
  834. if p := &e.lastCfgFull.Peers[i]; !p.DiscoKey.IsZero() {
  835. prevEP[p.PublicKey] = p.DiscoKey
  836. }
  837. }
  838. for i := range cfg.Peers {
  839. p := &cfg.Peers[i]
  840. if p.DiscoKey.IsZero() {
  841. continue
  842. }
  843. pub := p.PublicKey
  844. if old, ok := prevEP[pub]; ok && old != p.DiscoKey {
  845. discoChanged[pub] = true
  846. e.logf("wgengine: Reconfig: %s changed from %q to %q", pub.ShortString(), old, p.DiscoKey)
  847. }
  848. }
  849. }
  850. e.lastCfgFull = *cfg.Clone()
  851. // Tell magicsock about the new (or initial) private key
  852. // (which is needed by DERP) before wgdev gets it, as wgdev
  853. // will start trying to handshake, which we want to be able to
  854. // go over DERP.
  855. if err := e.magicConn.SetPrivateKey(cfg.PrivateKey); err != nil {
  856. e.logf("wgengine: Reconfig: SetPrivateKey: %v", err)
  857. }
  858. e.magicConn.UpdatePeers(peerSet)
  859. e.magicConn.SetPreferredPort(listenPort)
  860. e.magicConn.UpdatePMTUD()
  861. if err := e.maybeReconfigWireguardLocked(discoChanged); err != nil {
  862. return err
  863. }
  864. // Shutdown the network logger because the IDs changed.
  865. // Let it be started back up by subsequent logic.
  866. if netLogIDsChanged && e.networkLogger.Running() {
  867. e.logf("wgengine: Reconfig: shutting down network logger")
  868. ctx, cancel := context.WithTimeout(context.Background(), networkLoggerUploadTimeout)
  869. defer cancel()
  870. if err := e.networkLogger.Shutdown(ctx); err != nil {
  871. e.logf("wgengine: Reconfig: error shutting down network logger: %v", err)
  872. }
  873. }
  874. // Startup the network logger.
  875. // Do this before configuring the router so that we capture initial packets.
  876. if netLogRunning && !e.networkLogger.Running() {
  877. nid := cfg.NetworkLogging.NodeID
  878. tid := cfg.NetworkLogging.DomainID
  879. logExitFlowEnabled := cfg.NetworkLogging.LogExitFlowEnabled
  880. e.logf("wgengine: Reconfig: starting up network logger (node:%s tailnet:%s)", nid.Public(), tid.Public())
  881. if err := e.networkLogger.Startup(cfg.NodeID, nid, tid, e.tundev, e.magicConn, e.netMon, e.health, logExitFlowEnabled); err != nil {
  882. e.logf("wgengine: Reconfig: error starting up network logger: %v", err)
  883. }
  884. e.networkLogger.ReconfigRoutes(routerCfg)
  885. }
  886. if routerChanged {
  887. e.logf("wgengine: Reconfig: configuring router")
  888. e.networkLogger.ReconfigRoutes(routerCfg)
  889. err := e.router.Set(routerCfg)
  890. e.health.SetRouterHealth(err)
  891. if err != nil {
  892. return err
  893. }
  894. // Keep DNS configuration after router configuration, as some
  895. // DNS managers refuse to apply settings if the device has no
  896. // assigned address.
  897. e.logf("wgengine: Reconfig: configuring DNS")
  898. err = e.dns.Set(*dnsCfg)
  899. e.health.SetDNSHealth(err)
  900. if err != nil {
  901. return err
  902. }
  903. if err := e.reconfigureVPNIfNecessary(); err != nil {
  904. return err
  905. }
  906. }
  907. // Shutdown the network logger.
  908. // Do this after configuring the router so that we capture final packets.
  909. // This attempts to flush out any log messages and may block.
  910. if !netLogRunning && e.networkLogger.Running() {
  911. e.logf("wgengine: Reconfig: shutting down network logger")
  912. ctx, cancel := context.WithTimeout(context.Background(), networkLoggerUploadTimeout)
  913. defer cancel()
  914. if err := e.networkLogger.Shutdown(ctx); err != nil {
  915. e.logf("wgengine: Reconfig: error shutting down network logger: %v", err)
  916. }
  917. }
  918. if isSubnetRouterChanged && e.birdClient != nil {
  919. e.logf("wgengine: Reconfig: configuring BIRD")
  920. var err error
  921. if isSubnetRouter {
  922. err = e.birdClient.EnableProtocol("tailscale")
  923. } else {
  924. err = e.birdClient.DisableProtocol("tailscale")
  925. }
  926. if err != nil {
  927. // Log but don't fail here.
  928. e.logf("wgengine: error configuring BIRD: %v", err)
  929. } else {
  930. e.lastIsSubnetRouter = isSubnetRouter
  931. }
  932. }
  933. e.logf("[v1] wgengine: Reconfig done")
  934. return nil
  935. }
  936. func (e *userspaceEngine) GetFilter() *filter.Filter {
  937. return e.tundev.GetFilter()
  938. }
  939. func (e *userspaceEngine) SetFilter(filt *filter.Filter) {
  940. e.tundev.SetFilter(filt)
  941. }
  942. func (e *userspaceEngine) GetJailedFilter() *filter.Filter {
  943. return e.tundev.GetJailedFilter()
  944. }
  945. func (e *userspaceEngine) SetJailedFilter(filt *filter.Filter) {
  946. e.tundev.SetJailedFilter(filt)
  947. }
  948. func (e *userspaceEngine) SetStatusCallback(cb StatusCallback) {
  949. e.mu.Lock()
  950. defer e.mu.Unlock()
  951. e.statusCallback = cb
  952. }
  953. func (e *userspaceEngine) getStatusCallback() StatusCallback {
  954. e.mu.Lock()
  955. defer e.mu.Unlock()
  956. return e.statusCallback
  957. }
  958. var ErrEngineClosing = errors.New("engine closing; no status")
  959. func (e *userspaceEngine) PeerByKey(pubKey key.NodePublic) (_ wgint.Peer, ok bool) {
  960. e.wgLock.Lock()
  961. dev := e.wgdev
  962. e.wgLock.Unlock()
  963. if dev == nil {
  964. return wgint.Peer{}, false
  965. }
  966. peer := dev.LookupPeer(pubKey.Raw32())
  967. if peer == nil {
  968. return wgint.Peer{}, false
  969. }
  970. return wgint.PeerOf(peer), true
  971. }
  972. func (e *userspaceEngine) getPeerStatusLite(pk key.NodePublic) (status ipnstate.PeerStatusLite, ok bool) {
  973. peer, ok := e.PeerByKey(pk)
  974. if !ok {
  975. return status, false
  976. }
  977. status.NodeKey = pk
  978. status.RxBytes = int64(peer.RxBytes())
  979. status.TxBytes = int64(peer.TxBytes())
  980. status.LastHandshake = peer.LastHandshake()
  981. return status, true
  982. }
  983. func (e *userspaceEngine) getStatus() (*Status, error) {
  984. // Grab derpConns before acquiring wgLock to not violate lock ordering;
  985. // the DERPs method acquires magicsock.Conn.mu.
  986. // (See comment in userspaceEngine's declaration.)
  987. derpConns := e.magicConn.DERPs()
  988. e.mu.Lock()
  989. closing := e.closing
  990. peerKeys := slices.Clone(e.peerSequence)
  991. localAddrs := slices.Clone(e.endpoints)
  992. e.mu.Unlock()
  993. if closing {
  994. return nil, ErrEngineClosing
  995. }
  996. peers := make([]ipnstate.PeerStatusLite, 0, len(peerKeys))
  997. for _, key := range peerKeys {
  998. if status, ok := e.getPeerStatusLite(key); ok {
  999. peers = append(peers, status)
  1000. }
  1001. }
  1002. return &Status{
  1003. AsOf: time.Now(),
  1004. LocalAddrs: localAddrs,
  1005. Peers: peers,
  1006. DERPs: derpConns,
  1007. }, nil
  1008. }
  1009. func (e *userspaceEngine) RequestStatus() {
  1010. // This is slightly tricky. e.getStatus() can theoretically get
  1011. // blocked inside wireguard for a while, and RequestStatus() is
  1012. // sometimes called from a goroutine, so we don't want a lot of
  1013. // them hanging around. On the other hand, requesting multiple
  1014. // status updates simultaneously is pointless anyway; they will
  1015. // all say the same thing.
  1016. // Enqueue at most one request. If one is in progress already, this
  1017. // adds one more to the queue. If one has been requested but not
  1018. // started, it is a no-op.
  1019. select {
  1020. case e.reqCh <- struct{}{}:
  1021. default:
  1022. }
  1023. // Dequeue at most one request. Another thread may have already
  1024. // dequeued the request we enqueued above, which is fine, since the
  1025. // information is guaranteed to be at least as recent as the current
  1026. // call to RequestStatus().
  1027. select {
  1028. case <-e.reqCh:
  1029. s, err := e.getStatus()
  1030. if s == nil && err == nil {
  1031. e.logf("[unexpected] RequestStatus: both s and err are nil")
  1032. return
  1033. }
  1034. if cb := e.getStatusCallback(); cb != nil {
  1035. cb(s, err)
  1036. }
  1037. default:
  1038. }
  1039. }
  1040. func (e *userspaceEngine) Close() {
  1041. e.mu.Lock()
  1042. if e.closing {
  1043. e.mu.Unlock()
  1044. return
  1045. }
  1046. e.closing = true
  1047. e.mu.Unlock()
  1048. r := bufio.NewReader(strings.NewReader(""))
  1049. e.wgdev.IpcSetOperation(r)
  1050. e.magicConn.Close()
  1051. e.netMonUnregister()
  1052. if e.netMonOwned {
  1053. e.netMon.Close()
  1054. }
  1055. e.dns.Down()
  1056. e.router.Close()
  1057. e.wgdev.Close()
  1058. e.tundev.Close()
  1059. if e.birdClient != nil {
  1060. e.birdClient.DisableProtocol("tailscale")
  1061. e.birdClient.Close()
  1062. }
  1063. close(e.waitCh)
  1064. ctx, cancel := context.WithTimeout(context.Background(), networkLoggerUploadTimeout)
  1065. defer cancel()
  1066. if err := e.networkLogger.Shutdown(ctx); err != nil {
  1067. e.logf("wgengine: Close: error shutting down network logger: %v", err)
  1068. }
  1069. }
  1070. func (e *userspaceEngine) Done() <-chan struct{} {
  1071. return e.waitCh
  1072. }
  1073. func (e *userspaceEngine) linkChange(delta *netmon.ChangeDelta) {
  1074. changed := delta.Major // TODO(bradfitz): ask more specific questions?
  1075. cur := delta.New
  1076. up := cur.AnyInterfaceUp()
  1077. if !up {
  1078. e.logf("LinkChange: all links down; pausing: %v", cur)
  1079. } else if changed {
  1080. e.logf("LinkChange: major, rebinding. New state: %v", cur)
  1081. } else {
  1082. e.logf("[v1] LinkChange: minor")
  1083. }
  1084. e.health.SetAnyInterfaceUp(up)
  1085. e.magicConn.SetNetworkUp(up)
  1086. if !up || changed {
  1087. if err := e.dns.FlushCaches(); err != nil {
  1088. e.logf("wgengine: dns flush failed after major link change: %v", err)
  1089. }
  1090. }
  1091. // Hacky workaround for Unix DNS issue 2458: on
  1092. // suspend/resume or whenever NetworkManager is started, it
  1093. // nukes all systemd-resolved configs. So reapply our DNS
  1094. // config on major link change.
  1095. // TODO: explain why this is ncessary not just on Linux but also android
  1096. // and Apple platforms.
  1097. if changed {
  1098. switch runtime.GOOS {
  1099. case "linux", "android", "ios", "darwin", "openbsd":
  1100. e.wgLock.Lock()
  1101. dnsCfg := e.lastDNSConfig
  1102. e.wgLock.Unlock()
  1103. if dnsCfg != nil {
  1104. if err := e.dns.Set(*dnsCfg); err != nil {
  1105. e.logf("wgengine: error setting DNS config after major link change: %v", err)
  1106. } else if err := e.reconfigureVPNIfNecessary(); err != nil {
  1107. e.logf("wgengine: error reconfiguring VPN after major link change: %v", err)
  1108. } else {
  1109. e.logf("wgengine: set DNS config again after major link change")
  1110. }
  1111. }
  1112. }
  1113. }
  1114. why := "link-change-minor"
  1115. if changed {
  1116. why = "link-change-major"
  1117. metricNumMajorChanges.Add(1)
  1118. e.magicConn.Rebind()
  1119. } else {
  1120. metricNumMinorChanges.Add(1)
  1121. }
  1122. e.magicConn.ReSTUN(why)
  1123. }
  1124. func (e *userspaceEngine) SetNetworkMap(nm *netmap.NetworkMap) {
  1125. e.magicConn.SetNetworkMap(nm)
  1126. e.mu.Lock()
  1127. e.netMap = nm
  1128. e.mu.Unlock()
  1129. }
  1130. func (e *userspaceEngine) UpdateStatus(sb *ipnstate.StatusBuilder) {
  1131. st, err := e.getStatus()
  1132. if err != nil {
  1133. e.logf("wgengine: getStatus: %v", err)
  1134. return
  1135. }
  1136. if sb.WantPeers {
  1137. for _, ps := range st.Peers {
  1138. sb.AddPeer(ps.NodeKey, &ipnstate.PeerStatus{
  1139. RxBytes: int64(ps.RxBytes),
  1140. TxBytes: int64(ps.TxBytes),
  1141. LastHandshake: ps.LastHandshake,
  1142. InEngine: true,
  1143. })
  1144. }
  1145. }
  1146. e.magicConn.UpdateStatus(sb)
  1147. }
  1148. func (e *userspaceEngine) Ping(ip netip.Addr, pingType tailcfg.PingType, size int, cb func(*ipnstate.PingResult)) {
  1149. res := &ipnstate.PingResult{IP: ip.String()}
  1150. pip, ok := e.PeerForIP(ip)
  1151. if !ok {
  1152. e.logf("ping(%v): no matching peer", ip)
  1153. res.Err = "no matching peer"
  1154. cb(res)
  1155. return
  1156. }
  1157. if pip.IsSelf {
  1158. res.Err = fmt.Sprintf("%v is local Tailscale IP", ip)
  1159. res.IsLocalIP = true
  1160. cb(res)
  1161. return
  1162. }
  1163. peer := pip.Node
  1164. e.logf("ping(%v): sending %v ping to %v %v ...", ip, pingType, peer.Key().ShortString(), peer.ComputedName())
  1165. switch pingType {
  1166. case "disco":
  1167. e.magicConn.Ping(peer, res, size, cb)
  1168. case "TSMP":
  1169. e.sendTSMPPing(ip, peer, res, cb)
  1170. case "ICMP":
  1171. e.sendICMPEchoRequest(ip, peer, res, cb)
  1172. }
  1173. }
  1174. func (e *userspaceEngine) mySelfIPMatchingFamily(dst netip.Addr) (src netip.Addr, err error) {
  1175. var zero netip.Addr
  1176. e.mu.Lock()
  1177. defer e.mu.Unlock()
  1178. if e.netMap == nil {
  1179. return zero, errors.New("no netmap")
  1180. }
  1181. addrs := e.netMap.GetAddresses()
  1182. if addrs.Len() == 0 {
  1183. return zero, errors.New("no self address in netmap")
  1184. }
  1185. for _, p := range addrs.All() {
  1186. if p.IsSingleIP() && p.Addr().BitLen() == dst.BitLen() {
  1187. return p.Addr(), nil
  1188. }
  1189. }
  1190. return zero, errors.New("no self address in netmap matching address family")
  1191. }
  1192. func (e *userspaceEngine) sendICMPEchoRequest(destIP netip.Addr, peer tailcfg.NodeView, res *ipnstate.PingResult, cb func(*ipnstate.PingResult)) {
  1193. srcIP, err := e.mySelfIPMatchingFamily(destIP)
  1194. if err != nil {
  1195. res.Err = err.Error()
  1196. cb(res)
  1197. return
  1198. }
  1199. var icmph packet.Header
  1200. if srcIP.Is4() {
  1201. icmph = packet.ICMP4Header{
  1202. IP4Header: packet.IP4Header{
  1203. IPProto: ipproto.ICMPv4,
  1204. Src: srcIP,
  1205. Dst: destIP,
  1206. },
  1207. Type: packet.ICMP4EchoRequest,
  1208. Code: packet.ICMP4NoCode,
  1209. }
  1210. } else {
  1211. icmph = packet.ICMP6Header{
  1212. IP6Header: packet.IP6Header{
  1213. IPProto: ipproto.ICMPv6,
  1214. Src: srcIP,
  1215. Dst: destIP,
  1216. },
  1217. Type: packet.ICMP6EchoRequest,
  1218. Code: packet.ICMP6NoCode,
  1219. }
  1220. }
  1221. idSeq, payload := packet.ICMPEchoPayload(nil)
  1222. expireTimer := time.AfterFunc(10*time.Second, func() {
  1223. e.setICMPEchoResponseCallback(idSeq, nil)
  1224. })
  1225. t0 := time.Now()
  1226. e.setICMPEchoResponseCallback(idSeq, func() {
  1227. expireTimer.Stop()
  1228. d := time.Since(t0)
  1229. res.LatencySeconds = d.Seconds()
  1230. res.NodeIP = destIP.String()
  1231. res.NodeName = peer.ComputedName()
  1232. cb(res)
  1233. })
  1234. icmpPing := packet.Generate(icmph, payload)
  1235. e.tundev.InjectOutbound(icmpPing)
  1236. }
  1237. func (e *userspaceEngine) sendTSMPPing(ip netip.Addr, peer tailcfg.NodeView, res *ipnstate.PingResult, cb func(*ipnstate.PingResult)) {
  1238. srcIP, err := e.mySelfIPMatchingFamily(ip)
  1239. if err != nil {
  1240. res.Err = err.Error()
  1241. cb(res)
  1242. return
  1243. }
  1244. var iph packet.Header
  1245. if srcIP.Is4() {
  1246. iph = packet.IP4Header{
  1247. IPProto: ipproto.TSMP,
  1248. Src: srcIP,
  1249. Dst: ip,
  1250. }
  1251. } else {
  1252. iph = packet.IP6Header{
  1253. IPProto: ipproto.TSMP,
  1254. Src: srcIP,
  1255. Dst: ip,
  1256. }
  1257. }
  1258. var data [8]byte
  1259. crand.Read(data[:])
  1260. expireTimer := time.AfterFunc(10*time.Second, func() {
  1261. e.setTSMPPongCallback(data, nil)
  1262. })
  1263. t0 := time.Now()
  1264. e.setTSMPPongCallback(data, func(pong packet.TSMPPongReply) {
  1265. expireTimer.Stop()
  1266. d := time.Since(t0)
  1267. res.LatencySeconds = d.Seconds()
  1268. res.NodeIP = ip.String()
  1269. res.NodeName = peer.ComputedName()
  1270. res.PeerAPIPort = pong.PeerAPIPort
  1271. cb(res)
  1272. })
  1273. var tsmpPayload [9]byte
  1274. tsmpPayload[0] = byte(packet.TSMPTypePing)
  1275. copy(tsmpPayload[1:], data[:])
  1276. tsmpPing := packet.Generate(iph, tsmpPayload[:])
  1277. e.tundev.InjectOutbound(tsmpPing)
  1278. }
  1279. func (e *userspaceEngine) setTSMPPongCallback(data [8]byte, cb func(packet.TSMPPongReply)) {
  1280. e.mu.Lock()
  1281. defer e.mu.Unlock()
  1282. if e.pongCallback == nil {
  1283. e.pongCallback = map[[8]byte]func(packet.TSMPPongReply){}
  1284. }
  1285. if cb == nil {
  1286. delete(e.pongCallback, data)
  1287. } else {
  1288. e.pongCallback[data] = cb
  1289. }
  1290. }
  1291. func (e *userspaceEngine) setICMPEchoResponseCallback(idSeq uint32, cb func()) {
  1292. e.mu.Lock()
  1293. defer e.mu.Unlock()
  1294. if cb == nil {
  1295. delete(e.icmpEchoResponseCallback, idSeq)
  1296. } else {
  1297. mak.Set(&e.icmpEchoResponseCallback, idSeq, cb)
  1298. }
  1299. }
  1300. // PeerForIP returns the Node in the wireguard config
  1301. // that's responsible for handling the given IP address.
  1302. //
  1303. // If none is found in the wireguard config but one is found in
  1304. // the netmap, it's described in an error.
  1305. //
  1306. // peerForIP acquires both e.mu and e.wgLock, but neither at the same
  1307. // time.
  1308. func (e *userspaceEngine) PeerForIP(ip netip.Addr) (ret PeerForIP, ok bool) {
  1309. e.mu.Lock()
  1310. nm := e.netMap
  1311. e.mu.Unlock()
  1312. if nm == nil {
  1313. return ret, false
  1314. }
  1315. // Check for exact matches before looking for subnet matches.
  1316. // TODO(bradfitz): add maps for these. on NetworkMap?
  1317. for _, p := range nm.Peers {
  1318. for i := range p.Addresses().Len() {
  1319. a := p.Addresses().At(i)
  1320. if a.Addr() == ip && a.IsSingleIP() && tsaddr.IsTailscaleIP(ip) {
  1321. return PeerForIP{Node: p, Route: a}, true
  1322. }
  1323. }
  1324. }
  1325. addrs := nm.GetAddresses()
  1326. for i := range addrs.Len() {
  1327. if a := addrs.At(i); a.Addr() == ip && a.IsSingleIP() && tsaddr.IsTailscaleIP(ip) {
  1328. return PeerForIP{Node: nm.SelfNode, IsSelf: true, Route: a}, true
  1329. }
  1330. }
  1331. e.wgLock.Lock()
  1332. defer e.wgLock.Unlock()
  1333. // TODO(bradfitz): this is O(n peers). Add ART to netaddr?
  1334. var best netip.Prefix
  1335. var bestKey key.NodePublic
  1336. for _, p := range e.lastCfgFull.Peers {
  1337. for _, cidr := range p.AllowedIPs {
  1338. if !cidr.Contains(ip) {
  1339. continue
  1340. }
  1341. if !best.IsValid() || cidr.Bits() > best.Bits() {
  1342. best = cidr
  1343. bestKey = p.PublicKey
  1344. }
  1345. }
  1346. }
  1347. // And another pass. Probably better than allocating a map per peerForIP
  1348. // call. But TODO(bradfitz): add a lookup map to netmap.NetworkMap.
  1349. if !bestKey.IsZero() {
  1350. for _, p := range nm.Peers {
  1351. if p.Key() == bestKey {
  1352. return PeerForIP{Node: p, Route: best}, true
  1353. }
  1354. }
  1355. }
  1356. return ret, false
  1357. }
  1358. type closeOnErrorPool []func()
  1359. func (p *closeOnErrorPool) add(c io.Closer) { *p = append(*p, func() { c.Close() }) }
  1360. func (p *closeOnErrorPool) addFunc(fn func()) { *p = append(*p, fn) }
  1361. func (p closeOnErrorPool) closeAllIfError(errp *error) {
  1362. if *errp != nil {
  1363. for _, closeFn := range p {
  1364. closeFn()
  1365. }
  1366. }
  1367. }
  1368. // ipInPrefixes reports whether ip is in any of pp.
  1369. func ipInPrefixes(ip netip.Addr, pp []netip.Prefix) bool {
  1370. for _, p := range pp {
  1371. if p.Contains(ip) {
  1372. return true
  1373. }
  1374. }
  1375. return false
  1376. }
  1377. // dnsIPsOverTailscale returns the IPPrefixes of DNS resolver IPs that are
  1378. // routed over Tailscale. The returned value does not contain duplicates is
  1379. // not necessarily sorted.
  1380. func dnsIPsOverTailscale(dnsCfg *dns.Config, routerCfg *router.Config) (ret []netip.Prefix) {
  1381. m := map[netip.Addr]bool{}
  1382. add := func(resolvers []*dnstype.Resolver) {
  1383. for _, r := range resolvers {
  1384. ip, err := netip.ParseAddr(r.Addr)
  1385. if err != nil {
  1386. if ipp, err := netip.ParseAddrPort(r.Addr); err == nil {
  1387. ip = ipp.Addr()
  1388. } else {
  1389. continue
  1390. }
  1391. }
  1392. if ipInPrefixes(ip, routerCfg.Routes) && !ipInPrefixes(ip, routerCfg.LocalRoutes) {
  1393. m[ip] = true
  1394. }
  1395. }
  1396. }
  1397. add(dnsCfg.DefaultResolvers)
  1398. for _, resolvers := range dnsCfg.Routes {
  1399. add(resolvers)
  1400. }
  1401. ret = make([]netip.Prefix, 0, len(m))
  1402. for ip := range m {
  1403. ret = append(ret, netip.PrefixFrom(ip, ip.BitLen()))
  1404. }
  1405. return ret
  1406. }
  1407. // fwdDNSLinkSelector is userspaceEngine's resolver.ForwardLinkSelector, to pick
  1408. // which network interface to send DNS queries out of.
  1409. type fwdDNSLinkSelector struct {
  1410. ue *userspaceEngine
  1411. tunName string
  1412. }
  1413. func (ls fwdDNSLinkSelector) PickLink(ip netip.Addr) (linkName string) {
  1414. if ls.ue.isDNSIPOverTailscale.Load()(ip) {
  1415. return ls.tunName
  1416. }
  1417. return ""
  1418. }
  1419. var (
  1420. metricReflectToOS = clientmetric.NewCounter("packet_reflect_to_os")
  1421. metricNumMajorChanges = clientmetric.NewCounter("wgengine_major_changes")
  1422. metricNumMinorChanges = clientmetric.NewCounter("wgengine_minor_changes")
  1423. )
  1424. func (e *userspaceEngine) InstallCaptureHook(cb packet.CaptureCallback) {
  1425. e.tundev.InstallCaptureHook(cb)
  1426. e.magicConn.InstallCaptureHook(cb)
  1427. }
  1428. func (e *userspaceEngine) reconfigureVPNIfNecessary() error {
  1429. if e.reconfigureVPN == nil {
  1430. return nil
  1431. }
  1432. return e.reconfigureVPN()
  1433. }