auto.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721
  1. // Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package controlclient
  5. import (
  6. "context"
  7. "fmt"
  8. "sync"
  9. "time"
  10. "tailscale.com/health"
  11. "tailscale.com/logtail/backoff"
  12. "tailscale.com/tailcfg"
  13. "tailscale.com/types/empty"
  14. "tailscale.com/types/key"
  15. "tailscale.com/types/logger"
  16. "tailscale.com/types/netmap"
  17. "tailscale.com/types/persist"
  18. "tailscale.com/types/structs"
  19. )
  20. type LoginGoal struct {
  21. _ structs.Incomparable
  22. wantLoggedIn bool // true if we *want* to be logged in
  23. token *tailcfg.Oauth2Token // oauth token to use when logging in
  24. flags LoginFlags // flags to use when logging in
  25. url string // auth url that needs to be visited
  26. loggedOutResult chan<- error
  27. }
  28. func (g *LoginGoal) sendLogoutError(err error) {
  29. if g.loggedOutResult == nil {
  30. return
  31. }
  32. select {
  33. case g.loggedOutResult <- err:
  34. default:
  35. }
  36. }
  37. // Auto connects to a tailcontrol server for a node.
  38. // It's a concrete implementation of the Client interface.
  39. type Auto struct {
  40. direct *Direct // our interface to the server APIs
  41. timeNow func() time.Time
  42. logf logger.Logf
  43. expiry *time.Time
  44. closed bool
  45. newMapCh chan struct{} // readable when we must restart a map request
  46. unregisterHealthWatch func()
  47. mu sync.Mutex // mutex guards the following fields
  48. statusFunc func(Status) // called to update Client status
  49. paused bool // whether we should stop making HTTP requests
  50. unpauseWaiters []chan struct{}
  51. loggedIn bool // true if currently logged in
  52. loginGoal *LoginGoal // non-nil if some login activity is desired
  53. synced bool // true if our netmap is up-to-date
  54. hostinfo *tailcfg.Hostinfo
  55. inPollNetMap bool // true if currently running a PollNetMap
  56. inLiteMapUpdate bool // true if a lite (non-streaming) map request is outstanding
  57. inSendStatus int // number of sendStatus calls currently in progress
  58. state State
  59. authCtx context.Context // context used for auth requests
  60. mapCtx context.Context // context used for netmap requests
  61. authCancel func() // cancel the auth context
  62. mapCancel func() // cancel the netmap context
  63. quit chan struct{} // when closed, goroutines should all exit
  64. authDone chan struct{} // when closed, auth goroutine is done
  65. mapDone chan struct{} // when closed, map goroutine is done
  66. }
  67. // New creates and starts a new Auto.
  68. func New(opts Options) (*Auto, error) {
  69. c, err := NewNoStart(opts)
  70. if c != nil {
  71. c.Start()
  72. }
  73. return c, err
  74. }
  75. // NewNoStart creates a new Auto, but without calling Start on it.
  76. func NewNoStart(opts Options) (*Auto, error) {
  77. direct, err := NewDirect(opts)
  78. if err != nil {
  79. return nil, err
  80. }
  81. if opts.Logf == nil {
  82. opts.Logf = func(fmt string, args ...interface{}) {}
  83. }
  84. if opts.TimeNow == nil {
  85. opts.TimeNow = time.Now
  86. }
  87. c := &Auto{
  88. direct: direct,
  89. timeNow: opts.TimeNow,
  90. logf: opts.Logf,
  91. newMapCh: make(chan struct{}, 1),
  92. quit: make(chan struct{}),
  93. authDone: make(chan struct{}),
  94. mapDone: make(chan struct{}),
  95. }
  96. c.authCtx, c.authCancel = context.WithCancel(context.Background())
  97. c.mapCtx, c.mapCancel = context.WithCancel(context.Background())
  98. c.unregisterHealthWatch = health.RegisterWatcher(c.onHealthChange)
  99. return c, nil
  100. }
  101. func (c *Auto) onHealthChange(sys health.Subsystem, err error) {
  102. if sys == health.SysOverall {
  103. return
  104. }
  105. c.logf("controlclient: restarting map request for %q health change to new state: %v", sys, err)
  106. c.cancelMapSafely()
  107. }
  108. // SetPaused controls whether HTTP activity should be paused.
  109. //
  110. // The client can be paused and unpaused repeatedly, unlike Start and Shutdown, which can only be used once.
  111. func (c *Auto) SetPaused(paused bool) {
  112. c.mu.Lock()
  113. defer c.mu.Unlock()
  114. if paused == c.paused {
  115. return
  116. }
  117. c.logf("setPaused(%v)", paused)
  118. c.paused = paused
  119. if paused {
  120. // Only cancel the map routine. (The auth routine isn't expensive
  121. // so it's fine to keep it running.)
  122. c.cancelMapLocked()
  123. } else {
  124. for _, ch := range c.unpauseWaiters {
  125. close(ch)
  126. }
  127. c.unpauseWaiters = nil
  128. }
  129. }
  130. // Start starts the client's goroutines.
  131. //
  132. // It should only be called for clients created by NewNoStart.
  133. func (c *Auto) Start() {
  134. go c.authRoutine()
  135. go c.mapRoutine()
  136. }
  137. // sendNewMapRequest either sends a new OmitPeers, non-streaming map request
  138. // (to just send Hostinfo/Netinfo/Endpoints info, while keeping an existing
  139. // streaming response open), or start a new streaming one if necessary.
  140. //
  141. // It should be called whenever there's something new to tell the server.
  142. func (c *Auto) sendNewMapRequest() {
  143. c.mu.Lock()
  144. // If we're not already streaming a netmap, or if we're already stuck
  145. // in a lite update, then tear down everything and start a new stream
  146. // (which starts by sending a new map request)
  147. if !c.inPollNetMap || c.inLiteMapUpdate || !c.loggedIn {
  148. c.mu.Unlock()
  149. c.cancelMapSafely()
  150. return
  151. }
  152. // Otherwise, send a lite update that doesn't keep a
  153. // long-running stream response.
  154. defer c.mu.Unlock()
  155. c.inLiteMapUpdate = true
  156. ctx, cancel := context.WithTimeout(c.mapCtx, 10*time.Second)
  157. go func() {
  158. defer cancel()
  159. t0 := time.Now()
  160. err := c.direct.SendLiteMapUpdate(ctx)
  161. d := time.Since(t0).Round(time.Millisecond)
  162. c.mu.Lock()
  163. c.inLiteMapUpdate = false
  164. c.mu.Unlock()
  165. if err == nil {
  166. c.logf("[v1] successful lite map update in %v", d)
  167. return
  168. }
  169. if ctx.Err() == nil {
  170. c.logf("lite map update after %v: %v", d, err)
  171. }
  172. // Fall back to restarting the long-polling map
  173. // request (the old heavy way) if the lite update
  174. // failed for any reason.
  175. c.cancelMapSafely()
  176. }()
  177. }
  178. func (c *Auto) cancelAuth() {
  179. c.mu.Lock()
  180. if c.authCancel != nil {
  181. c.authCancel()
  182. }
  183. if !c.closed {
  184. c.authCtx, c.authCancel = context.WithCancel(context.Background())
  185. }
  186. c.mu.Unlock()
  187. }
  188. func (c *Auto) cancelMapLocked() {
  189. if c.mapCancel != nil {
  190. c.mapCancel()
  191. }
  192. if !c.closed {
  193. c.mapCtx, c.mapCancel = context.WithCancel(context.Background())
  194. }
  195. }
  196. func (c *Auto) cancelMapUnsafely() {
  197. c.mu.Lock()
  198. c.cancelMapLocked()
  199. c.mu.Unlock()
  200. }
  201. func (c *Auto) cancelMapSafely() {
  202. c.mu.Lock()
  203. defer c.mu.Unlock()
  204. c.logf("[v1] cancelMapSafely: synced=%v", c.synced)
  205. if c.inPollNetMap {
  206. // received at least one netmap since the last
  207. // interruption. That means the server has already
  208. // fully processed our last request, which might
  209. // include UpdateEndpoints(). Interrupt it and try
  210. // again.
  211. c.cancelMapLocked()
  212. } else {
  213. // !synced means we either haven't done a netmap
  214. // request yet, or it hasn't answered yet. So the
  215. // server is in an undefined state. If we send
  216. // another netmap request too soon, it might race
  217. // with the last one, and if we're very unlucky,
  218. // the new request will be applied before the old one,
  219. // and the wrong endpoints will get registered. We
  220. // have to tell the client to abort politely, only
  221. // after it receives a response to its existing netmap
  222. // request.
  223. select {
  224. case c.newMapCh <- struct{}{}:
  225. c.logf("[v1] cancelMapSafely: wrote to channel")
  226. default:
  227. // if channel write failed, then there was already
  228. // an outstanding newMapCh request. One is enough,
  229. // since it'll always use the latest endpoints.
  230. c.logf("[v1] cancelMapSafely: channel was full")
  231. }
  232. }
  233. }
  234. func (c *Auto) authRoutine() {
  235. defer close(c.authDone)
  236. bo := backoff.NewBackoff("authRoutine", c.logf, 30*time.Second)
  237. for {
  238. c.mu.Lock()
  239. goal := c.loginGoal
  240. ctx := c.authCtx
  241. if goal != nil {
  242. c.logf("authRoutine: %s; wantLoggedIn=%v", c.state, goal.wantLoggedIn)
  243. } else {
  244. c.logf("authRoutine: %s; goal=nil paused=%v", c.state, c.paused)
  245. }
  246. c.mu.Unlock()
  247. select {
  248. case <-c.quit:
  249. c.logf("[v1] authRoutine: quit")
  250. return
  251. default:
  252. }
  253. report := func(err error, msg string) {
  254. c.logf("[v1] %s: %v", msg, err)
  255. // don't send status updates for context errors,
  256. // since context cancelation is always on purpose.
  257. if ctx.Err() == nil {
  258. c.sendStatus("authRoutine-report", err, "", nil)
  259. }
  260. }
  261. if goal == nil {
  262. // Wait for user to Login or Logout.
  263. <-ctx.Done()
  264. c.logf("[v1] authRoutine: context done.")
  265. continue
  266. }
  267. if !goal.wantLoggedIn {
  268. err := c.direct.TryLogout(ctx)
  269. goal.sendLogoutError(err)
  270. if err != nil {
  271. report(err, "TryLogout")
  272. bo.BackOff(ctx, err)
  273. continue
  274. }
  275. // success
  276. c.mu.Lock()
  277. c.loggedIn = false
  278. c.loginGoal = nil
  279. c.state = StateNotAuthenticated
  280. c.synced = false
  281. c.mu.Unlock()
  282. c.sendStatus("authRoutine-wantout", nil, "", nil)
  283. bo.BackOff(ctx, nil)
  284. } else { // ie. goal.wantLoggedIn
  285. c.mu.Lock()
  286. if goal.url != "" {
  287. c.state = StateURLVisitRequired
  288. } else {
  289. c.state = StateAuthenticating
  290. }
  291. c.mu.Unlock()
  292. var url string
  293. var err error
  294. var f string
  295. if goal.url != "" {
  296. url, err = c.direct.WaitLoginURL(ctx, goal.url)
  297. f = "WaitLoginURL"
  298. } else {
  299. url, err = c.direct.TryLogin(ctx, goal.token, goal.flags)
  300. f = "TryLogin"
  301. }
  302. if err != nil {
  303. report(err, f)
  304. bo.BackOff(ctx, err)
  305. continue
  306. }
  307. if url != "" {
  308. if goal.url != "" {
  309. err = fmt.Errorf("[unexpected] server required a new URL?")
  310. report(err, "WaitLoginURL")
  311. }
  312. c.mu.Lock()
  313. c.loginGoal = &LoginGoal{
  314. wantLoggedIn: true,
  315. flags: LoginDefault,
  316. url: url,
  317. }
  318. c.state = StateURLVisitRequired
  319. c.synced = false
  320. c.mu.Unlock()
  321. c.sendStatus("authRoutine-url", err, url, nil)
  322. bo.BackOff(ctx, err)
  323. continue
  324. }
  325. // success
  326. c.mu.Lock()
  327. c.loggedIn = true
  328. c.loginGoal = nil
  329. c.state = StateAuthenticated
  330. c.mu.Unlock()
  331. c.sendStatus("authRoutine-success", nil, "", nil)
  332. c.cancelMapSafely()
  333. bo.BackOff(ctx, nil)
  334. }
  335. }
  336. }
  337. // Expiry returns the credential expiration time, or the zero time if
  338. // the expiration time isn't known. Used in tests only.
  339. func (c *Auto) Expiry() *time.Time {
  340. c.mu.Lock()
  341. defer c.mu.Unlock()
  342. return c.expiry
  343. }
  344. // Direct returns the underlying direct client object. Used in tests
  345. // only.
  346. func (c *Auto) Direct() *Direct {
  347. return c.direct
  348. }
  349. // unpausedChanLocked returns a new channel that is closed when the
  350. // current Auto pause is unpaused.
  351. //
  352. // c.mu must be held
  353. func (c *Auto) unpausedChanLocked() <-chan struct{} {
  354. unpaused := make(chan struct{})
  355. c.unpauseWaiters = append(c.unpauseWaiters, unpaused)
  356. return unpaused
  357. }
  358. func (c *Auto) mapRoutine() {
  359. defer close(c.mapDone)
  360. bo := backoff.NewBackoff("mapRoutine", c.logf, 30*time.Second)
  361. for {
  362. c.mu.Lock()
  363. if c.paused {
  364. unpaused := c.unpausedChanLocked()
  365. c.mu.Unlock()
  366. c.logf("mapRoutine: awaiting unpause")
  367. select {
  368. case <-unpaused:
  369. c.logf("mapRoutine: unpaused")
  370. case <-c.quit:
  371. c.logf("mapRoutine: quit")
  372. return
  373. }
  374. continue
  375. }
  376. c.logf("mapRoutine: %s", c.state)
  377. loggedIn := c.loggedIn
  378. ctx := c.mapCtx
  379. c.mu.Unlock()
  380. select {
  381. case <-c.quit:
  382. c.logf("mapRoutine: quit")
  383. return
  384. default:
  385. }
  386. report := func(err error, msg string) {
  387. c.logf("[v1] %s: %v", msg, err)
  388. err = fmt.Errorf("%s: %w", msg, err)
  389. // don't send status updates for context errors,
  390. // since context cancelation is always on purpose.
  391. if ctx.Err() == nil {
  392. c.sendStatus("mapRoutine1", err, "", nil)
  393. }
  394. }
  395. if !loggedIn {
  396. // Wait for something interesting to happen
  397. c.mu.Lock()
  398. c.synced = false
  399. // c.state is set by authRoutine()
  400. c.mu.Unlock()
  401. select {
  402. case <-ctx.Done():
  403. c.logf("mapRoutine: context done.")
  404. case <-c.newMapCh:
  405. c.logf("mapRoutine: new map needed while idle.")
  406. }
  407. } else {
  408. // Be sure this is false when we're not inside
  409. // PollNetMap, so that cancelMapSafely() can notify
  410. // us correctly.
  411. c.mu.Lock()
  412. c.inPollNetMap = false
  413. c.mu.Unlock()
  414. health.SetInPollNetMap(false)
  415. err := c.direct.PollNetMap(ctx, -1, func(nm *netmap.NetworkMap) {
  416. health.SetInPollNetMap(true)
  417. c.mu.Lock()
  418. select {
  419. case <-c.newMapCh:
  420. c.logf("[v1] mapRoutine: new map request during PollNetMap. canceling.")
  421. c.cancelMapLocked()
  422. // Don't emit this netmap; we're
  423. // about to request a fresh one.
  424. c.mu.Unlock()
  425. return
  426. default:
  427. }
  428. c.synced = true
  429. c.inPollNetMap = true
  430. if c.loggedIn {
  431. c.state = StateSynchronized
  432. }
  433. exp := nm.Expiry
  434. c.expiry = &exp
  435. stillAuthed := c.loggedIn
  436. state := c.state
  437. c.mu.Unlock()
  438. c.logf("[v1] mapRoutine: netmap received: %s", state)
  439. if stillAuthed {
  440. c.sendStatus("mapRoutine-got-netmap", nil, "", nm)
  441. }
  442. })
  443. health.SetInPollNetMap(false)
  444. c.mu.Lock()
  445. c.synced = false
  446. c.inPollNetMap = false
  447. if c.state == StateSynchronized {
  448. c.state = StateAuthenticated
  449. }
  450. paused := c.paused
  451. c.mu.Unlock()
  452. if paused {
  453. c.logf("mapRoutine: paused")
  454. continue
  455. }
  456. if err != nil {
  457. report(err, "PollNetMap")
  458. bo.BackOff(ctx, err)
  459. continue
  460. }
  461. bo.BackOff(ctx, nil)
  462. }
  463. }
  464. }
  465. func (c *Auto) AuthCantContinue() bool {
  466. if c == nil {
  467. return true
  468. }
  469. c.mu.Lock()
  470. defer c.mu.Unlock()
  471. return !c.loggedIn && (c.loginGoal == nil || c.loginGoal.url != "")
  472. }
  473. // SetStatusFunc sets fn as the callback to run on any status change.
  474. func (c *Auto) SetStatusFunc(fn func(Status)) {
  475. c.mu.Lock()
  476. c.statusFunc = fn
  477. c.mu.Unlock()
  478. }
  479. func (c *Auto) SetHostinfo(hi *tailcfg.Hostinfo) {
  480. if hi == nil {
  481. panic("nil Hostinfo")
  482. }
  483. if !c.direct.SetHostinfo(hi) {
  484. // No changes. Don't log.
  485. return
  486. }
  487. // Send new Hostinfo to server
  488. c.sendNewMapRequest()
  489. }
  490. func (c *Auto) SetNetInfo(ni *tailcfg.NetInfo) {
  491. if ni == nil {
  492. panic("nil NetInfo")
  493. }
  494. if !c.direct.SetNetInfo(ni) {
  495. return
  496. }
  497. c.logf("NetInfo: %v", ni)
  498. // Send new Hostinfo (which includes NetInfo) to server
  499. c.sendNewMapRequest()
  500. }
  501. func (c *Auto) sendStatus(who string, err error, url string, nm *netmap.NetworkMap) {
  502. c.mu.Lock()
  503. state := c.state
  504. loggedIn := c.loggedIn
  505. synced := c.synced
  506. statusFunc := c.statusFunc
  507. hi := c.hostinfo
  508. c.inSendStatus++
  509. c.mu.Unlock()
  510. c.logf("[v1] sendStatus: %s: %v", who, state)
  511. var p *persist.Persist
  512. var loginFin, logoutFin *empty.Message
  513. if state == StateAuthenticated {
  514. loginFin = new(empty.Message)
  515. }
  516. if state == StateNotAuthenticated {
  517. logoutFin = new(empty.Message)
  518. }
  519. if nm != nil && loggedIn && synced {
  520. pp := c.direct.GetPersist()
  521. p = &pp
  522. } else {
  523. // don't send netmap status, as it's misleading when we're
  524. // not logged in.
  525. nm = nil
  526. }
  527. new := Status{
  528. LoginFinished: loginFin,
  529. LogoutFinished: logoutFin,
  530. URL: url,
  531. Persist: p,
  532. NetMap: nm,
  533. Hostinfo: hi,
  534. State: state,
  535. Err: err,
  536. }
  537. if statusFunc != nil {
  538. statusFunc(new)
  539. }
  540. c.mu.Lock()
  541. c.inSendStatus--
  542. c.mu.Unlock()
  543. }
  544. func (c *Auto) Login(t *tailcfg.Oauth2Token, flags LoginFlags) {
  545. c.logf("client.Login(%v, %v)", t != nil, flags)
  546. c.mu.Lock()
  547. c.loginGoal = &LoginGoal{
  548. wantLoggedIn: true,
  549. token: t,
  550. flags: flags,
  551. }
  552. c.mu.Unlock()
  553. c.cancelAuth()
  554. }
  555. func (c *Auto) StartLogout() {
  556. c.logf("client.StartLogout()")
  557. c.mu.Lock()
  558. c.loginGoal = &LoginGoal{
  559. wantLoggedIn: false,
  560. }
  561. c.mu.Unlock()
  562. c.cancelAuth()
  563. }
  564. func (c *Auto) Logout(ctx context.Context) error {
  565. c.logf("client.Logout()")
  566. errc := make(chan error, 1)
  567. c.mu.Lock()
  568. c.loginGoal = &LoginGoal{
  569. wantLoggedIn: false,
  570. loggedOutResult: errc,
  571. }
  572. c.mu.Unlock()
  573. c.cancelAuth()
  574. timer := time.NewTimer(10 * time.Second)
  575. defer timer.Stop()
  576. select {
  577. case err := <-errc:
  578. return err
  579. case <-ctx.Done():
  580. return ctx.Err()
  581. case <-timer.C:
  582. return context.DeadlineExceeded
  583. }
  584. }
  585. // UpdateEndpoints sets the client's discovered endpoints and sends
  586. // them to the control server if they've changed.
  587. //
  588. // It does not retain the provided slice.
  589. //
  590. // The localPort field is unused except for integration tests in
  591. // another repo.
  592. func (c *Auto) UpdateEndpoints(localPort uint16, endpoints []tailcfg.Endpoint) {
  593. changed := c.direct.SetEndpoints(localPort, endpoints)
  594. if changed {
  595. c.sendNewMapRequest()
  596. }
  597. }
  598. func (c *Auto) Shutdown() {
  599. c.logf("client.Shutdown()")
  600. c.mu.Lock()
  601. inSendStatus := c.inSendStatus
  602. closed := c.closed
  603. if !closed {
  604. c.closed = true
  605. c.statusFunc = nil
  606. }
  607. c.mu.Unlock()
  608. c.logf("client.Shutdown: inSendStatus=%v", inSendStatus)
  609. if !closed {
  610. c.unregisterHealthWatch()
  611. close(c.quit)
  612. c.cancelAuth()
  613. <-c.authDone
  614. c.cancelMapUnsafely()
  615. <-c.mapDone
  616. c.logf("Client.Shutdown done.")
  617. }
  618. }
  619. // NodePublicKey returns the node public key currently in use. This is
  620. // used exclusively in tests.
  621. func (c *Auto) TestOnlyNodePublicKey() key.NodePublic {
  622. priv := c.direct.GetPersist()
  623. return priv.PrivateNodeKey.Public()
  624. }
  625. func (c *Auto) TestOnlySetAuthKey(authkey string) {
  626. c.direct.mu.Lock()
  627. defer c.direct.mu.Unlock()
  628. c.direct.authKey = authkey
  629. }
  630. func (c *Auto) TestOnlyTimeNow() time.Time {
  631. return c.timeNow()
  632. }
  633. // SetDNS sends the SetDNSRequest request to the control plane server,
  634. // requesting a DNS record be created or updated.
  635. func (c *Auto) SetDNS(ctx context.Context, req *tailcfg.SetDNSRequest) error {
  636. return c.direct.SetDNS(ctx, req)
  637. }