monitor.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. // Copyright (c) Tailscale Inc & AUTHORS
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. // Package monitor provides facilities for monitoring network
  4. // interface and route changes. It primarily exists to know when
  5. // portable devices move between different networks.
  6. package monitor
  7. import (
  8. "encoding/json"
  9. "errors"
  10. "net/netip"
  11. "runtime"
  12. "sync"
  13. "time"
  14. "tailscale.com/net/interfaces"
  15. "tailscale.com/types/logger"
  16. "tailscale.com/util/set"
  17. )
  18. // pollWallTimeInterval is how often we check the time to check
  19. // for big jumps in wall (non-monotonic) time as a backup mechanism
  20. // to get notified of a sleeping device waking back up.
  21. // Usually there are also minor network change events on wake that let
  22. // us check the wall time sooner than this.
  23. const pollWallTimeInterval = 15 * time.Second
  24. // message represents a message returned from an osMon.
  25. type message interface {
  26. // Ignore is whether we should ignore this message.
  27. ignore() bool
  28. }
  29. // osMon is the interface that each operating system-specific
  30. // implementation of the link monitor must implement.
  31. type osMon interface {
  32. Close() error
  33. // Receive returns a new network interface change message. It
  34. // should block until there's either something to return, or
  35. // until the osMon is closed. After a Close, the returned
  36. // error is ignored.
  37. Receive() (message, error)
  38. // IsInterestingInterface reports whether the provided interface should
  39. // be considered for network change events.
  40. IsInterestingInterface(iface string) bool
  41. }
  42. // Mon represents a monitoring instance.
  43. type Mon struct {
  44. logf logger.Logf
  45. om osMon // nil means not supported on this platform
  46. change chan struct{}
  47. stop chan struct{} // closed on Stop
  48. mu sync.Mutex // guards all following fields
  49. cbs set.HandleSet[interfaces.ChangeFunc]
  50. ruleDelCB set.HandleSet[RuleDeleteCallback]
  51. ifState *interfaces.State
  52. gwValid bool // whether gw and gwSelfIP are valid
  53. gw netip.Addr // our gateway's IP
  54. gwSelfIP netip.Addr // our own IP address (that corresponds to gw)
  55. started bool
  56. closed bool
  57. goroutines sync.WaitGroup
  58. wallTimer *time.Timer // nil until Started; re-armed AfterFunc per tick
  59. lastWall time.Time
  60. timeJumped bool // whether we need to send a changed=true after a big time jump
  61. }
  62. // New instantiates and starts a monitoring instance.
  63. // The returned monitor is inactive until it's started by the Start method.
  64. // Use RegisterChangeCallback to get notified of network changes.
  65. func New(logf logger.Logf) (*Mon, error) {
  66. logf = logger.WithPrefix(logf, "monitor: ")
  67. m := &Mon{
  68. logf: logf,
  69. change: make(chan struct{}, 1),
  70. stop: make(chan struct{}),
  71. lastWall: wallTime(),
  72. }
  73. st, err := m.interfaceStateUncached()
  74. if err != nil {
  75. return nil, err
  76. }
  77. m.ifState = st
  78. m.om, err = newOSMon(logf, m)
  79. if err != nil {
  80. return nil, err
  81. }
  82. if m.om == nil {
  83. return nil, errors.New("newOSMon returned nil, nil")
  84. }
  85. return m, nil
  86. }
  87. // InterfaceState returns the latest snapshot of the machine's network
  88. // interfaces.
  89. //
  90. // The returned value is owned by Mon; it must not be modified.
  91. func (m *Mon) InterfaceState() *interfaces.State {
  92. m.mu.Lock()
  93. defer m.mu.Unlock()
  94. return m.ifState
  95. }
  96. func (m *Mon) interfaceStateUncached() (*interfaces.State, error) {
  97. return interfaces.GetState()
  98. }
  99. // GatewayAndSelfIP returns the current network's default gateway, and
  100. // the machine's default IP for that gateway.
  101. //
  102. // It's the same as interfaces.LikelyHomeRouterIP, but it caches the
  103. // result until the monitor detects a network change.
  104. func (m *Mon) GatewayAndSelfIP() (gw, myIP netip.Addr, ok bool) {
  105. m.mu.Lock()
  106. defer m.mu.Unlock()
  107. if m.gwValid {
  108. return m.gw, m.gwSelfIP, true
  109. }
  110. gw, myIP, ok = interfaces.LikelyHomeRouterIP()
  111. if ok {
  112. m.gw, m.gwSelfIP, m.gwValid = gw, myIP, true
  113. }
  114. return gw, myIP, ok
  115. }
  116. // RegisterChangeCallback adds callback to the set of parties to be
  117. // notified (in their own goroutine) when the network state changes.
  118. // To remove this callback, call unregister (or close the monitor).
  119. func (m *Mon) RegisterChangeCallback(callback interfaces.ChangeFunc) (unregister func()) {
  120. m.mu.Lock()
  121. defer m.mu.Unlock()
  122. handle := m.cbs.Add(callback)
  123. return func() {
  124. m.mu.Lock()
  125. defer m.mu.Unlock()
  126. delete(m.cbs, handle)
  127. }
  128. }
  129. // RuleDeleteCallback is a callback when a Linux IP policy routing
  130. // rule is deleted. The table is the table number (52, 253, 354) and
  131. // priority is the priority order number (for Tailscale rules
  132. // currently: 5210, 5230, 5250, 5270)
  133. type RuleDeleteCallback func(table uint8, priority uint32)
  134. // RegisterRuleDeleteCallback adds callback to the set of parties to be
  135. // notified (in their own goroutine) when a Linux ip rule is deleted.
  136. // To remove this callback, call unregister (or close the monitor).
  137. func (m *Mon) RegisterRuleDeleteCallback(callback RuleDeleteCallback) (unregister func()) {
  138. m.mu.Lock()
  139. defer m.mu.Unlock()
  140. handle := m.ruleDelCB.Add(callback)
  141. return func() {
  142. m.mu.Lock()
  143. defer m.mu.Unlock()
  144. delete(m.ruleDelCB, handle)
  145. }
  146. }
  147. // Start starts the monitor.
  148. // A monitor can only be started & closed once.
  149. func (m *Mon) Start() {
  150. m.mu.Lock()
  151. defer m.mu.Unlock()
  152. if m.started || m.closed {
  153. return
  154. }
  155. m.started = true
  156. if shouldMonitorTimeJump {
  157. m.wallTimer = time.AfterFunc(pollWallTimeInterval, m.pollWallTime)
  158. }
  159. if m.om == nil {
  160. return
  161. }
  162. m.goroutines.Add(2)
  163. go m.pump()
  164. go m.debounce()
  165. }
  166. // Close closes the monitor.
  167. func (m *Mon) Close() error {
  168. m.mu.Lock()
  169. if m.closed {
  170. m.mu.Unlock()
  171. return nil
  172. }
  173. m.closed = true
  174. close(m.stop)
  175. if m.wallTimer != nil {
  176. m.wallTimer.Stop()
  177. }
  178. var err error
  179. if m.om != nil {
  180. err = m.om.Close()
  181. }
  182. started := m.started
  183. m.mu.Unlock()
  184. if started {
  185. m.goroutines.Wait()
  186. }
  187. return err
  188. }
  189. // InjectEvent forces the monitor to pretend there was a network
  190. // change and re-check the state of the network. Any registered
  191. // ChangeFunc callbacks will be called within the event coalescing
  192. // period (under a fraction of a second).
  193. func (m *Mon) InjectEvent() {
  194. select {
  195. case m.change <- struct{}{}:
  196. default:
  197. // Another change signal is already
  198. // buffered. Debounce will wake up soon
  199. // enough.
  200. }
  201. }
  202. func (m *Mon) stopped() bool {
  203. select {
  204. case <-m.stop:
  205. return true
  206. default:
  207. return false
  208. }
  209. }
  210. // pump continuously retrieves messages from the connection, notifying
  211. // the change channel of changes, and stopping when a stop is issued.
  212. func (m *Mon) pump() {
  213. defer m.goroutines.Done()
  214. for !m.stopped() {
  215. msg, err := m.om.Receive()
  216. if err != nil {
  217. if m.stopped() {
  218. return
  219. }
  220. // Keep retrying while we're not closed.
  221. m.logf("error from link monitor: %v", err)
  222. time.Sleep(time.Second)
  223. continue
  224. }
  225. if rdm, ok := msg.(ipRuleDeletedMessage); ok {
  226. m.notifyRuleDeleted(rdm)
  227. continue
  228. }
  229. if msg.ignore() {
  230. continue
  231. }
  232. m.InjectEvent()
  233. }
  234. }
  235. func (m *Mon) notifyRuleDeleted(rdm ipRuleDeletedMessage) {
  236. m.mu.Lock()
  237. defer m.mu.Unlock()
  238. for _, cb := range m.ruleDelCB {
  239. go cb(rdm.table, rdm.priority)
  240. }
  241. }
  242. // isInterestingInterface reports whether the provided interface should be
  243. // considered when checking for network state changes.
  244. // The ips parameter should be the IPs of the provided interface.
  245. func (m *Mon) isInterestingInterface(i interfaces.Interface, ips []netip.Prefix) bool {
  246. return m.om.IsInterestingInterface(i.Name) && interfaces.UseInterestingInterfaces(i, ips)
  247. }
  248. // debounce calls the callback function with a delay between events
  249. // and exits when a stop is issued.
  250. func (m *Mon) debounce() {
  251. defer m.goroutines.Done()
  252. for {
  253. select {
  254. case <-m.stop:
  255. return
  256. case <-m.change:
  257. }
  258. if curState, err := m.interfaceStateUncached(); err != nil {
  259. m.logf("interfaces.State: %v", err)
  260. } else {
  261. m.mu.Lock()
  262. oldState := m.ifState
  263. changed := !curState.EqualFiltered(oldState, m.isInterestingInterface, interfaces.UseInterestingIPs)
  264. if changed {
  265. m.gwValid = false
  266. m.ifState = curState
  267. if s1, s2 := oldState.String(), curState.String(); s1 == s2 {
  268. m.logf("[unexpected] network state changed, but stringification didn't: %v", s1)
  269. m.logf("[unexpected] old: %s", jsonSummary(oldState))
  270. m.logf("[unexpected] new: %s", jsonSummary(curState))
  271. }
  272. }
  273. // See if we have a queued or new time jump signal.
  274. if shouldMonitorTimeJump && m.checkWallTimeAdvanceLocked() {
  275. m.resetTimeJumpedLocked()
  276. if !changed {
  277. // Only log if it wasn't an interesting change.
  278. m.logf("time jumped (probably wake from sleep); synthesizing major change event")
  279. changed = true
  280. }
  281. }
  282. for _, cb := range m.cbs {
  283. go cb(changed, m.ifState)
  284. }
  285. m.mu.Unlock()
  286. }
  287. select {
  288. case <-m.stop:
  289. return
  290. case <-time.After(250 * time.Millisecond):
  291. }
  292. }
  293. }
  294. func jsonSummary(x any) any {
  295. j, err := json.Marshal(x)
  296. if err != nil {
  297. return err
  298. }
  299. return j
  300. }
  301. func wallTime() time.Time {
  302. // From time package's docs: "The canonical way to strip a
  303. // monotonic clock reading is to use t = t.Round(0)."
  304. return time.Now().Round(0)
  305. }
  306. func (m *Mon) pollWallTime() {
  307. m.mu.Lock()
  308. defer m.mu.Unlock()
  309. if m.closed {
  310. return
  311. }
  312. if m.checkWallTimeAdvanceLocked() {
  313. m.InjectEvent()
  314. }
  315. m.wallTimer.Reset(pollWallTimeInterval)
  316. }
  317. // shouldMonitorTimeJump is whether we keep a regular periodic timer running in
  318. // the background watching for jumps in wall time.
  319. //
  320. // We don't do this on mobile platforms for battery reasons, and because these
  321. // platforms don't really sleep in the same way.
  322. const shouldMonitorTimeJump = runtime.GOOS != "android" && runtime.GOOS != "ios"
  323. // checkWallTimeAdvanceLocked reports whether wall time jumped more than 150% of
  324. // pollWallTimeInterval, indicating we probably just came out of sleep. Once a
  325. // time jump is detected it must be reset by calling resetTimeJumpedLocked.
  326. func (m *Mon) checkWallTimeAdvanceLocked() bool {
  327. if !shouldMonitorTimeJump {
  328. panic("unreachable") // if callers are correct
  329. }
  330. now := wallTime()
  331. if now.Sub(m.lastWall) > pollWallTimeInterval*3/2 {
  332. m.timeJumped = true // it is reset by debounce.
  333. }
  334. m.lastWall = now
  335. return m.timeJumped
  336. }
  337. // resetTimeJumpedLocked consumes the signal set by checkWallTimeAdvanceLocked.
  338. func (m *Mon) resetTimeJumpedLocked() {
  339. m.timeJumped = false
  340. }
  341. type ipRuleDeletedMessage struct {
  342. table uint8
  343. priority uint32
  344. }
  345. func (ipRuleDeletedMessage) ignore() bool { return true }