2
0

pendopen.go 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. // Copyright (c) Tailscale Inc & AUTHORS
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. package wgengine
  4. import (
  5. "fmt"
  6. "net/netip"
  7. "runtime"
  8. "strings"
  9. "sync"
  10. "time"
  11. "github.com/gaissmai/bart"
  12. "tailscale.com/net/flowtrack"
  13. "tailscale.com/net/packet"
  14. "tailscale.com/net/tstun"
  15. "tailscale.com/types/ipproto"
  16. "tailscale.com/util/mak"
  17. "tailscale.com/wgengine/filter"
  18. )
  19. const tcpTimeoutBeforeDebug = 5 * time.Second
  20. type pendingOpenFlow struct {
  21. timer *time.Timer // until giving up on the flow
  22. // guarded by userspaceEngine.mu:
  23. // problem is non-zero if we got a MaybeBroken (non-terminal)
  24. // TSMP "reject" header.
  25. problem packet.TailscaleRejectReason
  26. }
  27. func (e *userspaceEngine) removeFlow(f flowtrack.Tuple) (removed bool) {
  28. e.mu.Lock()
  29. defer e.mu.Unlock()
  30. of, ok := e.pendOpen[f]
  31. if !ok {
  32. // Not a tracked flow (likely already removed)
  33. return false
  34. }
  35. of.timer.Stop()
  36. delete(e.pendOpen, f)
  37. return true
  38. }
  39. func (e *userspaceEngine) noteFlowProblemFromPeer(f flowtrack.Tuple, problem packet.TailscaleRejectReason) {
  40. e.mu.Lock()
  41. defer e.mu.Unlock()
  42. of, ok := e.pendOpen[f]
  43. if !ok {
  44. // Not a tracked flow (likely already removed)
  45. return
  46. }
  47. of.problem = problem
  48. }
  49. func (e *userspaceEngine) trackOpenPreFilterIn(pp *packet.Parsed, t *tstun.Wrapper) (res filter.Response) {
  50. res = filter.Accept // always
  51. if pp.IPProto == ipproto.TSMP {
  52. res = filter.DropSilently
  53. rh, ok := pp.AsTailscaleRejectedHeader()
  54. if !ok {
  55. return
  56. }
  57. if rh.MaybeBroken {
  58. e.noteFlowProblemFromPeer(rh.Flow(), rh.Reason)
  59. } else if f := rh.Flow(); e.removeFlow(f) {
  60. e.logf("open-conn-track: flow %v %v > %v rejected due to %v", rh.Proto, rh.Src, rh.Dst, rh.Reason)
  61. }
  62. return
  63. }
  64. if pp.IPVersion == 0 ||
  65. pp.IPProto != ipproto.TCP ||
  66. pp.TCPFlags&(packet.TCPSyn|packet.TCPRst) == 0 {
  67. return
  68. }
  69. // Either a SYN or a RST came back. Remove it in either case.
  70. f := flowtrack.MakeTuple(pp.IPProto, pp.Dst, pp.Src) // src/dst reversed
  71. removed := e.removeFlow(f)
  72. if removed && pp.TCPFlags&packet.TCPRst != 0 {
  73. e.logf("open-conn-track: flow TCP %v got RST by peer", f)
  74. }
  75. return
  76. }
  77. var (
  78. appleIPRange = netip.MustParsePrefix("17.0.0.0/8")
  79. canonicalIPs = sync.OnceValue(func() (checkIPFunc func(netip.Addr) bool) {
  80. // https://bgp.he.net/AS41231#_prefixes
  81. t := &bart.Table[bool]{}
  82. for _, s := range strings.Fields(`
  83. 91.189.89.0/24
  84. 91.189.91.0/24
  85. 91.189.92.0/24
  86. 91.189.93.0/24
  87. 91.189.94.0/24
  88. 91.189.95.0/24
  89. 162.213.32.0/24
  90. 162.213.34.0/24
  91. 162.213.35.0/24
  92. 185.125.188.0/23
  93. 185.125.190.0/24
  94. 194.169.254.0/24`) {
  95. t.Insert(netip.MustParsePrefix(s), true)
  96. }
  97. return func(ip netip.Addr) bool {
  98. v, _ := t.Lookup(ip)
  99. return v
  100. }
  101. })
  102. )
  103. // isOSNetworkProbe reports whether the target is likely a network
  104. // connectivity probe target from e.g. iOS or Ubuntu network-manager.
  105. //
  106. // iOS likes to probe Apple IPs on all interfaces to check for connectivity.
  107. // Don't start timers tracking those. They won't succeed anyway. Avoids log
  108. // spam like:
  109. func (e *userspaceEngine) isOSNetworkProbe(dst netip.AddrPort) bool {
  110. // iOS had log spam like:
  111. // open-conn-track: timeout opening (100.115.73.60:52501 => 17.125.252.5:443); no associated peer node
  112. if runtime.GOOS == "ios" && dst.Port() == 443 && appleIPRange.Contains(dst.Addr()) {
  113. if _, ok := e.PeerForIP(dst.Addr()); !ok {
  114. return true
  115. }
  116. }
  117. // NetworkManager; https://github.com/tailscale/tailscale/issues/13687
  118. // open-conn-track: timeout opening (TCP 100.96.229.119:42798 => 185.125.190.49:80); no associated peer node
  119. if runtime.GOOS == "linux" && dst.Port() == 80 && canonicalIPs()(dst.Addr()) {
  120. if _, ok := e.PeerForIP(dst.Addr()); !ok {
  121. return true
  122. }
  123. }
  124. return false
  125. }
  126. func (e *userspaceEngine) trackOpenPostFilterOut(pp *packet.Parsed, t *tstun.Wrapper) (res filter.Response) {
  127. res = filter.Accept // always
  128. if pp.IPVersion == 0 ||
  129. pp.IPProto != ipproto.TCP ||
  130. pp.TCPFlags&packet.TCPAck != 0 ||
  131. pp.TCPFlags&packet.TCPSyn == 0 {
  132. return
  133. }
  134. if e.isOSNetworkProbe(pp.Dst) {
  135. return
  136. }
  137. flow := flowtrack.MakeTuple(pp.IPProto, pp.Src, pp.Dst)
  138. e.mu.Lock()
  139. defer e.mu.Unlock()
  140. if _, dup := e.pendOpen[flow]; dup {
  141. // Duplicates are expected when the OS retransmits. Ignore.
  142. return
  143. }
  144. timer := time.AfterFunc(tcpTimeoutBeforeDebug, func() {
  145. e.onOpenTimeout(flow)
  146. })
  147. mak.Set(&e.pendOpen, flow, &pendingOpenFlow{timer: timer})
  148. return filter.Accept
  149. }
  150. func (e *userspaceEngine) onOpenTimeout(flow flowtrack.Tuple) {
  151. e.mu.Lock()
  152. of, ok := e.pendOpen[flow]
  153. if !ok {
  154. // Not a tracked flow, or already handled & deleted.
  155. e.mu.Unlock()
  156. return
  157. }
  158. delete(e.pendOpen, flow)
  159. problem := of.problem
  160. e.mu.Unlock()
  161. if !problem.IsZero() {
  162. e.logf("open-conn-track: timeout opening %v; peer reported problem: %v", flow, problem)
  163. }
  164. // Diagnose why it might've timed out.
  165. pip, ok := e.PeerForIP(flow.DstAddr())
  166. if !ok {
  167. e.logf("open-conn-track: timeout opening %v; no associated peer node", flow)
  168. return
  169. }
  170. n := pip.Node
  171. if !n.IsWireGuardOnly() {
  172. if n.DiscoKey().IsZero() {
  173. e.logf("open-conn-track: timeout opening %v; peer node %v running pre-0.100", flow, n.Key().ShortString())
  174. return
  175. }
  176. if n.HomeDERP() == 0 {
  177. e.logf("open-conn-track: timeout opening %v; peer node %v not connected to any DERP relay", flow, n.Key().ShortString())
  178. return
  179. }
  180. }
  181. ps, found := e.getPeerStatusLite(n.Key())
  182. if !found {
  183. onlyZeroRoute := true // whether peerForIP returned n only because its /0 route matched
  184. for _, r := range n.AllowedIPs().All() {
  185. if r.Bits() != 0 && r.Contains(flow.DstAddr()) {
  186. onlyZeroRoute = false
  187. break
  188. }
  189. }
  190. if onlyZeroRoute {
  191. // This node was returned by peerForIP because
  192. // its exit node /0 route(s) matched, but this
  193. // might not be the exit node that's currently
  194. // selected. Rather than log misleading
  195. // errors, just don't log at all for now.
  196. // TODO(bradfitz): update this code to be
  197. // exit-node-aware and make peerForIP return
  198. // the node of the currently selected exit
  199. // node.
  200. return
  201. }
  202. e.logf("open-conn-track: timeout opening %v; target node %v in netmap but unknown to WireGuard", flow, n.Key().ShortString())
  203. return
  204. }
  205. // TODO(bradfitz): figure out what PeerStatus.LastHandshake
  206. // is. It appears to be the last time we sent a handshake,
  207. // which isn't what I expected. I thought it was when a
  208. // handshake completed, which is what I want.
  209. _ = ps.LastHandshake
  210. online := "?"
  211. if n.IsWireGuardOnly() {
  212. online = "wg"
  213. } else {
  214. if v, ok := n.Online().GetOk(); ok {
  215. if v {
  216. online = "yes"
  217. } else {
  218. online = "no"
  219. }
  220. }
  221. if lastSeen, ok := n.LastSeen().GetOk(); ok && online != "yes" {
  222. online += fmt.Sprintf(", lastseen=%v", durFmt(lastSeen))
  223. }
  224. }
  225. e.logf("open-conn-track: timeout opening %v to node %v; online=%v, lastRecv=%v",
  226. flow, n.Key().ShortString(),
  227. online,
  228. e.magicConn.LastRecvActivityOfNodeKey(n.Key()))
  229. }
  230. func durFmt(t time.Time) string {
  231. if t.IsZero() {
  232. return "never"
  233. }
  234. d := time.Since(t).Round(time.Second)
  235. if d < 10*time.Minute {
  236. // node.LastSeen times are rounded very coarsely, and
  237. // we compare times from different clocks (server vs
  238. // local), so negative is common when close. Format as
  239. // "recent" if negative or actually recent.
  240. return "recent"
  241. }
  242. return d.String()
  243. }