pendopen.go 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. // Copyright (c) Tailscale Inc & AUTHORS
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. //go:build !ts_omit_debug
  4. package wgengine
  5. import (
  6. "fmt"
  7. "net/netip"
  8. "runtime"
  9. "strings"
  10. "sync"
  11. "time"
  12. "github.com/gaissmai/bart"
  13. "tailscale.com/net/flowtrack"
  14. "tailscale.com/net/packet"
  15. "tailscale.com/net/tstun"
  16. "tailscale.com/types/ipproto"
  17. "tailscale.com/util/mak"
  18. "tailscale.com/wgengine/filter"
  19. )
  20. type flowtrackTuple = flowtrack.Tuple
  21. const tcpTimeoutBeforeDebug = 5 * time.Second
  22. type pendingOpenFlow struct {
  23. timer *time.Timer // until giving up on the flow
  24. // guarded by userspaceEngine.mu:
  25. // problem is non-zero if we got a MaybeBroken (non-terminal)
  26. // TSMP "reject" header.
  27. problem packet.TailscaleRejectReason
  28. }
  29. func (e *userspaceEngine) removeFlow(f flowtrack.Tuple) (removed bool) {
  30. e.mu.Lock()
  31. defer e.mu.Unlock()
  32. of, ok := e.pendOpen[f]
  33. if !ok {
  34. // Not a tracked flow (likely already removed)
  35. return false
  36. }
  37. of.timer.Stop()
  38. delete(e.pendOpen, f)
  39. return true
  40. }
  41. func (e *userspaceEngine) noteFlowProblemFromPeer(f flowtrack.Tuple, problem packet.TailscaleRejectReason) {
  42. e.mu.Lock()
  43. defer e.mu.Unlock()
  44. of, ok := e.pendOpen[f]
  45. if !ok {
  46. // Not a tracked flow (likely already removed)
  47. return
  48. }
  49. of.problem = problem
  50. }
  51. func tsRejectFlow(rh packet.TailscaleRejectedHeader) flowtrack.Tuple {
  52. return flowtrack.MakeTuple(rh.Proto, rh.Src, rh.Dst)
  53. }
  54. func (e *userspaceEngine) trackOpenPreFilterIn(pp *packet.Parsed, t *tstun.Wrapper) (res filter.Response) {
  55. res = filter.Accept // always
  56. if pp.IPProto == ipproto.TSMP {
  57. res = filter.DropSilently
  58. rh, ok := pp.AsTailscaleRejectedHeader()
  59. if !ok {
  60. return
  61. }
  62. if rh.MaybeBroken {
  63. e.noteFlowProblemFromPeer(tsRejectFlow(rh), rh.Reason)
  64. } else if f := tsRejectFlow(rh); e.removeFlow(f) {
  65. e.logf("open-conn-track: flow %v %v > %v rejected due to %v", rh.Proto, rh.Src, rh.Dst, rh.Reason)
  66. }
  67. return
  68. }
  69. if pp.IPVersion == 0 ||
  70. pp.IPProto != ipproto.TCP ||
  71. pp.TCPFlags&(packet.TCPSyn|packet.TCPRst) == 0 {
  72. return
  73. }
  74. // Either a SYN or a RST came back. Remove it in either case.
  75. f := flowtrack.MakeTuple(pp.IPProto, pp.Dst, pp.Src) // src/dst reversed
  76. removed := e.removeFlow(f)
  77. if removed && pp.TCPFlags&packet.TCPRst != 0 {
  78. e.logf("open-conn-track: flow TCP %v got RST by peer", f)
  79. }
  80. return
  81. }
  82. var (
  83. appleIPRange = netip.MustParsePrefix("17.0.0.0/8")
  84. canonicalIPs = sync.OnceValue(func() (checkIPFunc func(netip.Addr) bool) {
  85. // https://bgp.he.net/AS41231#_prefixes
  86. t := &bart.Table[bool]{}
  87. for _, s := range strings.Fields(`
  88. 91.189.89.0/24
  89. 91.189.91.0/24
  90. 91.189.92.0/24
  91. 91.189.93.0/24
  92. 91.189.94.0/24
  93. 91.189.95.0/24
  94. 162.213.32.0/24
  95. 162.213.34.0/24
  96. 162.213.35.0/24
  97. 185.125.188.0/23
  98. 185.125.190.0/24
  99. 194.169.254.0/24`) {
  100. t.Insert(netip.MustParsePrefix(s), true)
  101. }
  102. return func(ip netip.Addr) bool {
  103. v, _ := t.Lookup(ip)
  104. return v
  105. }
  106. })
  107. )
  108. // isOSNetworkProbe reports whether the target is likely a network
  109. // connectivity probe target from e.g. iOS or Ubuntu network-manager.
  110. //
  111. // iOS likes to probe Apple IPs on all interfaces to check for connectivity.
  112. // Don't start timers tracking those. They won't succeed anyway. Avoids log
  113. // spam like:
  114. func (e *userspaceEngine) isOSNetworkProbe(dst netip.AddrPort) bool {
  115. // iOS had log spam like:
  116. // open-conn-track: timeout opening (100.115.73.60:52501 => 17.125.252.5:443); no associated peer node
  117. if runtime.GOOS == "ios" && dst.Port() == 443 && appleIPRange.Contains(dst.Addr()) {
  118. if _, ok := e.PeerForIP(dst.Addr()); !ok {
  119. return true
  120. }
  121. }
  122. // NetworkManager; https://github.com/tailscale/tailscale/issues/13687
  123. // open-conn-track: timeout opening (TCP 100.96.229.119:42798 => 185.125.190.49:80); no associated peer node
  124. if runtime.GOOS == "linux" && dst.Port() == 80 && canonicalIPs()(dst.Addr()) {
  125. if _, ok := e.PeerForIP(dst.Addr()); !ok {
  126. return true
  127. }
  128. }
  129. return false
  130. }
  131. func (e *userspaceEngine) trackOpenPostFilterOut(pp *packet.Parsed, t *tstun.Wrapper) (res filter.Response) {
  132. res = filter.Accept // always
  133. if pp.IPVersion == 0 ||
  134. pp.IPProto != ipproto.TCP ||
  135. pp.TCPFlags&packet.TCPAck != 0 ||
  136. pp.TCPFlags&packet.TCPSyn == 0 {
  137. return
  138. }
  139. if e.isOSNetworkProbe(pp.Dst) {
  140. return
  141. }
  142. flow := flowtrack.MakeTuple(pp.IPProto, pp.Src, pp.Dst)
  143. e.mu.Lock()
  144. defer e.mu.Unlock()
  145. if _, dup := e.pendOpen[flow]; dup {
  146. // Duplicates are expected when the OS retransmits. Ignore.
  147. return
  148. }
  149. timer := time.AfterFunc(tcpTimeoutBeforeDebug, func() {
  150. e.onOpenTimeout(flow)
  151. })
  152. mak.Set(&e.pendOpen, flow, &pendingOpenFlow{timer: timer})
  153. return filter.Accept
  154. }
  155. func (e *userspaceEngine) onOpenTimeout(flow flowtrack.Tuple) {
  156. e.mu.Lock()
  157. of, ok := e.pendOpen[flow]
  158. if !ok {
  159. // Not a tracked flow, or already handled & deleted.
  160. e.mu.Unlock()
  161. return
  162. }
  163. delete(e.pendOpen, flow)
  164. problem := of.problem
  165. e.mu.Unlock()
  166. if !problem.IsZero() {
  167. e.logf("open-conn-track: timeout opening %v; peer reported problem: %v", flow, problem)
  168. }
  169. // Diagnose why it might've timed out.
  170. pip, ok := e.PeerForIP(flow.DstAddr())
  171. if !ok {
  172. e.logf("open-conn-track: timeout opening %v; no associated peer node", flow)
  173. return
  174. }
  175. n := pip.Node
  176. if !n.IsWireGuardOnly() {
  177. if n.DiscoKey().IsZero() {
  178. e.logf("open-conn-track: timeout opening %v; peer node %v running pre-0.100", flow, n.Key().ShortString())
  179. return
  180. }
  181. if n.HomeDERP() == 0 {
  182. e.logf("open-conn-track: timeout opening %v; peer node %v not connected to any DERP relay", flow, n.Key().ShortString())
  183. return
  184. }
  185. }
  186. ps, found := e.getPeerStatusLite(n.Key())
  187. if !found {
  188. onlyZeroRoute := true // whether peerForIP returned n only because its /0 route matched
  189. for _, r := range n.AllowedIPs().All() {
  190. if r.Bits() != 0 && r.Contains(flow.DstAddr()) {
  191. onlyZeroRoute = false
  192. break
  193. }
  194. }
  195. if onlyZeroRoute {
  196. // This node was returned by peerForIP because
  197. // its exit node /0 route(s) matched, but this
  198. // might not be the exit node that's currently
  199. // selected. Rather than log misleading
  200. // errors, just don't log at all for now.
  201. // TODO(bradfitz): update this code to be
  202. // exit-node-aware and make peerForIP return
  203. // the node of the currently selected exit
  204. // node.
  205. return
  206. }
  207. e.logf("open-conn-track: timeout opening %v; target node %v in netmap but unknown to WireGuard", flow, n.Key().ShortString())
  208. return
  209. }
  210. // TODO(bradfitz): figure out what PeerStatus.LastHandshake
  211. // is. It appears to be the last time we sent a handshake,
  212. // which isn't what I expected. I thought it was when a
  213. // handshake completed, which is what I want.
  214. _ = ps.LastHandshake
  215. online := "?"
  216. if n.IsWireGuardOnly() {
  217. online = "wg"
  218. } else {
  219. if v, ok := n.Online().GetOk(); ok {
  220. if v {
  221. online = "yes"
  222. } else {
  223. online = "no"
  224. }
  225. }
  226. if lastSeen, ok := n.LastSeen().GetOk(); ok && online != "yes" {
  227. online += fmt.Sprintf(", lastseen=%v", durFmt(lastSeen))
  228. }
  229. }
  230. e.logf("open-conn-track: timeout opening %v to node %v; online=%v, lastRecv=%v",
  231. flow, n.Key().ShortString(),
  232. online,
  233. e.magicConn.LastRecvActivityOfNodeKey(n.Key()))
  234. }
  235. func durFmt(t time.Time) string {
  236. if t.IsZero() {
  237. return "never"
  238. }
  239. d := time.Since(t).Round(time.Second)
  240. if d < 10*time.Minute {
  241. // node.LastSeen times are rounded very coarsely, and
  242. // we compare times from different clocks (server vs
  243. // local), so negative is common when close. Format as
  244. // "recent" if negative or actually recent.
  245. return "recent"
  246. }
  247. return d.String()
  248. }