pendopen.go 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. // Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package wgengine
  5. import (
  6. "fmt"
  7. "runtime"
  8. "time"
  9. "tailscale.com/ipn/ipnstate"
  10. "tailscale.com/net/flowtrack"
  11. "tailscale.com/net/packet"
  12. "tailscale.com/net/tsaddr"
  13. "tailscale.com/net/tstun"
  14. "tailscale.com/types/ipproto"
  15. "tailscale.com/wgengine/filter"
  16. )
  17. const tcpTimeoutBeforeDebug = 5 * time.Second
  18. type pendingOpenFlow struct {
  19. timer *time.Timer // until giving up on the flow
  20. // guarded by userspaceEngine.mu:
  21. // problem is non-zero if we got a MaybeBroken (non-terminal)
  22. // TSMP "reject" header.
  23. problem packet.TailscaleRejectReason
  24. }
  25. func (e *userspaceEngine) removeFlow(f flowtrack.Tuple) (removed bool) {
  26. e.mu.Lock()
  27. defer e.mu.Unlock()
  28. of, ok := e.pendOpen[f]
  29. if !ok {
  30. // Not a tracked flow (likely already removed)
  31. return false
  32. }
  33. of.timer.Stop()
  34. delete(e.pendOpen, f)
  35. return true
  36. }
  37. func (e *userspaceEngine) noteFlowProblemFromPeer(f flowtrack.Tuple, problem packet.TailscaleRejectReason) {
  38. e.mu.Lock()
  39. defer e.mu.Unlock()
  40. of, ok := e.pendOpen[f]
  41. if !ok {
  42. // Not a tracked flow (likely already removed)
  43. return
  44. }
  45. of.problem = problem
  46. }
  47. func (e *userspaceEngine) trackOpenPreFilterIn(pp *packet.Parsed, t *tstun.Wrapper) (res filter.Response) {
  48. res = filter.Accept // always
  49. if pp.IPProto == ipproto.TSMP {
  50. res = filter.DropSilently
  51. rh, ok := pp.AsTailscaleRejectedHeader()
  52. if !ok {
  53. return
  54. }
  55. if rh.MaybeBroken {
  56. e.noteFlowProblemFromPeer(rh.Flow(), rh.Reason)
  57. } else if f := rh.Flow(); e.removeFlow(f) {
  58. e.logf("open-conn-track: flow %v %v > %v rejected due to %v", rh.Proto, rh.Src, rh.Dst, rh.Reason)
  59. }
  60. return
  61. }
  62. if pp.IPVersion == 0 ||
  63. pp.IPProto != ipproto.TCP ||
  64. pp.TCPFlags&(packet.TCPSyn|packet.TCPRst) == 0 {
  65. return
  66. }
  67. // Either a SYN or a RST came back. Remove it in either case.
  68. f := flowtrack.Tuple{Proto: pp.IPProto, Dst: pp.Src, Src: pp.Dst} // src/dst reversed
  69. removed := e.removeFlow(f)
  70. if removed && pp.TCPFlags&packet.TCPRst != 0 {
  71. e.logf("open-conn-track: flow TCP %v got RST by peer", f)
  72. }
  73. return
  74. }
  75. func (e *userspaceEngine) trackOpenPostFilterOut(pp *packet.Parsed, t *tstun.Wrapper) (res filter.Response) {
  76. res = filter.Accept // always
  77. if pp.IPVersion == 0 ||
  78. pp.IPProto != ipproto.TCP ||
  79. pp.TCPFlags&packet.TCPAck != 0 ||
  80. pp.TCPFlags&packet.TCPSyn == 0 {
  81. return
  82. }
  83. flow := flowtrack.Tuple{Proto: pp.IPProto, Src: pp.Src, Dst: pp.Dst}
  84. // iOS likes to probe Apple IPs on all interfaces to check for connectivity.
  85. // Don't start timers tracking those. They won't succeed anyway. Avoids log spam
  86. // like:
  87. // open-conn-track: timeout opening (100.115.73.60:52501 => 17.125.252.5:443); no associated peer node
  88. if runtime.GOOS == "ios" && flow.Dst.Port() == 443 && !tsaddr.IsTailscaleIP(flow.Dst.IP()) {
  89. if _, ok := e.PeerForIP(flow.Dst.IP()); !ok {
  90. return
  91. }
  92. }
  93. timer := time.AfterFunc(tcpTimeoutBeforeDebug, func() {
  94. e.onOpenTimeout(flow)
  95. })
  96. e.mu.Lock()
  97. defer e.mu.Unlock()
  98. if e.pendOpen == nil {
  99. e.pendOpen = make(map[flowtrack.Tuple]*pendingOpenFlow)
  100. }
  101. if _, dup := e.pendOpen[flow]; dup {
  102. // Duplicates are expected when the OS retransmits. Ignore.
  103. return
  104. }
  105. e.pendOpen[flow] = &pendingOpenFlow{timer: timer}
  106. return filter.Accept
  107. }
  108. func (e *userspaceEngine) onOpenTimeout(flow flowtrack.Tuple) {
  109. e.mu.Lock()
  110. of, ok := e.pendOpen[flow]
  111. if !ok {
  112. // Not a tracked flow, or already handled & deleted.
  113. e.mu.Unlock()
  114. return
  115. }
  116. delete(e.pendOpen, flow)
  117. problem := of.problem
  118. e.mu.Unlock()
  119. if !problem.IsZero() {
  120. e.logf("open-conn-track: timeout opening %v; peer reported problem: %v", flow, problem)
  121. }
  122. // Diagnose why it might've timed out.
  123. pip, ok := e.PeerForIP(flow.Dst.IP())
  124. if !ok {
  125. e.logf("open-conn-track: timeout opening %v; no associated peer node", flow)
  126. return
  127. }
  128. n := pip.Node
  129. if n.DiscoKey.IsZero() {
  130. e.logf("open-conn-track: timeout opening %v; peer node %v running pre-0.100", flow, n.Key.ShortString())
  131. return
  132. }
  133. if n.DERP == "" {
  134. e.logf("open-conn-track: timeout opening %v; peer node %v not connected to any DERP relay", flow, n.Key.ShortString())
  135. return
  136. }
  137. var ps *ipnstate.PeerStatusLite
  138. if st, err := e.getStatus(); err == nil {
  139. for _, v := range st.Peers {
  140. if v.NodeKey == n.Key {
  141. v := v // copy
  142. ps = &v
  143. }
  144. }
  145. } else {
  146. e.logf("open-conn-track: timeout opening %v to node %v; failed to get engine status: %v", flow, n.Key.ShortString(), err)
  147. return
  148. }
  149. if ps == nil {
  150. onlyZeroRoute := true // whether peerForIP returned n only because its /0 route matched
  151. for _, r := range n.AllowedIPs {
  152. if r.Bits() != 0 && r.Contains(flow.Dst.IP()) {
  153. onlyZeroRoute = false
  154. break
  155. }
  156. }
  157. if onlyZeroRoute {
  158. // This node was returned by peerForIP because
  159. // its exit node /0 route(s) matched, but this
  160. // might not be the exit node that's currently
  161. // selected. Rather than log misleading
  162. // errors, just don't log at all for now.
  163. // TODO(bradfitz): update this code to be
  164. // exit-node-aware and make peerForIP return
  165. // the node of the currently selected exit
  166. // node.
  167. return
  168. }
  169. e.logf("open-conn-track: timeout opening %v; target node %v in netmap but unknown to wireguard", flow, n.Key.ShortString())
  170. return
  171. }
  172. // TODO(bradfitz): figure out what PeerStatus.LastHandshake
  173. // is. It appears to be the last time we sent a handshake,
  174. // which isn't what I expected. I thought it was when a
  175. // handshake completed, which is what I want.
  176. _ = ps.LastHandshake
  177. online := "?"
  178. if n.Online != nil {
  179. if *n.Online {
  180. online = "yes"
  181. } else {
  182. online = "no"
  183. }
  184. }
  185. if n.LastSeen != nil && online != "yes" {
  186. online += fmt.Sprintf(", lastseen=%v", durFmt(*n.LastSeen))
  187. }
  188. e.logf("open-conn-track: timeout opening %v to node %v; online=%v, lastRecv=%v",
  189. flow, n.Key.ShortString(),
  190. online,
  191. e.magicConn.LastRecvActivityOfNodeKey(n.Key))
  192. }
  193. func durFmt(t time.Time) string {
  194. if t.IsZero() {
  195. return "never"
  196. }
  197. d := time.Since(t).Round(time.Second)
  198. if d < 10*time.Minute {
  199. // node.LastSeen times are rounded very coarsely, and
  200. // we compare times from different clocks (server vs
  201. // local), so negative is common when close. Format as
  202. // "recent" if negative or actually recent.
  203. return "recent"
  204. }
  205. return d.String()
  206. }