pendopen.go 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. // Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package wgengine
  5. import (
  6. "fmt"
  7. "os"
  8. "runtime"
  9. "strconv"
  10. "time"
  11. "tailscale.com/ipn/ipnstate"
  12. "tailscale.com/net/flowtrack"
  13. "tailscale.com/net/packet"
  14. "tailscale.com/net/tsaddr"
  15. "tailscale.com/net/tstun"
  16. "tailscale.com/types/ipproto"
  17. "tailscale.com/wgengine/filter"
  18. )
  19. const tcpTimeoutBeforeDebug = 5 * time.Second
  20. // debugConnectFailures reports whether the local node should track
  21. // outgoing TCP connections and log which ones fail and why.
  22. func debugConnectFailures() bool {
  23. s := os.Getenv("TS_DEBUG_CONNECT_FAILURES")
  24. if s == "" {
  25. return true
  26. }
  27. v, _ := strconv.ParseBool(s)
  28. return v
  29. }
  30. type pendingOpenFlow struct {
  31. timer *time.Timer // until giving up on the flow
  32. // guarded by userspaceEngine.mu:
  33. // problem is non-zero if we got a MaybeBroken (non-terminal)
  34. // TSMP "reject" header.
  35. problem packet.TailscaleRejectReason
  36. }
  37. func (e *userspaceEngine) removeFlow(f flowtrack.Tuple) (removed bool) {
  38. e.mu.Lock()
  39. defer e.mu.Unlock()
  40. of, ok := e.pendOpen[f]
  41. if !ok {
  42. // Not a tracked flow (likely already removed)
  43. return false
  44. }
  45. of.timer.Stop()
  46. delete(e.pendOpen, f)
  47. return true
  48. }
  49. func (e *userspaceEngine) noteFlowProblemFromPeer(f flowtrack.Tuple, problem packet.TailscaleRejectReason) {
  50. e.mu.Lock()
  51. defer e.mu.Unlock()
  52. of, ok := e.pendOpen[f]
  53. if !ok {
  54. // Not a tracked flow (likely already removed)
  55. return
  56. }
  57. of.problem = problem
  58. }
  59. func (e *userspaceEngine) trackOpenPreFilterIn(pp *packet.Parsed, t *tstun.Wrapper) (res filter.Response) {
  60. res = filter.Accept // always
  61. if pp.IPProto == ipproto.TSMP {
  62. res = filter.DropSilently
  63. rh, ok := pp.AsTailscaleRejectedHeader()
  64. if !ok {
  65. return
  66. }
  67. if rh.MaybeBroken {
  68. e.noteFlowProblemFromPeer(rh.Flow(), rh.Reason)
  69. } else if f := rh.Flow(); e.removeFlow(f) {
  70. e.logf("open-conn-track: flow %v %v > %v rejected due to %v", rh.Proto, rh.Src, rh.Dst, rh.Reason)
  71. }
  72. return
  73. }
  74. if pp.IPVersion == 0 ||
  75. pp.IPProto != ipproto.TCP ||
  76. pp.TCPFlags&(packet.TCPSyn|packet.TCPRst) == 0 {
  77. return
  78. }
  79. // Either a SYN or a RST came back. Remove it in either case.
  80. f := flowtrack.Tuple{Proto: pp.IPProto, Dst: pp.Src, Src: pp.Dst} // src/dst reversed
  81. removed := e.removeFlow(f)
  82. if removed && pp.TCPFlags&packet.TCPRst != 0 {
  83. e.logf("open-conn-track: flow TCP %v got RST by peer", f)
  84. }
  85. return
  86. }
  87. func (e *userspaceEngine) trackOpenPostFilterOut(pp *packet.Parsed, t *tstun.Wrapper) (res filter.Response) {
  88. res = filter.Accept // always
  89. if pp.IPVersion == 0 ||
  90. pp.IPProto != ipproto.TCP ||
  91. pp.TCPFlags&packet.TCPAck != 0 ||
  92. pp.TCPFlags&packet.TCPSyn == 0 {
  93. return
  94. }
  95. flow := flowtrack.Tuple{Proto: pp.IPProto, Src: pp.Src, Dst: pp.Dst}
  96. // iOS likes to probe Apple IPs on all interfaces to check for connectivity.
  97. // Don't start timers tracking those. They won't succeed anyway. Avoids log spam
  98. // like:
  99. // open-conn-track: timeout opening (100.115.73.60:52501 => 17.125.252.5:443); no associated peer node
  100. if runtime.GOOS == "ios" && flow.Dst.Port() == 443 && !tsaddr.IsTailscaleIP(flow.Dst.IP()) {
  101. if _, _, err := e.peerForIP(flow.Dst.IP()); err != nil {
  102. return
  103. }
  104. }
  105. timer := time.AfterFunc(tcpTimeoutBeforeDebug, func() {
  106. e.onOpenTimeout(flow)
  107. })
  108. e.mu.Lock()
  109. defer e.mu.Unlock()
  110. if e.pendOpen == nil {
  111. e.pendOpen = make(map[flowtrack.Tuple]*pendingOpenFlow)
  112. }
  113. if _, dup := e.pendOpen[flow]; dup {
  114. // Duplicates are expected when the OS retransmits. Ignore.
  115. return
  116. }
  117. e.pendOpen[flow] = &pendingOpenFlow{timer: timer}
  118. return filter.Accept
  119. }
  120. func (e *userspaceEngine) onOpenTimeout(flow flowtrack.Tuple) {
  121. e.mu.Lock()
  122. of, ok := e.pendOpen[flow]
  123. if !ok {
  124. // Not a tracked flow, or already handled & deleted.
  125. e.mu.Unlock()
  126. return
  127. }
  128. delete(e.pendOpen, flow)
  129. problem := of.problem
  130. e.mu.Unlock()
  131. if !problem.IsZero() {
  132. e.logf("open-conn-track: timeout opening %v; peer reported problem: %v", flow, problem)
  133. }
  134. // Diagnose why it might've timed out.
  135. n, _, err := e.peerForIP(flow.Dst.IP())
  136. if err != nil {
  137. e.logf("open-conn-track: timeout opening %v; peerForIP: %v", flow, err)
  138. return
  139. }
  140. if n == nil {
  141. e.logf("open-conn-track: timeout opening %v; no associated peer node", flow)
  142. return
  143. }
  144. if n.DiscoKey.IsZero() {
  145. e.logf("open-conn-track: timeout opening %v; peer node %v running pre-0.100", flow, n.Key.ShortString())
  146. return
  147. }
  148. if n.DERP == "" {
  149. e.logf("open-conn-track: timeout opening %v; peer node %v not connected to any DERP relay", flow, n.Key.ShortString())
  150. return
  151. }
  152. var ps *ipnstate.PeerStatusLite
  153. if st, err := e.getStatus(); err == nil {
  154. for _, v := range st.Peers {
  155. if v.NodeKey == n.Key {
  156. v := v // copy
  157. ps = &v
  158. }
  159. }
  160. } else {
  161. e.logf("open-conn-track: timeout opening %v to node %v; failed to get engine status: %v", flow, n.Key.ShortString(), err)
  162. return
  163. }
  164. if ps == nil {
  165. onlyZeroRoute := true // whether peerForIP returned n only because its /0 route matched
  166. for _, r := range n.AllowedIPs {
  167. if r.Bits() != 0 && r.Contains(flow.Dst.IP()) {
  168. onlyZeroRoute = false
  169. break
  170. }
  171. }
  172. if onlyZeroRoute {
  173. // This node was returned by peerForIP because
  174. // its exit node /0 route(s) matched, but this
  175. // might not be the exit node that's currently
  176. // selected. Rather than log misleading
  177. // errors, just don't log at all for now.
  178. // TODO(bradfitz): update this code to be
  179. // exit-node-aware and make peerForIP return
  180. // the node of the currently selected exit
  181. // node.
  182. return
  183. }
  184. e.logf("open-conn-track: timeout opening %v; target node %v in netmap but unknown to wireguard", flow, n.Key.ShortString())
  185. return
  186. }
  187. // TODO(bradfitz): figure out what PeerStatus.LastHandshake
  188. // is. It appears to be the last time we sent a handshake,
  189. // which isn't what I expected. I thought it was when a
  190. // handshake completed, which is what I want.
  191. _ = ps.LastHandshake
  192. online := "?"
  193. if n.Online != nil {
  194. if *n.Online {
  195. online = "yes"
  196. } else {
  197. online = "no"
  198. }
  199. }
  200. if n.LastSeen != nil && online != "yes" {
  201. online += fmt.Sprintf(", lastseen=%v", durFmt(*n.LastSeen))
  202. }
  203. e.logf("open-conn-track: timeout opening %v to node %v; online=%v, lastRecv=%v",
  204. flow, n.Key.ShortString(),
  205. online,
  206. e.magicConn.LastRecvActivityOfNodeKey(n.Key))
  207. }
  208. func durFmt(t time.Time) string {
  209. if t.IsZero() {
  210. return "never"
  211. }
  212. d := time.Since(t).Round(time.Second)
  213. if d < 10*time.Minute {
  214. // node.LastSeen times are rounded very coarsely, and
  215. // we compare times from different clocks (server vs
  216. // local), so negative is common when close. Format as
  217. // "recent" if negative or actually recent.
  218. return "recent"
  219. }
  220. return d.String()
  221. }