|
|
@@ -34,12 +34,12 @@ import (
|
|
|
|
|
|
"go4.org/mem"
|
|
|
"golang.org/x/sync/errgroup"
|
|
|
- "golang.org/x/time/rate"
|
|
|
"tailscale.com/client/tailscale"
|
|
|
"tailscale.com/disco"
|
|
|
"tailscale.com/envknob"
|
|
|
"tailscale.com/metrics"
|
|
|
"tailscale.com/syncs"
|
|
|
+ "tailscale.com/tstime/rate"
|
|
|
"tailscale.com/types/key"
|
|
|
"tailscale.com/types/logger"
|
|
|
"tailscale.com/version"
|
|
|
@@ -122,7 +122,8 @@ type Server struct {
|
|
|
_ align64
|
|
|
packetsForwardedOut expvar.Int
|
|
|
packetsForwardedIn expvar.Int
|
|
|
- peerGoneFrames expvar.Int // number of peer gone frames sent
|
|
|
+ peerGoneDisconnectedFrames expvar.Int // number of peer disconnected frames sent
|
|
|
+ peerGoneNotHereFrames expvar.Int // number of peer not here frames sent
|
|
|
gotPing expvar.Int // number of ping frames from client
|
|
|
sentPong expvar.Int // number of pong frames enqueued to client
|
|
|
accepts expvar.Int
|
|
|
@@ -324,7 +325,8 @@ func NewServer(privateKey key.NodePrivate, logf logger.Logf) *Server {
|
|
|
s.packetsDroppedReasonCounters = []*expvar.Int{
|
|
|
s.packetsDroppedReason.Get("unknown_dest"),
|
|
|
s.packetsDroppedReason.Get("unknown_dest_on_fwd"),
|
|
|
- s.packetsDroppedReason.Get("gone"),
|
|
|
+ s.packetsDroppedReason.Get("gone_disconnected"),
|
|
|
+ s.packetsDroppedReason.Get("gone_not_here"),
|
|
|
s.packetsDroppedReason.Get("queue_head"),
|
|
|
s.packetsDroppedReason.Get("queue_tail"),
|
|
|
s.packetsDroppedReason.Get("write_error"),
|
|
|
@@ -615,13 +617,26 @@ func (s *Server) notePeerGoneFromRegionLocked(key key.NodePublic) {
|
|
|
}
|
|
|
set.ForeachClient(func(peer *sclient) {
|
|
|
if peer.connNum == connNum {
|
|
|
- go peer.requestPeerGoneWrite(key)
|
|
|
+ go peer.requestPeerGoneWrite(key, PeerGoneReasonDisconnected)
|
|
|
}
|
|
|
})
|
|
|
}
|
|
|
delete(s.sentTo, key)
|
|
|
}
|
|
|
|
|
|
+// requestPeerGoneWriteLimited sends a request to write a "peer gone"
|
|
|
+// frame, but only in reply to a disco packet, and only if we haven't
|
|
|
+// sent one recently.
|
|
|
+func (c *sclient) requestPeerGoneWriteLimited(peer key.NodePublic, contents []byte, reason PeerGoneReasonType) {
|
|
|
+ if disco.LooksLikeDiscoWrapper(contents) != true {
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ if c.peerGoneLim.Allow() {
|
|
|
+ go c.requestPeerGoneWrite(peer, reason)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
func (s *Server) addWatcher(c *sclient) {
|
|
|
if !c.canMesh {
|
|
|
panic("invariant: addWatcher called without permissions")
|
|
|
@@ -686,8 +701,9 @@ func (s *Server) accept(ctx context.Context, nc Conn, brw *bufio.ReadWriter, rem
|
|
|
sendQueue: make(chan pkt, perClientSendQueueDepth),
|
|
|
discoSendQueue: make(chan pkt, perClientSendQueueDepth),
|
|
|
sendPongCh: make(chan [8]byte, 1),
|
|
|
- peerGone: make(chan key.NodePublic),
|
|
|
+ peerGone: make(chan peerGoneMsg),
|
|
|
canMesh: clientInfo.MeshKey != "" && clientInfo.MeshKey == s.meshKey,
|
|
|
+ peerGoneLim: rate.NewLimiter(rate.Every(time.Second), 3),
|
|
|
}
|
|
|
|
|
|
if c.canMesh {
|
|
|
@@ -887,6 +903,8 @@ func (c *sclient) handleFrameForwardPacket(ft frameType, fl uint32) error {
|
|
|
reason := dropReasonUnknownDestOnFwd
|
|
|
if dstLen > 1 {
|
|
|
reason = dropReasonDupClient
|
|
|
+ } else {
|
|
|
+ c.requestPeerGoneWriteLimited(dstKey, contents, PeerGoneReasonNotHere)
|
|
|
}
|
|
|
s.recordDrop(contents, srcKey, dstKey, reason)
|
|
|
return nil
|
|
|
@@ -952,6 +970,8 @@ func (c *sclient) handleFrameSendPacket(ft frameType, fl uint32) error {
|
|
|
reason := dropReasonUnknownDest
|
|
|
if dstLen > 1 {
|
|
|
reason = dropReasonDupClient
|
|
|
+ } else {
|
|
|
+ c.requestPeerGoneWriteLimited(dstKey, contents, PeerGoneReasonNotHere)
|
|
|
}
|
|
|
s.recordDrop(contents, c.key, dstKey, reason)
|
|
|
c.debug("SendPacket for %s, dropping with reason=%s", dstKey.ShortString(), reason)
|
|
|
@@ -981,7 +1001,7 @@ type dropReason int
|
|
|
const (
|
|
|
dropReasonUnknownDest dropReason = iota // unknown destination pubkey
|
|
|
dropReasonUnknownDestOnFwd // unknown destination pubkey on a derp-forwarded packet
|
|
|
- dropReasonGone // destination tailscaled disconnected before we could send
|
|
|
+ dropReasonGoneDisconnected // destination tailscaled disconnected before we could send
|
|
|
dropReasonQueueHead // destination queue is full, dropped packet at queue head
|
|
|
dropReasonQueueTail // destination queue is full, dropped packet at queue tail
|
|
|
dropReasonWriteError // OS write() failed
|
|
|
@@ -1023,7 +1043,7 @@ func (c *sclient) sendPkt(dst *sclient, p pkt) error {
|
|
|
for attempt := 0; attempt < 3; attempt++ {
|
|
|
select {
|
|
|
case <-dst.done:
|
|
|
- s.recordDrop(p.bs, c.key, dstKey, dropReasonGone)
|
|
|
+ s.recordDrop(p.bs, c.key, dstKey, dropReasonGoneDisconnected)
|
|
|
dst.debug("sendPkt attempt %d dropped, dst gone", attempt)
|
|
|
return nil
|
|
|
default:
|
|
|
@@ -1052,11 +1072,14 @@ func (c *sclient) sendPkt(dst *sclient, p pkt) error {
|
|
|
}
|
|
|
|
|
|
// requestPeerGoneWrite sends a request to write a "peer gone" frame
|
|
|
-// that the provided peer has disconnected. It blocks until either the
|
|
|
+// with an explanation of why it is gone. It blocks until either the
|
|
|
// write request is scheduled, or the client has closed.
|
|
|
-func (c *sclient) requestPeerGoneWrite(peer key.NodePublic) {
|
|
|
+func (c *sclient) requestPeerGoneWrite(peer key.NodePublic, reason PeerGoneReasonType) {
|
|
|
select {
|
|
|
- case c.peerGone <- peer:
|
|
|
+ case c.peerGone <- peerGoneMsg{
|
|
|
+ peer: peer,
|
|
|
+ reason: reason,
|
|
|
+ }:
|
|
|
case <-c.done:
|
|
|
}
|
|
|
}
|
|
|
@@ -1270,25 +1293,20 @@ type sclient struct {
|
|
|
key key.NodePublic
|
|
|
info clientInfo
|
|
|
logf logger.Logf
|
|
|
- done <-chan struct{} // closed when connection closes
|
|
|
- remoteAddr string // usually ip:port from net.Conn.RemoteAddr().String()
|
|
|
- remoteIPPort netip.AddrPort // zero if remoteAddr is not ip:port.
|
|
|
- sendQueue chan pkt // packets queued to this client; never closed
|
|
|
- discoSendQueue chan pkt // important packets queued to this client; never closed
|
|
|
- sendPongCh chan [8]byte // pong replies to send to the client; never closed
|
|
|
- peerGone chan key.NodePublic // write request that a previous sender has disconnected (not used by mesh peers)
|
|
|
- meshUpdate chan struct{} // write request to write peerStateChange
|
|
|
- canMesh bool // clientInfo had correct mesh token for inter-region routing
|
|
|
- isDup atomic.Bool // whether more than 1 sclient for key is connected
|
|
|
- isDisabled atomic.Bool // whether sends to this peer are disabled due to active/active dups
|
|
|
+ done <-chan struct{} // closed when connection closes
|
|
|
+ remoteAddr string // usually ip:port from net.Conn.RemoteAddr().String()
|
|
|
+ remoteIPPort netip.AddrPort // zero if remoteAddr is not ip:port.
|
|
|
+ sendQueue chan pkt // packets queued to this client; never closed
|
|
|
+ discoSendQueue chan pkt // important packets queued to this client; never closed
|
|
|
+ sendPongCh chan [8]byte // pong replies to send to the client; never closed
|
|
|
+ peerGone chan peerGoneMsg // write request that a peer is not at this server (not used by mesh peers)
|
|
|
+ meshUpdate chan struct{} // write request to write peerStateChange
|
|
|
+ canMesh bool // clientInfo had correct mesh token for inter-region routing
|
|
|
+ isDup atomic.Bool // whether more than 1 sclient for key is connected
|
|
|
+ isDisabled atomic.Bool // whether sends to this peer are disabled due to active/active dups
|
|
|
|
|
|
debugLogging bool
|
|
|
|
|
|
- // replaceLimiter controls how quickly two connections with
|
|
|
- // the same client key can kick each other off the server by
|
|
|
- // taking over ownership of a key.
|
|
|
- replaceLimiter *rate.Limiter
|
|
|
-
|
|
|
// Owned by run, not thread-safe.
|
|
|
br *bufio.Reader
|
|
|
connectedAt time.Time
|
|
|
@@ -1304,6 +1322,11 @@ type sclient struct {
|
|
|
// the client for them to update their map of who's connected
|
|
|
// to this node.
|
|
|
peerStateChange []peerConnState
|
|
|
+
|
|
|
+ // peerGoneLimiter limits how often the server will inform a
|
|
|
+ // client that it's trying to establish a direct connection
|
|
|
+ // through us with a peer we have no record of.
|
|
|
+ peerGoneLim *rate.Limiter
|
|
|
}
|
|
|
|
|
|
// peerConnState represents whether a peer is connected to the server
|
|
|
@@ -1327,6 +1350,12 @@ type pkt struct {
|
|
|
bs []byte
|
|
|
}
|
|
|
|
|
|
+// peerGoneMsg is a request to write a peerGone frame to an sclient
|
|
|
+type peerGoneMsg struct {
|
|
|
+ peer key.NodePublic
|
|
|
+ reason PeerGoneReasonType
|
|
|
+}
|
|
|
+
|
|
|
func (c *sclient) setPreferred(v bool) {
|
|
|
if c.preferred == v {
|
|
|
return
|
|
|
@@ -1381,9 +1410,9 @@ func (c *sclient) sendLoop(ctx context.Context) error {
|
|
|
for {
|
|
|
select {
|
|
|
case pkt := <-c.sendQueue:
|
|
|
- c.s.recordDrop(pkt.bs, pkt.src, c.key, dropReasonGone)
|
|
|
+ c.s.recordDrop(pkt.bs, pkt.src, c.key, dropReasonGoneDisconnected)
|
|
|
case pkt := <-c.discoSendQueue:
|
|
|
- c.s.recordDrop(pkt.bs, pkt.src, c.key, dropReasonGone)
|
|
|
+ c.s.recordDrop(pkt.bs, pkt.src, c.key, dropReasonGoneDisconnected)
|
|
|
default:
|
|
|
return
|
|
|
}
|
|
|
@@ -1404,8 +1433,8 @@ func (c *sclient) sendLoop(ctx context.Context) error {
|
|
|
select {
|
|
|
case <-ctx.Done():
|
|
|
return nil
|
|
|
- case peer := <-c.peerGone:
|
|
|
- werr = c.sendPeerGone(peer)
|
|
|
+ case msg := <-c.peerGone:
|
|
|
+ werr = c.sendPeerGone(msg.peer, msg.reason)
|
|
|
continue
|
|
|
case <-c.meshUpdate:
|
|
|
werr = c.sendMeshUpdates()
|
|
|
@@ -1436,8 +1465,8 @@ func (c *sclient) sendLoop(ctx context.Context) error {
|
|
|
select {
|
|
|
case <-ctx.Done():
|
|
|
return nil
|
|
|
- case peer := <-c.peerGone:
|
|
|
- werr = c.sendPeerGone(peer)
|
|
|
+ case msg := <-c.peerGone:
|
|
|
+ werr = c.sendPeerGone(msg.peer, msg.reason)
|
|
|
case <-c.meshUpdate:
|
|
|
werr = c.sendMeshUpdates()
|
|
|
continue
|
|
|
@@ -1478,13 +1507,22 @@ func (c *sclient) sendPong(data [8]byte) error {
|
|
|
}
|
|
|
|
|
|
// sendPeerGone sends a peerGone frame, without flushing.
|
|
|
-func (c *sclient) sendPeerGone(peer key.NodePublic) error {
|
|
|
- c.s.peerGoneFrames.Add(1)
|
|
|
+func (c *sclient) sendPeerGone(peer key.NodePublic, reason PeerGoneReasonType) error {
|
|
|
+ switch reason {
|
|
|
+ case PeerGoneReasonDisconnected:
|
|
|
+ c.s.peerGoneDisconnectedFrames.Add(1)
|
|
|
+ case PeerGoneReasonNotHere:
|
|
|
+ c.s.peerGoneNotHereFrames.Add(1)
|
|
|
+ }
|
|
|
c.setWriteDeadline()
|
|
|
- if err := writeFrameHeader(c.bw.bw(), framePeerGone, keyLen); err != nil {
|
|
|
+ data := make([]byte, 0, keyLen+1)
|
|
|
+ data = peer.AppendTo(data)
|
|
|
+ data = append(data, byte(reason))
|
|
|
+ if err := writeFrameHeader(c.bw.bw(), framePeerGone, uint32(len(data))); err != nil {
|
|
|
return err
|
|
|
}
|
|
|
- _, err := c.bw.Write(peer.AppendTo(nil))
|
|
|
+
|
|
|
+ _, err := c.bw.Write(data)
|
|
|
return err
|
|
|
}
|
|
|
|
|
|
@@ -1515,7 +1553,7 @@ func (c *sclient) sendMeshUpdates() error {
|
|
|
if pcs.present {
|
|
|
err = c.sendPeerPresent(pcs.peer)
|
|
|
} else {
|
|
|
- err = c.sendPeerGone(pcs.peer)
|
|
|
+ err = c.sendPeerGone(pcs.peer, PeerGoneReasonDisconnected)
|
|
|
}
|
|
|
if err != nil {
|
|
|
// Shouldn't happen, though, as we're writing
|
|
|
@@ -1756,7 +1794,8 @@ func (s *Server) ExpVar() expvar.Var {
|
|
|
m.Set("home_moves_out", &s.homeMovesOut)
|
|
|
m.Set("got_ping", &s.gotPing)
|
|
|
m.Set("sent_pong", &s.sentPong)
|
|
|
- m.Set("peer_gone_frames", &s.peerGoneFrames)
|
|
|
+ m.Set("peer_gone_disconnected_frames", &s.peerGoneDisconnectedFrames)
|
|
|
+ m.Set("peer_gone_not_here_frames", &s.peerGoneNotHereFrames)
|
|
|
m.Set("packets_forwarded_out", &s.packetsForwardedOut)
|
|
|
m.Set("packets_forwarded_in", &s.packetsForwardedIn)
|
|
|
m.Set("multiforwarder_created", &s.multiForwarderCreated)
|