Browse Source

tsnet: fix panic on race between listener.Close and incoming packet

I saw this panic while writing a new test for #14715:

    panic: send on closed channel

    goroutine 826 [running]:
    tailscale.com/tsnet.(*listener).handle(0x1400031a500, {0x1035fbb00, 0x14000b82300})
            /Users/bradfitz/src/tailscale.com/tsnet/tsnet.go:1317 +0xac
    tailscale.com/wgengine/netstack.(*Impl).acceptTCP(0x14000204700, 0x14000882100)
            /Users/bradfitz/src/tailscale.com/wgengine/netstack/netstack.go:1320 +0x6dc
    created by gvisor.dev/gvisor/pkg/tcpip/transport/tcp.(*Forwarder).HandlePacket in goroutine 807
            /Users/bradfitz/go/pkg/mod/gvisor.dev/[email protected]/pkg/tcpip/transport/tcp/forwarder.go:98 +0x32c
    FAIL    tailscale.com/tsnet     0.927s

Updates #14715

Change-Id: I9924e0a6c2b801d46ee44eb8eeea0da2f9ea17c4
Signed-off-by: Brad Fitzpatrick <[email protected]>
Brad Fitzpatrick 1 year ago
parent
commit
7f3c1932b5
2 changed files with 33 additions and 11 deletions
  1. 14 11
      tsnet/tsnet.go
  2. 19 0
      tsnet/tsnet_test.go

+ 14 - 11
tsnet/tsnet.go

@@ -1180,7 +1180,8 @@ func (s *Server) listen(network, addr string, lnOn listenOn) (net.Listener, erro
 		keys: keys,
 		keys: keys,
 		addr: addr,
 		addr: addr,
 
 
-		conn: make(chan net.Conn),
+		closedc: make(chan struct{}),
+		conn:    make(chan net.Conn),
 	}
 	}
 	s.mu.Lock()
 	s.mu.Lock()
 	for _, key := range keys {
 	for _, key := range keys {
@@ -1243,11 +1244,12 @@ type listenKey struct {
 }
 }
 
 
 type listener struct {
 type listener struct {
-	s      *Server
-	keys   []listenKey
-	addr   string
-	conn   chan net.Conn
-	closed bool // guarded by s.mu
+	s       *Server
+	keys    []listenKey
+	addr    string
+	conn    chan net.Conn // unbuffered, never closed
+	closedc chan struct{} // closed on [listener.Close]
+	closed  bool          // guarded by s.mu
 }
 }
 
 
 func (ln *listener) Accept() (net.Conn, error) {
 func (ln *listener) Accept() (net.Conn, error) {
@@ -1277,21 +1279,22 @@ func (ln *listener) closeLocked() error {
 			delete(ln.s.listeners, key)
 			delete(ln.s.listeners, key)
 		}
 		}
 	}
 	}
-	close(ln.conn)
+	close(ln.closedc)
 	ln.closed = true
 	ln.closed = true
 	return nil
 	return nil
 }
 }
 
 
 func (ln *listener) handle(c net.Conn) {
 func (ln *listener) handle(c net.Conn) {
-	t := time.NewTimer(time.Second)
-	defer t.Stop()
 	select {
 	select {
 	case ln.conn <- c:
 	case ln.conn <- c:
-	case <-t.C:
+		return
+	case <-ln.closedc:
+	case <-ln.s.shutdownCtx.Done():
+	case <-time.After(time.Second):
 		// TODO(bradfitz): this isn't ideal. Think about how
 		// TODO(bradfitz): this isn't ideal. Think about how
 		// we how we want to do pushback.
 		// we how we want to do pushback.
-		c.Close()
 	}
 	}
+	c.Close()
 }
 }
 
 
 // Server returns the tsnet Server associated with the listener.
 // Server returns the tsnet Server associated with the listener.

+ 19 - 0
tsnet/tsnet_test.go

@@ -494,6 +494,25 @@ func TestListenerCleanup(t *testing.T) {
 	if err := ln.Close(); !errors.Is(err, net.ErrClosed) {
 	if err := ln.Close(); !errors.Is(err, net.ErrClosed) {
 		t.Fatalf("second ln.Close error: %v, want net.ErrClosed", err)
 		t.Fatalf("second ln.Close error: %v, want net.ErrClosed", err)
 	}
 	}
+
+	// Verify that handling a connection from gVisor (from a packet arriving)
+	// after a listener closed doesn't panic (previously: sending on a closed
+	// channel) or hang.
+	c := &closeTrackConn{}
+	ln.(*listener).handle(c)
+	if !c.closed {
+		t.Errorf("c.closed = false, want true")
+	}
+}
+
+type closeTrackConn struct {
+	net.Conn
+	closed bool
+}
+
+func (wc *closeTrackConn) Close() error {
+	wc.closed = true
+	return nil
 }
 }
 
 
 // tests https://github.com/tailscale/tailscale/issues/6973 -- that we can start a tsnet server,
 // tests https://github.com/tailscale/tailscale/issues/6973 -- that we can start a tsnet server,