Просмотр исходного кода

wgengine/magicsock: rebind/restun if a syscall.EPERM error is returned (#11711)

We have seen in macOS client logs that the "operation not permitted", a
syscall.EPERM error, is being returned when traffic is attempted to be
sent. This may be caused by security software on the client.

This change will perform a rebind and restun if we receive a
syscall.EPERM error on clients running darwin. Rebinds will only be
called if we haven't performed one specifically for an EPERM error in
the past 5 seconds.

Updates #11710

Signed-off-by: Charlotte Brandhorst-Satzkorn <[email protected]>
Charlotte Brandhorst-Satzkorn 1 год назад
Родитель
Сommit
449f46c207
2 измененных файлов с 91 добавлено и 0 удалено
  1. 36 0
      wgengine/magicsock/magicsock.go
  2. 55 0
      wgengine/magicsock/magicsock_test.go

+ 36 - 0
wgengine/magicsock/magicsock.go

@@ -18,6 +18,7 @@ import (
 	"strings"
 	"sync"
 	"sync/atomic"
+	"syscall"
 	"time"
 
 	"github.com/tailscale/wireguard-go/conn"
@@ -305,6 +306,10 @@ type Conn struct {
 	// getPeerByKey optionally specifies a function to look up a peer's
 	// wireguard state by its public key. If nil, it's not used.
 	getPeerByKey func(key.NodePublic) (_ wgint.Peer, ok bool)
+
+	// lastEPERMRebind tracks the last time a rebind was performed
+	// after experiencing a syscall.EPERM.
+	lastEPERMRebind syncs.AtomicValue[time.Time]
 }
 
 // SetDebugLoggingEnabled controls whether spammy debug logging is enabled.
@@ -1047,6 +1052,8 @@ func (c *Conn) sendUDPBatch(addr netip.AddrPort, buffs [][]byte) (sent bool, err
 		if errors.As(err, &errGSO) {
 			c.logf("magicsock: %s", errGSO.Error())
 			err = errGSO.RetryErr
+		} else {
+			_ = c.maybeRebindOnError(runtime.GOOS, err)
 		}
 	}
 	return err == nil, err
@@ -1061,6 +1068,7 @@ func (c *Conn) sendUDP(ipp netip.AddrPort, b []byte) (sent bool, err error) {
 	sent, err = c.sendUDPStd(ipp, b)
 	if err != nil {
 		metricSendUDPError.Add(1)
+		_ = c.maybeRebindOnError(runtime.GOOS, err)
 	} else {
 		if sent {
 			metricSendUDP.Add(1)
@@ -1069,6 +1077,34 @@ func (c *Conn) sendUDP(ipp netip.AddrPort, b []byte) (sent bool, err error) {
 	return
 }
 
+// maybeRebindOnError performs a rebind and restun if the error is defined and
+// any conditionals are met.
+func (c *Conn) maybeRebindOnError(os string, err error) bool {
+	switch err {
+	case syscall.EPERM:
+		why := "operation-not-permitted-rebind"
+		switch os {
+		// We currently will only rebind and restun on a syscall.EPERM if it is experienced
+		// on a client running darwin.
+		// TODO(charlotte, raggi): expand os options if required.
+		case "darwin":
+			// TODO(charlotte): implement a backoff, so we don't end up in a rebind loop for persistent
+			// EPERMs.
+			if c.lastEPERMRebind.Load().Before(time.Now().Add(-5 * time.Second)) {
+				c.logf("magicsock: performing %q", why)
+				c.lastEPERMRebind.Store(time.Now())
+				c.Rebind()
+				go c.ReSTUN(why)
+				return true
+			}
+		default:
+			c.logf("magicsock: not performing %q", why)
+			return false
+		}
+	}
+	return false
+}
+
 // sendUDP sends UDP packet b to addr.
 // See sendAddr's docs on the return value meanings.
 func (c *Conn) sendUDPStd(addr netip.AddrPort, b []byte) (sent bool, err error) {

+ 55 - 0
wgengine/magicsock/magicsock_test.go

@@ -23,6 +23,7 @@ import (
 	"strings"
 	"sync"
 	"sync/atomic"
+	"syscall"
 	"testing"
 	"time"
 	"unsafe"
@@ -3134,3 +3135,57 @@ func TestMaybeSetNearestDERP(t *testing.T) {
 		})
 	}
 }
+
+func TestMaybeRebindOnError(t *testing.T) {
+	tstest.PanicOnLog()
+	tstest.ResourceCheck(t)
+
+	t.Run("darwin should rebind", func(t *testing.T) {
+		conn, err := NewConn(Options{
+			EndpointsFunc: func(eps []tailcfg.Endpoint) {},
+			Logf:          t.Logf,
+		})
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer conn.Close()
+
+		rebound := conn.maybeRebindOnError("darwin", syscall.EPERM)
+		if !rebound {
+			t.Errorf("darwin should rebind on syscall.EPERM")
+		}
+	})
+
+	t.Run("linux should not rebind", func(t *testing.T) {
+		conn, err := NewConn(Options{
+			EndpointsFunc: func(eps []tailcfg.Endpoint) {},
+			Logf:          t.Logf,
+		})
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer conn.Close()
+
+		rebound := conn.maybeRebindOnError("linux", syscall.EPERM)
+		if rebound {
+			t.Errorf("linux should not rebind on syscall.EPERM")
+		}
+	})
+
+	t.Run("should not rebind if recently rebind recently performed", func(t *testing.T) {
+		conn, err := NewConn(Options{
+			EndpointsFunc: func(eps []tailcfg.Endpoint) {},
+			Logf:          t.Logf,
+		})
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer conn.Close()
+
+		conn.lastEPERMRebind.Store(time.Now().Add(-1 * time.Second))
+		rebound := conn.maybeRebindOnError("darwin", syscall.EPERM)
+		if rebound {
+			t.Errorf("darwin should not rebind on syscall.EPERM within 5 seconds of last")
+		}
+	})
+}