Browse Source

disco,net/tstun,wgengine/magicsock: probe peer MTU

Automatically probe the path MTU to a peer when peer MTU is enabled, but do not
use the MTU information for anything yet.

Updates #311

Signed-off-by: Val <[email protected]>
Val 2 years ago
parent
commit
893bdd729c

+ 1 - 1
disco/disco.go

@@ -261,7 +261,7 @@ func parsePong(ver uint8, p []byte) (m *Pong, err error) {
 func MessageSummary(m Message) string {
 	switch m := m.(type) {
 	case *Ping:
-		return fmt.Sprintf("ping tx=%x", m.TxID[:6])
+		return fmt.Sprintf("ping tx=%x padding=%v", m.TxID[:6], m.Padding)
 	case *Pong:
 		return fmt.Sprintf("pong tx=%x", m.TxID[:6])
 	case *CallMeMaybe:

+ 23 - 10
net/tstun/mtu.go

@@ -79,14 +79,16 @@ const (
 	safeTUNMTU TUNMTU = 1280
 )
 
-// MaxProbedWireMTU is the largest MTU we will test for path MTU
-// discovery.
-var MaxProbedWireMTU WireMTU = 9000
-
-func init() {
-	if MaxProbedWireMTU > WireMTU(maxTUNMTU) {
-		MaxProbedWireMTU = WireMTU(maxTUNMTU)
-	}
+// WireMTUsToProbe is a list of the on-the-wire MTUs we want to probe. Each time
+// magicsock discovery begins, it will send a set of pings, one of each size
+// listed below.
+var WireMTUsToProbe = []WireMTU{
+	WireMTU(safeTUNMTU),      // Tailscale over Tailscale :)
+	TUNToWireMTU(safeTUNMTU), // Smallest MTU allowed for IPv6, current default
+	1400,                     // Most common MTU minus a few bytes for tunnels
+	1500,                     // Most common MTU
+	8000,                     // Should fit inside all jumbo frame sizes
+	9000,                     // Most jumbo frames are this size or larger
 }
 
 // wgHeaderLen is the length of all the headers Wireguard adds to a packet
@@ -125,7 +127,7 @@ func WireToTUNMTU(w WireMTU) TUNMTU {
 // MTU. It is also the path MTU that we default to if we have no
 // information about the path to a peer.
 //
-// 1. If set, the value of TS_DEBUG_MTU clamped to a maximum of MaxTunMTU
+// 1. If set, the value of TS_DEBUG_MTU clamped to a maximum of MaxTUNMTU
 // 2. If TS_DEBUG_ENABLE_PMTUD is set, the maximum size MTU we probe, minus wg overhead
 // 3. If TS_DEBUG_ENABLE_PMTUD is not set, the Safe MTU
 func DefaultTUNMTU() TUNMTU {
@@ -135,12 +137,23 @@ func DefaultTUNMTU() TUNMTU {
 
 	debugPMTUD, _ := envknob.LookupBool("TS_DEBUG_ENABLE_PMTUD")
 	if debugPMTUD {
-		return WireToTUNMTU(MaxProbedWireMTU)
+		// TODO: While we are just probing MTU but not generating PTB,
+		// this has to continue to return the safe MTU. When we add the
+		// code to generate PTB, this will be:
+		//
+		// return WireToTUNMTU(maxProbedWireMTU)
+		return safeTUNMTU
 	}
 
 	return safeTUNMTU
 }
 
+// SafeWireMTU returns the wire MTU that is safe to use if we have no
+// information about the path MTU to this peer.
+func SafeWireMTU() WireMTU {
+	return TUNToWireMTU(safeTUNMTU)
+}
+
 // DefaultWireMTU returns the default TUN MTU, adjusted for wireguard
 // overhead.
 func DefaultWireMTU() WireMTU {

+ 8 - 5
net/tstun/mtu_test.go

@@ -39,15 +39,18 @@ func TestDefaultTunMTU(t *testing.T) {
 		t.Errorf("default TUN MTU = %d, want %d, clamping failed", DefaultTUNMTU(), maxTUNMTU)
 	}
 
-	// If PMTUD is enabled, the MTU should default to the largest probed
-	// MTU, but only if the user hasn't requested a specific MTU.
+	// If PMTUD is enabled, the MTU should default to the safe MTU, but only
+	// if the user hasn't requested a specific MTU.
+	//
+	// TODO: When PMTUD is generating PTB responses, this will become the
+	// largest MTU we probe.
 	os.Setenv("TS_DEBUG_MTU", "")
 	os.Setenv("TS_DEBUG_ENABLE_PMTUD", "true")
-	if DefaultTUNMTU() != WireToTUNMTU(MaxProbedWireMTU) {
-		t.Errorf("default TUN MTU = %d, want %d", DefaultTUNMTU(), WireToTUNMTU(MaxProbedWireMTU))
+	if DefaultTUNMTU() != safeTUNMTU {
+		t.Errorf("default TUN MTU = %d, want %d", DefaultTUNMTU(), safeTUNMTU)
 	}
 	// TS_DEBUG_MTU should take precedence over TS_DEBUG_ENABLE_PMTUD.
-	mtu = WireToTUNMTU(MaxProbedWireMTU - 1)
+	mtu = WireToTUNMTU(MaxPacketSize - 1)
 	os.Setenv("TS_DEBUG_MTU", strconv.Itoa(int(mtu)))
 	if DefaultTUNMTU() != mtu {
 		t.Errorf("default TUN MTU = %d, want %d", DefaultTUNMTU(), mtu)

+ 49 - 19
wgengine/magicsock/endpoint.go

@@ -35,6 +35,16 @@ import (
 	"tailscale.com/util/ringbuffer"
 )
 
+var mtuProbePingSizesV4 []int
+var mtuProbePingSizesV6 []int
+
+func init() {
+	for _, m := range tstun.WireMTUsToProbe {
+		mtuProbePingSizesV4 = append(mtuProbePingSizesV4, pktLenToPingSize(m, false))
+		mtuProbePingSizesV6 = append(mtuProbePingSizesV6, pktLenToPingSize(m, true))
+	}
+}
+
 // endpoint is a wireguard/conn.Endpoint. In wireguard-go and kernel WireGuard
 // there is only one endpoint for a peer, but in Tailscale we distribute a
 // number of possible endpoints for a peer which would include the all the
@@ -374,7 +384,7 @@ func (de *endpoint) addrForPingSizeLocked(now mono.Time, size int) (udpAddr, der
 
 	udpAddr = de.bestAddr.AddrPort
 	pathMTU := de.bestAddr.wireMTU
-	requestedMTU := pingSizeToPktLen(size, udpAddr.Addr())
+	requestedMTU := pingSizeToPktLen(size, udpAddr.Addr().Is6())
 	mtuOk := requestedMTU <= pathMTU
 
 	if udpAddr.IsValid() && mtuOk {
@@ -666,21 +676,41 @@ func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpo
 		st.lastPing = now
 	}
 
-	txid := stun.NewTxID()
-	de.sentPing[txid] = sentPing{
-		to:      ep,
-		at:      now,
-		timer:   time.AfterFunc(pingTimeoutDuration, func() { de.discoPingTimeout(txid) }),
-		purpose: purpose,
-		res:     res,
-		cb:      cb,
+	// If we are doing a discovery ping or a CLI ping with no specified size
+	// to a non DERP address, then probe the MTU. Otherwise just send the
+	// one specified ping.
+
+	// Default to sending a single ping of the specified size
+	sizes := []int{size}
+	if de.c.PeerMTUEnabled() {
+		isDerp := ep.Addr() == tailcfg.DerpMagicIPAddr
+		if !isDerp && ((purpose == pingDiscovery) || (purpose == pingCLI && size == 0)) {
+			de.c.dlogf("[v1] magicsock: starting MTU probe")
+			sizes = mtuProbePingSizesV4
+			if ep.Addr().Is6() {
+				sizes = mtuProbePingSizesV6
+			}
+		}
 	}
 
 	logLevel := discoLog
 	if purpose == pingHeartbeat {
 		logLevel = discoVerboseLog
 	}
-	go de.sendDiscoPing(ep, epDisco.key, txid, size, logLevel)
+	for _, s := range sizes {
+		txid := stun.NewTxID()
+		de.sentPing[txid] = sentPing{
+			to:      ep,
+			at:      now,
+			timer:   time.AfterFunc(pingTimeoutDuration, func() { de.discoPingTimeout(txid) }),
+			purpose: purpose,
+			res:     res,
+			cb:      cb,
+			size:    s,
+		}
+		go de.sendDiscoPing(ep, epDisco.key, txid, s, logLevel)
+	}
+
 }
 
 // sendDiscoPingsLocked starts pinging all of ep's endpoints.
@@ -982,13 +1012,13 @@ func (de *endpoint) noteConnectivityChange() {
 // pingSizeToPktLen calculates the minimum path MTU that would permit
 // a disco ping message of length size to reach its target at
 // addr. size is the length of the entire disco message including
-// disco headers. If size is zero, assume it is the default MTU.
-func pingSizeToPktLen(size int, addr netip.Addr) tstun.WireMTU {
+// disco headers. If size is zero, assume it is the safe wire MTU.
+func pingSizeToPktLen(size int, is6 bool) tstun.WireMTU {
 	if size == 0 {
-		return tstun.DefaultWireMTU()
+		return tstun.SafeWireMTU()
 	}
 	headerLen := ipv4.HeaderLen
-	if addr.Is6() {
+	if is6 {
 		headerLen = ipv6.HeaderLen
 	}
 	headerLen += 8 // UDP header length
@@ -999,12 +1029,12 @@ func pingSizeToPktLen(size int, addr netip.Addr) tstun.WireMTU {
 // create a disco ping message whose on-the-wire length is exactly mtu
 // bytes long. If mtu is zero or less than the minimum ping size, then
 // no MTU probe is desired and return zero for an unpadded ping.
-func pktLenToPingSize(mtu tstun.WireMTU, addr netip.Addr) int {
+func pktLenToPingSize(mtu tstun.WireMTU, is6 bool) int {
 	if mtu == 0 {
 		return 0
 	}
 	headerLen := ipv4.HeaderLen
-	if addr.Is6() {
+	if is6 {
 		headerLen = ipv6.HeaderLen
 	}
 	headerLen += 8 // UDP header length
@@ -1053,7 +1083,7 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
 	}
 
 	if sp.purpose != pingHeartbeat {
-		de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v)  got pong tx=%x latency=%v pktlen=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pingSizeToPktLen(sp.size, sp.to.Addr()), m.Src, logger.ArgWriter(func(bw *bufio.Writer) {
+		de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v)  got pong tx=%x latency=%v pktlen=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pingSizeToPktLen(sp.size, sp.to.Addr().Is6()), m.Src, logger.ArgWriter(func(bw *bufio.Writer) {
 			if sp.to != src {
 				fmt.Fprintf(bw, " ping.to=%v", sp.to)
 			}
@@ -1071,9 +1101,9 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
 	// Promote this pong response to our current best address if it's lower latency.
 	// TODO(bradfitz): decide how latency vs. preference order affects decision
 	if !isDerp {
-		thisPong := addrQuality{sp.to, latency, pingSizeToPktLen(sp.size, sp.to.Addr())}
+		thisPong := addrQuality{sp.to, latency, tstun.WireMTU(pingSizeToPktLen(sp.size, sp.to.Addr().Is6()))}
 		if betterAddr(thisPong, de.bestAddr) {
-			de.c.logf("magicsock: disco: node %v %v now using %v mtu %v", de.publicKey.ShortString(), de.discoShort(), sp.to, thisPong.wireMTU)
+			de.c.logf("magicsock: disco: node %v %v now using %v mtu=%v tx=%x", de.publicKey.ShortString(), de.discoShort(), sp.to, thisPong.wireMTU, m.TxID[:6])
 			de.debugUpdates.Add(EndpointChange{
 				When: time.Now(),
 				What: "handlePingLocked-bestAddr-update",

+ 1 - 1
wgengine/magicsock/magicsock.go

@@ -1567,7 +1567,7 @@ func (c *Conn) handlePingLocked(dm *disco.Ping, src netip.AddrPort, di *discoInf
 		if numNodes > 1 {
 			pingNodeSrcStr = "[one-of-multi]"
 		}
-		c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v)  got ping tx=%x", c.discoShort, di.discoShort, pingNodeSrcStr, src, dm.TxID[:6])
+		c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v)  got ping tx=%x padding=%v", c.discoShort, di.discoShort, pingNodeSrcStr, src, dm.TxID[:6], dm.Padding)
 	}
 
 	ipDst := src

+ 4 - 4
wgengine/magicsock/magicsock_test.go

@@ -2936,7 +2936,7 @@ func TestAddrForPingSizeLocked(t *testing.T) {
 		},
 		{
 			desc:            "ping_size_too_big_for_trusted_UDP_addr_should_start_discovery_and_send_to_DERP",
-			size:            pktLenToPingSize(1501, validUdpAddr.Addr()),
+			size:            pktLenToPingSize(1501, validUdpAddr.Addr().Is6()),
 			mtu:             1500,
 			bestAddr:        true,
 			bestAddrTrusted: true,
@@ -2945,7 +2945,7 @@ func TestAddrForPingSizeLocked(t *testing.T) {
 		},
 		{
 			desc:            "ping_size_too_big_for_untrusted_UDP_addr_should_start_discovery_and_send_to_DERP",
-			size:            pktLenToPingSize(1501, validUdpAddr.Addr()),
+			size:            pktLenToPingSize(1501, validUdpAddr.Addr().Is6()),
 			mtu:             1500,
 			bestAddr:        true,
 			bestAddrTrusted: false,
@@ -2954,7 +2954,7 @@ func TestAddrForPingSizeLocked(t *testing.T) {
 		},
 		{
 			desc:            "ping_size_small_enough_for_trusted_UDP_addr_should_send_to_UDP_and_not_DERP",
-			size:            pktLenToPingSize(1500, validUdpAddr.Addr()),
+			size:            pktLenToPingSize(1500, validUdpAddr.Addr().Is6()),
 			mtu:             1500,
 			bestAddr:        true,
 			bestAddrTrusted: true,
@@ -2963,7 +2963,7 @@ func TestAddrForPingSizeLocked(t *testing.T) {
 		},
 		{
 			desc:            "ping_size_small_enough_for_untrusted_UDP_addr_should_send_to_UDP_and_DERP",
-			size:            pktLenToPingSize(1500, validUdpAddr.Addr()),
+			size:            pktLenToPingSize(1500, validUdpAddr.Addr().Is6()),
 			mtu:             1500,
 			bestAddr:        true,
 			bestAddrTrusted: false,

+ 5 - 0
wgengine/magicsock/peermtu.go

@@ -5,6 +5,8 @@
 
 package magicsock
 
+import "tailscale.com/net/tstun"
+
 // Peer path MTU routines shared by platforms that implement it.
 
 // DontFragSetting returns true if at least one of the underlying sockets of
@@ -102,6 +104,9 @@ func (c *Conn) UpdatePMTUD() {
 		_ = c.setDontFragment("udp6", false)
 		newStatus = false
 	}
+	if debugPMTUD() {
+		c.logf("magicsock: peermtu: peer MTU probes are %v", tstun.WireMTUsToProbe)
+	}
 	c.peerMTUEnabled.Store(newStatus)
 	c.resetEndpointStates()
 }