Browse Source

prober: add a DERP bandwidth probe

Updates tailscale/corp#17912

Signed-off-by: Anton Tolchanov <[email protected]>
Anton Tolchanov 2 years ago
parent
commit
f12d2557f9
3 changed files with 388 additions and 100 deletions
  1. 20 6
      cmd/derpprobe/derpprobe.go
  2. 254 86
      prober/derp.go
  3. 114 8
      prober/derp_test.go

+ 20 - 6
cmd/derpprobe/derpprobe.go

@@ -19,18 +19,31 @@ import (
 )
 
 var (
-	derpMapURL = flag.String("derp-map", "https://login.tailscale.com/derpmap/default", "URL to DERP map (https:// or file://)")
-	listen     = flag.String("listen", ":8030", "HTTP listen address")
-	probeOnce  = flag.Bool("once", false, "probe once and print results, then exit; ignores the listen flag")
-	spread     = flag.Bool("spread", true, "whether to spread probing over time")
-	interval   = flag.Duration("interval", 15*time.Second, "probe interval")
+	derpMapURL   = flag.String("derp-map", "https://login.tailscale.com/derpmap/default", "URL to DERP map (https:// or file://)")
+	listen       = flag.String("listen", ":8030", "HTTP listen address")
+	probeOnce    = flag.Bool("once", false, "probe once and print results, then exit; ignores the listen flag")
+	spread       = flag.Bool("spread", true, "whether to spread probing over time")
+	interval     = flag.Duration("interval", 15*time.Second, "probe interval")
+	meshInterval = flag.Duration("mesh-interval", 15*time.Second, "mesh probe interval")
+	stunInterval = flag.Duration("stun-interval", 15*time.Second, "STUN probe interval")
+	tlsInterval  = flag.Duration("tls-interval", 15*time.Second, "TLS probe interval")
+	bwInterval   = flag.Duration("bw-interval", 0, "bandwidth probe interval (0 = no bandwidth probing)")
+	bwSize       = flag.Int64("bw-probe-size-bytes", 1_000_000, "bandwidth probe size")
 )
 
 func main() {
 	flag.Parse()
 
 	p := prober.New().WithSpread(*spread).WithOnce(*probeOnce).WithMetricNamespace("derpprobe")
-	dp, err := prober.DERP(p, *derpMapURL, *interval, *interval, *interval)
+	opts := []prober.DERPOpt{
+		prober.WithMeshProbing(*meshInterval),
+		prober.WithSTUNProbing(*stunInterval),
+		prober.WithTLSProbing(*tlsInterval),
+	}
+	if *bwInterval > 0 {
+		opts = append(opts, prober.WithBandwidthProbing(*bwInterval, *bwSize))
+	}
+	dp, err := prober.DERP(p, *derpMapURL, opts...)
 	if err != nil {
 		log.Fatal(err)
 	}
@@ -53,6 +66,7 @@ func main() {
 	mux := http.NewServeMux()
 	tsweb.Debugger(mux)
 	mux.HandleFunc("/", http.HandlerFunc(serveFunc(p)))
+	log.Printf("Listening on %s", *listen)
 	log.Fatal(http.ListenAndServe(*listen, mux))
 }
 

+ 254 - 86
prober/derp.go

@@ -5,12 +5,14 @@ package prober
 
 import (
 	"bytes"
+	"cmp"
 	"context"
 	crand "crypto/rand"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"log"
+	"maps"
 	"net"
 	"net/http"
 	"strconv"
@@ -21,6 +23,7 @@ import (
 	"tailscale.com/derp"
 	"tailscale.com/derp/derphttp"
 	"tailscale.com/net/stun"
+	"tailscale.com/syncs"
 	"tailscale.com/tailcfg"
 	"tailscale.com/types/key"
 	"tailscale.com/types/logger"
@@ -35,10 +38,15 @@ type derpProber struct {
 	meshInterval time.Duration
 	tlsInterval  time.Duration
 
+	// Optional bandwidth probing.
+	bwInterval  time.Duration
+	bwProbeSize int64
+
 	// Probe functions that can be overridden for testing.
 	tlsProbeFn  func(string) ProbeFunc
 	udpProbeFn  func(string, int) ProbeFunc
 	meshProbeFn func(string, string) ProbeFunc
+	bwProbeFn   func(string, string, int64) ProbeFunc
 
 	sync.Mutex
 	lastDERPMap   *tailcfg.DERPMap
@@ -47,20 +55,57 @@ type derpProber struct {
 	probes        map[string]*Probe
 }
 
+type DERPOpt func(*derpProber)
+
+// WithBandwidthProbing enables bandwidth probing. When enabled, a payload of
+// `size` bytes will be regularly transferred through each DERP server, and each
+// pair of DERP servers in every region.
+func WithBandwidthProbing(interval time.Duration, size int64) DERPOpt {
+	return func(d *derpProber) {
+		d.bwInterval = interval
+		d.bwProbeSize = size
+	}
+}
+
+// WithMeshProbing enables mesh probing. When enabled, a small message will be
+// transferred through each DERP server and each pair of DERP servers.
+func WithMeshProbing(interval time.Duration) DERPOpt {
+	return func(d *derpProber) {
+		d.meshInterval = interval
+	}
+}
+
+// WithSTUNProbing enables STUN/UDP probing, with a STUN request being sent
+// to each DERP server every `interval`.
+func WithSTUNProbing(interval time.Duration) DERPOpt {
+	return func(d *derpProber) {
+		d.udpInterval = interval
+	}
+}
+
+// WithTLSProbing enables TLS probing that will check TLS certificate on port
+// 443 of each DERP server every `interval`.
+func WithTLSProbing(interval time.Duration) DERPOpt {
+	return func(d *derpProber) {
+		d.tlsInterval = interval
+	}
+}
+
 // DERP creates a new derpProber.
-func DERP(p *Prober, derpMapURL string, udpInterval, meshInterval, tlsInterval time.Duration) (*derpProber, error) {
+func DERP(p *Prober, derpMapURL string, opts ...DERPOpt) (*derpProber, error) {
 	d := &derpProber{
-		p:            p,
-		derpMapURL:   derpMapURL,
-		udpInterval:  udpInterval,
-		meshInterval: meshInterval,
-		tlsInterval:  tlsInterval,
-		tlsProbeFn:   TLS,
-		nodes:        make(map[string]*tailcfg.DERPNode),
-		probes:       make(map[string]*Probe),
+		p:          p,
+		derpMapURL: derpMapURL,
+		tlsProbeFn: TLS,
+		nodes:      make(map[string]*tailcfg.DERPNode),
+		probes:     make(map[string]*Probe),
+	}
+	for _, o := range opts {
+		o(d)
 	}
 	d.udpProbeFn = d.ProbeUDP
 	d.meshProbeFn = d.probeMesh
+	d.bwProbeFn = d.probeBandwidth
 	return d, nil
 }
 
@@ -84,42 +129,59 @@ func (d *derpProber) ProbeMap(ctx context.Context) error {
 				"hostname":  server.HostName,
 			}
 
-			n := fmt.Sprintf("derp/%s/%s/tls", region.RegionCode, server.Name)
-			wantProbes[n] = true
-			if d.probes[n] == nil {
-				log.Printf("adding DERP TLS probe for %s (%s)", server.Name, region.RegionName)
-
-				derpPort := 443
-				if server.DERPPort != 0 {
-					derpPort = server.DERPPort
+			if d.tlsInterval > 0 {
+				n := fmt.Sprintf("derp/%s/%s/tls", region.RegionCode, server.Name)
+				wantProbes[n] = true
+				if d.probes[n] == nil {
+					log.Printf("adding DERP TLS probe for %s (%s) every %v", server.Name, region.RegionName, d.tlsInterval)
+					derpPort := cmp.Or(server.DERPPort, 443)
+					d.probes[n] = d.p.Run(n, d.tlsInterval, labels, d.tlsProbeFn(fmt.Sprintf("%s:%d", server.HostName, derpPort)))
 				}
-
-				d.probes[n] = d.p.Run(n, d.tlsInterval, labels, d.tlsProbeFn(fmt.Sprintf("%s:%d", server.HostName, derpPort)))
 			}
 
-			for idx, ipStr := range []string{server.IPv6, server.IPv4} {
-				n = fmt.Sprintf("derp/%s/%s/udp", region.RegionCode, server.Name)
-				if idx == 0 {
-					n = n + "6"
-				}
-
-				if ipStr == "" || server.STUNPort == -1 {
-					continue
-				}
-
-				wantProbes[n] = true
-				if d.probes[n] == nil {
-					log.Printf("adding DERP UDP probe for %s (%s)", server.Name, n)
-					d.probes[n] = d.p.Run(n, d.udpInterval, labels, d.udpProbeFn(ipStr, server.STUNPort))
+			if d.udpInterval > 0 {
+				for idx, ipStr := range []string{server.IPv6, server.IPv4} {
+					n := fmt.Sprintf("derp/%s/%s/udp", region.RegionCode, server.Name)
+					if idx == 0 {
+						n += "6"
+					}
+
+					if ipStr == "" || server.STUNPort == -1 {
+						continue
+					}
+
+					wantProbes[n] = true
+					if d.probes[n] == nil {
+						log.Printf("adding DERP UDP probe for %s (%s) every %v", server.Name, n, d.udpInterval)
+						d.probes[n] = d.p.Run(n, d.udpInterval, labels, d.udpProbeFn(ipStr, server.STUNPort))
+					}
 				}
 			}
 
 			for _, to := range region.Nodes {
-				n = fmt.Sprintf("derp/%s/%s/%s/mesh", region.RegionCode, server.Name, to.Name)
-				wantProbes[n] = true
-				if d.probes[n] == nil {
-					log.Printf("adding DERP mesh probe for %s->%s (%s)", server.Name, to.Name, region.RegionName)
-					d.probes[n] = d.p.Run(n, d.meshInterval, labels, d.meshProbeFn(server.HostName, to.HostName))
+				if d.meshInterval > 0 {
+					n := fmt.Sprintf("derp/%s/%s/%s/mesh", region.RegionCode, server.Name, to.Name)
+					wantProbes[n] = true
+					if d.probes[n] == nil {
+						log.Printf("adding DERP mesh probe for %s->%s (%s) every %v", server.Name, to.Name, region.RegionName, d.meshInterval)
+						d.probes[n] = d.p.Run(n, d.meshInterval, labels, d.meshProbeFn(server.Name, to.Name))
+					}
+				}
+
+				if d.bwInterval > 0 && d.bwProbeSize > 0 {
+					bwLabels := maps.Clone(labels)
+					bwLabels["probe_size_bytes"] = fmt.Sprintf("%d", d.bwProbeSize)
+					if server.Name == to.Name {
+						bwLabels["derp_path"] = "single"
+					} else {
+						bwLabels["derp_path"] = "mesh"
+					}
+					n := fmt.Sprintf("derp/%s/%s/%s/bw", region.RegionCode, server.Name, to.Name)
+					wantProbes[n] = true
+					if d.probes[n] == nil {
+						log.Printf("adding DERP bandwidth probe for %s->%s (%s) %v bytes every %v", server.Name, to.Name, region.RegionName, d.bwProbeSize, d.bwInterval)
+						d.probes[n] = d.p.Run(n, d.bwInterval, bwLabels, d.bwProbeFn(server.Name, to.Name, d.bwProbeSize))
+					}
 				}
 			}
 		}
@@ -136,26 +198,52 @@ func (d *derpProber) ProbeMap(ctx context.Context) error {
 	return nil
 }
 
+// probeMesh returs a probe func that sends a test packet through a pair of DERP
+// servers (or just one server, if 'from' and 'to' are the same). 'from' and 'to'
+// are expected to be names (DERPNode.Name) of two DERP servers in the same region.
 func (d *derpProber) probeMesh(from, to string) ProbeFunc {
 	return func(ctx context.Context) error {
-		d.Lock()
-		dm := d.lastDERPMap
-		fromN, ok := d.nodes[from]
-		if !ok {
-			d.Unlock()
-			return fmt.Errorf("could not find derp node %s", from)
-		}
-		toN, ok := d.nodes[to]
-		if !ok {
-			d.Unlock()
-			return fmt.Errorf("could not find derp node %s", to)
+		fromN, toN, err := d.getNodePair(from, to)
+		if err != nil {
+			return err
 		}
-		d.Unlock()
 
+		dm := d.lastDERPMap
 		return derpProbeNodePair(ctx, dm, fromN, toN)
 	}
 }
 
+// probeBandwidth returs a probe func that sends a payload of a given size
+// through a pair of DERP servers (or just one server, if 'from' and 'to' are
+// the same). 'from' and 'to' are expected to be names (DERPNode.Name) of two
+// DERP servers in the same region.
+func (d *derpProber) probeBandwidth(from, to string, size int64) ProbeFunc {
+	return func(ctx context.Context) error {
+		fromN, toN, err := d.getNodePair(from, to)
+		if err != nil {
+			return err
+		}
+		return derpProbeBandwidth(ctx, d.lastDERPMap, fromN, toN, size)
+	}
+}
+
+// getNodePair returns DERPNode objects for two DERP servers based on their
+// short names.
+func (d *derpProber) getNodePair(n1, n2 string) (ret1, ret2 *tailcfg.DERPNode, _ error) {
+	d.Lock()
+	defer d.Unlock()
+	ret1, ok := d.nodes[n1]
+	if !ok {
+		return nil, nil, fmt.Errorf("could not find derp node %s", n1)
+	}
+	ret2, ok = d.nodes[n2]
+	if !ok {
+		return nil, nil, fmt.Errorf("could not find derp node %s", n2)
+	}
+	return ret1, ret2, nil
+}
+
+// updateMap refreshes the locally-cached DERP map.
 func (d *derpProber) updateMap(ctx context.Context) error {
 	req, err := http.NewRequestWithContext(ctx, "GET", d.derpMapURL, nil)
 	if err != nil {
@@ -189,13 +277,13 @@ func (d *derpProber) updateMap(ctx context.Context) error {
 	d.nodes = make(map[string]*tailcfg.DERPNode)
 	for _, reg := range d.lastDERPMap.Regions {
 		for _, n := range reg.Nodes {
-			if existing, ok := d.nodes[n.HostName]; ok {
+			if existing, ok := d.nodes[n.Name]; ok {
 				return fmt.Errorf("derpmap has duplicate nodes: %+v and %+v", existing, n)
 			}
 			// Allow the prober to monitor nodes marked as
 			// STUN only in the default map
 			n.STUNOnly = false
-			d.nodes[n.HostName] = n
+			d.nodes[n.Name] = n
 		}
 	}
 	return nil
@@ -257,13 +345,47 @@ func derpProbeUDP(ctx context.Context, ipStr string, port int) error {
 	return nil
 }
 
+// derpProbeBandwidth sends a payload of a given size between two local
+// DERP clients connected to two DERP servers.
+func derpProbeBandwidth(ctx context.Context, dm *tailcfg.DERPMap, from, to *tailcfg.DERPNode, size int64) (err error) {
+	// This probe uses clients with isProber=false to avoid spamming the derper logs with every packet
+	// sent by the bandwidth probe.
+	fromc, err := newConn(ctx, dm, from, false)
+	if err != nil {
+		return err
+	}
+	defer fromc.Close()
+	toc, err := newConn(ctx, dm, to, false)
+	if err != nil {
+		return err
+	}
+	defer toc.Close()
+
+	// Wait a bit for from's node to hear about to existing on the
+	// other node in the region, in the case where the two nodes
+	// are different.
+	if from.Name != to.Name {
+		time.Sleep(100 * time.Millisecond) // pretty arbitrary
+	}
+
+	if err := runDerpProbeNodePair(ctx, from, to, fromc, toc, size); err != nil {
+		// Record pubkeys on failed probes to aid investigation.
+		return fmt.Errorf("%s -> %s: %w",
+			fromc.SelfPublicKey().ShortString(),
+			toc.SelfPublicKey().ShortString(), err)
+	}
+	return nil
+}
+
+// derpProbeNodePair sends a small packet between two local DERP clients
+// connected to two DERP servers.
 func derpProbeNodePair(ctx context.Context, dm *tailcfg.DERPMap, from, to *tailcfg.DERPNode) (err error) {
-	fromc, err := newConn(ctx, dm, from)
+	fromc, err := newConn(ctx, dm, from, true)
 	if err != nil {
 		return err
 	}
 	defer fromc.Close()
-	toc, err := newConn(ctx, dm, to)
+	toc, err := newConn(ctx, dm, to, true)
 	if err != nil {
 		return err
 	}
@@ -276,71 +398,117 @@ func derpProbeNodePair(ctx context.Context, dm *tailcfg.DERPMap, from, to *tailc
 		time.Sleep(100 * time.Millisecond) // pretty arbitrary
 	}
 
-	if err := runDerpProbeNodePair(ctx, from, to, fromc, toc); err != nil {
+	const meshProbePacketSize = 8
+	if err := runDerpProbeNodePair(ctx, from, to, fromc, toc, meshProbePacketSize); err != nil {
 		// Record pubkeys on failed probes to aid investigation.
 		return fmt.Errorf("%s -> %s: %w",
 			fromc.SelfPublicKey().ShortString(),
 			toc.SelfPublicKey().ShortString(), err)
 	}
-	return err
+	return nil
+}
+
+// probePackets stores a pregenerated slice of probe packets keyed by their total size.
+var probePackets syncs.Map[int64, [][]byte]
+
+// packetsForSize returns a slice of packet payloads with a given total size.
+func packetsForSize(size int64) [][]byte {
+	// For a small payload, create a unique random packet.
+	if size <= derp.MaxPacketSize {
+		pkt := make([]byte, size)
+		crand.Read(pkt)
+		return [][]byte{pkt}
+	}
+
+	// For a large payload, create a bunch of packets once and re-use them
+	// across probes.
+	pkts, _ := probePackets.LoadOrInit(size, func() [][]byte {
+		const packetSize = derp.MaxPacketSize
+		var pkts [][]byte
+		for remaining := size; remaining > 0; remaining -= packetSize {
+			pkt := make([]byte, min(remaining, packetSize))
+			crand.Read(pkt)
+			pkts = append(pkts, pkt)
+		}
+		return pkts
+	})
+	return pkts
 }
 
-func runDerpProbeNodePair(ctx context.Context, from, to *tailcfg.DERPNode, fromc, toc *derphttp.Client) error {
-	// Make a random packet
-	pkt := make([]byte, 8)
-	crand.Read(pkt)
+// runDerpProbeNodePair takes two DERP clients (fromc and toc) connected to two
+// DERP servers (from and to) and sends a test payload of a given size from one
+// to another.
+func runDerpProbeNodePair(ctx context.Context, from, to *tailcfg.DERPNode, fromc, toc *derphttp.Client, size int64) error {
+	// To avoid derper dropping enqueued packets, limit the number of packets in flight.
+	// The value here is slightly smaller than perClientSendQueueDepth in derp_server.go
+	inFlight := syncs.NewSemaphore(30)
+
+	pkts := packetsForSize(size)
 
-	// Send the random packet.
+	// Send the packets.
 	sendc := make(chan error, 1)
 	go func() {
-		sendc <- fromc.Send(toc.SelfPublicKey(), pkt)
-	}()
-	select {
-	case <-ctx.Done():
-		return fmt.Errorf("timeout sending via %q: %w", from.Name, ctx.Err())
-	case err := <-sendc:
-		if err != nil {
-			return fmt.Errorf("error sending via %q: %w", from.Name, err)
+		for idx, pkt := range pkts {
+			inFlight.AcquireContext(ctx)
+			if err := fromc.Send(toc.SelfPublicKey(), pkt); err != nil {
+				sendc <- fmt.Errorf("sending packet %d: %w", idx, err)
+				return
+			}
 		}
-	}
+	}()
 
-	// Receive the random packet.
-	recvc := make(chan any, 1) // either derp.ReceivedPacket or error
+	// Receive the packets.
+	recvc := make(chan error, 1)
 	go func() {
+		defer close(recvc) // to break out of 'select' below.
+		idx := 0
 		for {
 			m, err := toc.Recv()
 			if err != nil {
-				recvc <- err
+				recvc <- fmt.Errorf("after %d data packets: %w", idx, err)
 				return
 			}
 			switch v := m.(type) {
 			case derp.ReceivedPacket:
-				recvc <- v
+				inFlight.Release()
+				if v.Source != fromc.SelfPublicKey() {
+					recvc <- fmt.Errorf("got data packet %d from unexpected source, %v", idx, v.Source)
+					return
+				}
+				if got, want := v.Data, pkts[idx]; !bytes.Equal(got, want) {
+					recvc <- fmt.Errorf("unexpected data packet %d (out of %d)", idx, len(pkts))
+					return
+				}
+				idx += 1
+				if idx == len(pkts) {
+					return
+				}
+
+			case derp.KeepAliveMessage:
+				// Silently ignore.
 			default:
 				log.Printf("%v: ignoring Recv frame type %T", to.Name, v)
 				// Loop.
 			}
 		}
 	}()
+
 	select {
 	case <-ctx.Done():
-		return fmt.Errorf("timeout receiving from %q: %w", to.Name, ctx.Err())
-	case v := <-recvc:
-		if err, ok := v.(error); ok {
-			return fmt.Errorf("error receiving from %q: %w", to.Name, err)
-		}
-		p := v.(derp.ReceivedPacket)
-		if p.Source != fromc.SelfPublicKey() {
-			return fmt.Errorf("got data packet from unexpected source, %v", p.Source)
+		return fmt.Errorf("timeout: %w", ctx.Err())
+	case err := <-sendc:
+		if err != nil {
+			return fmt.Errorf("error sending via %q: %w", from.Name, err)
 		}
-		if !bytes.Equal(p.Data, pkt) {
-			return fmt.Errorf("unexpected data packet %q", p.Data)
+	case err := <-recvc:
+		if err != nil {
+			return fmt.Errorf("error receiving from %q: %w", to.Name, err)
 		}
 	}
 	return nil
 }
 
-func newConn(ctx context.Context, dm *tailcfg.DERPMap, n *tailcfg.DERPNode) (*derphttp.Client, error) {
+func newConn(ctx context.Context, dm *tailcfg.DERPMap, n *tailcfg.DERPNode, isProber bool) (*derphttp.Client, error) {
 	// To avoid spamming the log with regular connection messages.
 	l := logger.Filtered(log.Printf, func(s string) bool {
 		return !strings.Contains(s, "derphttp.Client.Connect: connecting to")
@@ -355,7 +523,7 @@ func newConn(ctx context.Context, dm *tailcfg.DERPMap, n *tailcfg.DERPNode) (*de
 			Nodes:      []*tailcfg.DERPNode{n},
 		}
 	})
-	dc.IsProber = true
+	dc.IsProber = isProber
 	err := dc.Connect(ctx)
 	if err != nil {
 		return nil, err

+ 114 - 8
prober/derp_test.go

@@ -5,12 +5,19 @@ package prober
 
 import (
 	"context"
+	"crypto/sha256"
+	"crypto/tls"
 	"encoding/json"
+	"net"
 	"net/http"
 	"net/http/httptest"
 	"testing"
+	"time"
 
+	"tailscale.com/derp"
+	"tailscale.com/derp/derphttp"
 	"tailscale.com/tailcfg"
+	"tailscale.com/types/key"
 )
 
 func TestDerpProber(t *testing.T) {
@@ -50,18 +57,21 @@ func TestDerpProber(t *testing.T) {
 	clk := newFakeTime()
 	p := newForTest(clk.Now, clk.NewTicker)
 	dp := &derpProber{
-		p:           p,
-		derpMapURL:  srv.URL,
-		tlsProbeFn:  func(_ string) ProbeFunc { return func(context.Context) error { return nil } },
-		udpProbeFn:  func(_ string, _ int) ProbeFunc { return func(context.Context) error { return nil } },
-		meshProbeFn: func(_, _ string) ProbeFunc { return func(context.Context) error { return nil } },
-		nodes:       make(map[string]*tailcfg.DERPNode),
-		probes:      make(map[string]*Probe),
+		p:            p,
+		derpMapURL:   srv.URL,
+		tlsInterval:  time.Second,
+		tlsProbeFn:   func(_ string) ProbeFunc { return func(context.Context) error { return nil } },
+		udpInterval:  time.Second,
+		udpProbeFn:   func(_ string, _ int) ProbeFunc { return func(context.Context) error { return nil } },
+		meshInterval: time.Second,
+		meshProbeFn:  func(_, _ string) ProbeFunc { return func(context.Context) error { return nil } },
+		nodes:        make(map[string]*tailcfg.DERPNode),
+		probes:       make(map[string]*Probe),
 	}
 	if err := dp.ProbeMap(context.Background()); err != nil {
 		t.Errorf("unexpected ProbeMap() error: %s", err)
 	}
-	if len(dp.nodes) != 2 || dp.nodes["derpn1.tailscale.test"] == nil || dp.nodes["derpn2.tailscale.test"] == nil {
+	if len(dp.nodes) != 2 || dp.nodes["n1"] == nil || dp.nodes["n2"] == nil {
 		t.Errorf("unexpected nodes: %+v", dp.nodes)
 	}
 	// Probes expected for two nodes:
@@ -103,3 +113,99 @@ func TestDerpProber(t *testing.T) {
 		t.Errorf("unexpected probes: %+v", dp.probes)
 	}
 }
+
+func TestRunDerpProbeNodePair(t *testing.T) {
+	// os.Setenv("DERP_DEBUG_LOGS", "true")
+	serverPrivateKey := key.NewNode()
+	s := derp.NewServer(serverPrivateKey, t.Logf)
+	defer s.Close()
+
+	httpsrv := &http.Server{
+		TLSNextProto: make(map[string]func(*http.Server, *tls.Conn, http.Handler)),
+		Handler:      derphttp.Handler(s),
+	}
+	ln, err := net.Listen("tcp4", "localhost:0")
+	if err != nil {
+		t.Fatal(err)
+	}
+	serverURL := "http://" + ln.Addr().String()
+	t.Logf("server URL: %s", serverURL)
+
+	go func() {
+		if err := httpsrv.Serve(ln); err != nil {
+			if err == http.ErrServerClosed {
+				return
+			}
+			panic(err)
+		}
+	}()
+	newClient := func() *derphttp.Client {
+		c, err := derphttp.NewClient(key.NewNode(), serverURL, t.Logf)
+		if err != nil {
+			t.Fatalf("NewClient: %v", err)
+		}
+		m, err := c.Recv()
+		if err != nil {
+			t.Fatalf("Recv: %v", err)
+		}
+		switch m.(type) {
+		case derp.ServerInfoMessage:
+		default:
+			t.Fatalf("unexpected first message type %T", m)
+		}
+		return c
+	}
+
+	c1 := newClient()
+	defer c1.Close()
+	c2 := newClient()
+	defer c2.Close()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
+	defer cancel()
+
+	err = runDerpProbeNodePair(ctx, &tailcfg.DERPNode{Name: "c1"}, &tailcfg.DERPNode{Name: "c2"}, c1, c2, 100_000_000)
+	if err != nil {
+		t.Error(err)
+	}
+}
+
+func Test_packetsForSize(t *testing.T) {
+	tests := []struct {
+		name        string
+		size        int
+		wantPackets int
+		wantUnique  bool
+	}{
+		{"small_unqiue", 8, 1, true},
+		{"8k_unique", 8192, 1, true},
+		{"full_size_packet", derp.MaxPacketSize, 1, true},
+		{"larger_than_one", derp.MaxPacketSize + 1, 2, false},
+		{"large", 500000, 8, false},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			hashes := make(map[string]int)
+			for i := 0; i < 5; i++ {
+				pkts := packetsForSize(int64(tt.size))
+				if len(pkts) != tt.wantPackets {
+					t.Errorf("packetsForSize(%d) got %d packets, want %d", tt.size, len(pkts), tt.wantPackets)
+				}
+				var total int
+				hash := sha256.New()
+				for _, p := range pkts {
+					hash.Write(p)
+					total += len(p)
+				}
+				hashes[string(hash.Sum(nil))]++
+				if total != tt.size {
+					t.Errorf("packetsForSize(%d) returned %d bytes total", tt.size, total)
+				}
+			}
+			unique := len(hashes) > 1
+			if unique != tt.wantUnique {
+				t.Errorf("packetsForSize(%d) is unique=%v (returned %d different answers); want unique=%v", tt.size, unique, len(hashes), unique)
+			}
+		})
+	}
+}