Преглед изворни кода

netcheck, controlclient, magicsock: add more metrics

Updates #3307

Change-Id: Ibb33425764a75bde49230632f1b472f923551126
Signed-off-by: Brad Fitzpatrick <[email protected]>
Brad Fitzpatrick пре 4 година
родитељ
комит
24ea365d48

+ 1 - 0
cmd/tailscale/depaware.txt

@@ -72,6 +72,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
         tailscale.com/types/persist                                  from tailscale.com/ipn
         tailscale.com/types/preftype                                 from tailscale.com/cmd/tailscale/cli+
         tailscale.com/types/structs                                  from tailscale.com/ipn+
+        tailscale.com/util/clientmetric                              from tailscale.com/net/netcheck
         tailscale.com/util/dnsname                                   from tailscale.com/cmd/tailscale/cli+
    W    tailscale.com/util/endian                                    from tailscale.com/net/netns
         tailscale.com/util/groupmember                               from tailscale.com/cmd/tailscale/cli

+ 43 - 1
control/controlclient/direct.go

@@ -46,6 +46,7 @@ import (
 	"tailscale.com/types/netmap"
 	"tailscale.com/types/opt"
 	"tailscale.com/types/persist"
+	"tailscale.com/util/clientmetric"
 	"tailscale.com/util/systemd"
 	"tailscale.com/wgengine/monitor"
 )
@@ -558,6 +559,15 @@ const pollTimeout = 120 * time.Second
 
 // cb nil means to omit peers.
 func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, cb func(*netmap.NetworkMap)) error {
+	metricMapRequests.Add(1)
+	metricMapRequestsActive.Add(1)
+	defer metricMapRequestsActive.Add(-1)
+	if maxPolls == -1 {
+		metricMapRequestsPoll.Add(1)
+	} else {
+		metricMapRequestsLite.Add(1)
+	}
+
 	c.mu.Lock()
 	persist := c.persist
 	serverURL := c.serverURL
@@ -747,11 +757,14 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, cb func(*netm
 			return err
 		}
 
+		metricMapResponseMessages.Add(1)
+
 		if allowStream {
 			health.GotStreamedMapResponse()
 		}
 
 		if pr := resp.PingRequest; pr != nil && c.isUniquePingRequest(pr) {
+			metricMapResponsePings.Add(1)
 			go answerPing(c.logf, c.httpc, pr)
 		}
 
@@ -768,9 +781,15 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, cb func(*netm
 			return ctx.Err()
 		}
 		if resp.KeepAlive {
+			metricMapResponseKeepAlives.Add(1)
 			continue
 		}
 
+		metricMapResponseMap.Add(1)
+		if i > 0 {
+			metricMapResponseMapDelta.Add(1)
+		}
+
 		hasDebug := resp.Debug != nil
 		// being conservative here, if Debug not present set to False
 		controlknobs.SetDisableUPnP(hasDebug && resp.Debug.DisableUPnP.EqualBool(true))
@@ -1181,7 +1200,13 @@ func sleepAsRequested(ctx context.Context, logf logger.Logf, timeoutReset chan<-
 
 // SetDNS sends the SetDNSRequest request to the control plane server,
 // requesting a DNS record be created or updated.
-func (c *Direct) SetDNS(ctx context.Context, req *tailcfg.SetDNSRequest) error {
+func (c *Direct) SetDNS(ctx context.Context, req *tailcfg.SetDNSRequest) (err error) {
+	metricSetDNS.Add(1)
+	defer func() {
+		if err != nil {
+			metricSetDNSError.Add(1)
+		}
+	}()
 	c.mu.Lock()
 	serverKey := c.serverKey
 	c.mu.Unlock()
@@ -1281,3 +1306,20 @@ func postPingResult(now time.Time, logf logger.Logf, c *http.Client, pr *tailcfg
 	}
 	return nil
 }
+
+var (
+	metricMapRequestsActive = clientmetric.NewGauge("controlclient_map_requests_active")
+
+	metricMapRequests     = clientmetric.NewCounter("controlclient_map_requests")
+	metricMapRequestsLite = clientmetric.NewCounter("controlclient_map_requests_lite")
+	metricMapRequestsPoll = clientmetric.NewCounter("controlclient_map_requests_poll")
+
+	metricMapResponseMessages   = clientmetric.NewCounter("controlclient_map_response_message") // any message type
+	metricMapResponsePings      = clientmetric.NewCounter("controlclient_map_response_ping")
+	metricMapResponseKeepAlives = clientmetric.NewCounter("controlclient_map_response_keepalive")
+	metricMapResponseMap        = clientmetric.NewCounter("controlclient_map_response_map")       // any non-keepalive map response
+	metricMapResponseMapDelta   = clientmetric.NewCounter("controlclient_map_response_map_delta") // 2nd+ non-keepalive map response
+
+	metricSetDNS      = clientmetric.NewCounter("controlclient_setdns")
+	metricSetDNSError = clientmetric.NewCounter("controlclient_setdns_error")
+)

+ 30 - 1
net/netcheck/netcheck.go

@@ -34,6 +34,7 @@ import (
 	"tailscale.com/tailcfg"
 	"tailscale.com/types/logger"
 	"tailscale.com/types/opt"
+	"tailscale.com/util/clientmetric"
 )
 
 // Debugging and experimentation tweakables.
@@ -232,6 +233,12 @@ func (c *Client) MakeNextReportFull() {
 func (c *Client) ReceiveSTUNPacket(pkt []byte, src netaddr.IPPort) {
 	c.vlogf("received STUN packet from %s", src)
 
+	if src.IP().Is4() {
+		metricSTUNRecv4.Add(1)
+	} else if src.IP().Is6() {
+		metricSTUNRecv6.Add(1)
+	}
+
 	c.mu.Lock()
 	if c.handleHairSTUNLocked(pkt, src) {
 		c.mu.Unlock()
@@ -737,7 +744,13 @@ func (c *Client) udpBindAddr() string {
 // GetReport gets a report.
 //
 // It may not be called concurrently with itself.
-func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (*Report, error) {
+func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (_ *Report, reterr error) {
+	defer func() {
+		if reterr != nil {
+			metricNumGetReportError.Add(1)
+		}
+	}()
+	metricNumGetReport.Add(1)
 	// Mask user context with ours that we guarantee to cancel so
 	// we can depend on it being closed in goroutines later.
 	// (User ctx might be context.Background, etc)
@@ -769,6 +782,7 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (*Report, e
 		last = nil // causes makeProbePlan below to do a full (initial) plan
 		c.nextFull = false
 		c.lastFull = now
+		metricNumGetReportFull.Add(1)
 	}
 	rs.incremental = last != nil
 	c.mu.Unlock()
@@ -983,6 +997,7 @@ func (c *Client) runHTTPOnlyChecks(ctx context.Context, last *Report, rs *report
 }
 
 func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegion) (time.Duration, netaddr.IP, error) {
+	metricHTTPSend.Add(1)
 	var result httpstat.Result
 	ctx, cancel := context.WithTimeout(httpstat.WithHTTPStat(ctx, &result), overallProbeTimeout)
 	defer cancel()
@@ -1217,6 +1232,7 @@ func (rs *reportState) runProbe(ctx context.Context, dm *tailcfg.DERPMap, probe
 
 	switch probe.proto {
 	case probeIPv4:
+		metricSTUNSend4.Add(1)
 		n, err := rs.pc4.WriteTo(req, addr)
 		if n == len(req) && err == nil {
 			rs.mu.Lock()
@@ -1224,6 +1240,7 @@ func (rs *reportState) runProbe(ctx context.Context, dm *tailcfg.DERPMap, probe
 			rs.mu.Unlock()
 		}
 	case probeIPv6:
+		metricSTUNSend6.Add(1)
 		n, err := rs.pc6.WriteTo(req, addr)
 		if n == len(req) && err == nil {
 			rs.mu.Lock()
@@ -1322,3 +1339,15 @@ func conciseOptBool(b opt.Bool, trueVal string) string {
 	}
 	return ""
 }
+
+var (
+	metricNumGetReport      = clientmetric.NewCounter("netcheck_report")
+	metricNumGetReportFull  = clientmetric.NewCounter("netcheck_report_full")
+	metricNumGetReportError = clientmetric.NewCounter("netcheck_report_error")
+
+	metricSTUNSend4 = clientmetric.NewCounter("netcheck_stun_send_ipv4")
+	metricSTUNSend6 = clientmetric.NewCounter("netcheck_stun_send_ipv6")
+	metricSTUNRecv4 = clientmetric.NewCounter("netcheck_stun_recv_ipv4")
+	metricSTUNRecv6 = clientmetric.NewCounter("netcheck_stun_recv_ipv6")
+	metricHTTPSend  = clientmetric.NewCounter("netcheck_https_measure")
+)

+ 12 - 0
wgengine/magicsock/magicsock.go

@@ -967,6 +967,9 @@ func (c *Conn) setNearestDERP(derpNum int) (wantDERP bool) {
 		// No change.
 		return true
 	}
+	if c.myDerp != 0 && derpNum != 0 {
+		metricDERPHomeChange.Add(1)
+	}
 	c.myDerp = derpNum
 	health.SetMagicSockDERPHome(derpNum)
 
@@ -1616,6 +1619,9 @@ func (c *Conn) runDerpWriter(ctx context.Context, dc *derphttp.Client, ch <-chan
 			err := dc.Send(wr.pubKey, wr.b)
 			if err != nil {
 				c.logf("magicsock: derp.Send(%v): %v", wr.addr, err)
+				metricSendDERPError.Add(1)
+			} else {
+				metricSendDERP.Add(1)
 			}
 		}
 	}
@@ -4054,6 +4060,8 @@ var (
 	metricSendDERPErrorQueue  = clientmetric.NewCounter("magicsock_send_derp_error_queue")
 	metricSendUDP             = clientmetric.NewCounter("magicsock_send_udp")
 	metricSendUDPError        = clientmetric.NewCounter("magicsock_send_udp_error")
+	metricSendDERP            = clientmetric.NewCounter("magicsock_send_derp")
+	metricSendDERPError       = clientmetric.NewCounter("magicsock_send_derp_error")
 
 	// Data packets (non-disco)
 	metricSendData            = clientmetric.NewCounter("magicsock_send_data")
@@ -4079,4 +4087,8 @@ var (
 	metricRecvDiscoCallMeMaybe         = clientmetric.NewCounter("magicsock_disco_recv_callmemaybe")
 	metricRecvDiscoCallMeMaybeBadNode  = clientmetric.NewCounter("magicsock_disco_recv_callmemaybe_bad_node")
 	metricRecvDiscoCallMeMaybeBadDisco = clientmetric.NewCounter("magicsock_disco_recv_callmemaybe_bad_disco")
+
+	// metricDERPHomeChange is how many times our DERP home region DI has
+	// changed from non-zero to a different non-zero.
+	metricDERPHomeChange = clientmetric.NewCounter("derp_home_change")
 )