Selaa lähdekoodia

prober: only record latency for successful probes

This will make it easier to track probe latency on a dashboard.

Updates https://github.com/tailscale/corp/issues/9916

Signed-off-by: Anton Tolchanov <[email protected]>
Anton Tolchanov 3 vuotta sitten
vanhempi
sitoutus
7083246409
2 muutettua tiedostoa jossa 25 lisäystä ja 20 poistoa
  1. 19 12
      prober/prober.go
  2. 6 8
      prober/prober_test.go

+ 19 - 12
prober/prober.go

@@ -161,11 +161,12 @@ type Probe struct {
 	tick         ticker
 	labels       map[string]string
 
-	mu      sync.Mutex
-	start   time.Time // last time doProbe started
-	end     time.Time // last time doProbe returned
-	result  bool      // whether the last doProbe call succeeded
-	lastErr error
+	mu        sync.Mutex
+	start     time.Time     // last time doProbe started
+	end       time.Time     // last time doProbe returned
+	latency   time.Duration // last successful probe latency
+	succeeded bool          // whether the last doProbe call succeeded
+	lastErr   error
 }
 
 // Close shuts down the Probe and unregisters it from its Prober.
@@ -254,8 +255,13 @@ func (p *Probe) recordEnd(start time.Time, err error) {
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	p.end = end
-	p.result = err == nil
+	p.succeeded = err == nil
 	p.lastErr = err
+	if p.succeeded {
+		p.latency = end.Sub(p.start)
+	} else {
+		p.latency = 0
+	}
 }
 
 type varExporter struct {
@@ -289,13 +295,13 @@ func (v varExporter) probeInfo() map[string]ProbeInfo {
 			Labels: probe.labels,
 			Start:  probe.start,
 			End:    probe.end,
-			Result: probe.result,
+			Result: probe.succeeded,
 		}
 		if probe.lastErr != nil {
 			inf.Error = probe.lastErr.Error()
 		}
-		if probe.end.After(probe.start) {
-			inf.Latency = probe.end.Sub(probe.start).String()
+		if probe.latency > 0 {
+			inf.Latency = probe.latency.String()
 		}
 		out[probe.name] = inf
 		probe.mu.Unlock()
@@ -358,9 +364,10 @@ func (v varExporter) WritePrometheus(w io.Writer, prefix string) {
 		}
 		if !probe.end.IsZero() {
 			fmt.Fprintf(w, "%s_end_secs{%s} %d\n", prefix, labels, probe.end.Unix())
-			// Start is always present if end is.
-			fmt.Fprintf(w, "%s_latency_millis{%s} %d\n", prefix, labels, probe.end.Sub(probe.start).Milliseconds())
-			if probe.result {
+			if probe.latency > 0 {
+				fmt.Fprintf(w, "%s_latency_millis{%s} %d\n", prefix, labels, probe.latency.Milliseconds())
+			}
+			if probe.succeeded {
 				fmt.Fprintf(w, "%s_result{%s} 1\n", prefix, labels)
 			} else {
 				fmt.Fprintf(w, "%s_result{%s} 0\n", prefix, labels)

+ 6 - 8
prober/prober_test.go

@@ -237,12 +237,11 @@ func TestExpvar(t *testing.T) {
 	}
 
 	check("probe", ProbeInfo{
-		Labels:  map[string]string{"label": "value"},
-		Start:   epoch,
-		End:     epoch.Add(aFewMillis),
-		Latency: aFewMillis.String(),
-		Result:  false,
-		Error:   "failing, as instructed by test",
+		Labels: map[string]string{"label": "value"},
+		Start:  epoch,
+		End:    epoch.Add(aFewMillis),
+		Result: false,
+		Error:  "failing, as instructed by test",
 	})
 
 	succeed.Store(true)
@@ -280,9 +279,8 @@ func TestPrometheus(t *testing.T) {
 probe_interval_secs{name="testprobe",label="value"} %f
 probe_start_secs{name="testprobe",label="value"} %d
 probe_end_secs{name="testprobe",label="value"} %d
-probe_latency_millis{name="testprobe",label="value"} %d
 probe_result{name="testprobe",label="value"} 0
-`, probeInterval.Seconds(), epoch.Unix(), epoch.Add(aFewMillis).Unix(), aFewMillis.Milliseconds()))
+`, probeInterval.Seconds(), epoch.Unix(), epoch.Add(aFewMillis).Unix()))
 		if diff := cmp.Diff(strings.TrimSpace(b.String()), want); diff != "" {
 			return fmt.Errorf("wrong probe stats (-got+want):\n%s", diff)
 		}