prober_test.go 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936
  1. // Copyright (c) Tailscale Inc & AUTHORS
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. package prober
  4. import (
  5. "context"
  6. "encoding/json"
  7. "errors"
  8. "fmt"
  9. "io"
  10. "net/http"
  11. "net/http/httptest"
  12. "net/url"
  13. "regexp"
  14. "strings"
  15. "sync"
  16. "sync/atomic"
  17. "testing"
  18. "time"
  19. "github.com/google/go-cmp/cmp"
  20. "github.com/google/go-cmp/cmp/cmpopts"
  21. "github.com/prometheus/client_golang/prometheus/testutil"
  22. "tailscale.com/tstest"
  23. "tailscale.com/tsweb"
  24. )
  25. const (
  26. probeInterval = 8 * time.Second // So expvars that are integer numbers of seconds change
  27. halfProbeInterval = probeInterval / 2
  28. quarterProbeInterval = probeInterval / 4
  29. convergenceTimeout = time.Second
  30. convergenceSleep = time.Millisecond
  31. aFewMillis = 20 * time.Millisecond
  32. )
  33. var epoch = time.Unix(0, 0)
  34. func TestProberTiming(t *testing.T) {
  35. clk := newFakeTime()
  36. p := newForTest(clk.Now, clk.NewTicker)
  37. invoked := make(chan struct{}, 1)
  38. notCalled := func() {
  39. t.Helper()
  40. select {
  41. case <-invoked:
  42. t.Fatal("probe was invoked earlier than expected")
  43. default:
  44. }
  45. }
  46. called := func() {
  47. t.Helper()
  48. select {
  49. case <-invoked:
  50. case <-time.After(2 * time.Second):
  51. t.Fatal("probe wasn't invoked as expected")
  52. }
  53. }
  54. p.Run("test-probe", probeInterval, nil, FuncProbe(func(context.Context) error {
  55. invoked <- struct{}{}
  56. return nil
  57. }))
  58. waitActiveProbes(t, p, clk, 1)
  59. called()
  60. notCalled()
  61. clk.Advance(probeInterval + halfProbeInterval)
  62. called()
  63. notCalled()
  64. clk.Advance(quarterProbeInterval)
  65. notCalled()
  66. clk.Advance(probeInterval)
  67. called()
  68. notCalled()
  69. }
  70. func TestProberTimingSpread(t *testing.T) {
  71. clk := newFakeTime()
  72. p := newForTest(clk.Now, clk.NewTicker).WithSpread(true)
  73. invoked := make(chan struct{}, 1)
  74. notCalled := func() {
  75. t.Helper()
  76. select {
  77. case <-invoked:
  78. t.Fatal("probe was invoked earlier than expected")
  79. default:
  80. }
  81. }
  82. called := func() {
  83. t.Helper()
  84. select {
  85. case <-invoked:
  86. case <-time.After(2 * time.Second):
  87. t.Fatal("probe wasn't invoked as expected")
  88. }
  89. }
  90. probe := p.Run("test-spread-probe", probeInterval, nil, FuncProbe(func(context.Context) error {
  91. invoked <- struct{}{}
  92. return nil
  93. }))
  94. waitActiveProbes(t, p, clk, 1)
  95. notCalled()
  96. // Name of the probe (test-spread-probe) has been chosen to ensure that
  97. // the initial delay is smaller than half of the probe interval.
  98. clk.Advance(halfProbeInterval)
  99. called()
  100. notCalled()
  101. // We need to wait until the main (non-initial) ticker in Probe.loop is
  102. // waiting, or we could race and advance the test clock between when
  103. // the initial delay ticker completes and before the ticker for the
  104. // main loop is created. In this race, we'd first advance the test
  105. // clock, then the ticker would be registered, and the test would fail
  106. // because that ticker would never be fired.
  107. err := tstest.WaitFor(convergenceTimeout, func() error {
  108. clk.Lock()
  109. defer clk.Unlock()
  110. for _, tick := range clk.tickers {
  111. tick.Lock()
  112. stopped, interval := tick.stopped, tick.interval
  113. tick.Unlock()
  114. if stopped {
  115. continue
  116. }
  117. // Test for the main loop, not the initialDelay
  118. if interval == probe.interval {
  119. return nil
  120. }
  121. }
  122. return fmt.Errorf("no ticker with interval %d found", probe.interval)
  123. })
  124. if err != nil {
  125. t.Fatal(err)
  126. }
  127. clk.Advance(quarterProbeInterval)
  128. notCalled()
  129. clk.Advance(probeInterval)
  130. called()
  131. notCalled()
  132. }
  133. func TestProberTimeout(t *testing.T) {
  134. clk := newFakeTime()
  135. p := newForTest(clk.Now, clk.NewTicker)
  136. var done sync.WaitGroup
  137. done.Add(1)
  138. pfunc := FuncProbe(func(ctx context.Context) error {
  139. defer done.Done()
  140. select {
  141. case <-ctx.Done():
  142. return ctx.Err()
  143. }
  144. })
  145. pfunc.Timeout = time.Microsecond
  146. probe := p.Run("foo", 30*time.Second, nil, pfunc)
  147. waitActiveProbes(t, p, clk, 1)
  148. done.Wait()
  149. probe.mu.Lock()
  150. info := probe.probeInfoLocked()
  151. probe.mu.Unlock()
  152. wantInfo := ProbeInfo{
  153. Name: "foo",
  154. Interval: 30 * time.Second,
  155. Labels: map[string]string{"class": "", "name": "foo"},
  156. Status: ProbeStatusFailed,
  157. Error: "context deadline exceeded",
  158. RecentResults: []bool{false},
  159. RecentLatencies: nil,
  160. }
  161. if diff := cmp.Diff(wantInfo, info, cmpopts.IgnoreFields(ProbeInfo{}, "Start", "End", "Latency")); diff != "" {
  162. t.Fatalf("unexpected ProbeInfo (-want +got):\n%s", diff)
  163. }
  164. if got := info.Latency; got > time.Second {
  165. t.Errorf("info.Latency = %v, want at most 1s", got)
  166. }
  167. }
  168. func TestProberConcurrency(t *testing.T) {
  169. clk := newFakeTime()
  170. p := newForTest(clk.Now, clk.NewTicker)
  171. var ran atomic.Int64
  172. stopProbe := make(chan struct{})
  173. pfunc := FuncProbe(func(ctx context.Context) error {
  174. ran.Add(1)
  175. <-stopProbe
  176. return nil
  177. })
  178. pfunc.Timeout = time.Hour
  179. pfunc.Concurrency = 3
  180. p.Run("foo", time.Second, nil, pfunc)
  181. waitActiveProbes(t, p, clk, 1)
  182. for range 50 {
  183. clk.Advance(time.Second)
  184. }
  185. if err := tstest.WaitFor(convergenceTimeout, func() error {
  186. if got, want := ran.Load(), int64(3); got != want {
  187. return fmt.Errorf("expected %d probes to run concurrently, got %d", want, got)
  188. }
  189. return nil
  190. }); err != nil {
  191. t.Fatal(err)
  192. }
  193. close(stopProbe)
  194. }
  195. func TestProberRun(t *testing.T) {
  196. clk := newFakeTime()
  197. p := newForTest(clk.Now, clk.NewTicker)
  198. var (
  199. mu sync.Mutex
  200. cnt int
  201. )
  202. const startingProbes = 100
  203. var probes []*Probe
  204. for i := range startingProbes {
  205. probes = append(probes, p.Run(fmt.Sprintf("probe%d", i), probeInterval, nil, FuncProbe(func(context.Context) error {
  206. mu.Lock()
  207. defer mu.Unlock()
  208. cnt++
  209. return nil
  210. })))
  211. }
  212. checkCnt := func(want int) {
  213. t.Helper()
  214. err := tstest.WaitFor(convergenceTimeout, func() error {
  215. mu.Lock()
  216. defer mu.Unlock()
  217. if cnt == want {
  218. cnt = 0
  219. return nil
  220. }
  221. return fmt.Errorf("wrong number of probe counter increments, got %d want %d", cnt, want)
  222. })
  223. if err != nil {
  224. t.Fatal(err)
  225. }
  226. }
  227. waitActiveProbes(t, p, clk, startingProbes)
  228. checkCnt(startingProbes)
  229. clk.Advance(probeInterval + halfProbeInterval)
  230. checkCnt(startingProbes)
  231. if c, err := testutil.GatherAndCount(p.metrics, "prober_result"); c != startingProbes || err != nil {
  232. t.Fatalf("expected %d prober_result metrics; got %d (error %s)", startingProbes, c, err)
  233. }
  234. keep := startingProbes / 2
  235. for i := keep; i < startingProbes; i++ {
  236. probes[i].Close()
  237. }
  238. waitActiveProbes(t, p, clk, keep)
  239. clk.Advance(probeInterval)
  240. checkCnt(keep)
  241. if c, err := testutil.GatherAndCount(p.metrics, "prober_result"); c != keep || err != nil {
  242. t.Fatalf("expected %d prober_result metrics; got %d (error %s)", keep, c, err)
  243. }
  244. }
  245. func TestPrometheus(t *testing.T) {
  246. clk := newFakeTime()
  247. p := newForTest(clk.Now, clk.NewTicker).WithMetricNamespace("probe")
  248. var succeed atomic.Bool
  249. p.Run("testprobe", probeInterval, map[string]string{"label": "value"}, FuncProbe(func(context.Context) error {
  250. clk.Advance(aFewMillis)
  251. if succeed.Load() {
  252. return nil
  253. }
  254. return errors.New("failing, as instructed by test")
  255. }))
  256. waitActiveProbes(t, p, clk, 1)
  257. err := tstest.WaitFor(convergenceTimeout, func() error {
  258. want := fmt.Sprintf(`
  259. # HELP probe_interval_secs Probe interval in seconds
  260. # TYPE probe_interval_secs gauge
  261. probe_interval_secs{class="",label="value",name="testprobe"} %f
  262. # HELP probe_start_secs Latest probe start time (seconds since epoch)
  263. # TYPE probe_start_secs gauge
  264. probe_start_secs{class="",label="value",name="testprobe"} %d
  265. # HELP probe_end_secs Latest probe end time (seconds since epoch)
  266. # TYPE probe_end_secs gauge
  267. probe_end_secs{class="",label="value",name="testprobe"} %d
  268. # HELP probe_result Latest probe result (1 = success, 0 = failure)
  269. # TYPE probe_result gauge
  270. probe_result{class="",label="value",name="testprobe"} 0
  271. `, probeInterval.Seconds(), epoch.Unix(), epoch.Add(aFewMillis).Unix())
  272. return testutil.GatherAndCompare(p.metrics, strings.NewReader(want),
  273. "probe_interval_secs", "probe_start_secs", "probe_end_secs", "probe_result")
  274. })
  275. if err != nil {
  276. t.Fatal(err)
  277. }
  278. succeed.Store(true)
  279. clk.Advance(probeInterval + halfProbeInterval)
  280. err = tstest.WaitFor(convergenceTimeout, func() error {
  281. start := epoch.Add(probeInterval + halfProbeInterval)
  282. end := start.Add(aFewMillis)
  283. want := fmt.Sprintf(`
  284. # HELP probe_interval_secs Probe interval in seconds
  285. # TYPE probe_interval_secs gauge
  286. probe_interval_secs{class="",label="value",name="testprobe"} %f
  287. # HELP probe_start_secs Latest probe start time (seconds since epoch)
  288. # TYPE probe_start_secs gauge
  289. probe_start_secs{class="",label="value",name="testprobe"} %d
  290. # HELP probe_end_secs Latest probe end time (seconds since epoch)
  291. # TYPE probe_end_secs gauge
  292. probe_end_secs{class="",label="value",name="testprobe"} %d
  293. # HELP probe_latency_millis Latest probe latency (ms)
  294. # TYPE probe_latency_millis gauge
  295. probe_latency_millis{class="",label="value",name="testprobe"} %d
  296. # HELP probe_result Latest probe result (1 = success, 0 = failure)
  297. # TYPE probe_result gauge
  298. probe_result{class="",label="value",name="testprobe"} 1
  299. `, probeInterval.Seconds(), start.Unix(), end.Unix(), aFewMillis.Milliseconds())
  300. return testutil.GatherAndCompare(p.metrics, strings.NewReader(want),
  301. "probe_interval_secs", "probe_start_secs", "probe_end_secs", "probe_latency_millis", "probe_result")
  302. })
  303. if err != nil {
  304. t.Fatal(err)
  305. }
  306. }
  307. func TestOnceMode(t *testing.T) {
  308. clk := newFakeTime()
  309. p := newForTest(clk.Now, clk.NewTicker).WithOnce(true)
  310. p.Run("probe1", probeInterval, nil, FuncProbe(func(context.Context) error { return nil }))
  311. p.Run("probe2", probeInterval, nil, FuncProbe(func(context.Context) error { return fmt.Errorf("error2") }))
  312. p.Run("probe3", probeInterval, nil, FuncProbe(func(context.Context) error {
  313. p.Run("probe4", probeInterval, nil, FuncProbe(func(context.Context) error {
  314. return fmt.Errorf("error4")
  315. }))
  316. return nil
  317. }))
  318. p.Wait()
  319. wantCount := 4
  320. for _, metric := range []string{"prober_result", "prober_end_secs"} {
  321. if c, err := testutil.GatherAndCount(p.metrics, metric); c != wantCount || err != nil {
  322. t.Fatalf("expected %d %s metrics; got %d (error %s)", wantCount, metric, c, err)
  323. }
  324. }
  325. }
  326. func TestProberProbeInfo(t *testing.T) {
  327. clk := newFakeTime()
  328. p := newForTest(clk.Now, clk.NewTicker).WithOnce(true)
  329. p.Run("probe1", probeInterval, nil, FuncProbe(func(context.Context) error {
  330. clk.Advance(500 * time.Millisecond)
  331. return nil
  332. }))
  333. p.Run("probe2", probeInterval, nil, FuncProbe(func(context.Context) error { return fmt.Errorf("error2") }))
  334. p.Wait()
  335. info := p.ProbeInfo()
  336. wantInfo := map[string]ProbeInfo{
  337. "probe1": {
  338. Name: "probe1",
  339. Interval: probeInterval,
  340. Labels: map[string]string{"class": "", "name": "probe1"},
  341. Latency: 500 * time.Millisecond,
  342. Status: ProbeStatusSucceeded,
  343. RecentResults: []bool{true},
  344. RecentLatencies: []time.Duration{500 * time.Millisecond},
  345. },
  346. "probe2": {
  347. Name: "probe2",
  348. Interval: probeInterval,
  349. Labels: map[string]string{"class": "", "name": "probe2"},
  350. Status: ProbeStatusFailed,
  351. Error: "error2",
  352. RecentResults: []bool{false},
  353. RecentLatencies: nil, // no latency for failed probes
  354. },
  355. }
  356. if diff := cmp.Diff(wantInfo, info, cmpopts.IgnoreFields(ProbeInfo{}, "Start", "End")); diff != "" {
  357. t.Fatalf("unexpected ProbeInfo (-want +got):\n%s", diff)
  358. }
  359. }
  360. func TestProbeInfoRecent(t *testing.T) {
  361. type probeResult struct {
  362. latency time.Duration
  363. err error
  364. }
  365. tests := []struct {
  366. name string
  367. results []probeResult
  368. wantProbeInfo ProbeInfo
  369. wantRecentSuccessRatio float64
  370. wantRecentMedianLatency time.Duration
  371. }{
  372. {
  373. name: "no_runs",
  374. wantProbeInfo: ProbeInfo{Status: ProbeStatusUnknown},
  375. wantRecentSuccessRatio: 0,
  376. wantRecentMedianLatency: 0,
  377. },
  378. {
  379. name: "single_success",
  380. results: []probeResult{{latency: 100 * time.Millisecond, err: nil}},
  381. wantProbeInfo: ProbeInfo{
  382. Latency: 100 * time.Millisecond,
  383. Status: ProbeStatusSucceeded,
  384. RecentResults: []bool{true},
  385. RecentLatencies: []time.Duration{100 * time.Millisecond},
  386. },
  387. wantRecentSuccessRatio: 1,
  388. wantRecentMedianLatency: 100 * time.Millisecond,
  389. },
  390. {
  391. name: "single_failure",
  392. results: []probeResult{{latency: 100 * time.Millisecond, err: errors.New("error123")}},
  393. wantProbeInfo: ProbeInfo{
  394. Status: ProbeStatusFailed,
  395. RecentResults: []bool{false},
  396. RecentLatencies: nil,
  397. Error: "error123",
  398. },
  399. wantRecentSuccessRatio: 0,
  400. wantRecentMedianLatency: 0,
  401. },
  402. {
  403. name: "recent_mix",
  404. results: []probeResult{
  405. {latency: 10 * time.Millisecond, err: errors.New("error1")},
  406. {latency: 20 * time.Millisecond, err: nil},
  407. {latency: 30 * time.Millisecond, err: nil},
  408. {latency: 40 * time.Millisecond, err: errors.New("error4")},
  409. {latency: 50 * time.Millisecond, err: nil},
  410. {latency: 60 * time.Millisecond, err: nil},
  411. {latency: 70 * time.Millisecond, err: errors.New("error7")},
  412. {latency: 80 * time.Millisecond, err: nil},
  413. },
  414. wantProbeInfo: ProbeInfo{
  415. Status: ProbeStatusSucceeded,
  416. Latency: 80 * time.Millisecond,
  417. RecentResults: []bool{false, true, true, false, true, true, false, true},
  418. RecentLatencies: []time.Duration{
  419. 20 * time.Millisecond,
  420. 30 * time.Millisecond,
  421. 50 * time.Millisecond,
  422. 60 * time.Millisecond,
  423. 80 * time.Millisecond,
  424. },
  425. },
  426. wantRecentSuccessRatio: 0.625,
  427. wantRecentMedianLatency: 50 * time.Millisecond,
  428. },
  429. {
  430. name: "only_last_10",
  431. results: []probeResult{
  432. {latency: 10 * time.Millisecond, err: errors.New("old_error")},
  433. {latency: 20 * time.Millisecond, err: nil},
  434. {latency: 30 * time.Millisecond, err: nil},
  435. {latency: 40 * time.Millisecond, err: nil},
  436. {latency: 50 * time.Millisecond, err: nil},
  437. {latency: 60 * time.Millisecond, err: nil},
  438. {latency: 70 * time.Millisecond, err: nil},
  439. {latency: 80 * time.Millisecond, err: nil},
  440. {latency: 90 * time.Millisecond, err: nil},
  441. {latency: 100 * time.Millisecond, err: nil},
  442. {latency: 110 * time.Millisecond, err: nil},
  443. },
  444. wantProbeInfo: ProbeInfo{
  445. Status: ProbeStatusSucceeded,
  446. Latency: 110 * time.Millisecond,
  447. RecentResults: []bool{true, true, true, true, true, true, true, true, true, true},
  448. RecentLatencies: []time.Duration{
  449. 20 * time.Millisecond,
  450. 30 * time.Millisecond,
  451. 40 * time.Millisecond,
  452. 50 * time.Millisecond,
  453. 60 * time.Millisecond,
  454. 70 * time.Millisecond,
  455. 80 * time.Millisecond,
  456. 90 * time.Millisecond,
  457. 100 * time.Millisecond,
  458. 110 * time.Millisecond,
  459. },
  460. },
  461. wantRecentSuccessRatio: 1,
  462. wantRecentMedianLatency: 70 * time.Millisecond,
  463. },
  464. }
  465. clk := newFakeTime()
  466. p := newForTest(clk.Now, clk.NewTicker).WithOnce(true)
  467. for _, tt := range tests {
  468. t.Run(tt.name, func(t *testing.T) {
  469. probe := newProbe(p, "", probeInterval, nil, FuncProbe(func(context.Context) error { return nil }))
  470. for _, r := range tt.results {
  471. probe.recordStart()
  472. clk.Advance(r.latency)
  473. probe.recordEndLocked(r.err)
  474. }
  475. probe.mu.Lock()
  476. info := probe.probeInfoLocked()
  477. probe.mu.Unlock()
  478. if diff := cmp.Diff(tt.wantProbeInfo, info, cmpopts.IgnoreFields(ProbeInfo{}, "Start", "End", "Interval")); diff != "" {
  479. t.Fatalf("unexpected ProbeInfo (-want +got):\n%s", diff)
  480. }
  481. if got := info.RecentSuccessRatio(); got != tt.wantRecentSuccessRatio {
  482. t.Errorf("recentSuccessRatio() = %v, want %v", got, tt.wantRecentSuccessRatio)
  483. }
  484. if got := info.RecentMedianLatency(); got != tt.wantRecentMedianLatency {
  485. t.Errorf("recentMedianLatency() = %v, want %v", got, tt.wantRecentMedianLatency)
  486. }
  487. })
  488. }
  489. }
  490. func TestProberRunHandler(t *testing.T) {
  491. clk := newFakeTime()
  492. tests := []struct {
  493. name string
  494. probeFunc func(context.Context) error
  495. wantResponseCode int
  496. wantJSONResponse RunHandlerResponse
  497. wantPlaintextResponse *regexp.Regexp
  498. }{
  499. {
  500. name: "success",
  501. probeFunc: func(context.Context) error { return nil },
  502. wantResponseCode: 200,
  503. wantJSONResponse: RunHandlerResponse{
  504. ProbeInfo: ProbeInfo{
  505. Name: "success",
  506. Interval: probeInterval,
  507. Status: ProbeStatusSucceeded,
  508. RecentResults: []bool{true, true},
  509. },
  510. PreviousSuccessRatio: 1,
  511. },
  512. wantPlaintextResponse: regexp.MustCompile("(?s)Probe succeeded .*Last 2 probes.*success rate 100%"),
  513. },
  514. {
  515. name: "failure",
  516. probeFunc: func(context.Context) error { return fmt.Errorf("error123") },
  517. wantResponseCode: 424,
  518. wantJSONResponse: RunHandlerResponse{
  519. ProbeInfo: ProbeInfo{
  520. Name: "failure",
  521. Interval: probeInterval,
  522. Status: ProbeStatusFailed,
  523. Error: "error123",
  524. RecentResults: []bool{false, false},
  525. },
  526. },
  527. wantPlaintextResponse: regexp.MustCompile("(?s)Probe failed: .*Last 2 probes.*success rate 0%"),
  528. },
  529. }
  530. for _, tt := range tests {
  531. for _, reqJSON := range []bool{true, false} {
  532. t.Run(fmt.Sprintf("%s_json-%v", tt.name, reqJSON), func(t *testing.T) {
  533. p := newForTest(clk.Now, clk.NewTicker).WithOnce(true)
  534. probe := p.Run(tt.name, probeInterval, nil, FuncProbe(tt.probeFunc))
  535. defer probe.Close()
  536. <-probe.stopped // wait for the first run.
  537. mux := http.NewServeMux()
  538. server := httptest.NewServer(mux)
  539. defer server.Close()
  540. mux.Handle("/prober/run/", tsweb.StdHandler(tsweb.ReturnHandlerFunc(p.RunHandler), tsweb.HandlerOptions{}))
  541. req, err := http.NewRequest("GET", server.URL+"/prober/run/?name="+tt.name, nil)
  542. if err != nil {
  543. t.Fatalf("failed to create request: %v", err)
  544. }
  545. if reqJSON {
  546. req.Header.Set("Accept", "application/json")
  547. }
  548. resp, err := http.DefaultClient.Do(req)
  549. if err != nil {
  550. t.Fatalf("failed to make request: %v", err)
  551. }
  552. defer resp.Body.Close()
  553. if resp.StatusCode != tt.wantResponseCode {
  554. t.Errorf("unexpected response code: got %d, want %d", resp.StatusCode, tt.wantResponseCode)
  555. }
  556. if reqJSON {
  557. if resp.Header.Get("Content-Type") != "application/json" {
  558. t.Errorf("unexpected content type: got %q, want application/json", resp.Header.Get("Content-Type"))
  559. }
  560. var gotJSON RunHandlerResponse
  561. body, err := io.ReadAll(resp.Body)
  562. if err != nil {
  563. t.Fatalf("failed to read response body: %v", err)
  564. }
  565. if err := json.Unmarshal(body, &gotJSON); err != nil {
  566. t.Fatalf("failed to unmarshal JSON response: %v; body: %s", err, body)
  567. }
  568. if diff := cmp.Diff(tt.wantJSONResponse, gotJSON, cmpopts.IgnoreFields(ProbeInfo{}, "Start", "End", "Labels", "RecentLatencies")); diff != "" {
  569. t.Errorf("unexpected JSON response (-want +got):\n%s", diff)
  570. }
  571. } else {
  572. body, _ := io.ReadAll(resp.Body)
  573. if !tt.wantPlaintextResponse.MatchString(string(body)) {
  574. t.Errorf("unexpected response body: got %q, want to match %q", body, tt.wantPlaintextResponse)
  575. }
  576. }
  577. })
  578. }
  579. }
  580. }
  581. func TestRunAllHandler(t *testing.T) {
  582. clk := newFakeTime()
  583. tests := []struct {
  584. name string
  585. probeFunc []func(context.Context) error
  586. wantResponseCode int
  587. wantJSONResponse RunHandlerAllResponse
  588. wantPlaintextResponse string
  589. }{
  590. {
  591. name: "successProbe",
  592. probeFunc: []func(context.Context) error{func(context.Context) error { return nil }, func(context.Context) error { return nil }},
  593. wantResponseCode: http.StatusOK,
  594. wantJSONResponse: RunHandlerAllResponse{
  595. Results: map[string]RunHandlerResponse{
  596. "successProbe-0": {
  597. ProbeInfo: ProbeInfo{
  598. Name: "successProbe-0",
  599. Interval: probeInterval,
  600. Status: ProbeStatusSucceeded,
  601. RecentResults: []bool{true, true},
  602. },
  603. PreviousSuccessRatio: 1,
  604. },
  605. "successProbe-1": {
  606. ProbeInfo: ProbeInfo{
  607. Name: "successProbe-1",
  608. Interval: probeInterval,
  609. Status: ProbeStatusSucceeded,
  610. RecentResults: []bool{true, true},
  611. },
  612. PreviousSuccessRatio: 1,
  613. },
  614. },
  615. },
  616. wantPlaintextResponse: "Probe successProbe-0: succeeded\n\tLast run: 0s\n\tPrevious success rate: 100.0%\n\tPrevious median latency: 0s\nProbe successProbe-1: succeeded\n\tLast run: 0s\n\tPrevious success rate: 100.0%\n\tPrevious median latency: 0s\n\n",
  617. },
  618. {
  619. name: "successAndFailureProbes",
  620. probeFunc: []func(context.Context) error{func(context.Context) error { return nil }, func(context.Context) error { return fmt.Errorf("error2") }},
  621. wantResponseCode: http.StatusFailedDependency,
  622. wantJSONResponse: RunHandlerAllResponse{
  623. Results: map[string]RunHandlerResponse{
  624. "successAndFailureProbes-0": {
  625. ProbeInfo: ProbeInfo{
  626. Name: "successAndFailureProbes-0",
  627. Interval: probeInterval,
  628. Status: ProbeStatusSucceeded,
  629. RecentResults: []bool{true, true},
  630. },
  631. PreviousSuccessRatio: 1,
  632. },
  633. "successAndFailureProbes-1": {
  634. ProbeInfo: ProbeInfo{
  635. Name: "successAndFailureProbes-1",
  636. Interval: probeInterval,
  637. Status: ProbeStatusFailed,
  638. Error: "error2",
  639. RecentResults: []bool{false, false},
  640. },
  641. },
  642. },
  643. },
  644. wantPlaintextResponse: "Probe successAndFailureProbes-0: succeeded\n\tLast run: 0s\n\tPrevious success rate: 100.0%\n\tPrevious median latency: 0s\nProbe successAndFailureProbes-1: failed\n\tLast run: 0s\n\tPrevious success rate: 0.0%\n\tPrevious median latency: 0s\n\n\tLast error: error2\n\n",
  645. },
  646. }
  647. for _, tc := range tests {
  648. t.Run(tc.name, func(t *testing.T) {
  649. p := newForTest(clk.Now, clk.NewTicker).WithOnce(true)
  650. for i, pfunc := range tc.probeFunc {
  651. probe := p.Run(fmt.Sprintf("%s-%d", tc.name, i), probeInterval, nil, FuncProbe(pfunc))
  652. defer probe.Close()
  653. <-probe.stopped // wait for the first run.
  654. }
  655. mux := http.NewServeMux()
  656. server := httptest.NewServer(mux)
  657. defer server.Close()
  658. mux.Handle("/prober/runall/", tsweb.StdHandler(tsweb.ReturnHandlerFunc(p.RunAllHandler), tsweb.HandlerOptions{}))
  659. req, err := http.NewRequest("GET", server.URL+"/prober/runall", nil)
  660. if err != nil {
  661. t.Fatalf("failed to create request: %v", err)
  662. }
  663. resp, err := http.DefaultClient.Do(req)
  664. if err != nil {
  665. t.Fatalf("failed to make request: %v", err)
  666. }
  667. if resp.StatusCode != tc.wantResponseCode {
  668. t.Errorf("unexpected response code: got %d, want %d", resp.StatusCode, tc.wantResponseCode)
  669. }
  670. if resp.Header.Get("Content-Type") != "application/json" {
  671. t.Errorf("unexpected content type: got %q, want application/json", resp.Header.Get("Content-Type"))
  672. }
  673. var gotJSON RunHandlerAllResponse
  674. body, err := io.ReadAll(resp.Body)
  675. if err != nil {
  676. t.Fatalf("failed to read response body: %v", err)
  677. }
  678. if err := json.Unmarshal(body, &gotJSON); err != nil {
  679. t.Fatalf("failed to unmarshal JSON response: %v; body: %s", err, body)
  680. }
  681. if diff := cmp.Diff(tc.wantJSONResponse, gotJSON, cmpopts.IgnoreFields(ProbeInfo{}, "Start", "End", "Labels", "RecentLatencies")); diff != "" {
  682. t.Errorf("unexpected JSON response (-want +got):\n%s", diff)
  683. }
  684. })
  685. }
  686. }
  687. func TestExcludeInRunAll(t *testing.T) {
  688. clk := newFakeTime()
  689. p := newForTest(clk.Now, clk.NewTicker).WithOnce(true)
  690. wantJSONResponse := RunHandlerAllResponse{
  691. Results: map[string]RunHandlerResponse{
  692. "includedProbe": {
  693. ProbeInfo: ProbeInfo{
  694. Name: "includedProbe",
  695. Interval: probeInterval,
  696. Status: ProbeStatusSucceeded,
  697. RecentResults: []bool{true, true},
  698. },
  699. PreviousSuccessRatio: 1,
  700. },
  701. },
  702. }
  703. p.Run("includedProbe", probeInterval, nil, FuncProbe(func(context.Context) error { return nil }))
  704. p.Run("excludedProbe", probeInterval, nil, FuncProbe(func(context.Context) error { return nil }))
  705. p.Run("excludedOtherProbe", probeInterval, nil, FuncProbe(func(context.Context) error { return nil }))
  706. mux := http.NewServeMux()
  707. server := httptest.NewServer(mux)
  708. defer server.Close()
  709. mux.Handle("/prober/runall", tsweb.StdHandler(tsweb.ReturnHandlerFunc(p.RunAllHandler), tsweb.HandlerOptions{}))
  710. req, err := http.NewRequest("GET", server.URL+"/prober/runall", nil)
  711. if err != nil {
  712. t.Fatalf("failed to create request: %v", err)
  713. }
  714. // Exclude probes with "excluded" in their name
  715. req.URL.RawQuery = url.Values{
  716. "exclude": []string{"excludedProbe", "excludedOtherProbe"},
  717. }.Encode()
  718. resp, err := http.DefaultClient.Do(req)
  719. if err != nil {
  720. t.Fatalf("failed to make request: %v", err)
  721. }
  722. if resp.StatusCode != http.StatusOK {
  723. t.Errorf("unexpected response code: got %d, want %d", resp.StatusCode, http.StatusOK)
  724. }
  725. var gotJSON RunHandlerAllResponse
  726. body, err := io.ReadAll(resp.Body)
  727. if err != nil {
  728. t.Fatalf("failed to read response body: %v", err)
  729. }
  730. if err := json.Unmarshal(body, &gotJSON); err != nil {
  731. t.Fatalf("failed to unmarshal JSON response: %v; body: %s", err, body)
  732. }
  733. if resp.Header.Get("Content-Type") != "application/json" {
  734. t.Errorf("unexpected content type: got %q, want application/json", resp.Header.Get("Content-Type"))
  735. }
  736. if diff := cmp.Diff(wantJSONResponse, gotJSON, cmpopts.IgnoreFields(ProbeInfo{}, "Start", "End", "Labels", "RecentLatencies")); diff != "" {
  737. t.Errorf("unexpected JSON response (-want +got):\n%s", diff)
  738. }
  739. }
  740. type fakeTicker struct {
  741. ch chan time.Time
  742. interval time.Duration
  743. sync.Mutex
  744. next time.Time
  745. stopped bool
  746. }
  747. func (t *fakeTicker) Chan() <-chan time.Time {
  748. return t.ch
  749. }
  750. func (t *fakeTicker) Stop() {
  751. t.Lock()
  752. defer t.Unlock()
  753. t.stopped = true
  754. }
  755. func (t *fakeTicker) fire(now time.Time) {
  756. t.Lock()
  757. defer t.Unlock()
  758. // Slight deviation from the stdlib ticker: time.Ticker will
  759. // adjust t.next to make up for missed ticks, whereas we tick on a
  760. // fixed interval regardless of receiver behavior. In our case
  761. // this is fine, since we're using the ticker as a wakeup
  762. // mechanism and not a precise timekeeping system.
  763. select {
  764. case t.ch <- now:
  765. default:
  766. }
  767. for now.After(t.next) {
  768. t.next = t.next.Add(t.interval)
  769. }
  770. }
  771. type fakeTime struct {
  772. sync.Mutex
  773. *sync.Cond
  774. curTime time.Time
  775. tickers []*fakeTicker
  776. }
  777. func newFakeTime() *fakeTime {
  778. ret := &fakeTime{
  779. curTime: epoch,
  780. }
  781. ret.Cond = &sync.Cond{L: &ret.Mutex}
  782. return ret
  783. }
  784. func (t *fakeTime) Now() time.Time {
  785. t.Lock()
  786. defer t.Unlock()
  787. ret := t.curTime
  788. return ret
  789. }
  790. func (t *fakeTime) NewTicker(d time.Duration) ticker {
  791. t.Lock()
  792. defer t.Unlock()
  793. ret := &fakeTicker{
  794. ch: make(chan time.Time, 1),
  795. interval: d,
  796. next: t.curTime.Add(d),
  797. }
  798. t.tickers = append(t.tickers, ret)
  799. t.Cond.Broadcast()
  800. return ret
  801. }
  802. func (t *fakeTime) Advance(d time.Duration) {
  803. t.Lock()
  804. defer t.Unlock()
  805. t.curTime = t.curTime.Add(d)
  806. for _, tick := range t.tickers {
  807. if t.curTime.After(tick.next) {
  808. tick.fire(t.curTime)
  809. }
  810. }
  811. }
  812. func (t *fakeTime) activeTickers() (count int) {
  813. t.Lock()
  814. defer t.Unlock()
  815. for _, tick := range t.tickers {
  816. if !tick.stopped {
  817. count += 1
  818. }
  819. }
  820. return
  821. }
  822. func waitActiveProbes(t *testing.T, p *Prober, clk *fakeTime, want int) {
  823. t.Helper()
  824. err := tstest.WaitFor(convergenceTimeout, func() error {
  825. if got := p.activeProbes(); got != want {
  826. return fmt.Errorf("installed probe count is %d, want %d", got, want)
  827. }
  828. if got := clk.activeTickers(); got != want {
  829. return fmt.Errorf("active ticker count is %d, want %d", got, want)
  830. }
  831. return nil
  832. })
  833. if err != nil {
  834. t.Fatal(err)
  835. }
  836. }