testwrapper.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. // Copyright (c) Tailscale Inc & contributors
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. // testwrapper is a wrapper for retrying flaky tests. It is an alternative to
  4. // `go test` and re-runs failed marked flaky tests (using the flakytest pkg). It
  5. // takes different arguments than go test and requires the first positional
  6. // argument to be the pattern to test.
  7. package main
  8. import (
  9. "bufio"
  10. "bytes"
  11. "cmp"
  12. "context"
  13. "encoding/json"
  14. "errors"
  15. "fmt"
  16. "io"
  17. "log"
  18. "os"
  19. "os/exec"
  20. "slices"
  21. "sort"
  22. "strings"
  23. "time"
  24. "tailscale.com/cmd/testwrapper/flakytest"
  25. "tailscale.com/util/slicesx"
  26. )
  27. const (
  28. maxAttempts = 3
  29. )
  30. type testAttempt struct {
  31. pkg string // "tailscale.com/types/key"
  32. testName string // "TestFoo"
  33. outcome string // "pass", "fail", "skip"
  34. cached bool // whether package-level (non-testName specific) was pass due to being cached
  35. logs bytes.Buffer
  36. start, end time.Time
  37. isMarkedFlaky bool // set if the test is marked as flaky
  38. issueURL string // set if the test is marked as flaky
  39. pkgFinished bool
  40. }
  41. // packageTests describes what to run.
  42. // It's also JSON-marshalled to output for analysys tools to parse
  43. // so the fields are all exported.
  44. // TODO(bradfitz): move this type to its own types package?
  45. type packageTests struct {
  46. // Pattern is the package Pattern to run.
  47. // Must be a single Pattern, not a list of patterns.
  48. Pattern string // "./...", "./types/key"
  49. // Tests is a list of Tests to run. If empty, all Tests in the package are
  50. // run.
  51. Tests []string // ["TestFoo", "TestBar"]
  52. // IssueURLs maps from a test name to a URL tracking its flake.
  53. IssueURLs map[string]string // "TestFoo" => "https://github.com/foo/bar/issue/123"
  54. }
  55. type goTestOutput struct {
  56. Time time.Time
  57. Action string
  58. ImportPath string
  59. Package string
  60. Test string
  61. Output string
  62. }
  63. var debug = os.Getenv("TS_TESTWRAPPER_DEBUG") != ""
  64. // runTests runs the tests in pt and sends the results on ch. It sends a
  65. // testAttempt for each test and a final testAttempt per pkg with pkgFinished
  66. // set to true. Package build errors will not emit a testAttempt (as no valid
  67. // JSON is produced) but the [os/exec.ExitError] will be returned.
  68. // It calls close(ch) when it's done.
  69. func runTests(ctx context.Context, attempt int, pt *packageTests, goTestArgs, testArgs []string, ch chan<- *testAttempt) error {
  70. defer close(ch)
  71. args := []string{"test"}
  72. args = append(args, goTestArgs...)
  73. args = append(args, pt.Pattern)
  74. if len(pt.Tests) > 0 {
  75. runArg := strings.Join(pt.Tests, "|")
  76. args = append(args, "--run", runArg)
  77. }
  78. args = append(args, testArgs...)
  79. args = append(args, "-json")
  80. if debug {
  81. fmt.Println("running", strings.Join(args, " "))
  82. }
  83. cmd := exec.CommandContext(ctx, "go", args...)
  84. if len(pt.Tests) > 0 {
  85. cmd.Env = append(os.Environ(), "TS_TEST_SHARD=") // clear test shard; run all tests we say to run
  86. }
  87. r, err := cmd.StdoutPipe()
  88. if err != nil {
  89. log.Printf("error creating stdout pipe: %v", err)
  90. }
  91. defer r.Close()
  92. cmd.Stderr = os.Stderr
  93. cmd.Env = os.Environ()
  94. cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%d", flakytest.FlakeAttemptEnv, attempt))
  95. if err := cmd.Start(); err != nil {
  96. log.Printf("error starting test: %v", err)
  97. os.Exit(1)
  98. }
  99. pkgCached := map[string]bool{}
  100. s := bufio.NewScanner(r)
  101. resultMap := make(map[string]map[string]*testAttempt) // pkg -> test -> testAttempt
  102. for s.Scan() {
  103. var goOutput goTestOutput
  104. if err := json.Unmarshal(s.Bytes(), &goOutput); err != nil {
  105. return fmt.Errorf("failed to parse go test output %q: %w", s.Bytes(), err)
  106. }
  107. pkg := cmp.Or(
  108. goOutput.Package,
  109. "build:"+goOutput.ImportPath, // can be "./cmd" while Package is "tailscale.com/cmd" so use separate namespace
  110. )
  111. pkgTests := resultMap[pkg]
  112. if pkgTests == nil {
  113. pkgTests = map[string]*testAttempt{
  114. "": {}, // Used for start time and build logs.
  115. }
  116. resultMap[pkg] = pkgTests
  117. }
  118. if goOutput.Test == "" {
  119. // Detect output lines like:
  120. // ok \ttailscale.com/cmd/testwrapper\t(cached)
  121. // ok \ttailscale.com/cmd/testwrapper\t(cached)\tcoverage: 17.0% of statements
  122. if goOutput.Package != "" && strings.Contains(goOutput.Output, fmt.Sprintf("%s\t(cached)", goOutput.Package)) {
  123. pkgCached[goOutput.Package] = true
  124. }
  125. switch goOutput.Action {
  126. case "start":
  127. pkgTests[""].start = goOutput.Time
  128. case "build-output":
  129. pkgTests[""].logs.WriteString(goOutput.Output)
  130. case "build-fail", "fail", "pass", "skip":
  131. for _, test := range pkgTests {
  132. if test.testName != "" && test.outcome == "" {
  133. test.outcome = "fail"
  134. ch <- test
  135. }
  136. }
  137. outcome := goOutput.Action
  138. if outcome == "build-fail" {
  139. outcome = "fail"
  140. }
  141. pkgTests[""].logs.WriteString(goOutput.Output)
  142. ch <- &testAttempt{
  143. pkg: goOutput.Package,
  144. outcome: outcome,
  145. start: pkgTests[""].start,
  146. end: goOutput.Time,
  147. logs: pkgTests[""].logs,
  148. pkgFinished: true,
  149. cached: pkgCached[goOutput.Package],
  150. }
  151. case "output":
  152. // Capture all output from the package except for the final
  153. // "FAIL tailscale.io/control 0.684s" line, as
  154. // printPkgOutcome will output a similar line
  155. if !strings.HasPrefix(goOutput.Output, fmt.Sprintf("FAIL\t%s\t", goOutput.Package)) {
  156. pkgTests[""].logs.WriteString(goOutput.Output)
  157. }
  158. }
  159. continue
  160. }
  161. testName := goOutput.Test
  162. if test, _, isSubtest := strings.Cut(goOutput.Test, "/"); isSubtest {
  163. testName = test
  164. if goOutput.Action == "output" {
  165. resultMap[pkg][testName].logs.WriteString(goOutput.Output)
  166. }
  167. continue
  168. }
  169. switch goOutput.Action {
  170. case "start":
  171. // ignore
  172. case "run":
  173. pkgTests[testName] = &testAttempt{
  174. pkg: pkg,
  175. testName: testName,
  176. start: goOutput.Time,
  177. }
  178. case "skip", "pass", "fail":
  179. pkgTests[testName].end = goOutput.Time
  180. pkgTests[testName].outcome = goOutput.Action
  181. ch <- pkgTests[testName]
  182. case "output":
  183. if suffix, ok := strings.CutPrefix(strings.TrimSpace(goOutput.Output), flakytest.FlakyTestLogMessage); ok {
  184. pkgTests[testName].isMarkedFlaky = true
  185. pkgTests[testName].issueURL = strings.TrimPrefix(suffix, ": ")
  186. } else {
  187. pkgTests[testName].logs.WriteString(goOutput.Output)
  188. }
  189. }
  190. }
  191. if err := cmd.Wait(); err != nil {
  192. return err
  193. }
  194. if err := s.Err(); err != nil {
  195. return fmt.Errorf("reading go test stdout: %w", err)
  196. }
  197. return nil
  198. }
  199. func main() {
  200. goTestArgs, packages, testArgs, err := splitArgs(os.Args[1:])
  201. if err != nil {
  202. log.Fatal(err)
  203. return
  204. }
  205. if len(packages) == 0 {
  206. fmt.Println("testwrapper: no packages specified")
  207. return
  208. }
  209. // As a special case, if the packages looks like "sharded:1/2" then shell out to
  210. // ./tool/listpkgs to cut up the package list pieces for each sharded builder.
  211. if nOfM, ok := strings.CutPrefix(packages[0], "sharded:"); ok && len(packages) == 1 {
  212. out, err := exec.Command("go", "run", "tailscale.com/tool/listpkgs", "-shard", nOfM, "./...").Output()
  213. if err != nil {
  214. log.Fatalf("failed to list packages for sharded test: %v", err)
  215. }
  216. packages = strings.Split(strings.TrimSpace(string(out)), "\n")
  217. }
  218. ctx := context.Background()
  219. type nextRun struct {
  220. tests []*packageTests
  221. attempt int // starting at 1
  222. }
  223. firstRun := &nextRun{
  224. attempt: 1,
  225. }
  226. for _, pkg := range packages {
  227. firstRun.tests = append(firstRun.tests, &packageTests{Pattern: pkg})
  228. }
  229. toRun := []*nextRun{firstRun}
  230. printPkgOutcome := func(pkg, outcome string, cached bool, attempt int, testDur time.Duration) {
  231. if pkg == "" {
  232. return // We reach this path on a build error.
  233. }
  234. if outcome == "skip" {
  235. fmt.Printf("?\t%s [skipped/no tests] \n", pkg)
  236. return
  237. }
  238. if outcome == "pass" {
  239. outcome = "ok"
  240. }
  241. if outcome == "fail" {
  242. outcome = "FAIL"
  243. }
  244. if attempt > 1 {
  245. fmt.Printf("%s\t%s\t%.3fs\t[attempt=%d]\n", outcome, pkg, testDur.Seconds(), attempt)
  246. return
  247. }
  248. var lastCol string
  249. if cached {
  250. lastCol = "(cached)"
  251. } else {
  252. lastCol = fmt.Sprintf("%.3f", testDur.Seconds())
  253. }
  254. fmt.Printf("%s\t%s\t%v\n", outcome, pkg, lastCol)
  255. }
  256. for len(toRun) > 0 {
  257. var thisRun *nextRun
  258. thisRun, toRun = toRun[0], toRun[1:]
  259. if thisRun.attempt > maxAttempts {
  260. fmt.Println("max attempts reached")
  261. os.Exit(1)
  262. }
  263. if thisRun.attempt > 1 {
  264. j, _ := json.Marshal(thisRun.tests)
  265. fmt.Printf("\n\nAttempt #%d: Retrying flaky tests:\n\nflakytest failures JSON: %s\n\n", thisRun.attempt, j)
  266. }
  267. fatalFailures := make(map[string]struct{}) // pkg.Test key
  268. toRetry := make(map[string][]*testAttempt) // pkg -> tests to retry
  269. for _, pt := range thisRun.tests {
  270. ch := make(chan *testAttempt)
  271. runErr := make(chan error, 1)
  272. go func() {
  273. defer close(runErr)
  274. runErr <- runTests(ctx, thisRun.attempt, pt, goTestArgs, testArgs, ch)
  275. }()
  276. var failed bool
  277. for tr := range ch {
  278. // Go assigns the package name "command-line-arguments" when you
  279. // `go test FILE` rather than `go test PKG`. It's more
  280. // convenient for us to to specify files in tests, so fix tr.pkg
  281. // so that subsequent testwrapper attempts run correctly.
  282. if tr.pkg == "command-line-arguments" {
  283. tr.pkg = packages[0]
  284. }
  285. if tr.pkgFinished {
  286. if tr.outcome == "fail" && len(toRetry[tr.pkg]) == 0 {
  287. // If a package fails and we don't have any tests to
  288. // retry, then we should fail. This typically happens
  289. // when a package times out.
  290. failed = true
  291. }
  292. if testingVerbose || tr.outcome == "fail" {
  293. // Output package-level output which is where e.g.
  294. // panics outside tests will be printed
  295. io.Copy(os.Stdout, &tr.logs)
  296. }
  297. printPkgOutcome(tr.pkg, tr.outcome, tr.cached, thisRun.attempt, tr.end.Sub(tr.start))
  298. continue
  299. }
  300. if testingVerbose || tr.outcome == "fail" {
  301. io.Copy(os.Stdout, &tr.logs)
  302. }
  303. if tr.outcome != "fail" {
  304. continue
  305. }
  306. if tr.isMarkedFlaky {
  307. toRetry[tr.pkg] = append(toRetry[tr.pkg], tr)
  308. } else {
  309. fatalFailures[tr.pkg+"."+tr.testName] = struct{}{}
  310. failed = true
  311. }
  312. }
  313. if failed {
  314. fmt.Println("\n\nNot retrying flaky tests because non-flaky tests failed.")
  315. // Print the list of non-flakytest failures.
  316. // We will later analyze the retried GitHub Action runs to see
  317. // if non-flakytest failures succeeded upon retry. This will
  318. // highlight tests which are flaky but not yet flagged as such.
  319. if len(fatalFailures) > 0 {
  320. tests := slicesx.MapKeys(fatalFailures)
  321. sort.Strings(tests)
  322. j, _ := json.Marshal(tests)
  323. fmt.Printf("non-flakytest failures: %s\n", j)
  324. }
  325. fmt.Println()
  326. os.Exit(1)
  327. }
  328. // If there's nothing to retry and no non-retryable tests have
  329. // failed then we've probably hit a build error.
  330. if err := <-runErr; len(toRetry) == 0 && err != nil {
  331. var exit *exec.ExitError
  332. if errors.As(err, &exit) {
  333. if code := exit.ExitCode(); code > -1 {
  334. os.Exit(exit.ExitCode())
  335. }
  336. }
  337. log.Printf("testwrapper: %s", err)
  338. os.Exit(1)
  339. }
  340. }
  341. if len(toRetry) == 0 {
  342. continue
  343. }
  344. pkgs := slicesx.MapKeys(toRetry)
  345. sort.Strings(pkgs)
  346. nextRun := &nextRun{
  347. attempt: thisRun.attempt + 1,
  348. }
  349. for _, pkg := range pkgs {
  350. tests := toRetry[pkg]
  351. slices.SortFunc(tests, func(a, b *testAttempt) int { return strings.Compare(a.testName, b.testName) })
  352. issueURLs := map[string]string{} // test name => URL
  353. var testNames []string
  354. for _, ta := range tests {
  355. issueURLs[ta.testName] = ta.issueURL
  356. testNames = append(testNames, ta.testName)
  357. }
  358. nextRun.tests = append(nextRun.tests, &packageTests{
  359. Pattern: pkg,
  360. Tests: testNames,
  361. IssueURLs: issueURLs,
  362. })
  363. }
  364. toRun = append(toRun, nextRun)
  365. }
  366. }