bench.go 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. // Copyright (c) Tailscale Inc & AUTHORS
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. // Create two wgengine instances and pass data through them, measuring
  4. // throughput, latency, and packet loss.
  5. package main
  6. import (
  7. "bufio"
  8. "io"
  9. "log"
  10. "net"
  11. "net/http"
  12. "net/http/pprof"
  13. "net/netip"
  14. "os"
  15. "strconv"
  16. "sync"
  17. "time"
  18. "tailscale.com/types/logger"
  19. )
  20. const PayloadSize = 1000
  21. const ICMPMinSize = 24
  22. var Addr1 = netip.MustParsePrefix("100.64.1.1/32")
  23. var Addr2 = netip.MustParsePrefix("100.64.1.2/32")
  24. func main() {
  25. var logf logger.Logf = log.Printf
  26. log.SetFlags(0)
  27. debugMux := newDebugMux()
  28. go runDebugServer(debugMux, "0.0.0.0:8999")
  29. mode, err := strconv.Atoi(os.Args[1])
  30. if err != nil {
  31. log.Fatalf("%q: %v", os.Args[1], err)
  32. }
  33. traf := NewTrafficGen(nil)
  34. // Sample test results below are using GOMAXPROCS=2 (for some
  35. // tests, including wireguard-go, higher GOMAXPROCS goes slower)
  36. // on apenwarr's old Linux box:
  37. // Intel(R) Core(TM) i7-4785T CPU @ 2.20GHz
  38. // My 2019 Mac Mini is about 20% faster on most tests.
  39. switch mode {
  40. // tx=8786325 rx=8786326 (0 = 0.00% loss) (70768.7 Mbits/sec)
  41. case 1:
  42. setupTrivialNoAllocTest(logf, traf)
  43. // tx=6476293 rx=6476293 (0 = 0.00% loss) (52249.7 Mbits/sec)
  44. case 2:
  45. setupTrivialTest(logf, traf)
  46. // tx=1957974 rx=1958379 (0 = 0.00% loss) (15939.8 Mbits/sec)
  47. case 11:
  48. setupBlockingChannelTest(logf, traf)
  49. // tx=728621 rx=701825 (26620 = 3.65% loss) (5525.2 Mbits/sec)
  50. // (much faster on macOS??)
  51. case 12:
  52. setupNonblockingChannelTest(logf, traf)
  53. // tx=1024260 rx=941098 (83334 = 8.14% loss) (7516.6 Mbits/sec)
  54. // (much faster on macOS??)
  55. case 13:
  56. setupDoubleChannelTest(logf, traf)
  57. // tx=265468 rx=263189 (2279 = 0.86% loss) (2162.0 Mbits/sec)
  58. case 21:
  59. setupUDPTest(logf, traf)
  60. // tx=1493580 rx=1493580 (0 = 0.00% loss) (12210.4 Mbits/sec)
  61. case 31:
  62. setupBatchTCPTest(logf, traf)
  63. // tx=134236 rx=133166 (1070 = 0.80% loss) (1088.9 Mbits/sec)
  64. case 101:
  65. setupWGTest(nil, logf, traf, Addr1, Addr2)
  66. default:
  67. log.Fatalf("provide a valid test number (0..n)")
  68. }
  69. logf("initialized ok.")
  70. traf.Start(Addr1.Addr(), Addr2.Addr(), PayloadSize+ICMPMinSize, 0)
  71. var cur, prev Snapshot
  72. var pps int64
  73. i := 0
  74. for {
  75. i += 1
  76. time.Sleep(10 * time.Millisecond)
  77. if (i % 100) == 0 {
  78. prev = cur
  79. cur = traf.Snap()
  80. d := cur.Sub(prev)
  81. if prev.WhenNsec == 0 {
  82. logf("tx=%-6d rx=%-6d", d.TxPackets, d.RxPackets)
  83. } else {
  84. logf("%v @%7d pkt/s", d, pps)
  85. }
  86. }
  87. pps = traf.Adjust()
  88. }
  89. }
  90. func newDebugMux() *http.ServeMux {
  91. mux := http.NewServeMux()
  92. mux.HandleFunc("/debug/pprof/", pprof.Index)
  93. mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
  94. mux.HandleFunc("/debug/pprof/profile", pprof.Profile)
  95. mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
  96. mux.HandleFunc("/debug/pprof/trace", pprof.Trace)
  97. return mux
  98. }
  99. func runDebugServer(mux *http.ServeMux, addr string) {
  100. srv := &http.Server{
  101. Addr: addr,
  102. Handler: mux,
  103. }
  104. if err := srv.ListenAndServe(); err != nil {
  105. log.Fatal(err)
  106. }
  107. }
  108. // The absolute minimal test of the traffic generator: have it fill
  109. // a packet buffer, then absorb it again. Zero packet loss.
  110. func setupTrivialNoAllocTest(logf logger.Logf, traf *TrafficGen) {
  111. go func() {
  112. b := make([]byte, 1600)
  113. for {
  114. n := traf.Generate(b, 16)
  115. if n == 0 {
  116. break
  117. }
  118. traf.GotPacket(b[0:n+16], 16)
  119. }
  120. }()
  121. }
  122. // Almost the same, but this time allocate a fresh buffer each time
  123. // through the loop. Still zero packet loss. Runs about 2/3 as fast for me.
  124. func setupTrivialTest(logf logger.Logf, traf *TrafficGen) {
  125. go func() {
  126. for {
  127. b := make([]byte, 1600)
  128. n := traf.Generate(b, 16)
  129. if n == 0 {
  130. break
  131. }
  132. traf.GotPacket(b[0:n+16], 16)
  133. }
  134. }()
  135. }
  136. // Pass packets through a blocking channel between sender and receiver.
  137. // Still zero packet loss since the sender stops when the channel is full.
  138. // Max speed depends on channel length (I'm not sure why).
  139. func setupBlockingChannelTest(logf logger.Logf, traf *TrafficGen) {
  140. ch := make(chan []byte, 1000)
  141. go func() {
  142. // transmitter
  143. for {
  144. b := make([]byte, 1600)
  145. n := traf.Generate(b, 16)
  146. if n == 0 {
  147. close(ch)
  148. break
  149. }
  150. ch <- b[0 : n+16]
  151. }
  152. }()
  153. go func() {
  154. // receiver
  155. for b := range ch {
  156. traf.GotPacket(b, 16)
  157. }
  158. }()
  159. }
  160. // Same as setupBlockingChannelTest, but now we drop packets whenever the
  161. // channel is full. Max speed is about the same as the above test, but
  162. // now with nonzero packet loss.
  163. func setupNonblockingChannelTest(logf logger.Logf, traf *TrafficGen) {
  164. ch := make(chan []byte, 1000)
  165. go func() {
  166. // transmitter
  167. for {
  168. b := make([]byte, 1600)
  169. n := traf.Generate(b, 16)
  170. if n == 0 {
  171. close(ch)
  172. break
  173. }
  174. select {
  175. case ch <- b[0 : n+16]:
  176. default:
  177. }
  178. }
  179. }()
  180. go func() {
  181. // receiver
  182. for b := range ch {
  183. traf.GotPacket(b, 16)
  184. }
  185. }()
  186. }
  187. // Same as above, but at an intermediate blocking channel and goroutine
  188. // to make things a little more like wireguard-go. Roughly 20% slower than
  189. // the single-channel version.
  190. func setupDoubleChannelTest(logf logger.Logf, traf *TrafficGen) {
  191. ch := make(chan []byte, 1000)
  192. ch2 := make(chan []byte, 1000)
  193. go func() {
  194. // transmitter
  195. for {
  196. b := make([]byte, 1600)
  197. n := traf.Generate(b, 16)
  198. if n == 0 {
  199. close(ch)
  200. break
  201. }
  202. select {
  203. case ch <- b[0 : n+16]:
  204. default:
  205. }
  206. }
  207. }()
  208. go func() {
  209. // intermediary
  210. for b := range ch {
  211. ch2 <- b
  212. }
  213. close(ch2)
  214. }()
  215. go func() {
  216. // receiver
  217. for b := range ch2 {
  218. traf.GotPacket(b, 16)
  219. }
  220. }()
  221. }
  222. // Instead of a channel, pass packets through a UDP socket.
  223. func setupUDPTest(logf logger.Logf, traf *TrafficGen) {
  224. la, err := net.ResolveUDPAddr("udp", ":0")
  225. if err != nil {
  226. log.Fatalf("resolve: %v", err)
  227. }
  228. s1, err := net.ListenUDP("udp", la)
  229. if err != nil {
  230. log.Fatalf("listen1: %v", err)
  231. }
  232. s2, err := net.ListenUDP("udp", la)
  233. if err != nil {
  234. log.Fatalf("listen2: %v", err)
  235. }
  236. a2 := s2.LocalAddr()
  237. // On macOS (but not Linux), you can't transmit to 0.0.0.0:port,
  238. // which is what returns from .LocalAddr() above. We have to
  239. // force it to localhost instead.
  240. a2.(*net.UDPAddr).IP = net.ParseIP("127.0.0.1")
  241. s1.SetWriteBuffer(1024 * 1024)
  242. s2.SetReadBuffer(1024 * 1024)
  243. go func() {
  244. // transmitter
  245. b := make([]byte, 1600)
  246. for {
  247. n := traf.Generate(b, 16)
  248. if n == 0 {
  249. break
  250. }
  251. s1.WriteTo(b[16:n+16], a2)
  252. }
  253. }()
  254. go func() {
  255. // receiver
  256. b := make([]byte, 1600)
  257. for traf.Running() {
  258. // Use ReadFrom instead of Read, to be more like
  259. // how wireguard-go does it, even though we're not
  260. // going to actually look at the address.
  261. n, _, err := s2.ReadFrom(b)
  262. if err != nil {
  263. log.Fatalf("s2.Read: %v", err)
  264. }
  265. traf.GotPacket(b[:n], 0)
  266. }
  267. }()
  268. }
  269. // Instead of a channel, pass packets through a TCP socket.
  270. // TCP is a single stream, so we can amortize one syscall across
  271. // multiple packets. 10x amortization seems to make it go ~10x faster,
  272. // as expected, getting us close to the speed of the channel tests above.
  273. // There's also zero packet loss.
  274. func setupBatchTCPTest(logf logger.Logf, traf *TrafficGen) {
  275. sl, err := net.Listen("tcp", ":0")
  276. if err != nil {
  277. log.Fatalf("listen: %v", err)
  278. }
  279. var slCloseOnce sync.Once
  280. slClose := func() {
  281. slCloseOnce.Do(func() {
  282. sl.Close()
  283. })
  284. }
  285. s1, err := net.Dial("tcp", sl.Addr().String())
  286. if err != nil {
  287. log.Fatalf("dial: %v", err)
  288. }
  289. s2, err := sl.Accept()
  290. if err != nil {
  291. log.Fatalf("accept: %v", err)
  292. }
  293. s1.(*net.TCPConn).SetWriteBuffer(1024 * 1024)
  294. s2.(*net.TCPConn).SetReadBuffer(1024 * 1024)
  295. ch := make(chan int)
  296. go func() {
  297. // transmitter
  298. defer slClose()
  299. defer s1.Close()
  300. bs1 := bufio.NewWriterSize(s1, 1024*1024)
  301. b := make([]byte, 1600)
  302. i := 0
  303. for {
  304. i += 1
  305. n := traf.Generate(b, 16)
  306. if n == 0 {
  307. break
  308. }
  309. if i == 1 {
  310. ch <- n
  311. }
  312. bs1.Write(b[16 : n+16])
  313. // TODO: this is a pretty half-baked batching
  314. // function, which we'd never want to employ in
  315. // a real-life program.
  316. //
  317. // In real life, we'd probably want to flush
  318. // immediately when there are no more packets to
  319. // generate, and queue up only if we fall behind.
  320. //
  321. // In our case however, we just want to see the
  322. // technical benefits of batching 10 syscalls
  323. // into 1, so a fixed ratio makes more sense.
  324. if (i % 10) == 0 {
  325. bs1.Flush()
  326. }
  327. }
  328. }()
  329. go func() {
  330. // receiver
  331. defer slClose()
  332. defer s2.Close()
  333. bs2 := bufio.NewReaderSize(s2, 1024*1024)
  334. // Find out the packet size (we happen to know they're
  335. // all the same size)
  336. packetSize := <-ch
  337. b := make([]byte, packetSize)
  338. for traf.Running() {
  339. // TODO: can't use ReadFrom() here, which is
  340. // unfair compared to UDP. (ReadFrom for UDP
  341. // apparently allocates memory per packet, which
  342. // this test does not.)
  343. n, err := io.ReadFull(bs2, b)
  344. if err != nil {
  345. log.Fatalf("s2.Read: %v", err)
  346. }
  347. traf.GotPacket(b[:n], 0)
  348. }
  349. }()
  350. }