udp_rio_windows.go 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419
  1. //go:build !e2e_testing
  2. // +build !e2e_testing
  3. // Inspired by https://git.zx2c4.com/wireguard-go/tree/conn/bind_windows.go
  4. package udp
  5. import (
  6. "errors"
  7. "fmt"
  8. "io"
  9. "net"
  10. "net/netip"
  11. "sync"
  12. "sync/atomic"
  13. "syscall"
  14. "time"
  15. "unsafe"
  16. "github.com/sirupsen/logrus"
  17. "github.com/slackhq/nebula/config"
  18. "github.com/slackhq/nebula/firewall"
  19. "github.com/slackhq/nebula/header"
  20. "golang.org/x/sys/windows"
  21. "golang.zx2c4.com/wireguard/conn/winrio"
  22. )
  23. // Assert we meet the standard conn interface
  24. var _ Conn = &RIOConn{}
  25. //go:linkname procyield runtime.procyield
  26. func procyield(cycles uint32)
  27. const (
  28. packetsPerRing = 1024
  29. bytesPerPacket = 2048 - 32
  30. receiveSpins = 15
  31. )
  32. type ringPacket struct {
  33. addr windows.RawSockaddrInet6
  34. data [bytesPerPacket]byte
  35. }
  36. type ringBuffer struct {
  37. packets uintptr
  38. head, tail uint32
  39. id winrio.BufferId
  40. iocp windows.Handle
  41. isFull bool
  42. cq winrio.Cq
  43. mu sync.Mutex
  44. overlapped windows.Overlapped
  45. }
  46. type RIOConn struct {
  47. isOpen atomic.Bool
  48. l *logrus.Logger
  49. sock windows.Handle
  50. rx, tx ringBuffer
  51. rq winrio.Rq
  52. results [packetsPerRing]winrio.Result
  53. }
  54. func NewRIOListener(l *logrus.Logger, addr netip.Addr, port int) (*RIOConn, error) {
  55. if !winrio.Initialize() {
  56. return nil, errors.New("could not initialize winrio")
  57. }
  58. u := &RIOConn{l: l}
  59. err := u.bind(&windows.SockaddrInet6{Addr: addr.As16(), Port: port})
  60. if err != nil {
  61. return nil, fmt.Errorf("bind: %w", err)
  62. }
  63. for i := 0; i < packetsPerRing; i++ {
  64. err = u.insertReceiveRequest()
  65. if err != nil {
  66. return nil, fmt.Errorf("init rx ring: %w", err)
  67. }
  68. }
  69. u.isOpen.Store(true)
  70. return u, nil
  71. }
  72. func (u *RIOConn) bind(sa windows.Sockaddr) error {
  73. var err error
  74. u.sock, err = winrio.Socket(windows.AF_INET6, windows.SOCK_DGRAM, windows.IPPROTO_UDP)
  75. if err != nil {
  76. return err
  77. }
  78. // Enable v4 for this socket
  79. syscall.SetsockoptInt(syscall.Handle(u.sock), syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, 0)
  80. err = u.rx.Open()
  81. if err != nil {
  82. return err
  83. }
  84. err = u.tx.Open()
  85. if err != nil {
  86. return err
  87. }
  88. u.rq, err = winrio.CreateRequestQueue(u.sock, packetsPerRing, 1, packetsPerRing, 1, u.rx.cq, u.tx.cq, 0)
  89. if err != nil {
  90. return err
  91. }
  92. err = windows.Bind(u.sock, sa)
  93. if err != nil {
  94. return err
  95. }
  96. return nil
  97. }
  98. func (u *RIOConn) ListenOut(r EncReader, lhf LightHouseHandlerFunc, cache *firewall.ConntrackCacheTicker, q int) {
  99. plaintext := make([]byte, MTU)
  100. buffer := make([]byte, MTU)
  101. h := &header.H{}
  102. fwPacket := &firewall.Packet{}
  103. nb := make([]byte, 12, 12)
  104. consecutiveErrors := 0
  105. for {
  106. // Just read one packet at a time
  107. n, rua, err := u.receive(buffer)
  108. if err != nil {
  109. if errors.Is(err, net.ErrClosed) {
  110. u.l.WithError(err).Debug("udp socket is closed, exiting read loop")
  111. return
  112. }
  113. // Try to suss out whether this is a transient error or something more permanent
  114. consecutiveErrors++
  115. u.l.WithError(err).WithField("consecutiveErrors", consecutiveErrors).Error("unexpected udp socket recieve error")
  116. if consecutiveErrors > 15 {
  117. panic("too many consecutive UDP receive errors")
  118. } else if consecutiveErrors > 10 {
  119. time.Sleep(100 * time.Millisecond)
  120. }
  121. continue
  122. }
  123. consecutiveErrors = 0
  124. r(
  125. netip.AddrPortFrom(netip.AddrFrom16(rua.Addr).Unmap(), (rua.Port>>8)|((rua.Port&0xff)<<8)),
  126. plaintext[:0],
  127. buffer[:n],
  128. h,
  129. fwPacket,
  130. lhf,
  131. nb,
  132. q,
  133. cache.Get(u.l),
  134. )
  135. }
  136. }
  137. func (u *RIOConn) insertReceiveRequest() error {
  138. packet := u.rx.Push()
  139. dataBuffer := &winrio.Buffer{
  140. Id: u.rx.id,
  141. Offset: uint32(uintptr(unsafe.Pointer(&packet.data[0])) - u.rx.packets),
  142. Length: uint32(len(packet.data)),
  143. }
  144. addressBuffer := &winrio.Buffer{
  145. Id: u.rx.id,
  146. Offset: uint32(uintptr(unsafe.Pointer(&packet.addr)) - u.rx.packets),
  147. Length: uint32(unsafe.Sizeof(packet.addr)),
  148. }
  149. return winrio.ReceiveEx(u.rq, dataBuffer, 1, nil, addressBuffer, nil, nil, 0, uintptr(unsafe.Pointer(packet)))
  150. }
  151. func (u *RIOConn) receive(buf []byte) (int, windows.RawSockaddrInet6, error) {
  152. if !u.isOpen.Load() {
  153. return 0, windows.RawSockaddrInet6{}, net.ErrClosed
  154. }
  155. u.rx.mu.Lock()
  156. defer u.rx.mu.Unlock()
  157. var err error
  158. var count uint32
  159. var results [1]winrio.Result
  160. retry:
  161. count = 0
  162. for tries := 0; count == 0 && tries < receiveSpins; tries++ {
  163. if tries > 0 {
  164. if !u.isOpen.Load() {
  165. return 0, windows.RawSockaddrInet6{}, net.ErrClosed
  166. }
  167. procyield(1)
  168. }
  169. count = winrio.DequeueCompletion(u.rx.cq, results[:])
  170. }
  171. if count == 0 {
  172. err = winrio.Notify(u.rx.cq)
  173. if err != nil {
  174. return 0, windows.RawSockaddrInet6{}, err
  175. }
  176. var bytes uint32
  177. var key uintptr
  178. var overlapped *windows.Overlapped
  179. err = windows.GetQueuedCompletionStatus(u.rx.iocp, &bytes, &key, &overlapped, windows.INFINITE)
  180. if err != nil {
  181. return 0, windows.RawSockaddrInet6{}, err
  182. }
  183. if !u.isOpen.Load() {
  184. return 0, windows.RawSockaddrInet6{}, net.ErrClosed
  185. }
  186. count = winrio.DequeueCompletion(u.rx.cq, results[:])
  187. if count == 0 {
  188. return 0, windows.RawSockaddrInet6{}, io.ErrNoProgress
  189. }
  190. }
  191. u.rx.Return(1)
  192. err = u.insertReceiveRequest()
  193. if err != nil {
  194. return 0, windows.RawSockaddrInet6{}, err
  195. }
  196. // We limit the MTU well below the 65k max for practicality, but this means a remote host can still send us
  197. // huge packets. Just try again when this happens. The infinite loop this could cause is still limited to
  198. // attacker bandwidth, just like the rest of the receive path.
  199. if windows.Errno(results[0].Status) == windows.WSAEMSGSIZE {
  200. goto retry
  201. }
  202. if results[0].Status != 0 {
  203. return 0, windows.RawSockaddrInet6{}, windows.Errno(results[0].Status)
  204. }
  205. packet := (*ringPacket)(unsafe.Pointer(uintptr(results[0].RequestContext)))
  206. ep := packet.addr
  207. n := copy(buf, packet.data[:results[0].BytesTransferred])
  208. return n, ep, nil
  209. }
  210. func (u *RIOConn) WriteTo(buf []byte, ip netip.AddrPort) error {
  211. if !u.isOpen.Load() {
  212. return net.ErrClosed
  213. }
  214. if len(buf) > bytesPerPacket {
  215. return io.ErrShortBuffer
  216. }
  217. u.tx.mu.Lock()
  218. defer u.tx.mu.Unlock()
  219. count := winrio.DequeueCompletion(u.tx.cq, u.results[:])
  220. if count == 0 && u.tx.isFull {
  221. err := winrio.Notify(u.tx.cq)
  222. if err != nil {
  223. return err
  224. }
  225. var bytes uint32
  226. var key uintptr
  227. var overlapped *windows.Overlapped
  228. err = windows.GetQueuedCompletionStatus(u.tx.iocp, &bytes, &key, &overlapped, windows.INFINITE)
  229. if err != nil {
  230. return err
  231. }
  232. if !u.isOpen.Load() {
  233. return net.ErrClosed
  234. }
  235. count = winrio.DequeueCompletion(u.tx.cq, u.results[:])
  236. if count == 0 {
  237. return io.ErrNoProgress
  238. }
  239. }
  240. if count > 0 {
  241. u.tx.Return(count)
  242. }
  243. packet := u.tx.Push()
  244. packet.addr.Family = windows.AF_INET6
  245. packet.addr.Addr = ip.Addr().As16()
  246. port := ip.Port()
  247. packet.addr.Port = (port >> 8) | ((port & 0xff) << 8)
  248. copy(packet.data[:], buf)
  249. dataBuffer := &winrio.Buffer{
  250. Id: u.tx.id,
  251. Offset: uint32(uintptr(unsafe.Pointer(&packet.data[0])) - u.tx.packets),
  252. Length: uint32(len(buf)),
  253. }
  254. addressBuffer := &winrio.Buffer{
  255. Id: u.tx.id,
  256. Offset: uint32(uintptr(unsafe.Pointer(&packet.addr)) - u.tx.packets),
  257. Length: uint32(unsafe.Sizeof(packet.addr)),
  258. }
  259. return winrio.SendEx(u.rq, dataBuffer, 1, nil, addressBuffer, nil, nil, 0, 0)
  260. }
  261. func (u *RIOConn) LocalAddr() (netip.AddrPort, error) {
  262. sa, err := windows.Getsockname(u.sock)
  263. if err != nil {
  264. return netip.AddrPort{}, err
  265. }
  266. v6 := sa.(*windows.SockaddrInet6)
  267. return netip.AddrPortFrom(netip.AddrFrom16(v6.Addr).Unmap(), uint16(v6.Port)), nil
  268. }
  269. func (u *RIOConn) Rebind() error {
  270. return nil
  271. }
  272. func (u *RIOConn) ReloadConfig(*config.C) {}
  273. func (u *RIOConn) Close() error {
  274. if !u.isOpen.CompareAndSwap(true, false) {
  275. return nil
  276. }
  277. windows.PostQueuedCompletionStatus(u.rx.iocp, 0, 0, nil)
  278. windows.PostQueuedCompletionStatus(u.tx.iocp, 0, 0, nil)
  279. u.rx.CloseAndZero()
  280. u.tx.CloseAndZero()
  281. if u.sock != 0 {
  282. windows.CloseHandle(u.sock)
  283. }
  284. return nil
  285. }
  286. func (ring *ringBuffer) Push() *ringPacket {
  287. for ring.isFull {
  288. panic("ring is full")
  289. }
  290. ret := (*ringPacket)(unsafe.Pointer(ring.packets + (uintptr(ring.tail%packetsPerRing) * unsafe.Sizeof(ringPacket{}))))
  291. ring.tail += 1
  292. if ring.tail%packetsPerRing == ring.head%packetsPerRing {
  293. ring.isFull = true
  294. }
  295. return ret
  296. }
  297. func (ring *ringBuffer) Return(count uint32) {
  298. if ring.head%packetsPerRing == ring.tail%packetsPerRing && !ring.isFull {
  299. return
  300. }
  301. ring.head += count
  302. ring.isFull = false
  303. }
  304. func (ring *ringBuffer) CloseAndZero() {
  305. if ring.cq != 0 {
  306. winrio.CloseCompletionQueue(ring.cq)
  307. ring.cq = 0
  308. }
  309. if ring.iocp != 0 {
  310. windows.CloseHandle(ring.iocp)
  311. ring.iocp = 0
  312. }
  313. if ring.id != 0 {
  314. winrio.DeregisterBuffer(ring.id)
  315. ring.id = 0
  316. }
  317. if ring.packets != 0 {
  318. windows.VirtualFree(ring.packets, 0, windows.MEM_RELEASE)
  319. ring.packets = 0
  320. }
  321. ring.head = 0
  322. ring.tail = 0
  323. ring.isFull = false
  324. }
  325. func (ring *ringBuffer) Open() error {
  326. var err error
  327. packetsLen := unsafe.Sizeof(ringPacket{}) * packetsPerRing
  328. ring.packets, err = windows.VirtualAlloc(0, packetsLen, windows.MEM_COMMIT|windows.MEM_RESERVE, windows.PAGE_READWRITE)
  329. if err != nil {
  330. return err
  331. }
  332. ring.id, err = winrio.RegisterPointer(unsafe.Pointer(ring.packets), uint32(packetsLen))
  333. if err != nil {
  334. return err
  335. }
  336. ring.iocp, err = windows.CreateIoCompletionPort(windows.InvalidHandle, 0, 0, 0)
  337. if err != nil {
  338. return err
  339. }
  340. ring.cq, err = winrio.CreateIOCPCompletionQueue(packetsPerRing, ring.iocp, 0, &ring.overlapped)
  341. if err != nil {
  342. return err
  343. }
  344. return nil
  345. }