udp_rio_windows.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
  1. //go:build !e2e_testing
  2. // +build !e2e_testing
  3. // Inspired by https://git.zx2c4.com/wireguard-go/tree/conn/bind_windows.go
  4. package udp
  5. import (
  6. "errors"
  7. "fmt"
  8. "io"
  9. "net"
  10. "net/netip"
  11. "sync"
  12. "sync/atomic"
  13. "syscall"
  14. "time"
  15. "unsafe"
  16. "github.com/sirupsen/logrus"
  17. "github.com/slackhq/nebula/config"
  18. "github.com/slackhq/nebula/firewall"
  19. "github.com/slackhq/nebula/header"
  20. "golang.org/x/sys/windows"
  21. "golang.zx2c4.com/wireguard/conn/winrio"
  22. )
  23. // Assert we meet the standard conn interface
  24. var _ Conn = &RIOConn{}
  25. //go:linkname procyield runtime.procyield
  26. func procyield(cycles uint32)
  27. const (
  28. packetsPerRing = 1024
  29. bytesPerPacket = 2048 - 32
  30. receiveSpins = 15
  31. )
  32. type ringPacket struct {
  33. addr windows.RawSockaddrInet6
  34. data [bytesPerPacket]byte
  35. }
  36. type ringBuffer struct {
  37. packets uintptr
  38. head, tail uint32
  39. id winrio.BufferId
  40. iocp windows.Handle
  41. isFull bool
  42. cq winrio.Cq
  43. mu sync.Mutex
  44. overlapped windows.Overlapped
  45. }
  46. type RIOConn struct {
  47. isOpen atomic.Bool
  48. l *logrus.Logger
  49. sock windows.Handle
  50. rx, tx ringBuffer
  51. rq winrio.Rq
  52. results [packetsPerRing]winrio.Result
  53. }
  54. func NewRIOListener(l *logrus.Logger, addr netip.Addr, port int) (*RIOConn, error) {
  55. if !winrio.Initialize() {
  56. return nil, errors.New("could not initialize winrio")
  57. }
  58. u := &RIOConn{l: l}
  59. err := u.bind(l, &windows.SockaddrInet6{Addr: addr.As16(), Port: port})
  60. if err != nil {
  61. return nil, fmt.Errorf("bind: %w", err)
  62. }
  63. for i := 0; i < packetsPerRing; i++ {
  64. err = u.insertReceiveRequest()
  65. if err != nil {
  66. return nil, fmt.Errorf("init rx ring: %w", err)
  67. }
  68. }
  69. u.isOpen.Store(true)
  70. return u, nil
  71. }
  72. func (u *RIOConn) bind(l *logrus.Logger, sa windows.Sockaddr) error {
  73. var err error
  74. u.sock, err = winrio.Socket(windows.AF_INET6, windows.SOCK_DGRAM, windows.IPPROTO_UDP)
  75. if err != nil {
  76. return fmt.Errorf("winrio.Socket error: %w", err)
  77. }
  78. // Enable v4 for this socket
  79. syscall.SetsockoptInt(syscall.Handle(u.sock), syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, 0)
  80. // Disable reporting of PORT_UNREACHABLE and NET_UNREACHABLE errors from the UDP socket receive call.
  81. // These errors are returned on Windows during UDP receives based on the receipt of ICMP packets. Disable
  82. // the UDP receive error returns with these ioctl calls.
  83. ret := uint32(0)
  84. flag := uint32(0)
  85. size := uint32(unsafe.Sizeof(flag))
  86. err = syscall.WSAIoctl(syscall.Handle(u.sock), syscall.SIO_UDP_CONNRESET, (*byte)(unsafe.Pointer(&flag)), size, nil, 0, &ret, nil, 0)
  87. if err != nil {
  88. // This is a best-effort to prevent errors from being returned by the udp recv operation.
  89. // Quietly log a failure and continue.
  90. l.WithError(err).Debug("failed to set UDP_CONNRESET ioctl")
  91. }
  92. ret = 0
  93. flag = 0
  94. size = uint32(unsafe.Sizeof(flag))
  95. SIO_UDP_NETRESET := uint32(syscall.IOC_IN | syscall.IOC_VENDOR | 15)
  96. err = syscall.WSAIoctl(syscall.Handle(u.sock), SIO_UDP_NETRESET, (*byte)(unsafe.Pointer(&flag)), size, nil, 0, &ret, nil, 0)
  97. if err != nil {
  98. // This is a best-effort to prevent errors from being returned by the udp recv operation.
  99. // Quietly log a failure and continue.
  100. l.WithError(err).Debug("failed to set UDP_NETRESET ioctl")
  101. }
  102. err = u.rx.Open()
  103. if err != nil {
  104. return fmt.Errorf("error rx.Open(): %w", err)
  105. }
  106. err = u.tx.Open()
  107. if err != nil {
  108. return fmt.Errorf("error tx.Open(): %w", err)
  109. }
  110. u.rq, err = winrio.CreateRequestQueue(u.sock, packetsPerRing, 1, packetsPerRing, 1, u.rx.cq, u.tx.cq, 0)
  111. if err != nil {
  112. return fmt.Errorf("error CreateRequestQueue: %w", err)
  113. }
  114. err = windows.Bind(u.sock, sa)
  115. if err != nil {
  116. return fmt.Errorf("error windows.Bind(): %w", err)
  117. }
  118. return nil
  119. }
  120. func (u *RIOConn) ListenOut(r EncReader, lhf LightHouseHandlerFunc, cache *firewall.ConntrackCacheTicker, q int) {
  121. plaintext := make([]byte, MTU)
  122. buffer := make([]byte, MTU)
  123. h := &header.H{}
  124. fwPacket := &firewall.Packet{}
  125. nb := make([]byte, 12, 12)
  126. var lastRecvErr time.Time
  127. for {
  128. // Just read one packet at a time
  129. n, rua, err := u.receive(buffer)
  130. if err != nil {
  131. if errors.Is(err, net.ErrClosed) {
  132. u.l.WithError(err).Debug("udp socket is closed, exiting read loop")
  133. return
  134. }
  135. // Dampen unexpected message warns to once per minute
  136. if lastRecvErr.IsZero() || time.Since(lastRecvErr) > time.Minute {
  137. lastRecvErr = time.Now()
  138. u.l.WithError(err).Warn("unexpected udp socket receive error")
  139. }
  140. continue
  141. }
  142. r(
  143. netip.AddrPortFrom(netip.AddrFrom16(rua.Addr).Unmap(), (rua.Port>>8)|((rua.Port&0xff)<<8)),
  144. plaintext[:0],
  145. buffer[:n],
  146. h,
  147. fwPacket,
  148. lhf,
  149. nb,
  150. q,
  151. cache.Get(u.l),
  152. )
  153. }
  154. }
  155. func (u *RIOConn) insertReceiveRequest() error {
  156. packet := u.rx.Push()
  157. dataBuffer := &winrio.Buffer{
  158. Id: u.rx.id,
  159. Offset: uint32(uintptr(unsafe.Pointer(&packet.data[0])) - u.rx.packets),
  160. Length: uint32(len(packet.data)),
  161. }
  162. addressBuffer := &winrio.Buffer{
  163. Id: u.rx.id,
  164. Offset: uint32(uintptr(unsafe.Pointer(&packet.addr)) - u.rx.packets),
  165. Length: uint32(unsafe.Sizeof(packet.addr)),
  166. }
  167. return winrio.ReceiveEx(u.rq, dataBuffer, 1, nil, addressBuffer, nil, nil, 0, uintptr(unsafe.Pointer(packet)))
  168. }
  169. func (u *RIOConn) receive(buf []byte) (int, windows.RawSockaddrInet6, error) {
  170. if !u.isOpen.Load() {
  171. return 0, windows.RawSockaddrInet6{}, net.ErrClosed
  172. }
  173. u.rx.mu.Lock()
  174. defer u.rx.mu.Unlock()
  175. var err error
  176. var count uint32
  177. var results [1]winrio.Result
  178. retry:
  179. count = 0
  180. for tries := 0; count == 0 && tries < receiveSpins; tries++ {
  181. if tries > 0 {
  182. if !u.isOpen.Load() {
  183. return 0, windows.RawSockaddrInet6{}, net.ErrClosed
  184. }
  185. procyield(1)
  186. }
  187. count = winrio.DequeueCompletion(u.rx.cq, results[:])
  188. }
  189. if count == 0 {
  190. err = winrio.Notify(u.rx.cq)
  191. if err != nil {
  192. return 0, windows.RawSockaddrInet6{}, err
  193. }
  194. var bytes uint32
  195. var key uintptr
  196. var overlapped *windows.Overlapped
  197. err = windows.GetQueuedCompletionStatus(u.rx.iocp, &bytes, &key, &overlapped, windows.INFINITE)
  198. if err != nil {
  199. return 0, windows.RawSockaddrInet6{}, err
  200. }
  201. if !u.isOpen.Load() {
  202. return 0, windows.RawSockaddrInet6{}, net.ErrClosed
  203. }
  204. count = winrio.DequeueCompletion(u.rx.cq, results[:])
  205. if count == 0 {
  206. return 0, windows.RawSockaddrInet6{}, io.ErrNoProgress
  207. }
  208. }
  209. u.rx.Return(1)
  210. err = u.insertReceiveRequest()
  211. if err != nil {
  212. return 0, windows.RawSockaddrInet6{}, err
  213. }
  214. // We limit the MTU well below the 65k max for practicality, but this means a remote host can still send us
  215. // huge packets. Just try again when this happens. The infinite loop this could cause is still limited to
  216. // attacker bandwidth, just like the rest of the receive path.
  217. if windows.Errno(results[0].Status) == windows.WSAEMSGSIZE {
  218. goto retry
  219. }
  220. if results[0].Status != 0 {
  221. return 0, windows.RawSockaddrInet6{}, windows.Errno(results[0].Status)
  222. }
  223. packet := (*ringPacket)(unsafe.Pointer(uintptr(results[0].RequestContext)))
  224. ep := packet.addr
  225. n := copy(buf, packet.data[:results[0].BytesTransferred])
  226. return n, ep, nil
  227. }
  228. func (u *RIOConn) WriteTo(buf []byte, ip netip.AddrPort) error {
  229. if !u.isOpen.Load() {
  230. return net.ErrClosed
  231. }
  232. if len(buf) > bytesPerPacket {
  233. return io.ErrShortBuffer
  234. }
  235. u.tx.mu.Lock()
  236. defer u.tx.mu.Unlock()
  237. count := winrio.DequeueCompletion(u.tx.cq, u.results[:])
  238. if count == 0 && u.tx.isFull {
  239. err := winrio.Notify(u.tx.cq)
  240. if err != nil {
  241. return err
  242. }
  243. var bytes uint32
  244. var key uintptr
  245. var overlapped *windows.Overlapped
  246. err = windows.GetQueuedCompletionStatus(u.tx.iocp, &bytes, &key, &overlapped, windows.INFINITE)
  247. if err != nil {
  248. return err
  249. }
  250. if !u.isOpen.Load() {
  251. return net.ErrClosed
  252. }
  253. count = winrio.DequeueCompletion(u.tx.cq, u.results[:])
  254. if count == 0 {
  255. return io.ErrNoProgress
  256. }
  257. }
  258. if count > 0 {
  259. u.tx.Return(count)
  260. }
  261. packet := u.tx.Push()
  262. packet.addr.Family = windows.AF_INET6
  263. packet.addr.Addr = ip.Addr().As16()
  264. port := ip.Port()
  265. packet.addr.Port = (port >> 8) | ((port & 0xff) << 8)
  266. copy(packet.data[:], buf)
  267. dataBuffer := &winrio.Buffer{
  268. Id: u.tx.id,
  269. Offset: uint32(uintptr(unsafe.Pointer(&packet.data[0])) - u.tx.packets),
  270. Length: uint32(len(buf)),
  271. }
  272. addressBuffer := &winrio.Buffer{
  273. Id: u.tx.id,
  274. Offset: uint32(uintptr(unsafe.Pointer(&packet.addr)) - u.tx.packets),
  275. Length: uint32(unsafe.Sizeof(packet.addr)),
  276. }
  277. return winrio.SendEx(u.rq, dataBuffer, 1, nil, addressBuffer, nil, nil, 0, 0)
  278. }
  279. func (u *RIOConn) LocalAddr() (netip.AddrPort, error) {
  280. sa, err := windows.Getsockname(u.sock)
  281. if err != nil {
  282. return netip.AddrPort{}, err
  283. }
  284. v6 := sa.(*windows.SockaddrInet6)
  285. return netip.AddrPortFrom(netip.AddrFrom16(v6.Addr).Unmap(), uint16(v6.Port)), nil
  286. }
  287. func (u *RIOConn) Rebind() error {
  288. return nil
  289. }
  290. func (u *RIOConn) ReloadConfig(*config.C) {}
  291. func (u *RIOConn) Close() error {
  292. if !u.isOpen.CompareAndSwap(true, false) {
  293. return nil
  294. }
  295. windows.PostQueuedCompletionStatus(u.rx.iocp, 0, 0, nil)
  296. windows.PostQueuedCompletionStatus(u.tx.iocp, 0, 0, nil)
  297. u.rx.CloseAndZero()
  298. u.tx.CloseAndZero()
  299. if u.sock != 0 {
  300. windows.CloseHandle(u.sock)
  301. }
  302. return nil
  303. }
  304. func (ring *ringBuffer) Push() *ringPacket {
  305. for ring.isFull {
  306. panic("ring is full")
  307. }
  308. ret := (*ringPacket)(unsafe.Pointer(ring.packets + (uintptr(ring.tail%packetsPerRing) * unsafe.Sizeof(ringPacket{}))))
  309. ring.tail += 1
  310. if ring.tail%packetsPerRing == ring.head%packetsPerRing {
  311. ring.isFull = true
  312. }
  313. return ret
  314. }
  315. func (ring *ringBuffer) Return(count uint32) {
  316. if ring.head%packetsPerRing == ring.tail%packetsPerRing && !ring.isFull {
  317. return
  318. }
  319. ring.head += count
  320. ring.isFull = false
  321. }
  322. func (ring *ringBuffer) CloseAndZero() {
  323. if ring.cq != 0 {
  324. winrio.CloseCompletionQueue(ring.cq)
  325. ring.cq = 0
  326. }
  327. if ring.iocp != 0 {
  328. windows.CloseHandle(ring.iocp)
  329. ring.iocp = 0
  330. }
  331. if ring.id != 0 {
  332. winrio.DeregisterBuffer(ring.id)
  333. ring.id = 0
  334. }
  335. if ring.packets != 0 {
  336. windows.VirtualFree(ring.packets, 0, windows.MEM_RELEASE)
  337. ring.packets = 0
  338. }
  339. ring.head = 0
  340. ring.tail = 0
  341. ring.isFull = false
  342. }
  343. func (ring *ringBuffer) Open() error {
  344. var err error
  345. packetsLen := unsafe.Sizeof(ringPacket{}) * packetsPerRing
  346. ring.packets, err = windows.VirtualAlloc(0, packetsLen, windows.MEM_COMMIT|windows.MEM_RESERVE, windows.PAGE_READWRITE)
  347. if err != nil {
  348. return err
  349. }
  350. ring.id, err = winrio.RegisterPointer(unsafe.Pointer(ring.packets), uint32(packetsLen))
  351. if err != nil {
  352. return err
  353. }
  354. ring.iocp, err = windows.CreateIoCompletionPort(windows.InvalidHandle, 0, 0, 0)
  355. if err != nil {
  356. return err
  357. }
  358. ring.cq, err = winrio.CreateIOCPCompletionQueue(packetsPerRing, ring.iocp, 0, &ring.overlapped)
  359. if err != nil {
  360. return err
  361. }
  362. return nil
  363. }