tap_linux.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. // Copyright (c) Tailscale Inc & AUTHORS
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. // Package tap registers Tailscale's experimental (demo) Linux TAP (Layer 2) support.
  4. package tap
  5. import (
  6. "bytes"
  7. "errors"
  8. "fmt"
  9. "net"
  10. "net/netip"
  11. "os"
  12. "os/exec"
  13. "sync"
  14. "github.com/insomniacslk/dhcp/dhcpv4"
  15. "github.com/tailscale/wireguard-go/tun"
  16. "golang.org/x/sys/unix"
  17. "gvisor.dev/gvisor/pkg/tcpip"
  18. "gvisor.dev/gvisor/pkg/tcpip/checksum"
  19. "gvisor.dev/gvisor/pkg/tcpip/header"
  20. "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
  21. "gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
  22. "gvisor.dev/gvisor/pkg/tcpip/transport/udp"
  23. "tailscale.com/net/netaddr"
  24. "tailscale.com/net/packet"
  25. "tailscale.com/net/tsaddr"
  26. "tailscale.com/net/tstun"
  27. "tailscale.com/syncs"
  28. "tailscale.com/types/ipproto"
  29. "tailscale.com/types/logger"
  30. )
  31. // TODO: this was randomly generated once. Maybe do it per process start? But
  32. // then an upgraded tailscaled would be visible to devices behind it. So
  33. // maybe instead make it a function of the tailscaled's wireguard public key?
  34. // For now just hard code it.
  35. var ourMAC = net.HardwareAddr{0x30, 0x2D, 0x66, 0xEC, 0x7A, 0x93}
  36. const tapDebug = tstun.TAPDebug
  37. func init() {
  38. tstun.CreateTAP.Set(createTAPLinux)
  39. }
  40. func createTAPLinux(logf logger.Logf, tapName, bridgeName string) (tun.Device, error) {
  41. fd, err := unix.Open("/dev/net/tun", unix.O_RDWR, 0)
  42. if err != nil {
  43. return nil, err
  44. }
  45. dev, err := openDevice(logf, fd, tapName, bridgeName)
  46. if err != nil {
  47. unix.Close(fd)
  48. return nil, err
  49. }
  50. return dev, nil
  51. }
  52. func openDevice(logf logger.Logf, fd int, tapName, bridgeName string) (tun.Device, error) {
  53. ifr, err := unix.NewIfreq(tapName)
  54. if err != nil {
  55. return nil, err
  56. }
  57. // Flags are stored as a uint16 in the ifreq union.
  58. ifr.SetUint16(unix.IFF_TAP | unix.IFF_NO_PI)
  59. if err := unix.IoctlIfreq(fd, unix.TUNSETIFF, ifr); err != nil {
  60. return nil, err
  61. }
  62. if err := run("ip", "link", "set", "dev", tapName, "up"); err != nil {
  63. return nil, err
  64. }
  65. if bridgeName != "" {
  66. if err := run("brctl", "addif", bridgeName, tapName); err != nil {
  67. return nil, err
  68. }
  69. }
  70. return newTAPDevice(logf, fd, tapName)
  71. }
  72. type etherType [2]byte
  73. var (
  74. etherTypeARP = etherType{0x08, 0x06}
  75. etherTypeIPv4 = etherType{0x08, 0x00}
  76. etherTypeIPv6 = etherType{0x86, 0xDD}
  77. )
  78. const (
  79. ipv4HeaderLen = 20
  80. ethernetFrameSize = 14 // 2 six byte MACs, 2 bytes ethertype
  81. )
  82. const (
  83. consumePacket = true
  84. passOnPacket = false
  85. )
  86. // handleTAPFrame handles receiving a raw TAP ethernet frame and reports whether
  87. // it's been handled (that is, whether it should NOT be passed to wireguard).
  88. func (t *tapDevice) handleTAPFrame(ethBuf []byte) bool {
  89. if len(ethBuf) < ethernetFrameSize {
  90. // Corrupt. Ignore.
  91. if tapDebug {
  92. t.logf("tap: short TAP frame")
  93. }
  94. return consumePacket
  95. }
  96. ethDstMAC, ethSrcMAC := ethBuf[:6], ethBuf[6:12]
  97. _ = ethDstMAC
  98. et := etherType{ethBuf[12], ethBuf[13]}
  99. switch et {
  100. default:
  101. if tapDebug {
  102. t.logf("tap: ignoring etherType %v", et)
  103. }
  104. return consumePacket // filter out packet we should ignore
  105. case etherTypeIPv6:
  106. // TODO: support DHCPv6/ND/etc later. For now pass all to WireGuard.
  107. if tapDebug {
  108. t.logf("tap: ignoring IPv6 %v", et)
  109. }
  110. return passOnPacket
  111. case etherTypeIPv4:
  112. if len(ethBuf) < ethernetFrameSize+ipv4HeaderLen {
  113. // Bogus IPv4. Eat.
  114. if tapDebug {
  115. t.logf("tap: short ipv4")
  116. }
  117. return consumePacket
  118. }
  119. return t.handleDHCPRequest(ethBuf)
  120. case etherTypeARP:
  121. arpPacket := header.ARP(ethBuf[ethernetFrameSize:])
  122. if !arpPacket.IsValid() {
  123. // Bogus ARP. Eat.
  124. return consumePacket
  125. }
  126. switch arpPacket.Op() {
  127. case header.ARPRequest:
  128. req := arpPacket // better name at this point
  129. buf := make([]byte, header.EthernetMinimumSize+header.ARPSize)
  130. // Our ARP "Table" of one:
  131. var srcMAC [6]byte
  132. copy(srcMAC[:], ethSrcMAC)
  133. if old := t.destMAC(); old != srcMAC {
  134. t.destMACAtomic.Store(srcMAC)
  135. }
  136. eth := header.Ethernet(buf)
  137. eth.Encode(&header.EthernetFields{
  138. SrcAddr: tcpip.LinkAddress(ourMAC[:]),
  139. DstAddr: tcpip.LinkAddress(ethSrcMAC),
  140. Type: 0x0806, // arp
  141. })
  142. res := header.ARP(buf[header.EthernetMinimumSize:])
  143. res.SetIPv4OverEthernet()
  144. res.SetOp(header.ARPReply)
  145. // If the client's asking about their own IP, tell them it's
  146. // their own MAC. TODO(bradfitz): remove String allocs.
  147. if net.IP(req.ProtocolAddressTarget()).String() == t.clientIPv4.Load() {
  148. copy(res.HardwareAddressSender(), ethSrcMAC)
  149. } else {
  150. copy(res.HardwareAddressSender(), ourMAC[:])
  151. }
  152. copy(res.ProtocolAddressSender(), req.ProtocolAddressTarget())
  153. copy(res.HardwareAddressTarget(), req.HardwareAddressSender())
  154. copy(res.ProtocolAddressTarget(), req.ProtocolAddressSender())
  155. n, err := t.WriteEthernet(buf)
  156. if tapDebug {
  157. t.logf("tap: wrote ARP reply %v, %v", n, err)
  158. }
  159. }
  160. return consumePacket
  161. }
  162. }
  163. var (
  164. // routerIP is the IP address of the DHCP server.
  165. routerIP = net.ParseIP(tsaddr.TailscaleServiceIPString)
  166. // cgnatNetMask is the netmask of the 100.64.0.0/10 CGNAT range.
  167. cgnatNetMask = net.IPMask(net.ParseIP("255.192.0.0").To4())
  168. )
  169. // parsedPacketPool holds a pool of Parsed structs for use in filtering.
  170. // This is needed because escape analysis cannot see that parsed packets
  171. // do not escape through {Pre,Post}Filter{In,Out}.
  172. var parsedPacketPool = sync.Pool{New: func() any { return new(packet.Parsed) }}
  173. // handleDHCPRequest handles receiving a raw TAP ethernet frame and reports whether
  174. // it's been handled as a DHCP request. That is, it reports whether the frame should
  175. // be ignored by the caller and not passed on.
  176. func (t *tapDevice) handleDHCPRequest(ethBuf []byte) bool {
  177. const udpHeader = 8
  178. if len(ethBuf) < ethernetFrameSize+ipv4HeaderLen+udpHeader {
  179. if tapDebug {
  180. t.logf("tap: DHCP short")
  181. }
  182. return passOnPacket
  183. }
  184. ethDstMAC, ethSrcMAC := ethBuf[:6], ethBuf[6:12]
  185. if string(ethDstMAC) != "\xff\xff\xff\xff\xff\xff" {
  186. // Not a broadcast
  187. if tapDebug {
  188. t.logf("tap: dhcp no broadcast")
  189. }
  190. return passOnPacket
  191. }
  192. p := parsedPacketPool.Get().(*packet.Parsed)
  193. defer parsedPacketPool.Put(p)
  194. p.Decode(ethBuf[ethernetFrameSize:])
  195. if p.IPProto != ipproto.UDP || p.Src.Port() != 68 || p.Dst.Port() != 67 {
  196. // Not a DHCP request.
  197. if tapDebug {
  198. t.logf("tap: DHCP wrong meta: %+v", p)
  199. }
  200. return passOnPacket
  201. }
  202. dp, err := dhcpv4.FromBytes(ethBuf[ethernetFrameSize+ipv4HeaderLen+udpHeader:])
  203. if err != nil {
  204. // Bogus. Trash it.
  205. if tapDebug {
  206. t.logf("tap: DHCP FromBytes bad")
  207. }
  208. return consumePacket
  209. }
  210. if tapDebug {
  211. t.logf("tap: DHCP request: %+v", dp)
  212. }
  213. switch dp.MessageType() {
  214. case dhcpv4.MessageTypeDiscover:
  215. ips := t.clientIPv4.Load()
  216. if ips == "" {
  217. t.logf("tap: DHCP no client IP")
  218. return consumePacket
  219. }
  220. offer, err := dhcpv4.New(
  221. dhcpv4.WithReply(dp),
  222. dhcpv4.WithMessageType(dhcpv4.MessageTypeOffer),
  223. dhcpv4.WithRouter(routerIP), // the default route
  224. dhcpv4.WithDNS(routerIP),
  225. dhcpv4.WithServerIP(routerIP), // TODO: what is this?
  226. dhcpv4.WithOption(dhcpv4.OptServerIdentifier(routerIP)),
  227. dhcpv4.WithYourIP(net.ParseIP(ips)),
  228. dhcpv4.WithLeaseTime(3600), // hour works
  229. //dhcpv4.WithHwAddr(ethSrcMAC),
  230. dhcpv4.WithNetmask(cgnatNetMask),
  231. //dhcpv4.WithTransactionID(dp.TransactionID),
  232. )
  233. if err != nil {
  234. t.logf("error building DHCP offer: %v", err)
  235. return consumePacket
  236. }
  237. // Make a layer 2 packet to write out:
  238. pkt := packLayer2UDP(
  239. offer.ToBytes(),
  240. ourMAC, ethSrcMAC,
  241. netip.AddrPortFrom(netaddr.IPv4(100, 100, 100, 100), 67), // src
  242. netip.AddrPortFrom(netaddr.IPv4(255, 255, 255, 255), 68), // dst
  243. )
  244. n, err := t.WriteEthernet(pkt)
  245. if tapDebug {
  246. t.logf("tap: wrote DHCP OFFER %v, %v", n, err)
  247. }
  248. case dhcpv4.MessageTypeRequest:
  249. ips := t.clientIPv4.Load()
  250. if ips == "" {
  251. t.logf("tap: DHCP no client IP")
  252. return consumePacket
  253. }
  254. ack, err := dhcpv4.New(
  255. dhcpv4.WithReply(dp),
  256. dhcpv4.WithMessageType(dhcpv4.MessageTypeAck),
  257. dhcpv4.WithDNS(routerIP),
  258. dhcpv4.WithRouter(routerIP), // the default route
  259. dhcpv4.WithServerIP(routerIP), // TODO: what is this?
  260. dhcpv4.WithOption(dhcpv4.OptServerIdentifier(routerIP)),
  261. dhcpv4.WithYourIP(net.ParseIP(ips)), // Hello world
  262. dhcpv4.WithLeaseTime(3600), // hour works
  263. dhcpv4.WithNetmask(cgnatNetMask),
  264. )
  265. if err != nil {
  266. t.logf("error building DHCP ack: %v", err)
  267. return consumePacket
  268. }
  269. // Make a layer 2 packet to write out:
  270. pkt := packLayer2UDP(
  271. ack.ToBytes(),
  272. ourMAC, ethSrcMAC,
  273. netip.AddrPortFrom(netaddr.IPv4(100, 100, 100, 100), 67), // src
  274. netip.AddrPortFrom(netaddr.IPv4(255, 255, 255, 255), 68), // dst
  275. )
  276. n, err := t.WriteEthernet(pkt)
  277. if tapDebug {
  278. t.logf("tap: wrote DHCP ACK %v, %v", n, err)
  279. }
  280. default:
  281. if tapDebug {
  282. t.logf("tap: unknown DHCP type")
  283. }
  284. }
  285. return consumePacket
  286. }
  287. func writeEthernetFrame(buf []byte, srcMAC, dstMAC net.HardwareAddr, proto tcpip.NetworkProtocolNumber) {
  288. // Ethernet header
  289. eth := header.Ethernet(buf)
  290. eth.Encode(&header.EthernetFields{
  291. SrcAddr: tcpip.LinkAddress(srcMAC),
  292. DstAddr: tcpip.LinkAddress(dstMAC),
  293. Type: proto,
  294. })
  295. }
  296. func packLayer2UDP(payload []byte, srcMAC, dstMAC net.HardwareAddr, src, dst netip.AddrPort) []byte {
  297. buf := make([]byte, header.EthernetMinimumSize+header.UDPMinimumSize+header.IPv4MinimumSize+len(payload))
  298. payloadStart := len(buf) - len(payload)
  299. copy(buf[payloadStart:], payload)
  300. srcB := src.Addr().As4()
  301. srcIP := tcpip.AddrFromSlice(srcB[:])
  302. dstB := dst.Addr().As4()
  303. dstIP := tcpip.AddrFromSlice(dstB[:])
  304. // Ethernet header
  305. writeEthernetFrame(buf, srcMAC, dstMAC, ipv4.ProtocolNumber)
  306. // IP header
  307. ipbuf := buf[header.EthernetMinimumSize:]
  308. ip := header.IPv4(ipbuf)
  309. ip.Encode(&header.IPv4Fields{
  310. TotalLength: uint16(len(ipbuf)),
  311. TTL: 65,
  312. Protocol: uint8(udp.ProtocolNumber),
  313. SrcAddr: srcIP,
  314. DstAddr: dstIP,
  315. })
  316. ip.SetChecksum(^ip.CalculateChecksum())
  317. // UDP header
  318. u := header.UDP(buf[header.EthernetMinimumSize+header.IPv4MinimumSize:])
  319. u.Encode(&header.UDPFields{
  320. SrcPort: src.Port(),
  321. DstPort: dst.Port(),
  322. Length: uint16(header.UDPMinimumSize + len(payload)),
  323. })
  324. // Calculate the UDP pseudo-header checksum.
  325. xsum := header.PseudoHeaderChecksum(udp.ProtocolNumber, srcIP, dstIP, uint16(len(u)))
  326. // Calculate the UDP checksum and set it.
  327. xsum = checksum.Checksum(payload, xsum)
  328. u.SetChecksum(^u.CalculateChecksum(xsum))
  329. return []byte(buf)
  330. }
  331. func run(prog string, args ...string) error {
  332. cmd := exec.Command(prog, args...)
  333. cmd.Stdout = os.Stdout
  334. cmd.Stderr = os.Stderr
  335. if err := cmd.Run(); err != nil {
  336. return fmt.Errorf("error running %v: %v", cmd, err)
  337. }
  338. return nil
  339. }
  340. func (t *tapDevice) destMAC() [6]byte {
  341. return t.destMACAtomic.Load()
  342. }
  343. func newTAPDevice(logf logger.Logf, fd int, tapName string) (tun.Device, error) {
  344. err := unix.SetNonblock(fd, true)
  345. if err != nil {
  346. return nil, err
  347. }
  348. file := os.NewFile(uintptr(fd), "/dev/tap")
  349. d := &tapDevice{
  350. logf: logf,
  351. file: file,
  352. events: make(chan tun.Event),
  353. name: tapName,
  354. }
  355. return d, nil
  356. }
  357. type tapDevice struct {
  358. file *os.File
  359. logf func(format string, args ...any)
  360. events chan tun.Event
  361. name string
  362. closeOnce sync.Once
  363. clientIPv4 syncs.AtomicValue[string]
  364. destMACAtomic syncs.AtomicValue[[6]byte]
  365. }
  366. var _ tstun.SetIPer = (*tapDevice)(nil)
  367. func (t *tapDevice) SetIP(ipV4, ipV6TODO netip.Addr) error {
  368. t.clientIPv4.Store(ipV4.String())
  369. return nil
  370. }
  371. func (t *tapDevice) File() *os.File {
  372. return t.file
  373. }
  374. func (t *tapDevice) Name() (string, error) {
  375. return t.name, nil
  376. }
  377. // Read reads an IP packet from the TAP device. It strips the ethernet frame header.
  378. func (t *tapDevice) Read(buffs [][]byte, sizes []int, offset int) (int, error) {
  379. n, err := t.ReadEthernet(buffs, sizes, offset)
  380. if err != nil || n == 0 {
  381. return n, err
  382. }
  383. // Strip the ethernet frame header.
  384. copy(buffs[0][offset:], buffs[0][offset+ethernetFrameSize:offset+sizes[0]])
  385. sizes[0] -= ethernetFrameSize
  386. return 1, nil
  387. }
  388. // ReadEthernet reads a raw ethernet frame from the TAP device.
  389. func (t *tapDevice) ReadEthernet(buffs [][]byte, sizes []int, offset int) (int, error) {
  390. n, err := t.file.Read(buffs[0][offset:])
  391. if err != nil {
  392. return 0, err
  393. }
  394. if t.handleTAPFrame(buffs[0][offset : offset+n]) {
  395. return 0, nil
  396. }
  397. sizes[0] = n
  398. return 1, nil
  399. }
  400. // WriteEthernet writes a raw ethernet frame to the TAP device.
  401. func (t *tapDevice) WriteEthernet(buf []byte) (int, error) {
  402. return t.file.Write(buf)
  403. }
  404. // ethBufPool holds a pool of bytes.Buffers for use in [tapDevice.Write].
  405. var ethBufPool = syncs.Pool[*bytes.Buffer]{New: func() *bytes.Buffer { return new(bytes.Buffer) }}
  406. // Write writes a raw IP packet to the TAP device. It adds the ethernet frame header.
  407. func (t *tapDevice) Write(buffs [][]byte, offset int) (int, error) {
  408. errs := make([]error, 0)
  409. wrote := 0
  410. m := t.destMAC()
  411. dstMac := net.HardwareAddr(m[:])
  412. buf := ethBufPool.Get()
  413. defer ethBufPool.Put(buf)
  414. for _, buff := range buffs {
  415. buf.Reset()
  416. buf.Grow(header.EthernetMinimumSize + len(buff) - offset)
  417. var ebuf [14]byte
  418. switch buff[offset] >> 4 {
  419. case 4:
  420. writeEthernetFrame(ebuf[:], ourMAC, dstMac, ipv4.ProtocolNumber)
  421. case 6:
  422. writeEthernetFrame(ebuf[:], ourMAC, dstMac, ipv6.ProtocolNumber)
  423. default:
  424. continue
  425. }
  426. buf.Write(ebuf[:])
  427. buf.Write(buff[offset:])
  428. _, err := t.WriteEthernet(buf.Bytes())
  429. if err != nil {
  430. errs = append(errs, err)
  431. } else {
  432. wrote++
  433. }
  434. }
  435. return wrote, errors.Join(errs...)
  436. }
  437. func (t *tapDevice) MTU() (int, error) {
  438. ifr, err := unix.NewIfreq(t.name)
  439. if err != nil {
  440. return 0, err
  441. }
  442. if err := unix.IoctlIfreq(int(t.file.Fd()), unix.SIOCGIFMTU, ifr); err != nil {
  443. return 0, err
  444. }
  445. return int(ifr.Uint32()), nil
  446. }
  447. func (t *tapDevice) Events() <-chan tun.Event {
  448. return t.events
  449. }
  450. func (t *tapDevice) Close() error {
  451. var err error
  452. t.closeOnce.Do(func() {
  453. close(t.events)
  454. err = t.file.Close()
  455. })
  456. return err
  457. }
  458. func (t *tapDevice) BatchSize() int {
  459. return 1
  460. }