NetconEthernetTap.hpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469
  1. /*
  2. * ZeroTier One - Network Virtualization Everywhere
  3. * Copyright (C) 2011-2015 ZeroTier, Inc.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. * --
  19. *
  20. * ZeroTier may be used and distributed under the terms of the GPLv3, which
  21. * are available at: http://www.gnu.org/licenses/gpl-3.0.html
  22. *
  23. * If you would like to embed ZeroTier into a commercial application or
  24. * redistribute it in a modified binary form, please contact ZeroTier Networks
  25. * LLC. Start here: http://www.zerotier.com/
  26. */
  27. #ifndef ZT_NETCONETHERNETTAP_HPP
  28. #define ZT_NETCONETHERNETTAP_HPP
  29. #include <stdio.h>
  30. #include <stdlib.h>
  31. #include <string>
  32. #include <vector>
  33. #include <utility>
  34. #include <stdexcept>
  35. #include <stdint.h>
  36. #include "../node/Constants.hpp"
  37. #include "../node/MulticastGroup.hpp"
  38. #include "../node/Mutex.hpp"
  39. #include "../node/InetAddress.hpp"
  40. #include "../osdep/Thread.hpp"
  41. #include "../osdep/Phy.hpp"
  42. #include "netif/etharp.h"
  43. #include "RPC.h"
  44. struct tcp_pcb;
  45. struct socket_st;
  46. struct listen_st;
  47. struct bind_st;
  48. struct connect_st;
  49. struct getsockname_st;
  50. struct accept_st;
  51. #define APPLICATION_POLL_FREQ 50
  52. #define ZT_LWIP_TCP_TIMER_INTERVAL 5
  53. #define STATUS_TMR_INTERVAL 1000 // How often we check connection statuses (in ms)
  54. #define DEFAULT_BUF_SZ 1024 * 1024 * 2
  55. namespace ZeroTier {
  56. class NetconEthernetTap;
  57. class LWIPStack;
  58. /*
  59. * TCP connection administered by service
  60. */
  61. struct TcpConnection
  62. {
  63. bool listening;
  64. int pid, txsz, rxsz;
  65. PhySocket *rpcSock, *sock;
  66. struct tcp_pcb *pcb;
  67. struct sockaddr_storage *addr;
  68. unsigned char txbuf[DEFAULT_BUF_SZ];
  69. unsigned char rxbuf[DEFAULT_BUF_SZ];
  70. };
  71. /*
  72. * A helper for passing a reference to _phy to LWIP callbacks as a "state"
  73. */
  74. struct Larg
  75. {
  76. NetconEthernetTap *tap;
  77. TcpConnection *conn;
  78. Larg(NetconEthernetTap *_tap, TcpConnection *conn) : tap(_tap), conn(conn) {}
  79. };
  80. /*
  81. * Network Containers instance -- emulates an Ethernet tap device as far as OneService knows
  82. */
  83. class NetconEthernetTap
  84. {
  85. friend class Phy<NetconEthernetTap *>;
  86. public:
  87. NetconEthernetTap(
  88. const char *homePath,
  89. const MAC &mac,
  90. unsigned int mtu,
  91. unsigned int metric,
  92. uint64_t nwid,
  93. const char *friendlyName,
  94. void (*handler)(void *,uint64_t,const MAC &,const MAC &,unsigned int,unsigned int,const void *,unsigned int),
  95. void *arg);
  96. ~NetconEthernetTap();
  97. void setEnabled(bool en);
  98. bool enabled() const;
  99. bool addIp(const InetAddress &ip);
  100. bool removeIp(const InetAddress &ip);
  101. std::vector<InetAddress> ips() const;
  102. void put(const MAC &from,const MAC &to,unsigned int etherType,const void *data,unsigned int len);
  103. std::string deviceName() const;
  104. void setFriendlyName(const char *friendlyName);
  105. void scanMulticastGroups(std::vector<MulticastGroup> &added,std::vector<MulticastGroup> &removed);
  106. void threadMain()
  107. throw();
  108. LWIPStack *lwipstack;
  109. uint64_t _nwid;
  110. void (*_handler)(void *,uint64_t,const MAC &,const MAC &,unsigned int,unsigned int,const void *,unsigned int);
  111. void *_arg;
  112. private:
  113. // LWIP callbacks
  114. // NOTE: these are called from within LWIP, meaning that lwipstack->_lock is ALREADY
  115. // locked in this case!
  116. /*
  117. * Callback from LWIP for when a connection has been accepted and the PCB has been
  118. * put into an ACCEPT state.
  119. *
  120. * A socketpair is created, one end is kept and wrapped into a PhySocket object
  121. * for use in the main ZT I/O loop, and one end is sent to the client. The client
  122. * is then required to tell the service what new file descriptor it has allocated
  123. * for this connection. After the mapping is complete, the accepted socket can be
  124. * used.
  125. *
  126. * @param associated service state object
  127. * @param newly allocated PCB
  128. * @param error code
  129. * @return ERR_OK if everything is ok, -1 otherwise
  130. *
  131. * i := should be implemented in intercept lib
  132. * I := is implemented in intercept lib
  133. * X := is implemented in service
  134. * ? := required treatment Unknown
  135. * - := Not needed
  136. *
  137. * [ ] EAGAIN or EWOULDBLOCK - The socket is marked nonblocking and no connections are present
  138. * to be accepted. POSIX.1-2001 allows either error to be returned for
  139. * this case, and does not require these constants to have the same value,
  140. * so a portable application should check for both possibilities.
  141. * [I] EBADF - The descriptor is invalid.
  142. * [I] ECONNABORTED - A connection has been aborted.
  143. * [i] EFAULT - The addr argument is not in a writable part of the user address space.
  144. * [-] EINTR - The system call was interrupted by a signal that was caught before a valid connection arrived; see signal(7).
  145. * [I] EINVAL - Socket is not listening for connections, or addrlen is invalid (e.g., is negative).
  146. * [I] EINVAL - (accept4()) invalid value in flags.
  147. * [I] EMFILE - The per-process limit of open file descriptors has been reached.
  148. * [ ] ENFILE - The system limit on the total number of open files has been reached.
  149. * [ ] ENOBUFS, ENOMEM - Not enough free memory. This often means that the memory allocation is
  150. * limited by the socket buffer limits, not by the system memory.
  151. * [I] ENOTSOCK - The descriptor references a file, not a socket.
  152. * [I] EOPNOTSUPP - The referenced socket is not of type SOCK_STREAM.
  153. * [ ] EPROTO - Protocol error.
  154. *
  155. */
  156. static err_t nc_accept(void *arg, struct tcp_pcb *newPCB, err_t err);
  157. /*
  158. * Callback from LWIP for when data is available to be read from the network.
  159. *
  160. * Data is in the form of a linked list of struct pbufs, it is then recombined and
  161. * send to the client over the associated unix socket.
  162. *
  163. * @param associated service state object
  164. * @param allocated PCB
  165. * @param chain of pbufs
  166. * @param error code
  167. * @return ERR_OK if everything is ok, -1 otherwise
  168. *
  169. */
  170. static err_t nc_recved(void *arg, struct tcp_pcb *PCB, struct pbuf *p, err_t err);
  171. /*
  172. * Callback from LWIP when an internal error is associtated with the given (arg)
  173. *
  174. * Since the PCB related to this error might no longer exist, only its perviously
  175. * associated (arg) is provided to us.
  176. *
  177. * @param associated service state object
  178. * @param error code
  179. *
  180. */
  181. static void nc_err(void *arg, err_t err);
  182. /*
  183. * Callback from LWIP to do whatever work we might need to do.
  184. *
  185. * @param associated service state object
  186. * @param PCB we're polling on
  187. * @return ERR_OK if everything is ok, -1 otherwise
  188. *
  189. */
  190. static err_t nc_poll(void* arg, struct tcp_pcb *PCB);
  191. /*
  192. * Callback from LWIP to signal that 'len' bytes have successfully been sent.
  193. * As a result, we should put our socket back into a notify-on-readability state
  194. * since there is now room on the PCB buffer to write to.
  195. *
  196. * NOTE: This could be used to track the amount of data sent by a connection.
  197. *
  198. * @param associated service state object
  199. * @param relevant PCB
  200. * @param length of data sent
  201. * @return ERR_OK if everything is ok, -1 otherwise
  202. *
  203. */
  204. static err_t nc_sent(void *arg, struct tcp_pcb *PCB, u16_t len);
  205. /*
  206. * Callback from LWIP which sends a return value to the client to signal that
  207. * a connection was established for this PCB
  208. *
  209. * @param associated service state object
  210. * @param relevant PCB
  211. * @param error code
  212. * @return ERR_OK if everything is ok, -1 otherwise
  213. *
  214. */
  215. static err_t nc_connected(void *arg, struct tcp_pcb *PCB, err_t err);
  216. //static void nc_close(struct tcp_pcb *PCB);
  217. //static err_t nc_send(struct tcp_pcb *PCB);
  218. /*
  219. * Handles an RPC to bind an LWIP PCB to a given address and port
  220. *
  221. * @param PhySocket associated with this RPC connection
  222. * @param structure containing the data and parameters for this client's RPC
  223. *
  224. i := should be implemented in intercept lib
  225. I := is implemented in intercept lib
  226. X := is implemented in service
  227. ? := required treatment Unknown
  228. - := Not needed
  229. [ ] EACCES - The address is protected, and the user is not the superuser.
  230. [X] EADDRINUSE - The given address is already in use.
  231. [I] EBADF - sockfd is not a valid descriptor.
  232. [X] EINVAL - The socket is already bound to an address.
  233. [I] ENOTSOCK - sockfd is a descriptor for a file, not a socket.
  234. [X] ENOMEM - Insufficient kernel memory was available.
  235. - The following errors are specific to UNIX domain (AF_UNIX) sockets:
  236. [-] EACCES - Search permission is denied on a component of the path prefix. (See also path_resolution(7).)
  237. [-] EADDRNOTAVAIL - A nonexistent interface was requested or the requested address was not local.
  238. [-] EFAULT - addr points outside the user's accessible address space.
  239. [-] EINVAL - The addrlen is wrong, or the socket was not in the AF_UNIX family.
  240. [-] ELOOP - Too many symbolic links were encountered in resolving addr.
  241. [-] ENAMETOOLONG - s addr is too long.
  242. [-] ENOENT - The file does not exist.
  243. [-] ENOTDIR - A component of the path prefix is not a directory.
  244. [-] EROFS - The socket inode would reside on a read-only file system.
  245. */
  246. void handleBind(PhySocket *sock, PhySocket *rpcsock, void **uptr, struct bind_st *bind_rpc);
  247. /*
  248. * Handles an RPC to put an LWIP PCB into LISTEN mode
  249. *
  250. * @param PhySocket associated with this RPC connection
  251. * @param structure containing the data and parameters for this client's RPC
  252. *
  253. i := should be implemented in intercept lib
  254. I := is implemented in intercept lib
  255. X := is implemented in service
  256. ? := required treatment Unknown
  257. - := Not needed
  258. [?] EADDRINUSE - Another socket is already listening on the same port.
  259. [IX] EBADF - The argument sockfd is not a valid descriptor.
  260. [I] ENOTSOCK - The argument sockfd is not a socket.
  261. [I] EOPNOTSUPP - The socket is not of a type that supports the listen() operation.
  262. */
  263. void handleListen(PhySocket *sock, PhySocket *rpcsock, void **uptr, struct listen_st *listen_rpc);
  264. /*
  265. * Handles an RPC to create a socket (LWIP PCB and associated socketpair)
  266. *
  267. * A socketpair is created, one end is kept and wrapped into a PhySocket object
  268. * for use in the main ZT I/O loop, and one end is sent to the client. The client
  269. * is then required to tell the service what new file descriptor it has allocated
  270. * for this connection. After the mapping is complete, the socket can be used.
  271. *
  272. * @param PhySocket associated with this RPC connection
  273. * @param structure containing the data and parameters for this client's RPC
  274. *
  275. i := should be implemented in intercept lib
  276. I := is implemented in intercept lib
  277. X := is implemented in service
  278. ? := required treatment Unknown
  279. - := Not needed
  280. [-] EACCES - Permission to create a socket of the specified type and/or protocol is denied.
  281. [I] EAFNOSUPPORT - The implementation does not support the specified address family.
  282. [I] EINVAL - Unknown protocol, or protocol family not available.
  283. [I] EINVAL - Invalid flags in type.
  284. [I] EMFILE - Process file table overflow.
  285. [?] ENFILE - The system limit on the total number of open files has been reached.
  286. [X] ENOBUFS or ENOMEM - Insufficient memory is available. The socket cannot be created until sufficient resources are freed.
  287. [?] EPROTONOSUPPORT - The protocol type or the specified protocol is not supported within this domain.
  288. */
  289. TcpConnection * handleSocket(PhySocket *sock, void **uptr, struct socket_st* socket_rpc);
  290. /*
  291. * Handles an RPC to connect to a given address and port
  292. *
  293. * @param PhySocket associated with this RPC connection
  294. * @param structure containing the data and parameters for this client's RPC
  295. --- Error handling in this method will only catch problems which are immedately
  296. apprent. Some errors will need to be caught in the nc_connected(0 callback
  297. i := should be implemented in intercept lib
  298. I := is implemented in intercept lib
  299. X := is implemented in service
  300. ? := required treatment Unknown
  301. - := Not needed
  302. [-] EACCES - For UNIX domain sockets, which are identified by pathname: Write permission is denied ...
  303. [?] EACCES, EPERM - The user tried to connect to a broadcast address without having the socket broadcast flag enabled ...
  304. [X] EADDRINUSE - Local address is already in use.
  305. [I] EAFNOSUPPORT - The passed address didn't have the correct address family in its sa_family field.
  306. [X] EAGAIN - No more free local ports or insufficient entries in the routing cache.
  307. [ ] EALREADY - The socket is nonblocking and a previous connection attempt has not yet been completed.
  308. [IX] EBADF - The file descriptor is not a valid index in the descriptor table.
  309. [ ] ECONNREFUSED - No-one listening on the remote address.
  310. [i] EFAULT - The socket structure address is outside the user's address space.
  311. [ ] EINPROGRESS - The socket is nonblocking and the connection cannot be completed immediately.
  312. [-] EINTR - The system call was interrupted by a signal that was caught.
  313. [X] EISCONN - The socket is already connected.
  314. [X] ENETUNREACH - Network is unreachable.
  315. [I] ENOTSOCK - The file descriptor is not associated with a socket.
  316. [X] ETIMEDOUT - Timeout while attempting connection.
  317. [X] EINVAL - Invalid argument, SVr4, generally makes sense to set this
  318. */
  319. void handleConnect(PhySocket *sock, PhySocket *rpcsock, TcpConnection *conn, struct connect_st* connect_rpc);
  320. /*
  321. * Return the address that the socket is bound to
  322. */
  323. void handleGetsockname(PhySocket *sock, PhySocket *rpcsock, void **uptr, struct getsockname_st *getsockname_rpc);
  324. /*
  325. * Writes data from the application's socket to the LWIP connection
  326. */
  327. void handleWrite(TcpConnection *conn);
  328. /*
  329. * Sends a return value to the intercepted application
  330. */
  331. int sendReturnValue(PhySocket *sock, int retval, int _errno);
  332. int sendReturnValue(int fd, int retval, int _errno);
  333. /*
  334. * Unpacks the buffer from an RPC command
  335. */
  336. void unloadRPC(void *data, pid_t &pid, pid_t &tid,
  337. int &rpc_count, char (timestamp[RPC_TIMESTAMP_SZ]), char (magic[sizeof(uint64_t)]), char &cmd, void* &payload);
  338. // Unused -- no UDP or TCP from this thread/Phy<>
  339. void phyOnDatagram(PhySocket *sock,void **uptr,const struct sockaddr *from,void *data,unsigned long len);
  340. void phyOnTcpConnect(PhySocket *sock,void **uptr,bool success);
  341. void phyOnTcpAccept(PhySocket *sockL,PhySocket *sockN,void **uptrL,void **uptrN,const struct sockaddr *from);
  342. void phyOnTcpClose(PhySocket *sock,void **uptr);
  343. void phyOnTcpData(PhySocket *sock,void **uptr,void *data,unsigned long len);
  344. void phyOnTcpWritable(PhySocket *sock,void **uptr);
  345. /*
  346. * Signals us to close the TcpConnection associated with this PhySocket
  347. */
  348. void phyOnUnixClose(PhySocket *sock,void **uptr);
  349. /*
  350. * Notifies us that there is data to be read from an application's socket
  351. */
  352. void phyOnUnixData(PhySocket *sock,void **uptr,void *data,unsigned long len);
  353. /*
  354. * Notifies us that we can write to an application's socket
  355. */
  356. void phyOnUnixWritable(PhySocket *sock,void **uptr);
  357. /*
  358. * Returns a pointer to a TcpConnection associated with a given PhySocket
  359. */
  360. TcpConnection *getConnection(PhySocket *sock);
  361. /*
  362. * Safely adds a new TcpConnection to _TcpConnections
  363. */
  364. TcpConnection *addConnection(TcpConnection *conn);
  365. /*
  366. * Safely removes a TcpConnection from _TcpConnections
  367. */
  368. void removeConnection(TcpConnection *conn);
  369. /*
  370. * Closes a TcpConnection, associated LWIP PCB strcuture,
  371. * PhySocket, and underlying file descriptor
  372. */
  373. void closeConnection(PhySocket *sock);
  374. ip_addr_t convert_ip(struct sockaddr_in * addr)
  375. {
  376. ip_addr_t conn_addr;
  377. struct sockaddr_in *ipv4 = addr;
  378. short a = ip4_addr1(&(ipv4->sin_addr));
  379. short b = ip4_addr2(&(ipv4->sin_addr));
  380. short c = ip4_addr3(&(ipv4->sin_addr));
  381. short d = ip4_addr4(&(ipv4->sin_addr));
  382. IP4_ADDR(&conn_addr, a,b,c,d);
  383. return conn_addr;
  384. }
  385. Phy<NetconEthernetTap *> _phy;
  386. PhySocket *_unixListenSocket;
  387. std::vector<TcpConnection*> _TcpConnections;
  388. std::map<uint64_t, std::pair<PhySocket*, void*> > jobmap;
  389. pid_t rpcCounter;
  390. netif interface;
  391. MAC _mac;
  392. Thread _thread;
  393. std::string _homePath;
  394. std::string _dev; // path to Unix domain socket
  395. std::vector<MulticastGroup> _multicastGroups;
  396. Mutex _multicastGroups_m;
  397. std::vector<InetAddress> _ips;
  398. Mutex _ips_m, _tcpconns_m, _rx_buf_m;
  399. unsigned int _mtu;
  400. volatile bool _enabled;
  401. volatile bool _run;
  402. };
  403. } // namespace ZeroTier
  404. #endif