/* * ZeroTier One - Network Virtualization Everywhere * Copyright (C) 2011-2015 ZeroTier, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * * -- * * ZeroTier may be used and distributed under the terms of the GPLv3, which * are available at: http://www.gnu.org/licenses/gpl-3.0.html * * If you would like to embed ZeroTier into a commercial application or * redistribute it in a modified binary form, please contact ZeroTier Networks * LLC. Start here: http://www.zerotier.com/ */ #ifndef ZT_NETCONETHERNETTAP_HPP #define ZT_NETCONETHERNETTAP_HPP #include #include #include #include #include #include #include #include "../node/Constants.hpp" #include "../node/MulticastGroup.hpp" #include "../node/Mutex.hpp" #include "../node/InetAddress.hpp" #include "../osdep/Thread.hpp" #include "../osdep/Phy.hpp" #include "netif/etharp.h" #include "RPC.h" struct tcp_pcb; struct socket_st; struct listen_st; struct bind_st; struct connect_st; struct getsockname_st; struct accept_st; #define APPLICATION_POLL_FREQ 2 #define ZT_LWIP_TCP_TIMER_INTERVAL 5 #define STATUS_TMR_INTERVAL 250 // How often we check connection statuses (in ms) #define DEFAULT_BUF_SZ 1024 * 1024 * 2 #define DEFAULT_BUF_SOFTMAX DEFAULT_BUF_SZ / 2 namespace ZeroTier { class NetconEthernetTap; class LWIPStack; /* * TCP connection administered by service */ struct TcpConnection { bool listening, probation; int pid, txsz, rxsz; PhySocket *rpcSock, *sock; struct tcp_pcb *pcb; struct sockaddr_storage *addr; unsigned char txbuf[DEFAULT_BUF_SZ]; unsigned char rxbuf[DEFAULT_BUF_SZ]; }; /* * A helper for passing a reference to _phy to LWIP callbacks as a "state" */ struct Larg { NetconEthernetTap *tap; TcpConnection *conn; Larg(NetconEthernetTap *_tap, TcpConnection *conn) : tap(_tap), conn(conn) {} }; /* * Network Containers instance -- emulates an Ethernet tap device as far as OneService knows */ class NetconEthernetTap { friend class Phy; public: NetconEthernetTap( const char *homePath, const MAC &mac, unsigned int mtu, unsigned int metric, uint64_t nwid, const char *friendlyName, void (*handler)(void *,uint64_t,const MAC &,const MAC &,unsigned int,unsigned int,const void *,unsigned int), void *arg); ~NetconEthernetTap(); void setEnabled(bool en); bool enabled() const; bool addIp(const InetAddress &ip); bool removeIp(const InetAddress &ip); std::vector ips() const; void put(const MAC &from,const MAC &to,unsigned int etherType,const void *data,unsigned int len); std::string deviceName() const; void setFriendlyName(const char *friendlyName); void scanMulticastGroups(std::vector &added,std::vector &removed); void threadMain() throw(); LWIPStack *lwipstack; uint64_t _nwid; void (*_handler)(void *,uint64_t,const MAC &,const MAC &,unsigned int,unsigned int,const void *,unsigned int); void *_arg; private: // LWIP callbacks // NOTE: these are called from within LWIP, meaning that lwipstack->_lock is ALREADY // locked in this case! /* * Callback from LWIP for when a connection has been accepted and the PCB has been * put into an ACCEPT state. * * A socketpair is created, one end is kept and wrapped into a PhySocket object * for use in the main ZT I/O loop, and one end is sent to the client. The client * is then required to tell the service what new file descriptor it has allocated * for this connection. After the mapping is complete, the accepted socket can be * used. * * @param associated service state object * @param newly allocated PCB * @param error code * @return ERR_OK if everything is ok, -1 otherwise * * i := should be implemented in intercept lib * I := is implemented in intercept lib * X := is implemented in service * ? := required treatment Unknown * - := Not needed * * [ ] EAGAIN or EWOULDBLOCK - The socket is marked nonblocking and no connections are present * to be accepted. POSIX.1-2001 allows either error to be returned for * this case, and does not require these constants to have the same value, * so a portable application should check for both possibilities. * [I] EBADF - The descriptor is invalid. * [I] ECONNABORTED - A connection has been aborted. * [i] EFAULT - The addr argument is not in a writable part of the user address space. * [-] EINTR - The system call was interrupted by a signal that was caught before a valid connection arrived; see signal(7). * [I] EINVAL - Socket is not listening for connections, or addrlen is invalid (e.g., is negative). * [I] EINVAL - (accept4()) invalid value in flags. * [I] EMFILE - The per-process limit of open file descriptors has been reached. * [ ] ENFILE - The system limit on the total number of open files has been reached. * [ ] ENOBUFS, ENOMEM - Not enough free memory. This often means that the memory allocation is * limited by the socket buffer limits, not by the system memory. * [I] ENOTSOCK - The descriptor references a file, not a socket. * [I] EOPNOTSUPP - The referenced socket is not of type SOCK_STREAM. * [ ] EPROTO - Protocol error. * */ static err_t nc_accept(void *arg, struct tcp_pcb *newPCB, err_t err); /* * Callback from LWIP for when data is available to be read from the network. * * Data is in the form of a linked list of struct pbufs, it is then recombined and * send to the client over the associated unix socket. * * @param associated service state object * @param allocated PCB * @param chain of pbufs * @param error code * @return ERR_OK if everything is ok, -1 otherwise * */ static err_t nc_recved(void *arg, struct tcp_pcb *PCB, struct pbuf *p, err_t err); /* * Callback from LWIP when an internal error is associtated with the given (arg) * * Since the PCB related to this error might no longer exist, only its perviously * associated (arg) is provided to us. * * @param associated service state object * @param error code * */ static void nc_err(void *arg, err_t err); /* * Callback from LWIP to do whatever work we might need to do. * * @param associated service state object * @param PCB we're polling on * @return ERR_OK if everything is ok, -1 otherwise * */ static err_t nc_poll(void* arg, struct tcp_pcb *PCB); /* * Callback from LWIP to signal that 'len' bytes have successfully been sent. * As a result, we should put our socket back into a notify-on-readability state * since there is now room on the PCB buffer to write to. * * NOTE: This could be used to track the amount of data sent by a connection. * * @param associated service state object * @param relevant PCB * @param length of data sent * @return ERR_OK if everything is ok, -1 otherwise * */ static err_t nc_sent(void *arg, struct tcp_pcb *PCB, u16_t len); /* * Callback from LWIP which sends a return value to the client to signal that * a connection was established for this PCB * * @param associated service state object * @param relevant PCB * @param error code * @return ERR_OK if everything is ok, -1 otherwise * */ static err_t nc_connected(void *arg, struct tcp_pcb *PCB, err_t err); //static void nc_close(struct tcp_pcb *PCB); //static err_t nc_send(struct tcp_pcb *PCB); /* * Handles an RPC to bind an LWIP PCB to a given address and port * * @param PhySocket associated with this RPC connection * @param structure containing the data and parameters for this client's RPC * i := should be implemented in intercept lib I := is implemented in intercept lib X := is implemented in service ? := required treatment Unknown - := Not needed [ ] EACCES - The address is protected, and the user is not the superuser. [X] EADDRINUSE - The given address is already in use. [I] EBADF - sockfd is not a valid descriptor. [X] EINVAL - The socket is already bound to an address. [I] ENOTSOCK - sockfd is a descriptor for a file, not a socket. [X] ENOMEM - Insufficient kernel memory was available. - The following errors are specific to UNIX domain (AF_UNIX) sockets: [-] EACCES - Search permission is denied on a component of the path prefix. (See also path_resolution(7).) [-] EADDRNOTAVAIL - A nonexistent interface was requested or the requested address was not local. [-] EFAULT - addr points outside the user's accessible address space. [-] EINVAL - The addrlen is wrong, or the socket was not in the AF_UNIX family. [-] ELOOP - Too many symbolic links were encountered in resolving addr. [-] ENAMETOOLONG - s addr is too long. [-] ENOENT - The file does not exist. [-] ENOTDIR - A component of the path prefix is not a directory. [-] EROFS - The socket inode would reside on a read-only file system. */ void handleBind(PhySocket *sock, PhySocket *rpcsock, void **uptr, struct bind_st *bind_rpc); /* * Handles an RPC to put an LWIP PCB into LISTEN mode * * @param PhySocket associated with this RPC connection * @param structure containing the data and parameters for this client's RPC * i := should be implemented in intercept lib I := is implemented in intercept lib X := is implemented in service ? := required treatment Unknown - := Not needed [?] EADDRINUSE - Another socket is already listening on the same port. [IX] EBADF - The argument sockfd is not a valid descriptor. [I] ENOTSOCK - The argument sockfd is not a socket. [I] EOPNOTSUPP - The socket is not of a type that supports the listen() operation. */ void handleListen(PhySocket *sock, PhySocket *rpcsock, void **uptr, struct listen_st *listen_rpc); /* * Handles an RPC to create a socket (LWIP PCB and associated socketpair) * * A socketpair is created, one end is kept and wrapped into a PhySocket object * for use in the main ZT I/O loop, and one end is sent to the client. The client * is then required to tell the service what new file descriptor it has allocated * for this connection. After the mapping is complete, the socket can be used. * * @param PhySocket associated with this RPC connection * @param structure containing the data and parameters for this client's RPC * i := should be implemented in intercept lib I := is implemented in intercept lib X := is implemented in service ? := required treatment Unknown - := Not needed [-] EACCES - Permission to create a socket of the specified type and/or protocol is denied. [I] EAFNOSUPPORT - The implementation does not support the specified address family. [I] EINVAL - Unknown protocol, or protocol family not available. [I] EINVAL - Invalid flags in type. [I] EMFILE - Process file table overflow. [?] ENFILE - The system limit on the total number of open files has been reached. [X] ENOBUFS or ENOMEM - Insufficient memory is available. The socket cannot be created until sufficient resources are freed. [?] EPROTONOSUPPORT - The protocol type or the specified protocol is not supported within this domain. */ TcpConnection * handleSocket(PhySocket *sock, void **uptr, struct socket_st* socket_rpc); /* * Handles an RPC to connect to a given address and port * * @param PhySocket associated with this RPC connection * @param structure containing the data and parameters for this client's RPC --- Error handling in this method will only catch problems which are immedately apprent. Some errors will need to be caught in the nc_connected(0 callback i := should be implemented in intercept lib I := is implemented in intercept lib X := is implemented in service ? := required treatment Unknown - := Not needed [-] EACCES - For UNIX domain sockets, which are identified by pathname: Write permission is denied ... [?] EACCES, EPERM - The user tried to connect to a broadcast address without having the socket broadcast flag enabled ... [X] EADDRINUSE - Local address is already in use. [I] EAFNOSUPPORT - The passed address didn't have the correct address family in its sa_family field. [X] EAGAIN - No more free local ports or insufficient entries in the routing cache. [ ] EALREADY - The socket is nonblocking and a previous connection attempt has not yet been completed. [IX] EBADF - The file descriptor is not a valid index in the descriptor table. [ ] ECONNREFUSED - No-one listening on the remote address. [i] EFAULT - The socket structure address is outside the user's address space. [ ] EINPROGRESS - The socket is nonblocking and the connection cannot be completed immediately. [-] EINTR - The system call was interrupted by a signal that was caught. [X] EISCONN - The socket is already connected. [X] ENETUNREACH - Network is unreachable. [I] ENOTSOCK - The file descriptor is not associated with a socket. [X] ETIMEDOUT - Timeout while attempting connection. [X] EINVAL - Invalid argument, SVr4, generally makes sense to set this */ void handleConnect(PhySocket *sock, PhySocket *rpcsock, TcpConnection *conn, struct connect_st* connect_rpc); /* * Return the address that the socket is bound to */ void handleGetsockname(PhySocket *sock, PhySocket *rpcsock, void **uptr, struct getsockname_st *getsockname_rpc); /* * Writes data from the application's socket to the LWIP connection */ void handleWrite(TcpConnection *conn); /* * Sends a return value to the intercepted application */ int sendReturnValue(PhySocket *sock, int retval, int _errno); int sendReturnValue(int fd, int retval, int _errno); /* * Unpacks the buffer from an RPC command */ void unloadRPC(void *data, pid_t &pid, pid_t &tid, int &rpc_count, char (timestamp[RPC_TIMESTAMP_SZ]), char (magic[sizeof(uint64_t)]), char &cmd, void* &payload); // Unused -- no UDP or TCP from this thread/Phy<> void phyOnDatagram(PhySocket *sock,void **uptr,const struct sockaddr *from,void *data,unsigned long len); void phyOnTcpConnect(PhySocket *sock,void **uptr,bool success); void phyOnTcpAccept(PhySocket *sockL,PhySocket *sockN,void **uptrL,void **uptrN,const struct sockaddr *from); void phyOnTcpClose(PhySocket *sock,void **uptr); void phyOnTcpData(PhySocket *sock,void **uptr,void *data,unsigned long len); void phyOnTcpWritable(PhySocket *sock,void **uptr); /* * Signals us to close the TcpConnection associated with this PhySocket */ void phyOnUnixClose(PhySocket *sock,void **uptr); /* * Notifies us that there is data to be read from an application's socket */ void phyOnUnixData(PhySocket *sock,void **uptr,void *data,unsigned long len); /* * Notifies us that we can write to an application's socket */ void phyOnUnixWritable(PhySocket *sock,void **uptr,bool lwip_invoked); /* * Returns a pointer to a TcpConnection associated with a given PhySocket */ TcpConnection *getConnection(PhySocket *sock); /* * Closes a TcpConnection, associated LWIP PCB strcuture, * PhySocket, and underlying file descriptor */ void closeConnection(PhySocket *sock); ip_addr_t convert_ip(struct sockaddr_in * addr) { ip_addr_t conn_addr; struct sockaddr_in *ipv4 = addr; short a = ip4_addr1(&(ipv4->sin_addr)); short b = ip4_addr2(&(ipv4->sin_addr)); short c = ip4_addr3(&(ipv4->sin_addr)); short d = ip4_addr4(&(ipv4->sin_addr)); IP4_ADDR(&conn_addr, a,b,c,d); return conn_addr; } Phy _phy; PhySocket *_unixListenSocket; std::vector _TcpConnections; std::map > jobmap; pid_t rpcCounter; netif interface; MAC _mac; Thread _thread; std::string _homePath; std::string _dev; // path to Unix domain socket std::vector _multicastGroups; Mutex _multicastGroups_m; std::vector _ips; Mutex _ips_m, _tcpconns_m, _rx_buf_m; unsigned int _mtu; volatile bool _enabled; volatile bool _run; }; } // namespace ZeroTier #endif