/* * Copyright (c)2019 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * * Change Date: 2026-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. */ /****/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "../node/Constants.hpp" #include "../node/Utils.hpp" #include "../node/Mutex.hpp" #include "OSUtils.hpp" #include "BSDEthernetTap.hpp" #define ZT_BASE32_CHARS "0123456789abcdefghijklmnopqrstuv" #define ZT_TAP_BUF_SIZE (1024 * 16) // ff:ff:ff:ff:ff:ff with no ADI static const ZeroTier::MulticastGroup _blindWildcardMulticastGroup(ZeroTier::MAC(0xff),0); namespace ZeroTier { BSDEthernetTap::BSDEthernetTap( const char *homePath, unsigned int concurrency, bool pinning, const MAC &mac, unsigned int mtu, unsigned int metric, uint64_t nwid, const char *friendlyName, void (*handler)(void *,void *,uint64_t,const MAC &,const MAC &,unsigned int,unsigned int,const void *,unsigned int), void *arg) : _handler(handler), _concurrency(concurrency), _pinning(pinning), _arg(arg), _nwid(nwid), _mtu(mtu), _metric(metric), _fd(0), _enabled(true), _lastIfAddrsUpdate(0) { static Mutex globalTapCreateLock; char devpath[64],ethaddr[64],mtustr[32],metstr[32],tmpdevname[32]; Mutex::Lock _gl(globalTapCreateLock); #ifdef __FreeBSD__ /* FreeBSD allows long interface names and interface renaming */ _dev = "zt"; _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 60) & 0x1f)]); _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 55) & 0x1f)]); _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 50) & 0x1f)]); _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 45) & 0x1f)]); _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 40) & 0x1f)]); _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 35) & 0x1f)]); _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 30) & 0x1f)]); _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 25) & 0x1f)]); _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 20) & 0x1f)]); _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 15) & 0x1f)]); _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 10) & 0x1f)]); _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 5) & 0x1f)]); _dev.push_back(ZT_BASE32_CHARS[(unsigned long)(nwid & 0x1f)]); std::vector devFiles(OSUtils::listDirectory("/dev")); for(int i=9993;i<(9993+128);++i) { OSUtils::ztsnprintf(tmpdevname,sizeof(tmpdevname),"tap%d",i); OSUtils::ztsnprintf(devpath,sizeof(devpath),"/dev/%s",tmpdevname); if (std::find(devFiles.begin(),devFiles.end(),std::string(tmpdevname)) == devFiles.end()) { long cpid = (long)vfork(); if (cpid == 0) { #ifdef ZT_TRACE fprintf(stderr, "DEBUG: ifconfig %s create" ZT_EOL_S, tmpdevname); #endif ::execl("/sbin/ifconfig","/sbin/ifconfig",tmpdevname,"create",(const char *)0); ::_exit(-1); } else if (cpid > 0) { int exitcode = -1; ::waitpid(cpid,&exitcode,0); } else throw std::runtime_error("fork() failed"); struct stat stattmp; if (!stat(devpath,&stattmp)) { cpid = (long)vfork(); if (cpid == 0) { #ifdef ZT_TRACE fprintf(stderr, "DEBUG: ifconfig %s name %s" ZT_EOL_S, tmpdevname, _dev.c_str()); #endif ::execl("/sbin/ifconfig","/sbin/ifconfig",tmpdevname,"name",_dev.c_str(),(const char *)0); ::_exit(-1); } else if (cpid > 0) { int exitcode = -1; ::waitpid(cpid,&exitcode,0); if (exitcode) throw std::runtime_error("ifconfig rename operation failed"); } else throw std::runtime_error("fork() failed"); _fd = ::open(devpath,O_RDWR); if (_fd > 0) break; else throw std::runtime_error("unable to open created tap device"); } else { throw std::runtime_error("cannot find /dev node for newly created tap device"); } } } #else /* Other BSDs like OpenBSD only have a limited number of tap devices that cannot be renamed */ for(int i=0;i<64;++i) { OSUtils::ztsnprintf(tmpdevname,sizeof(tmpdevname),"tap%d",i); OSUtils::ztsnprintf(devpath,sizeof(devpath),"/dev/%s",tmpdevname); _fd = ::open(devpath,O_RDWR); if (_fd > 0) { _dev = tmpdevname; break; } } #endif if (_fd <= 0) throw std::runtime_error("unable to open TAP device or no more devices available"); if (fcntl(_fd,F_SETFL,fcntl(_fd,F_GETFL) & ~O_NONBLOCK) == -1) { ::close(_fd); throw std::runtime_error("unable to set flags on file descriptor for TAP device"); } // Configure MAC address and MTU, bring interface up OSUtils::ztsnprintf(ethaddr,sizeof(ethaddr),"%.2x:%.2x:%.2x:%.2x:%.2x:%.2x",(int)mac[0],(int)mac[1],(int)mac[2],(int)mac[3],(int)mac[4],(int)mac[5]); OSUtils::ztsnprintf(mtustr,sizeof(mtustr),"%u",_mtu); OSUtils::ztsnprintf(metstr,sizeof(metstr),"%u",_metric); long cpid = (long)vfork(); if (cpid == 0) { #ifdef ZT_TRACE fprintf(stderr, "DEBUG: ifconfig %s lladdr %s mtu %s metric %s up" ZT_EOL_S, _dev.c_str(), ethaddr, mtustr, metstr); #endif ::execl("/sbin/ifconfig","/sbin/ifconfig",_dev.c_str(),"lladdr",ethaddr,"mtu",mtustr,"metric",metstr,"up",(const char *)0); ::_exit(-1); } else if (cpid > 0) { int exitcode = -1; ::waitpid(cpid,&exitcode,0); if (exitcode) { ::close(_fd); throw std::runtime_error("ifconfig failure setting link-layer address and activating tap interface"); } } // Set close-on-exec so that devices cannot persist if we fork/exec for update fcntl(_fd,F_SETFD,fcntl(_fd,F_GETFD) | FD_CLOEXEC); ::pipe(_shutdownSignalPipe); _thread = Thread::start(this); } BSDEthernetTap::~BSDEthernetTap() { ::write(_shutdownSignalPipe[1],"\0",1); // causes thread to exit ::close(_fd); ::close(_shutdownSignalPipe[0]); ::close(_shutdownSignalPipe[1]); long cpid = (long)vfork(); if (cpid == 0) { #ifdef ZT_TRACE fprintf(stderr, "DEBUG: ifconfig %s destroy" ZT_EOL_S, _dev.c_str()); #endif ::execl("/sbin/ifconfig","/sbin/ifconfig",_dev.c_str(),"destroy",(const char *)0); ::_exit(-1); } else if (cpid > 0) { int exitcode = -1; ::waitpid(cpid,&exitcode,0); } Thread::join(_thread); for (std::thread &t : _rxThreads) { t.join(); } } void BSDEthernetTap::setEnabled(bool en) { _enabled = en; } bool BSDEthernetTap::enabled() const { return _enabled; } static bool ___removeIp(const std::string &_dev,const InetAddress &ip) { long cpid = (long)vfork(); if (cpid == 0) { char ipbuf[64]; #ifdef ZT_TRACE fprintf(stderr, "DEBUG: ifconfig %s inet %s -alias" ZT_EOL_S, _dev.c_str(), ip.toIpString(ipbuf)); #endif execl("/sbin/ifconfig","/sbin/ifconfig",_dev.c_str(),"inet",ip.toIpString(ipbuf),"-alias",(const char *)0); _exit(-1); } else if (cpid > 0) { int exitcode = -1; waitpid(cpid,&exitcode,0); return (exitcode == 0); } return false; // never reached, make compiler shut up about return value } bool BSDEthernetTap::addIp(const InetAddress &ip) { if (!ip) return false; std::vector allIps(ips()); if (std::find(allIps.begin(),allIps.end(),ip) != allIps.end()) return true; // IP/netmask already assigned // Remove and reconfigure if address is the same but netmask is different for(std::vector::iterator i(allIps.begin());i!=allIps.end();++i) { if ((i->ipsEqual(ip))&&(i->netmaskBits() != ip.netmaskBits())) { if (___removeIp(_dev,*i)) break; } } long cpid = (long)vfork(); if (cpid == 0) { char tmp[128]; #ifdef ZT_TRACE fprintf(stderr, "DEBUG: ifconfig %s %s %s alias" ZT_EOL_S, _dev.c_str(), ip.isV4() ? "inet" : "inet6", ip.toString(tmp)); #endif ::execl("/sbin/ifconfig","/sbin/ifconfig",_dev.c_str(),ip.isV4() ? "inet" : "inet6",ip.toString(tmp),"alias",(const char *)0); ::_exit(-1); } else if (cpid > 0) { int exitcode = -1; ::waitpid(cpid,&exitcode,0); return (exitcode == 0); } return false; } bool BSDEthernetTap::removeIp(const InetAddress &ip) { if (!ip) return false; std::vector allIps(ips()); if (std::find(allIps.begin(),allIps.end(),ip) != allIps.end()) { if (___removeIp(_dev,ip)) return true; } return false; } std::vector BSDEthernetTap::ips() const { uint64_t now = OSUtils::now(); if ((now - _lastIfAddrsUpdate) <= GETIFADDRS_CACHE_TIME) { return _ifaddrs; } _lastIfAddrsUpdate = now; struct ifaddrs *ifa = (struct ifaddrs *)0; if (getifaddrs(&ifa)) return std::vector(); std::vector r; struct ifaddrs *p = ifa; while (p) { if ((!strcmp(p->ifa_name,_dev.c_str()))&&(p->ifa_addr)&&(p->ifa_netmask)&&(p->ifa_addr->sa_family == p->ifa_netmask->sa_family)) { switch(p->ifa_addr->sa_family) { case AF_INET: { struct sockaddr_in *sin = (struct sockaddr_in *)p->ifa_addr; struct sockaddr_in *nm = (struct sockaddr_in *)p->ifa_netmask; r.push_back(InetAddress(&(sin->sin_addr.s_addr),4,Utils::countBits((uint32_t)nm->sin_addr.s_addr))); } break; case AF_INET6: { struct sockaddr_in6 *sin = (struct sockaddr_in6 *)p->ifa_addr; struct sockaddr_in6 *nm = (struct sockaddr_in6 *)p->ifa_netmask; uint32_t b[4]; memcpy(b,nm->sin6_addr.s6_addr,sizeof(b)); r.push_back(InetAddress(sin->sin6_addr.s6_addr,16,Utils::countBits(b[0]) + Utils::countBits(b[1]) + Utils::countBits(b[2]) + Utils::countBits(b[3]))); } break; } } p = p->ifa_next; } if (ifa) freeifaddrs(ifa); std::sort(r.begin(),r.end()); std::unique(r.begin(),r.end()); _ifaddrs = r; return r; } void BSDEthernetTap::put(const MAC &from,const MAC &to,unsigned int etherType,const void *data,unsigned int len) { char putBuf[ZT_MAX_MTU + 64]; if ((_fd > 0)&&(len <= _mtu)&&(_enabled)) { to.copyTo(putBuf,6); from.copyTo(putBuf + 6,6); *((uint16_t *)(putBuf + 12)) = htons((uint16_t)etherType); memcpy(putBuf + 14,data,len); len += 14; ::write(_fd,putBuf,len); } } std::string BSDEthernetTap::deviceName() const { return _dev; } void BSDEthernetTap::setFriendlyName(const char *friendlyName) { } void BSDEthernetTap::scanMulticastGroups(std::vector &added,std::vector &removed) { std::vector newGroups; #ifndef __OpenBSD__ struct ifmaddrs *ifmap = (struct ifmaddrs *)0; if (!getifmaddrs(&ifmap)) { struct ifmaddrs *p = ifmap; while (p) { if (p->ifma_addr->sa_family == AF_LINK) { struct sockaddr_dl *in = (struct sockaddr_dl *)p->ifma_name; struct sockaddr_dl *la = (struct sockaddr_dl *)p->ifma_addr; if ((la->sdl_alen == 6)&&(in->sdl_nlen <= _dev.length())&&(!memcmp(_dev.data(),in->sdl_data,in->sdl_nlen))) newGroups.push_back(MulticastGroup(MAC(la->sdl_data + la->sdl_nlen,6),0)); } p = p->ifma_next; } freeifmaddrs(ifmap); } #endif // __OpenBSD__ std::vector allIps(ips()); for(std::vector::iterator ip(allIps.begin());ip!=allIps.end();++ip) newGroups.push_back(MulticastGroup::deriveMulticastGroupForAddressResolution(*ip)); std::sort(newGroups.begin(),newGroups.end()); std::unique(newGroups.begin(),newGroups.end()); for(std::vector::iterator m(newGroups.begin());m!=newGroups.end();++m) { if (!std::binary_search(_multicastGroups.begin(),_multicastGroups.end(),*m)) added.push_back(*m); } for(std::vector::iterator m(_multicastGroups.begin());m!=_multicastGroups.end();++m) { if (!std::binary_search(newGroups.begin(),newGroups.end(),*m)) removed.push_back(*m); } _multicastGroups.swap(newGroups); } void BSDEthernetTap::setMtu(unsigned int mtu) { if (mtu != _mtu) { _mtu = mtu; long cpid = (long)vfork(); if (cpid == 0) { char tmp[64]; OSUtils::ztsnprintf(tmp,sizeof(tmp),"%u",mtu); #ifdef ZT_TRACE fprintf(stderr, "DEBUG: ifconfig %s mtu %s" ZT_EOL_S, _dev.c_str(), tmp); #endif execl("/sbin/ifconfig","/sbin/ifconfig",_dev.c_str(),"mtu",tmp,(const char *)0); _exit(-1); } else if (cpid > 0) { int exitcode = -1; waitpid(cpid,&exitcode,0); } } } void BSDEthernetTap::threadMain() throw() { // Wait for a moment after startup -- wait for Network to finish // constructing itself. Thread::sleep(500); bool pinning = _pinning; for (unsigned int i = 0; i < _concurrency; ++i) { _rxThreads.push_back(std::thread([this, i, pinning] { if (pinning) { int pinCore = i % _concurrency; fprintf(stderr, "Pinning thread %d to core %d\n", i, pinCore); pthread_t self = pthread_self(); cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(pinCore, &cpuset); //int rc = sched_setaffinity(self, sizeof(cpu_set_t), &cpuset); int rc = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset); if (rc != 0) { fprintf(stderr, "Failed to pin thread %d to core %d: %s\n", i, pinCore, strerror(errno)); exit(1); } } uint8_t b[ZT_TAP_BUF_SIZE]; MAC to, from; fd_set readfds, nullfds; int n, nfds, r; FD_ZERO(&readfds); FD_ZERO(&nullfds); nfds = (int)std::max(_shutdownSignalPipe[0],_fd) + 1; r = 0; for(;;) { FD_SET(_shutdownSignalPipe[0],&readfds); FD_SET(_fd,&readfds); select(nfds,&readfds,&nullfds,&nullfds,(struct timeval *)0); if (FD_ISSET(_shutdownSignalPipe[0],&readfds)) // writes to shutdown pipe terminate thread break; if (FD_ISSET(_fd,&readfds)) { n = (int)::read(_fd,b + r,sizeof(b) - r); if (n < 0) { if ((errno != EINTR)&&(errno != ETIMEDOUT)) break; } else { // Some tap drivers like to send the ethernet frame and the // payload in two chunks, so handle that by accumulating // data until we have at least a frame. r += n; if (r > 14) { if (r > ((int)_mtu + 14)) // sanity check for weird TAP behavior on some platforms r = _mtu + 14; if (_enabled) { to.setTo(b,6); from.setTo(b + 6,6); unsigned int etherType = ntohs(((const uint16_t *)b)[6]); _handler(_arg,(void *)0,_nwid,from,to,etherType,0,(const void *)(b + 14),r - 14); } r = 0; } } } } })); } } } // namespace ZeroTier