Add multi-core concurrent packet processing

This commit is contained in:
Joseph Henry 2024-02-23 09:57:39 -08:00
parent ac6d532651
commit 683d332abc
No known key found for this signature in database
GPG Key ID: C45B33FF5EBC9344
12 changed files with 400 additions and 190 deletions

View File

@ -39,7 +39,9 @@
#include <net/if_dl.h> #include <net/if_dl.h>
#include <net/if_media.h> #include <net/if_media.h>
#include <net/route.h> #include <net/route.h>
#include <pthread_np.h>
#include <sched.h>
#include <string> #include <string>
#include <map> #include <map>
#include <set> #include <set>
@ -53,6 +55,7 @@
#include "BSDEthernetTap.hpp" #include "BSDEthernetTap.hpp"
#define ZT_BASE32_CHARS "0123456789abcdefghijklmnopqrstuv" #define ZT_BASE32_CHARS "0123456789abcdefghijklmnopqrstuv"
#define ZT_TAP_BUF_SIZE (1024 * 16)
// ff:ff:ff:ff:ff:ff with no ADI // ff:ff:ff:ff:ff:ff with no ADI
static const ZeroTier::MulticastGroup _blindWildcardMulticastGroup(ZeroTier::MAC(0xff),0); static const ZeroTier::MulticastGroup _blindWildcardMulticastGroup(ZeroTier::MAC(0xff),0);
@ -61,6 +64,7 @@ namespace ZeroTier {
BSDEthernetTap::BSDEthernetTap( BSDEthernetTap::BSDEthernetTap(
const char *homePath, const char *homePath,
unsigned int concurrency,
const MAC &mac, const MAC &mac,
unsigned int mtu, unsigned int mtu,
unsigned int metric, unsigned int metric,
@ -69,6 +73,7 @@ BSDEthernetTap::BSDEthernetTap(
void (*handler)(void *,void *,uint64_t,const MAC &,const MAC &,unsigned int,unsigned int,const void *,unsigned int), void (*handler)(void *,void *,uint64_t,const MAC &,const MAC &,unsigned int,unsigned int,const void *,unsigned int),
void *arg) : void *arg) :
_handler(handler), _handler(handler),
_concurrency(concurrency),
_arg(arg), _arg(arg),
_nwid(nwid), _nwid(nwid),
_mtu(mtu), _mtu(mtu),
@ -195,11 +200,9 @@ BSDEthernetTap::BSDEthernetTap(
BSDEthernetTap::~BSDEthernetTap() BSDEthernetTap::~BSDEthernetTap()
{ {
::write(_shutdownSignalPipe[1],"\0",1); // causes thread to exit ::write(_shutdownSignalPipe[1],"\0",1); // causes thread to exit
Thread::join(_thread);
::close(_fd); ::close(_fd);
::close(_shutdownSignalPipe[0]); ::close(_shutdownSignalPipe[0]);
::close(_shutdownSignalPipe[1]); ::close(_shutdownSignalPipe[1]);
long cpid = (long)vfork(); long cpid = (long)vfork();
if (cpid == 0) { if (cpid == 0) {
#ifdef ZT_TRACE #ifdef ZT_TRACE
@ -211,6 +214,10 @@ BSDEthernetTap::~BSDEthernetTap()
int exitcode = -1; int exitcode = -1;
::waitpid(cpid,&exitcode,0); ::waitpid(cpid,&exitcode,0);
} }
Thread::join(_thread);
for (std::thread &t : _rxThreads) {
t.join();
}
} }
void BSDEthernetTap::setEnabled(bool en) void BSDEthernetTap::setEnabled(bool en)
@ -418,53 +425,84 @@ void BSDEthernetTap::setMtu(unsigned int mtu)
void BSDEthernetTap::threadMain() void BSDEthernetTap::threadMain()
throw() throw()
{ {
fd_set readfds,nullfds; bool _enablePinning = false;
MAC to,from; char* envvar = std::getenv("ZT_CPU_PINNING");
int n,nfds,r; if (envvar) {
char getBuf[ZT_MAX_MTU + 64]; int tmp = atoi(envvar);
if (tmp > 0) {
_enablePinning = true;
}
}
// Wait for a moment after startup -- wait for Network to finish // Wait for a moment after startup -- wait for Network to finish
// constructing itself. // constructing itself.
Thread::sleep(500); Thread::sleep(500);
FD_ZERO(&readfds); for (unsigned int i = 0; i < _concurrency; ++i) {
FD_ZERO(&nullfds); _rxThreads.push_back(std::thread([this, i, _enablePinning] {
nfds = (int)std::max(_shutdownSignalPipe[0],_fd) + 1;
r = 0; if (_enablePinning) {
for(;;) { int pinCore = i % _concurrency;
FD_SET(_shutdownSignalPipe[0],&readfds); fprintf(stderr, "pinning thread %d to core %d\n", i, pinCore);
FD_SET(_fd,&readfds); pthread_t self = pthread_self();
select(nfds,&readfds,&nullfds,&nullfds,(struct timeval *)0); cpu_set_t cpuset;
CPU_ZERO(&cpuset);
if (FD_ISSET(_shutdownSignalPipe[0],&readfds)) // writes to shutdown pipe terminate thread CPU_SET(pinCore, &cpuset);
break; //int rc = sched_setaffinity(self, sizeof(cpu_set_t), &cpuset);
int rc = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset);
if (FD_ISSET(_fd,&readfds)) { if (rc != 0)
n = (int)::read(_fd,getBuf + r,sizeof(getBuf) - r); {
if (n < 0) { fprintf(stderr, "failed to pin thread %d to core %d: %s\n", i, pinCore, strerror(errno));
if ((errno != EINTR)&&(errno != ETIMEDOUT)) exit(1);
break;
} else {
// Some tap drivers like to send the ethernet frame and the
// payload in two chunks, so handle that by accumulating
// data until we have at least a frame.
r += n;
if (r > 14) {
if (r > ((int)_mtu + 14)) // sanity check for weird TAP behavior on some platforms
r = _mtu + 14;
if (_enabled) {
to.setTo(getBuf,6);
from.setTo(getBuf + 6,6);
unsigned int etherType = ntohs(((const uint16_t *)getBuf)[6]);
_handler(_arg,(void *)0,_nwid,from,to,etherType,0,(const void *)(getBuf + 14),r - 14);
}
r = 0;
} }
} }
}
uint8_t b[ZT_TAP_BUF_SIZE];
MAC to, from;
fd_set readfds, nullfds;
int n, nfds, r;
FD_ZERO(&readfds);
FD_ZERO(&nullfds);
nfds = (int)std::max(_shutdownSignalPipe[0],_fd) + 1;
r = 0;
for(;;) {
FD_SET(_shutdownSignalPipe[0],&readfds);
FD_SET(_fd,&readfds);
select(nfds,&readfds,&nullfds,&nullfds,(struct timeval *)0);
if (FD_ISSET(_shutdownSignalPipe[0],&readfds)) // writes to shutdown pipe terminate thread
break;
if (FD_ISSET(_fd,&readfds)) {
n = (int)::read(_fd,b + r,sizeof(b) - r);
if (n < 0) {
if ((errno != EINTR)&&(errno != ETIMEDOUT))
break;
} else {
// Some tap drivers like to send the ethernet frame and the
// payload in two chunks, so handle that by accumulating
// data until we have at least a frame.
r += n;
if (r > 14) {
if (r > ((int)_mtu + 14)) // sanity check for weird TAP behavior on some platforms
r = _mtu + 14;
if (_enabled) {
to.setTo(b,6);
from.setTo(b + 6,6);
unsigned int etherType = ntohs(((const uint16_t *)b)[6]);
_handler(_arg,(void *)0,_nwid,from,to,etherType,0,(const void *)(b + 14),r - 14);
}
r = 0;
}
}
}
}
}));
} }
} }

View File

@ -20,6 +20,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <stdexcept> #include <stdexcept>
#include <thread>
#include "../node/Constants.hpp" #include "../node/Constants.hpp"
#include "../node/MulticastGroup.hpp" #include "../node/MulticastGroup.hpp"
@ -34,6 +35,7 @@ class BSDEthernetTap : public EthernetTap
public: public:
BSDEthernetTap( BSDEthernetTap(
const char *homePath, const char *homePath,
unsigned int concurrency,
const MAC &mac, const MAC &mac,
unsigned int mtu, unsigned int mtu,
unsigned int metric, unsigned int metric,
@ -62,6 +64,7 @@ public:
private: private:
void (*_handler)(void *,void *,uint64_t,const MAC &,const MAC &,unsigned int,unsigned int,const void *,unsigned int); void (*_handler)(void *,void *,uint64_t,const MAC &,const MAC &,unsigned int,unsigned int,const void *,unsigned int);
void *_arg; void *_arg;
unsigned int _concurrency;
uint64_t _nwid; uint64_t _nwid;
Thread _thread; Thread _thread;
std::string _dev; std::string _dev;
@ -73,6 +76,7 @@ private:
volatile bool _enabled; volatile bool _enabled;
mutable std::vector<InetAddress> _ifaddrs; mutable std::vector<InetAddress> _ifaddrs;
mutable uint64_t _lastIfAddrsUpdate; mutable uint64_t _lastIfAddrsUpdate;
std::vector<std::thread> _rxThreads;
}; };
} // namespace ZeroTier } // namespace ZeroTier

View File

@ -58,6 +58,7 @@ namespace ZeroTier {
std::shared_ptr<EthernetTap> EthernetTap::newInstance( std::shared_ptr<EthernetTap> EthernetTap::newInstance(
const char *tapDeviceType, // OS-specific, NULL for default const char *tapDeviceType, // OS-specific, NULL for default
const char *homePath, const char *homePath,
unsigned int concurrency,
const MAC &mac, const MAC &mac,
unsigned int mtu, unsigned int mtu,
unsigned int metric, unsigned int metric,
@ -83,16 +84,16 @@ std::shared_ptr<EthernetTap> EthernetTap::newInstance(
// The "feth" virtual Ethernet device type appeared in Darwin 17.x.x. Older versions // The "feth" virtual Ethernet device type appeared in Darwin 17.x.x. Older versions
// (Sierra and earlier) must use the a kernel extension. // (Sierra and earlier) must use the a kernel extension.
if (strtol(osrelease,(char **)0,10) < 17) { if (strtol(osrelease,(char **)0,10) < 17) {
return std::shared_ptr<EthernetTap>(new MacKextEthernetTap(homePath,mac,mtu,metric,nwid,friendlyName,handler,arg)); return std::shared_ptr<EthernetTap>(new MacKextEthernetTap(homePath,concurrency,mac,mtu,metric,nwid,friendlyName,handler,arg));
} else { } else {
return std::shared_ptr<EthernetTap>(new MacEthernetTap(homePath,mac,mtu,metric,nwid,friendlyName,handler,arg)); return std::shared_ptr<EthernetTap>(new MacEthernetTap(homePath,concurrency,mac,mtu,metric,nwid,friendlyName,handler,arg));
} }
} }
} }
#endif // __APPLE__ #endif // __APPLE__
#ifdef __LINUX__ #ifdef __LINUX__
return std::shared_ptr<EthernetTap>(new LinuxEthernetTap(homePath,mac,mtu,metric,nwid,friendlyName,handler,arg)); return std::shared_ptr<EthernetTap>(new LinuxEthernetTap(homePath,concurrency,mac,mtu,metric,nwid,friendlyName,handler,arg));
#endif // __LINUX__ #endif // __LINUX__
#ifdef __WINDOWS__ #ifdef __WINDOWS__
@ -126,19 +127,19 @@ std::shared_ptr<EthernetTap> EthernetTap::newInstance(
_comInit = true; _comInit = true;
} }
} }
return std::shared_ptr<EthernetTap>(new WindowsEthernetTap(homePath,mac,mtu,metric,nwid,friendlyName,handler,arg)); return std::shared_ptr<EthernetTap>(new WindowsEthernetTap(homePath,concurrency,mac,mtu,metric,nwid,friendlyName,handler,arg));
#endif // __WINDOWS__ #endif // __WINDOWS__
#ifdef __FreeBSD__ #ifdef __FreeBSD__
return std::shared_ptr<EthernetTap>(new BSDEthernetTap(homePath,mac,mtu,metric,nwid,friendlyName,handler,arg)); return std::shared_ptr<EthernetTap>(new BSDEthernetTap(homePath,concurrency,mac,mtu,metric,nwid,friendlyName,handler,arg));
#endif // __FreeBSD__ #endif // __FreeBSD__
#ifdef __NetBSD__ #ifdef __NetBSD__
return std::shared_ptr<EthernetTap>(new NetBSDEthernetTap(homePath,mac,mtu,metric,nwid,friendlyName,handler,arg)); return std::shared_ptr<EthernetTap>(new NetBSDEthernetTap(homePath,concurrency,mac,mtu,metric,nwid,friendlyName,handler,arg));
#endif // __NetBSD__ #endif // __NetBSD__
#ifdef __OpenBSD__ #ifdef __OpenBSD__
return std::shared_ptr<EthernetTap>(new BSDEthernetTap(homePath,mac,mtu,metric,nwid,friendlyName,handler,arg)); return std::shared_ptr<EthernetTap>(new BSDEthernetTap(homePath,concurrency,mac,mtu,metric,nwid,friendlyName,handler,arg));
#endif // __OpenBSD__ #endif // __OpenBSD__
#endif // ZT_SDK? #endif // ZT_SDK?

View File

@ -33,6 +33,7 @@ public:
static std::shared_ptr<EthernetTap> newInstance( static std::shared_ptr<EthernetTap> newInstance(
const char *tapDeviceType, // OS-specific, NULL for default const char *tapDeviceType, // OS-specific, NULL for default
const char *homePath, const char *homePath,
unsigned int concurrency,
const MAC &mac, const MAC &mac,
unsigned int mtu, unsigned int mtu,
unsigned int metric, unsigned int metric,

View File

@ -60,7 +60,7 @@
#define IFNAMSIZ 16 #define IFNAMSIZ 16
#endif #endif
#define ZT_TAP_BUF_SIZE 16384 #define ZT_TAP_BUF_SIZE (1024 * 16)
// ff:ff:ff:ff:ff:ff with no ADI // ff:ff:ff:ff:ff:ff with no ADI
static const ZeroTier::MulticastGroup _blindWildcardMulticastGroup(ZeroTier::MAC(0xff),0); static const ZeroTier::MulticastGroup _blindWildcardMulticastGroup(ZeroTier::MAC(0xff),0);
@ -68,7 +68,7 @@ static const ZeroTier::MulticastGroup _blindWildcardMulticastGroup(ZeroTier::MAC
namespace ZeroTier { namespace ZeroTier {
// determine if we're running a really old linux kernel. // determine if we're running a really old linux kernel.
// Kernels in the 2.6.x series don't behave the same when bringing up // Kernels in the 2.6.x series don't behave the same when bringing up
// the tap devices. // the tap devices.
// //
// Returns true if the kernel major version is < 3 // Returns true if the kernel major version is < 3
@ -111,6 +111,7 @@ static void _base32_5_to_8(const uint8_t *in,char *out)
LinuxEthernetTap::LinuxEthernetTap( LinuxEthernetTap::LinuxEthernetTap(
const char *homePath, const char *homePath,
unsigned int concurrency,
const MAC &mac, const MAC &mac,
unsigned int mtu, unsigned int mtu,
unsigned int metric, unsigned int metric,
@ -127,6 +128,7 @@ LinuxEthernetTap::LinuxEthernetTap(
_fd(0), _fd(0),
_enabled(true), _enabled(true),
_run(true), _run(true),
_concurrency(concurrency),
_lastIfAddrsUpdate(0) _lastIfAddrsUpdate(0)
{ {
static std::mutex s_tapCreateLock; static std::mutex s_tapCreateLock;
@ -220,135 +222,164 @@ LinuxEthernetTap::LinuxEthernetTap(
(void)::pipe(_shutdownSignalPipe); (void)::pipe(_shutdownSignalPipe);
_tapReaderThread = std::thread([this]{ bool _enablePinning = false;
uint8_t b[ZT_TAP_BUF_SIZE]; char* envvar = std::getenv("ZT_CPU_PINNING");
fd_set readfds,nullfds; if (envvar) {
int n,nfds,r; int tmp = atoi(envvar);
std::vector<void *> buffers; if (tmp > 0) {
struct ifreq ifr; _enablePinning = true;
memset(&ifr,0,sizeof(ifr));
strcpy(ifr.ifr_name,_dev.c_str());
const int sock = socket(AF_INET,SOCK_DGRAM,0);
if (sock <= 0)
return;
if (ioctl(sock,SIOCGIFFLAGS,(void *)&ifr) < 0) {
::close(sock);
printf("WARNING: ioctl() failed setting up Linux tap device (bring interface up)\n");
return;
} }
}
ifr.ifr_ifru.ifru_hwaddr.sa_family = ARPHRD_ETHER; for (unsigned int i = 0; i < _concurrency; ++i) {
_mac.copyTo(ifr.ifr_ifru.ifru_hwaddr.sa_data,6); _rxThreads.push_back(std::thread([this, i, _enablePinning] {
if (ioctl(sock,SIOCSIFHWADDR,(void *)&ifr) < 0) {
::close(sock);
printf("WARNING: ioctl() failed setting up Linux tap device (set MAC)\n");
return;
}
usleep(100000); if (_enablePinning) {
int pinCore = i % _concurrency;
fprintf(stderr, "pinning thread %d to core %d\n", i, pinCore);
pthread_t self = pthread_self();
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(pinCore, &cpuset);
int rc = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset);
if (rc != 0)
{
fprintf(stderr, "failed to pin thread %d to core %d: %s\n", i, pinCore, strerror(errno));
exit(1);
}
}
uint8_t b[ZT_TAP_BUF_SIZE];
fd_set readfds, nullfds;
int n, nfds, r;
if (i == 0) {
struct ifreq ifr;
memset(&ifr, 0, sizeof(ifr));
strcpy(ifr.ifr_name, _dev.c_str());
const int sock = socket(AF_INET, SOCK_DGRAM, 0);
if (sock <= 0)
return;
if (ioctl(sock, SIOCGIFFLAGS, (void*)&ifr) < 0) {
::close(sock);
printf("WARNING: ioctl() failed setting up Linux tap device (bring interface up)\n");
return;
}
ifr.ifr_ifru.ifru_hwaddr.sa_family = ARPHRD_ETHER;
_mac.copyTo(ifr.ifr_ifru.ifru_hwaddr.sa_data, 6);
if (ioctl(sock, SIOCSIFHWADDR, (void*)&ifr) < 0) {
::close(sock);
printf("WARNING: ioctl() failed setting up Linux tap device (set MAC)\n");
return;
}
usleep(100000);
if (isOldLinuxKernel()) {
ifr.ifr_ifru.ifru_mtu = (int)_mtu;
if (ioctl(sock, SIOCSIFMTU, (void*)&ifr) < 0) {
::close(sock);
printf("WARNING: ioctl() failed setting up Linux tap device (set MTU)\n");
return;
}
usleep(100000);
}
ifr.ifr_flags |= IFF_MULTICAST;
ifr.ifr_flags |= IFF_UP;
if (ioctl(sock, SIOCSIFFLAGS, (void*)&ifr) < 0) {
::close(sock);
printf("WARNING: ioctl() failed setting up Linux tap device (bring interface up)\n");
return;
}
usleep(100000);
if (! isOldLinuxKernel()) {
ifr.ifr_ifru.ifru_hwaddr.sa_family = ARPHRD_ETHER;
_mac.copyTo(ifr.ifr_ifru.ifru_hwaddr.sa_data, 6);
if (ioctl(sock, SIOCSIFHWADDR, (void*)&ifr) < 0) {
::close(sock);
printf("WARNING: ioctl() failed setting up Linux tap device (set MAC)\n");
return;
}
ifr.ifr_ifru.ifru_mtu = (int)_mtu;
if (ioctl(sock, SIOCSIFMTU, (void*)&ifr) < 0) {
::close(sock);
printf("WARNING: ioctl() failed setting up Linux tap device (set MTU)\n");
return;
}
}
fcntl(_fd, F_SETFL, O_NONBLOCK);
if (isOldLinuxKernel()) {
ifr.ifr_ifru.ifru_mtu = (int)_mtu;
if (ioctl(sock,SIOCSIFMTU,(void *)&ifr) < 0) {
::close(sock); ::close(sock);
printf("WARNING: ioctl() failed setting up Linux tap device (set MTU)\n"); }
if (! _run) {
return; return;
} }
usleep(100000); FD_ZERO(&readfds);
} FD_ZERO(&nullfds);
nfds = (int)std::max(_shutdownSignalPipe[0], _fd) + 1;
ifr.ifr_flags |= IFF_MULTICAST; r = 0;
ifr.ifr_flags |= IFF_UP; for (;;) {
if (ioctl(sock,SIOCSIFFLAGS,(void *)&ifr) < 0) { FD_SET(_shutdownSignalPipe[0], &readfds);
::close(sock); FD_SET(_fd, &readfds);
printf("WARNING: ioctl() failed setting up Linux tap device (bring interface up)\n"); select(nfds, &readfds, &nullfds, &nullfds, (struct timeval*)0);
return;
}
usleep(100000); if (FD_ISSET(_shutdownSignalPipe[0], &readfds)) {
break;
}
if (FD_ISSET(_fd, &readfds)) {
for (;;) {
// read until there are no more packets, then return to outer select() loop
n = (int)::read(_fd, b + r, ZT_TAP_BUF_SIZE - r);
if (n > 0) {
// Some tap drivers like to send the ethernet frame and the
// payload in two chunks, so handle that by accumulating
// data until we have at least a frame.
r += n;
if (r > 14) {
if (r > ((int)_mtu + 14)) // sanity check for weird TAP behavior on some platforms
r = _mtu + 14;
if (!isOldLinuxKernel()) { if (_enabled) {
ifr.ifr_ifru.ifru_hwaddr.sa_family = ARPHRD_ETHER; MAC to(b, 6), from(b + 6, 6);
_mac.copyTo(ifr.ifr_ifru.ifru_hwaddr.sa_data,6); unsigned int etherType = Utils::ntoh(((const uint16_t*)b)[6]);
if (ioctl(sock,SIOCSIFHWADDR,(void *)&ifr) < 0) { _handler(_arg, nullptr, _nwid, from, to, etherType, 0, (const void*)(b + 14), (unsigned int)(r - 14));
::close(sock); }
printf("WARNING: ioctl() failed setting up Linux tap device (set MAC)\n");
return;
}
ifr.ifr_ifru.ifru_mtu = (int)_mtu; r = 0;
if (ioctl(sock,SIOCSIFMTU,(void *)&ifr) < 0) {
::close(sock);
printf("WARNING: ioctl() failed setting up Linux tap device (set MTU)\n");
return;
}
}
fcntl(_fd,F_SETFL,O_NONBLOCK);
::close(sock);
if (!_run)
return;
FD_ZERO(&readfds);
FD_ZERO(&nullfds);
nfds = (int)std::max(_shutdownSignalPipe[0],_fd) + 1;
r = 0;
for(;;) {
FD_SET(_shutdownSignalPipe[0],&readfds);
FD_SET(_fd,&readfds);
select(nfds,&readfds,&nullfds,&nullfds,(struct timeval *)0);
if (FD_ISSET(_shutdownSignalPipe[0],&readfds))
break;
if (FD_ISSET(_fd,&readfds)) {
for(;;) { // read until there are no more packets, then return to outer select() loop
n = (int)::read(_fd,b + r,ZT_TAP_BUF_SIZE - r);
if (n > 0) {
// Some tap drivers like to send the ethernet frame and the
// payload in two chunks, so handle that by accumulating
// data until we have at least a frame.
r += n;
if (r > 14) {
if (r > ((int)_mtu + 14)) // sanity check for weird TAP behavior on some platforms
r = _mtu + 14;
if (_enabled) {
//_tapq.post(std::pair<void *,int>(buf,r));
//buf = nullptr;
MAC to(b, 6),from(b + 6, 6);
unsigned int etherType = Utils::ntoh(((const uint16_t *)b)[6]);
_handler(_arg, nullptr, _nwid, from, to, etherType, 0, (const void *)(b + 14),(unsigned int)(r - 14));
} }
r = 0;
} }
} else { else {
r = 0; r = 0;
break; break;
}
} }
} }
} }
} }));
}); }
} }
LinuxEthernetTap::~LinuxEthernetTap() LinuxEthernetTap::~LinuxEthernetTap()
{ {
_run = false; _run = false;
(void)::write(_shutdownSignalPipe[1],"\0",1); (void)::write(_shutdownSignalPipe[1],"\0",1);
_tapReaderThread.join();
::close(_fd); ::close(_fd);
::close(_shutdownSignalPipe[0]); ::close(_shutdownSignalPipe[0]);
::close(_shutdownSignalPipe[1]); ::close(_shutdownSignalPipe[1]);
for (std::thread &t : _rxThreads) {
t.join();
}
} }
void LinuxEthernetTap::setEnabled(bool en) void LinuxEthernetTap::setEnabled(bool en)

View File

@ -26,6 +26,7 @@
#include <mutex> #include <mutex>
#include "../node/MulticastGroup.hpp" #include "../node/MulticastGroup.hpp"
#include "EthernetTap.hpp" #include "EthernetTap.hpp"
#include "BlockingQueue.hpp"
namespace ZeroTier { namespace ZeroTier {
@ -34,6 +35,7 @@ class LinuxEthernetTap : public EthernetTap
public: public:
LinuxEthernetTap( LinuxEthernetTap(
const char *homePath, const char *homePath,
unsigned int _concurrency,
const MAC &mac, const MAC &mac,
unsigned int mtu, unsigned int mtu,
unsigned int metric, unsigned int metric,
@ -57,9 +59,6 @@ public:
virtual void setMtu(unsigned int mtu); virtual void setMtu(unsigned int mtu);
virtual void setDns(const char *domain, const std::vector<InetAddress> &servers) {} virtual void setDns(const char *domain, const std::vector<InetAddress> &servers) {}
private: private:
void (*_handler)(void *,void *,uint64_t,const MAC &,const MAC &,unsigned int,unsigned int,const void *,unsigned int); void (*_handler)(void *,void *,uint64_t,const MAC &,const MAC &,unsigned int,unsigned int,const void *,unsigned int);
void *_arg; void *_arg;
@ -69,13 +68,14 @@ private:
std::string _dev; std::string _dev;
std::vector<MulticastGroup> _multicastGroups; std::vector<MulticastGroup> _multicastGroups;
unsigned int _mtu; unsigned int _mtu;
unsigned int _concurrency;
int _fd; int _fd;
int _shutdownSignalPipe[2]; int _shutdownSignalPipe[2];
std::atomic_bool _enabled; std::atomic_bool _enabled;
std::atomic_bool _run; std::atomic_bool _run;
std::thread _tapReaderThread;
mutable std::vector<InetAddress> _ifaddrs; mutable std::vector<InetAddress> _ifaddrs;
mutable uint64_t _lastIfAddrsUpdate; mutable uint64_t _lastIfAddrsUpdate;
std::vector<std::thread> _rxThreads;
}; };
} // namespace ZeroTier } // namespace ZeroTier

View File

@ -69,6 +69,7 @@ static bool fethMaxMtuAdjusted = false;
MacEthernetTap::MacEthernetTap( MacEthernetTap::MacEthernetTap(
const char *homePath, const char *homePath,
unsigned int concurrency,
const MAC &mac, const MAC &mac,
unsigned int mtu, unsigned int mtu,
unsigned int metric, unsigned int metric,
@ -77,6 +78,7 @@ MacEthernetTap::MacEthernetTap(
void (*handler)(void *,void *,uint64_t,const MAC &,const MAC &,unsigned int,unsigned int,const void *data,unsigned int len), void (*handler)(void *,void *,uint64_t,const MAC &,const MAC &,unsigned int,unsigned int,const void *data,unsigned int len),
void *arg) : void *arg) :
_handler(handler), _handler(handler),
_concurrency(concurrency),
_arg(arg), _arg(arg),
_nwid(nwid), _nwid(nwid),
_homePath(homePath), _homePath(homePath),
@ -286,6 +288,9 @@ MacEthernetTap::~MacEthernetTap()
} }
Thread::join(_thread); Thread::join(_thread);
for (std::thread &t : _rxThreads) {
t.join();
}
} }
void MacEthernetTap::setEnabled(bool en) { _enabled = en; } void MacEthernetTap::setEnabled(bool en) { _enabled = en; }
@ -474,17 +479,25 @@ void MacEthernetTap::setMtu(unsigned int mtu)
void MacEthernetTap::threadMain() void MacEthernetTap::threadMain()
throw() throw()
{ {
Thread::sleep(250);
for (unsigned int i = 0; i < _concurrency; ++i) {
_rxThreads.push_back(std::thread([this, i] {
fprintf(stderr, "starting thread %d\n", i);
char agentReadBuf[ZT_MACETHERNETTAP_AGENT_READ_BUF_SIZE]; char agentReadBuf[ZT_MACETHERNETTAP_AGENT_READ_BUF_SIZE];
char agentStderrBuf[256]; char agentStderrBuf[256];
fd_set readfds,nullfds; fd_set readfds,nullfds;
MAC to,from; MAC to,from;
Thread::sleep(250);
const int nfds = std::max(std::max(_shutdownSignalPipe[0],_agentStdout),_agentStderr) + 1; const int nfds = std::max(std::max(_shutdownSignalPipe[0],_agentStdout),_agentStderr) + 1;
long agentReadPtr = 0; long agentReadPtr = 0;
fcntl(_agentStdout,F_SETFL,fcntl(_agentStdout,F_GETFL)|O_NONBLOCK);
fcntl(_agentStderr,F_SETFL,fcntl(_agentStderr,F_GETFL)|O_NONBLOCK); if (i == 0) {
fcntl(_agentStdout,F_SETFL,fcntl(_agentStdout,F_GETFL)|O_NONBLOCK);
fcntl(_agentStderr,F_SETFL,fcntl(_agentStderr,F_GETFL)|O_NONBLOCK);
}
FD_ZERO(&readfds); FD_ZERO(&readfds);
FD_ZERO(&nullfds); FD_ZERO(&nullfds);
@ -533,6 +546,7 @@ void MacEthernetTap::threadMain()
*/ */
} }
} }
}));}
::close(_agentStdin); ::close(_agentStdin);
::close(_agentStdout); ::close(_agentStdout);

View File

@ -28,6 +28,7 @@
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
#include <vector> #include <vector>
#include <thread>
namespace ZeroTier { namespace ZeroTier {
@ -36,6 +37,7 @@ class MacEthernetTap : public EthernetTap
public: public:
MacEthernetTap( MacEthernetTap(
const char *homePath, const char *homePath,
unsigned int concurrency,
const MAC &mac, const MAC &mac,
unsigned int mtu, unsigned int mtu,
unsigned int metric, unsigned int metric,
@ -67,6 +69,7 @@ private:
uint64_t _nwid; uint64_t _nwid;
Thread _thread; Thread _thread;
std::string _homePath; std::string _homePath;
unsigned int _concurrency;
std::string _dev; std::string _dev;
std::vector<MulticastGroup> _multicastGroups; std::vector<MulticastGroup> _multicastGroups;
Mutex _putLock; Mutex _putLock;
@ -79,6 +82,7 @@ private:
volatile bool _enabled; volatile bool _enabled;
mutable std::vector<InetAddress> _ifaddrs; mutable std::vector<InetAddress> _ifaddrs;
mutable uint64_t _lastIfAddrsUpdate; mutable uint64_t _lastIfAddrsUpdate;
std::vector<std::thread> _rxThreads;
}; };

View File

@ -32,7 +32,7 @@
* All this stuff is basically undocumented. A lot of tracing through * All this stuff is basically undocumented. A lot of tracing through
* the Darwin/XNU kernel source was required to figure out how to make * the Darwin/XNU kernel source was required to figure out how to make
* this actually work. * this actually work.
* *
* We hope to develop a DriverKit-based driver in the near-mid future to * We hope to develop a DriverKit-based driver in the near-mid future to
* replace this weird hack, but it works for now through Big Sur in our * replace this weird hack, but it works for now through Big Sur in our
* testing. * testing.

View File

@ -306,6 +306,7 @@ static Mutex globalTapCreateLock;
MacKextEthernetTap::MacKextEthernetTap( MacKextEthernetTap::MacKextEthernetTap(
const char *homePath, const char *homePath,
unsigned int concurrency,
const MAC &mac, const MAC &mac,
unsigned int mtu, unsigned int mtu,
unsigned int metric, unsigned int metric,
@ -317,6 +318,7 @@ MacKextEthernetTap::MacKextEthernetTap(
_arg(arg), _arg(arg),
_nwid(nwid), _nwid(nwid),
_homePath(homePath), _homePath(homePath),
_concurrency(concurrency),
_mtu(mtu), _mtu(mtu),
_metric(metric), _metric(metric),
_fd(0), _fd(0),
@ -447,7 +449,9 @@ MacKextEthernetTap::~MacKextEthernetTap()
::write(_shutdownSignalPipe[1],"\0",1); // causes thread to exit ::write(_shutdownSignalPipe[1],"\0",1); // causes thread to exit
Thread::join(_thread); Thread::join(_thread);
for (std::thread &t : _rxThreads) {
t.join();
}
::close(_fd); ::close(_fd);
::close(_shutdownSignalPipe[0]); ::close(_shutdownSignalPipe[0]);
::close(_shutdownSignalPipe[1]); ::close(_shutdownSignalPipe[1]);

View File

@ -20,6 +20,7 @@
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
#include <vector> #include <vector>
#include <thread>
#include "../node/Constants.hpp" #include "../node/Constants.hpp"
#include "../node/MAC.hpp" #include "../node/MAC.hpp"
@ -36,6 +37,7 @@ class MacKextEthernetTap : public EthernetTap
public: public:
MacKextEthernetTap( MacKextEthernetTap(
const char *homePath, const char *homePath,
unsigned int concurrency,
const MAC &mac, const MAC &mac,
unsigned int mtu, unsigned int mtu,
unsigned int metric, unsigned int metric,
@ -70,11 +72,13 @@ private:
std::string _homePath; std::string _homePath;
std::string _dev; std::string _dev;
std::vector<MulticastGroup> _multicastGroups; std::vector<MulticastGroup> _multicastGroups;
unsigned int _concurrency;
unsigned int _mtu; unsigned int _mtu;
unsigned int _metric; unsigned int _metric;
int _fd; int _fd;
int _shutdownSignalPipe[2]; int _shutdownSignalPipe[2];
volatile bool _enabled; volatile bool _enabled;
std::vector<std::thread> _rxThreads;
}; };
} // namespace ZeroTier } // namespace ZeroTier

View File

@ -16,7 +16,6 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <stdint.h> #include <stdint.h>
#include <string> #include <string>
#include <map> #include <map>
#include <vector> #include <vector>
@ -26,6 +25,11 @@
#include <mutex> #include <mutex>
#include <condition_variable> #include <condition_variable>
#ifdef __FreeBSD__
#include <sched.h>
#include <pthread_np.h>
#endif
#include "../version.h" #include "../version.h"
#include "../include/ZeroTierOne.h" #include "../include/ZeroTierOne.h"
@ -758,7 +762,7 @@ struct TcpConnection
Mutex writeq_m; Mutex writeq_m;
}; };
struct OneServiceIncomingPacket struct PacketRecord
{ {
uint64_t now; uint64_t now;
int64_t sock; int64_t sock;
@ -785,14 +789,20 @@ public:
SoftwareUpdater *_updater; SoftwareUpdater *_updater;
bool _updateAutoApply; bool _updateAutoApply;
httplib::Server _controlPlane; httplib::Server _controlPlane;
httplib::Server _controlPlaneV6; httplib::Server _controlPlaneV6;
std::thread _serverThread; std::thread _serverThread;
std::thread _serverThreadV6; std::thread _serverThreadV6;
bool _serverThreadRunning; bool _serverThreadRunning;
bool _serverThreadRunningV6; bool _serverThreadRunningV6;
bool _allowTcpFallbackRelay; unsigned int _rxThreadCount;
BlockingQueue<PacketRecord *> _rxPacketQueue;
std::vector<PacketRecord *> _rxPacketVector;
std::vector<std::thread> _rxPacketThreads;
Mutex _rxPacketVector_m,_rxPacketThreads_m;
bool _allowTcpFallbackRelay;
bool _forceTcpRelay; bool _forceTcpRelay;
bool _allowSecondaryPort; bool _allowSecondaryPort;
@ -842,8 +852,6 @@ public:
// Deadline for the next background task service function // Deadline for the next background task service function
volatile int64_t _nextBackgroundTaskDeadline; volatile int64_t _nextBackgroundTaskDeadline;
std::map<uint64_t,NetworkState> _nets; std::map<uint64_t,NetworkState> _nets;
Mutex _nets_m; Mutex _nets_m;
@ -890,9 +898,9 @@ public:
,_node((Node *)0) ,_node((Node *)0)
,_updater((SoftwareUpdater *)0) ,_updater((SoftwareUpdater *)0)
,_updateAutoApply(false) ,_updateAutoApply(false)
,_controlPlane() ,_controlPlane()
,_controlPlaneV6() ,_controlPlaneV6()
,_serverThread() ,_serverThread()
,_serverThreadV6() ,_serverThreadV6()
,_serverThreadRunning(false) ,_serverThreadRunning(false)
,_serverThreadRunningV6(false) ,_serverThreadRunningV6(false)
@ -926,9 +934,79 @@ public:
_ports[1] = 0; _ports[1] = 0;
_ports[2] = 0; _ports[2] = 0;
prometheus::simpleapi::saver.set_registry(prometheus::simpleapi::registry_ptr); bool _enablePinning = false;
prometheus::simpleapi::saver.set_delay(std::chrono::seconds(5)); char* pinningVar = std::getenv("ZT_CPU_PINNING");
prometheus::simpleapi::saver.set_out_file(_homePath + ZT_PATH_SEPARATOR + "metrics.prom"); if (pinningVar) {
int tmp = atoi(pinningVar);
if (tmp > 0) {
_enablePinning = true;
}
}
char* concurrencyVar = std::getenv("ZT_PACKET_PROCESSING_CONCURRENCY");
if (concurrencyVar) {
int tmp = atoi(concurrencyVar);
if (tmp > 0) {
_rxThreadCount = tmp;
}
else {
_rxThreadCount = std::thread::hardware_concurrency();
}
}
else {
_rxThreadCount = std::thread::hardware_concurrency();
}
for (unsigned int i = 0; i < _rxThreadCount; ++i) {
_rxPacketThreads.push_back(std::thread([this, i]() {
#if defined(__LINUX__) || defined(__FreeBSD__) /* || defined(__APPLE__) */
int pinCore = i % _rxThreadCount;
fprintf(stderr, "pinning thread %d to core %d\n", i, pinCore);
pthread_t self = pthread_self();
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(pinCore, &cpuset);
#endif
#ifdef __LINUX__
int rc = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset);
#elif __FreeBSD__
int rc = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset);
#endif
#if defined(__LINUX__) || defined(__FreeBSD__) /* || defined(__APPLE__) */
if (rc != 0)
{
fprintf(stderr, "failed to pin thread %d to core %d: %s\n", i, pinCore, strerror(errno));
exit(1);
}
#endif
PacketRecord* packet = nullptr;
for (;;) {
if (! _rxPacketQueue.get(packet)) {
break;
}
if (! packet) {
break;
}
const ZT_ResultCode err = _node->processWirePacket(nullptr, packet->now, packet->sock, &(packet->from), packet->data, packet->size, &_nextBackgroundTaskDeadline);
{
Mutex::Lock l(_rxPacketVector_m);
_rxPacketVector.push_back(packet);
}
if (ZT_ResultCode_isFatal(err)) {
char tmp[256];
OSUtils::ztsnprintf(tmp, sizeof(tmp), "error processing packet: %d", (int)err);
Mutex::Lock _l(_termReason_m);
_termReason = ONE_UNRECOVERABLE_ERROR;
_fatalErrorMessage = tmp;
this->terminate();
break;
}
}
}));
}
prometheus::simpleapi::saver.set_registry(prometheus::simpleapi::registry_ptr);
prometheus::simpleapi::saver.set_delay(std::chrono::seconds(5));
prometheus::simpleapi::saver.set_out_file(_homePath + ZT_PATH_SEPARATOR + "metrics.prom");
#if ZT_VAULT_SUPPORT #if ZT_VAULT_SUPPORT
curl_global_init(CURL_GLOBAL_DEFAULT); curl_global_init(CURL_GLOBAL_DEFAULT);
@ -940,20 +1018,34 @@ public:
#ifdef __WINDOWS__ #ifdef __WINDOWS__
WinFWHelper::removeICMPRules(); WinFWHelper::removeICMPRules();
#endif #endif
_rxPacketQueue.stop();
_rxPacketThreads_m.lock();
for(auto t=_rxPacketThreads.begin();t!=_rxPacketThreads.end();++t) {
t->join();
}
_rxPacketThreads_m.unlock();
_binder.closeAll(_phy); _binder.closeAll(_phy);
#if ZT_VAULT_SUPPORT #if ZT_VAULT_SUPPORT
curl_global_cleanup(); curl_global_cleanup();
#endif #endif
_controlPlane.stop(); _controlPlane.stop();
if (_serverThreadRunning) { if (_serverThreadRunning) {
_serverThread.join(); _serverThread.join();
} }
_controlPlaneV6.stop(); _controlPlaneV6.stop();
if (_serverThreadRunningV6) { if (_serverThreadRunningV6) {
_serverThreadV6.join(); _serverThreadV6.join();
} }
_rxPacketVector_m.lock();
while (!_rxPacketVector.empty()) {
delete _rxPacketVector.back();
_rxPacketVector.pop_back();
}
_rxPacketVector_m.unlock();
#ifdef ZT_USE_MINIUPNPC #ifdef ZT_USE_MINIUPNPC
delete _portMapper; delete _portMapper;
@ -1270,6 +1362,9 @@ public:
const unsigned long delay = (dl > now) ? (unsigned long)(dl - now) : 500; const unsigned long delay = (dl > now) ? (unsigned long)(dl - now) : 500;
clockShouldBe = now + (int64_t)delay; clockShouldBe = now + (int64_t)delay;
_phy.poll(delay); _phy.poll(delay);
} }
} catch (std::exception &e) { } catch (std::exception &e) {
Mutex::Lock _l(_termReason_m); Mutex::Lock _l(_termReason_m);
@ -2756,25 +2851,37 @@ public:
// Handlers for Node and Phy<> callbacks // Handlers for Node and Phy<> callbacks
// ========================================================================= // =========================================================================
inline void phyOnDatagram(PhySocket *sock,void **uptr,const struct sockaddr *localAddr,const struct sockaddr *from,void *data,unsigned long len)
inline void phyOnDatagram(PhySocket* sock, void** uptr, const struct sockaddr* localAddr, const struct sockaddr* from, void* data, unsigned long len)
{ {
if (_forceTcpRelay) { if (_forceTcpRelay) {
return; return;
} }
Metrics::udp_recv += len; Metrics::udp_recv += len;
const uint64_t now = OSUtils::now(); const uint64_t now = OSUtils::now();
if ((len >= 16)&&(reinterpret_cast<const InetAddress *>(from)->ipScope() == InetAddress::IP_SCOPE_GLOBAL)) { if ((len >= 16) && (reinterpret_cast<const InetAddress*>(from)->ipScope() == InetAddress::IP_SCOPE_GLOBAL)) {
_lastDirectReceiveFromGlobal = now; _lastDirectReceiveFromGlobal = now;
}
const ZT_ResultCode rc = _node->processWirePacket(nullptr,now,reinterpret_cast<int64_t>(sock),reinterpret_cast<const struct sockaddr_storage *>(from),data,len,&_nextBackgroundTaskDeadline);
if (ZT_ResultCode_isFatal(rc)) {
char tmp[256];
OSUtils::ztsnprintf(tmp,sizeof(tmp),"fatal error code from processWirePacket: %d",(int)rc);
Mutex::Lock _l(_termReason_m);
_termReason = ONE_UNRECOVERABLE_ERROR;
_fatalErrorMessage = tmp;
this->terminate();
} }
PacketRecord* packet;
_rxPacketVector_m.lock();
if (_rxPacketVector.empty()) {
packet = new PacketRecord;
}
else {
packet = _rxPacketVector.back();
_rxPacketVector.pop_back();
}
_rxPacketVector_m.unlock();
packet->sock = reinterpret_cast<int64_t>(sock);
packet->now = now;
memcpy(&(packet->from), from, sizeof(struct sockaddr_storage));
packet->size = (unsigned int)len;
memcpy(packet->data, data, len);
_rxPacketQueue.postLimit(packet, 256 * _rxThreadCount);
} }
inline void phyOnTcpConnect(PhySocket *sock,void **uptr,bool success) inline void phyOnTcpConnect(PhySocket *sock,void **uptr,bool success)
@ -2996,6 +3103,7 @@ public:
n.setTap(EthernetTap::newInstance( n.setTap(EthernetTap::newInstance(
nullptr, nullptr,
_homePath.c_str(), _homePath.c_str(),
_rxThreadCount,
MAC(nwc->mac), MAC(nwc->mac),
nwc->mtu, nwc->mtu,
(unsigned int)ZT_IF_METRIC, (unsigned int)ZT_IF_METRIC,
@ -3509,8 +3617,9 @@ public:
inline void nodeVirtualNetworkFrameFunction(uint64_t nwid,void **nuptr,uint64_t sourceMac,uint64_t destMac,unsigned int etherType,unsigned int vlanId,const void *data,unsigned int len) inline void nodeVirtualNetworkFrameFunction(uint64_t nwid,void **nuptr,uint64_t sourceMac,uint64_t destMac,unsigned int etherType,unsigned int vlanId,const void *data,unsigned int len)
{ {
NetworkState *n = reinterpret_cast<NetworkState *>(*nuptr); NetworkState *n = reinterpret_cast<NetworkState *>(*nuptr);
if ((!n)||(!n->tap())) if ((!n)||(!n->tap())) {
return; return;
}
n->tap()->put(MAC(sourceMac),MAC(destMac),etherType,data,len); n->tap()->put(MAC(sourceMac),MAC(destMac),etherType,data,len);
} }