Fix for possible high CPU usage on multicast queries.

This commit is contained in:
Adam Ierymenko 2015-11-09 14:54:05 -08:00
parent 35c4e28f31
commit 94f4316a0e
4 changed files with 20 additions and 29 deletions

View File

@ -57,9 +57,8 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,bool deferred)
if ((cipher() == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_NONE)&&(verb() == Packet::VERB_HELLO)) { if ((cipher() == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_NONE)&&(verb() == Packet::VERB_HELLO)) {
// Unencrypted HELLOs require some potentially expensive verification, so // Unencrypted HELLOs require some potentially expensive verification, so
// do this in the background if background processing is enabled. // do this in the background if background processing is enabled.
DeferredPackets *const dp = RR->dp; // read volatile pointer if ((RR->dpEnabled > 0)&&(!deferred)) {
if ((dp)&&(!deferred)) { RR->dp->enqueue(this);
dp->enqueue(this);
return true; // 'handled' via deferring to background thread(s) return true; // 'handled' via deferring to background thread(s)
} else { } else {
// A null pointer for peer to _doHELLO() tells it to run its own // A null pointer for peer to _doHELLO() tells it to run its own
@ -405,12 +404,12 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,const SharedPtr<Peer> &p
} break; } break;
case Packet::VERB_WHOIS: { case Packet::VERB_WHOIS: {
/* Right now only root servers are allowed to send OK(WHOIS) to prevent
* poisoning attacks. Further decentralization will require some other
* kind of trust mechanism. */
if (RR->topology->isRoot(peer->identity())) { if (RR->topology->isRoot(peer->identity())) {
const Identity id(*this,ZT_PROTO_VERB_WHOIS__OK__IDX_IDENTITY); const Identity id(*this,ZT_PROTO_VERB_WHOIS__OK__IDX_IDENTITY);
if (id.locallyValidate()) // Right now we can skip this since OK(WHOIS) is only accepted from
// roots. In the future it should be done if we query less trusted
// sources.
//if (id.locallyValidate())
RR->sw->doAnythingWaitingForPeer(RR->topology->addPeer(SharedPtr<Peer>(new Peer(RR->identity,id)))); RR->sw->doAnythingWaitingForPeer(RR->topology->addPeer(SharedPtr<Peer>(new Peer(RR->identity,id))));
} }
} break; } break;

View File

@ -116,7 +116,9 @@ Node::Node(
RR->antiRec = new AntiRecursion(); RR->antiRec = new AntiRecursion();
RR->topology = new Topology(RR); RR->topology = new Topology(RR);
RR->sa = new SelfAwareness(RR); RR->sa = new SelfAwareness(RR);
RR->dp = new DeferredPackets(RR);
} catch ( ... ) { } catch ( ... ) {
delete RR->dp;
delete RR->sa; delete RR->sa;
delete RR->topology; delete RR->topology;
delete RR->antiRec; delete RR->antiRec;
@ -131,14 +133,11 @@ Node::Node(
Node::~Node() Node::~Node()
{ {
Mutex::Lock _l(_networks_m); Mutex::Lock _l(_networks_m);
Mutex::Lock _l2(RR->dpSetLock);
_networks.clear(); // ensure that networks are destroyed before shutdow _networks.clear(); // ensure that networks are destroyed before shutdow
DeferredPackets *dp = RR->dp; RR->dpEnabled = 0;
RR->dp = (DeferredPackets *)0; delete RR->dp;
delete dp;
delete RR->sa; delete RR->sa;
delete RR->topology; delete RR->topology;
delete RR->antiRec; delete RR->antiRec;
@ -647,23 +646,14 @@ void Node::clusterStatus(ZT_ClusterStatus *cs)
void Node::backgroundThreadMain() void Node::backgroundThreadMain()
{ {
RR->dpSetLock.lock(); ++RR->dpEnabled;
if (!RR->dp) {
try {
RR->dp = new DeferredPackets(RR);
} catch ( ... ) { // sanity check -- could only really happen if out of memory
RR->dpSetLock.unlock();
return;
}
}
RR->dpSetLock.unlock();
for(;;) { for(;;) {
try { try {
if (RR->dp->process() < 0) if (RR->dp->process() < 0)
break; break;
} catch ( ... ) {} // sanity check -- should not throw } catch ( ... ) {} // sanity check -- should not throw
} }
--RR->dpEnabled;
} }
/****************************************************************************/ /****************************************************************************/

View File

@ -57,12 +57,12 @@ public:
node(n) node(n)
,identity() ,identity()
,localNetworkController((NetworkController *)0) ,localNetworkController((NetworkController *)0)
,dp((DeferredPackets *)0)
,sw((Switch *)0) ,sw((Switch *)0)
,mc((Multicaster *)0) ,mc((Multicaster *)0)
,antiRec((AntiRecursion *)0) ,antiRec((AntiRecursion *)0)
,topology((Topology *)0) ,topology((Topology *)0)
,sa((SelfAwareness *)0) ,sa((SelfAwareness *)0)
,dp((DeferredPackets *)0)
#ifdef ZT_ENABLE_CLUSTER #ifdef ZT_ENABLE_CLUSTER
,cluster((Cluster *)0) ,cluster((Cluster *)0)
#endif #endif
@ -80,10 +80,6 @@ public:
// This is set externally to an instance of this base class // This is set externally to an instance of this base class
NetworkController *localNetworkController; NetworkController *localNetworkController;
// This is created if background threads call Node::backgroundThreadMain().
DeferredPackets *volatile dp; // can be read without lock but not written
Mutex dpSetLock;
/* /*
* Order matters a bit here. These are constructed in this order * Order matters a bit here. These are constructed in this order
* and then deleted in the opposite order on Node exit. The order ensures * and then deleted in the opposite order on Node exit. The order ensures
@ -97,10 +93,15 @@ public:
AntiRecursion *antiRec; AntiRecursion *antiRec;
Topology *topology; Topology *topology;
SelfAwareness *sa; SelfAwareness *sa;
DeferredPackets *dp;
#ifdef ZT_ENABLE_CLUSTER #ifdef ZT_ENABLE_CLUSTER
Cluster *cluster; Cluster *cluster;
#endif #endif
// This is set to >0 if background threads are waiting on deferred
// packets, otherwise 'dp' should not be used.
volatile int dpEnabled;
}; };
} // namespace ZeroTier } // namespace ZeroTier

View File

@ -4,9 +4,10 @@
# 250 with a 16GB RAM VM like Amazon m4.xlarge seems good # 250 with a 16GB RAM VM like Amazon m4.xlarge seems good
NUM_CONTAINERS=250 NUM_CONTAINERS=250
CONTAINER_IMAGE=zerotier/http-test CONTAINER_IMAGE=zerotier/http-test
SCALE_UP_DELAY=4
export PATH=/bin:/usr/bin:/usr/local/bin:/usr/sbin:/sbin export PATH=/bin:/usr/bin:/usr/local/bin:/usr/sbin:/sbin
pssh -h big-test-hosts -x '-t -t' -i -OUserKnownHostsFile=/dev/null -OStrictHostKeyChecking=no -t 0 -p 256 "sudo sysctl -w net.netfilter.nf_conntrack_max=262144 ; for ((n=0;n<$NUM_CONTAINERS;n++)); do sudo docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE; sleep 10; done" pssh -h big-test-hosts -x '-t -t' -i -OUserKnownHostsFile=/dev/null -OStrictHostKeyChecking=no -t 0 -p 256 "sudo sysctl -w net.netfilter.nf_conntrack_max=262144 ; for ((n=0;n<$NUM_CONTAINERS;n++)); do sudo docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE; sleep $SCALE_UP_DELAY; done"
exit 0 exit 0