Fix for possible high CPU usage on multicast queries.

This commit is contained in:
Adam Ierymenko 2015-11-09 14:54:05 -08:00
parent 35c4e28f31
commit 94f4316a0e
4 changed files with 20 additions and 29 deletions

View File

@ -57,9 +57,8 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,bool deferred)
if ((cipher() == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_NONE)&&(verb() == Packet::VERB_HELLO)) {
// Unencrypted HELLOs require some potentially expensive verification, so
// do this in the background if background processing is enabled.
DeferredPackets *const dp = RR->dp; // read volatile pointer
if ((dp)&&(!deferred)) {
dp->enqueue(this);
if ((RR->dpEnabled > 0)&&(!deferred)) {
RR->dp->enqueue(this);
return true; // 'handled' via deferring to background thread(s)
} else {
// A null pointer for peer to _doHELLO() tells it to run its own
@ -405,12 +404,12 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,const SharedPtr<Peer> &p
} break;
case Packet::VERB_WHOIS: {
/* Right now only root servers are allowed to send OK(WHOIS) to prevent
* poisoning attacks. Further decentralization will require some other
* kind of trust mechanism. */
if (RR->topology->isRoot(peer->identity())) {
const Identity id(*this,ZT_PROTO_VERB_WHOIS__OK__IDX_IDENTITY);
if (id.locallyValidate())
// Right now we can skip this since OK(WHOIS) is only accepted from
// roots. In the future it should be done if we query less trusted
// sources.
//if (id.locallyValidate())
RR->sw->doAnythingWaitingForPeer(RR->topology->addPeer(SharedPtr<Peer>(new Peer(RR->identity,id))));
}
} break;

View File

@ -116,7 +116,9 @@ Node::Node(
RR->antiRec = new AntiRecursion();
RR->topology = new Topology(RR);
RR->sa = new SelfAwareness(RR);
RR->dp = new DeferredPackets(RR);
} catch ( ... ) {
delete RR->dp;
delete RR->sa;
delete RR->topology;
delete RR->antiRec;
@ -131,14 +133,11 @@ Node::Node(
Node::~Node()
{
Mutex::Lock _l(_networks_m);
Mutex::Lock _l2(RR->dpSetLock);
_networks.clear(); // ensure that networks are destroyed before shutdow
DeferredPackets *dp = RR->dp;
RR->dp = (DeferredPackets *)0;
delete dp;
RR->dpEnabled = 0;
delete RR->dp;
delete RR->sa;
delete RR->topology;
delete RR->antiRec;
@ -647,23 +646,14 @@ void Node::clusterStatus(ZT_ClusterStatus *cs)
void Node::backgroundThreadMain()
{
RR->dpSetLock.lock();
if (!RR->dp) {
try {
RR->dp = new DeferredPackets(RR);
} catch ( ... ) { // sanity check -- could only really happen if out of memory
RR->dpSetLock.unlock();
return;
}
}
RR->dpSetLock.unlock();
++RR->dpEnabled;
for(;;) {
try {
if (RR->dp->process() < 0)
break;
} catch ( ... ) {} // sanity check -- should not throw
}
--RR->dpEnabled;
}
/****************************************************************************/

View File

@ -57,12 +57,12 @@ public:
node(n)
,identity()
,localNetworkController((NetworkController *)0)
,dp((DeferredPackets *)0)
,sw((Switch *)0)
,mc((Multicaster *)0)
,antiRec((AntiRecursion *)0)
,topology((Topology *)0)
,sa((SelfAwareness *)0)
,dp((DeferredPackets *)0)
#ifdef ZT_ENABLE_CLUSTER
,cluster((Cluster *)0)
#endif
@ -80,10 +80,6 @@ public:
// This is set externally to an instance of this base class
NetworkController *localNetworkController;
// This is created if background threads call Node::backgroundThreadMain().
DeferredPackets *volatile dp; // can be read without lock but not written
Mutex dpSetLock;
/*
* Order matters a bit here. These are constructed in this order
* and then deleted in the opposite order on Node exit. The order ensures
@ -97,10 +93,15 @@ public:
AntiRecursion *antiRec;
Topology *topology;
SelfAwareness *sa;
DeferredPackets *dp;
#ifdef ZT_ENABLE_CLUSTER
Cluster *cluster;
#endif
// This is set to >0 if background threads are waiting on deferred
// packets, otherwise 'dp' should not be used.
volatile int dpEnabled;
};
} // namespace ZeroTier

View File

@ -4,9 +4,10 @@
# 250 with a 16GB RAM VM like Amazon m4.xlarge seems good
NUM_CONTAINERS=250
CONTAINER_IMAGE=zerotier/http-test
SCALE_UP_DELAY=4
export PATH=/bin:/usr/bin:/usr/local/bin:/usr/sbin:/sbin
pssh -h big-test-hosts -x '-t -t' -i -OUserKnownHostsFile=/dev/null -OStrictHostKeyChecking=no -t 0 -p 256 "sudo sysctl -w net.netfilter.nf_conntrack_max=262144 ; for ((n=0;n<$NUM_CONTAINERS;n++)); do sudo docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE; sleep 10; done"
pssh -h big-test-hosts -x '-t -t' -i -OUserKnownHostsFile=/dev/null -OStrictHostKeyChecking=no -t 0 -p 256 "sudo sysctl -w net.netfilter.nf_conntrack_max=262144 ; for ((n=0;n<$NUM_CONTAINERS;n++)); do sudo docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE; sleep $SCALE_UP_DELAY; done"
exit 0