Tweak some more timings for better reliability.

This commit is contained in:
Adam Ierymenko 2015-11-02 15:38:53 -08:00
parent 60ce886605
commit 7fbe2f7adf
8 changed files with 22 additions and 19 deletions

View File

@ -55,7 +55,7 @@
/**
* How often should we announce that we have a peer?
*/
#define ZT_CLUSTER_HAVE_PEER_ANNOUNCE_PERIOD (ZT_PEER_DIRECT_PING_DELAY / 2)
#define ZT_CLUSTER_HAVE_PEER_ANNOUNCE_PERIOD ZT_PEER_DIRECT_PING_DELAY
/**
* Desired period between doPeriodicTasks() in milliseconds

View File

@ -266,16 +266,16 @@
*/
#define ZT_PEER_DIRECT_PING_DELAY 60000
/**
* Timeout for overall peer activity (measured from last receive)
*/
#define ZT_PEER_ACTIVITY_TIMEOUT ((ZT_PEER_DIRECT_PING_DELAY * 4) + ZT_PING_CHECK_INVERVAL)
/**
* Delay between requests for updated network autoconf information
*/
#define ZT_NETWORK_AUTOCONF_DELAY 60000
/**
* Timeout for overall peer activity (measured from last receive)
*/
#define ZT_PEER_ACTIVITY_TIMEOUT ((ZT_PEER_DIRECT_PING_DELAY * 3) + (ZT_PING_CHECK_INVERVAL * 2))
/**
* Minimum interval between attempts by relays to unite peers
*
@ -283,7 +283,7 @@
* a RENDEZVOUS message no more than this often. This instructs the peers
* to attempt NAT-t and gives each the other's corresponding IP:port pair.
*/
#define ZT_MIN_UNITE_INTERVAL 60000
#define ZT_MIN_UNITE_INTERVAL 30000
/**
* Delay between initial direct NAT-t packet and more aggressive techniques

View File

@ -263,7 +263,7 @@ public:
}
lastReceiveFromUpstream = std::max(p->lastReceive(),lastReceiveFromUpstream);
} else if (p->alive(_now)) {
} else if (p->activelyTransferringFrames(_now)) {
// Normal nodes get their preferred link kept alive if the node has generated frame traffic recently
p->doPingAndKeepalive(RR,_now,0);
}

View File

@ -231,9 +231,9 @@ public:
inline uint64_t lastAnnouncedTo() const throw() { return _lastAnnouncedTo; }
/**
* @return True if peer has received an actual data frame within ZT_PEER_ACTIVITY_TIMEOUT milliseconds
* @return True if this peer is actively sending real network frames
*/
inline uint64_t alive(uint64_t now) const throw() { return ((now - lastFrame()) < ZT_PEER_ACTIVITY_TIMEOUT); }
inline uint64_t activelyTransferringFrames(uint64_t now) const throw() { return ((now - lastFrame()) < ZT_PEER_ACTIVITY_TIMEOUT); }
/**
* @return Current latency or 0 if unknown (max: 65535)

View File

@ -128,7 +128,7 @@ void SelfAwareness::iam(const Address &reporter,const InetAddress &reporterPhysi
// links to be re-established if possible, possibly using a root server or some
// other relay.
for(std::vector< SharedPtr<Peer> >::const_iterator p(rset.peersReset.begin());p!=rset.peersReset.end();++p) {
if ((*p)->alive(now)) {
if ((*p)->activelyTransferringFrames(now)) {
Packet outp((*p)->address(),RR->identity.address(),Packet::VERB_NOP);
RR->sw->send(outp,true,0);
}

View File

@ -442,8 +442,8 @@ unsigned long Switch::doTimerTasks(uint64_t now)
Mutex::Lock _l(_contactQueue_m);
for(std::list<ContactQueueEntry>::iterator qi(_contactQueue.begin());qi!=_contactQueue.end();) {
if (now >= qi->fireAtTime) {
if ((!qi->peer->alive(now))||(qi->peer->hasActiveDirectPath(now))) {
// Cancel attempt if we've already connected or peer is no longer "alive"
if (qi->peer->hasActiveDirectPath(now)) {
// Cancel if connection has succeeded
_contactQueue.erase(qi++);
continue;
} else {
@ -539,7 +539,7 @@ unsigned long Switch::doTimerTasks(uint64_t now)
_LastUniteKey *k = (_LastUniteKey *)0;
uint64_t *v = (uint64_t *)0;
while (i.next(k,v)) {
if ((now - *v) >= (ZT_MIN_UNITE_INTERVAL * 16))
if ((now - *v) >= (ZT_MIN_UNITE_INTERVAL * 8))
_lastUniteAttempt.erase(*k);
}
}

View File

@ -81,6 +81,11 @@ public:
/**
* Get a peer only if it is presently in memory (no disk cache)
*
* This also does not update the lastUsed() time for peers, which means
* that it won't prevent them from falling out of RAM. This is currently
* used in the Cluster code to update peer info without forcing all peers
* across the entire cluster to remain in memory cache.
*
* @param zta ZeroTier address
* @param now Current time
*/
@ -88,10 +93,8 @@ public:
{
Mutex::Lock _l(_lock);
const SharedPtr<Peer> *const ap = _peers.get(zta);
if (ap) {
(*ap)->use(now);
if (ap)
return *ap;
}
return SharedPtr<Peer>();
}

View File

@ -1,7 +1,7 @@
#!/bin/bash
# Edit as needed -- note that >1000 per host is likely problematic due to Linux kernel limits
NUM_CONTAINERS=25
NUM_CONTAINERS=50
CONTAINER_IMAGE=zerotier/http-test
#
@ -25,6 +25,6 @@ export PATH=/bin:/usr/bin:/usr/local/bin:/usr/sbin:/sbin
# docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE
#done
pssh -h big-test-hosts -i -t 0 -p 256 "for ((n=0;n<$NUM_CONTAINERS;n++)); do docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE; sleep 0.25; done"
pssh -h big-test-hosts -o big-test-out -t 0 -p 256 "for ((n=0;n<$NUM_CONTAINERS;n++)); do docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE; sleep 0.25; done"
exit 0