Such ping logic. So edge case.

This commit is contained in:
Adam Ierymenko 2014-04-03 14:36:52 -07:00
parent b1088a6bd7
commit c96d3ebf8c
5 changed files with 82 additions and 27 deletions

View File

@ -263,11 +263,11 @@ error_no_byte_order_defined;
#define ZT_NETWORK_FINGERPRINT_CHECK_DELAY 5000
/**
* Delay between pings (actually HELLOs) to direct links
* Delay between ordinary case pings of direct links
*/
#define ZT_PEER_DIRECT_PING_DELAY 120000
/**
/**
* Delay in ms between firewall opener packets to direct links
*
* This should be lower than the UDP conversation entry timeout in most
@ -297,12 +297,12 @@ error_no_byte_order_defined;
*
* A link that hasn't spoken in this long is simply considered inactive.
*/
#define ZT_PEER_PATH_ACTIVITY_TIMEOUT ((ZT_PEER_DIRECT_PING_DELAY * 2) + 1000)
#define ZT_PEER_PATH_ACTIVITY_TIMEOUT ((ZT_PEER_DIRECT_PING_DELAY * 2) + 10000)
/**
* Close TCP tunnels if unused for this long
*/
#define ZT_TCP_TUNNEL_ACTIVITY_TIMEOUT ZT_PEER_PATH_ACTIVITY_TIMEOUT
#define ZT_TCP_TUNNEL_ACTIVITY_TIMEOUT (ZT_PEER_PATH_ACTIVITY_TIMEOUT + 10000)
/**
* Try TCP tunnels if nothing received for this long
@ -314,6 +314,11 @@ error_no_byte_order_defined;
*/
#define ZT_STARTUP_AGGRO 5000
/**
* Time since a ping was sent to be considered unanswered
*/
#define ZT_PING_UNANSWERED_AFTER 2500
/**
* Stop relaying via peers that have not responded to direct sends in this long
*/

View File

@ -536,10 +536,10 @@ Node::ReasonForTermination Node::run()
uint64_t lastNetworkAutoconfCheck = Utils::now() - 5000ULL; // check autoconf again after 5s for startup
uint64_t lastPingCheck = 0;
uint64_t lastSupernodePing = 0;
uint64_t lastClean = Utils::now(); // don't need to do this immediately
uint64_t lastNetworkFingerprintCheck = 0;
uint64_t lastMulticastCheck = 0;
uint64_t lastSupernodePingCheck = 0;
long lastDelayDelta = 0;
uint64_t networkConfigurationFingerprint = 0;
@ -592,13 +592,9 @@ Node::ReasonForTermination Node::run()
/* Ping supernodes separately, and do so more aggressively if we haven't
* heard anything from anyone since our last resynchronize / startup. */
if ( ((now - lastSupernodePing) >= ZT_PEER_DIRECT_PING_DELAY) ||
((_r->timeOfLastResynchronize > _r->timeOfLastPacketReceived) && ((now - lastSupernodePing) >= ZT_STARTUP_AGGRO)) ) {
lastSupernodePing = now;
std::vector< SharedPtr<Peer> > sns(_r->topology->supernodePeers());
TRACE("pinging %d supernodes",(int)sns.size());
for(std::vector< SharedPtr<Peer> >::const_iterator p(sns.begin());p!=sns.end();++p)
(*p)->sendPing(_r,now);
if ((now - lastSupernodePingCheck) >= ZT_STARTUP_AGGRO) {
lastSupernodePingCheck = now;
_r->topology->eachSupernodePeer(Topology::PingSupernodesThatNeedPing(_r,now));
}
if (resynchronize) {

View File

@ -216,7 +216,7 @@ bool Peer::isTcpFailoverTime(const RuntimeEnvironment *_r,uint64_t now) const
void Peer::clean(uint64_t now)
{
Mutex::Lock _l(_lock);
unsigned long i = 0,o = 0,l = _paths.size();
unsigned long i = 0,o = 0,l = (unsigned long)_paths.size();
while (i != l) {
if (_paths[i].active(now))
_paths[o++] = _paths[i];

View File

@ -226,6 +226,26 @@ public:
return x;
}
/**
* @param _r Runtime environment
* @param now Current time
* @return True if the last ping is unanswered
*/
inline bool pingUnanswered(const RuntimeEnvironment *_r,uint64_t now)
throw()
{
uint64_t lp = 0;
uint64_t lr = 0;
{
Mutex::Lock _l(_lock);
for(std::vector<Path>::const_iterator p(_paths.begin());p!=_paths.end();++p) {
lp = std::max(p->lastPing(),lp);
lr = std::max(p->lastReceived(),lr);
}
}
return ( (lp > _r->timeOfLastResynchronize) && ((lr < lp)&&((lp - lr) >= ZT_PING_UNANSWERED_AFTER)) );
}
/**
* @return Time of most recent unicast frame received
*/

View File

@ -180,6 +180,20 @@ public:
f(*this,p->second);
}
/**
* Apply a function or function object to all supernode peers
*
* @param f Function to apply
* @tparam F Function or function object type
*/
template<typename F>
inline void eachSupernodePeer(F f)
{
Mutex::Lock _l(_supernodes_m);
for(std::vector< SharedPtr<Peer> >::const_iterator p(_supernodePeers.begin());p!=_supernodePeers.end();++p)
f(*this,*p);
}
/**
* Function object to collect peers that need a firewall opener sent
*/
@ -214,20 +228,16 @@ public:
inline void operator()(Topology &t,const SharedPtr<Peer> &p)
{
if (
/* 1: we have not heard anything directly in ZT_PEER_DIRECT_PING_DELAY ms */
((_now - p->lastDirectReceive()) >= ZT_PEER_DIRECT_PING_DELAY) &&
/* 2: */
(
/* 2a: peer has direct path, and has sent us something recently */
(
(p->hasDirectPath())&&
((_now - p->lastFrame()) < ZT_PEER_PATH_ACTIVITY_TIMEOUT)
) &&
/* 2b: peer is not a supernode */
(!_supernodeAddresses.count(p->address()))
)
) { p->sendPing(_r,_now); }
/* For ordinary nodes we ping if they've sent us a frame recently,
* otherwise they are stale and we let the link die.
*
* Note that we measure ping time from time of last receive rather
* than time of last send in order to only count full round trips. */
if ( (!_supernodeAddresses.count(p->address())) &&
((_now - p->lastFrame()) < ZT_PEER_PATH_ACTIVITY_TIMEOUT) &&
((_now - p->lastDirectReceive()) > ZT_PEER_DIRECT_PING_DELAY) ) {
p->sendPing(_r,_now);
}
}
private:
@ -236,6 +246,30 @@ public:
const RuntimeEnvironment *_r;
};
/**
* Ping peers that need ping according to supernode rules (slightly more aggressive)
*/
class PingSupernodesThatNeedPing
{
public:
PingSupernodesThatNeedPing(const RuntimeEnvironment *renv,uint64_t now) throw() :
_now(now),
_r(renv) {}
inline void operator()(Topology &t,const SharedPtr<Peer> &p)
{
/* For supernodes we always ping even if no frames have been seen, and
* we ping aggressively if pings are unanswered. The limit to this
* frequency is set in the main loop to no more than ZT_STARTUP_AGGRO. */
if ( (p->pingUnanswered(_r,_now)) || ((_now - p->lastDirectReceive()) > ZT_PEER_DIRECT_PING_DELAY) )
p->sendPing(_r,_now);
}
private:
uint64_t _now;
const RuntimeEnvironment *_r;
};
/**
* Function object to forget direct links to active peers and then ping them indirectly
*