mirror of
https://github.com/zerotier/ZeroTierOne.git
synced 2024-12-22 06:17:48 +00:00
More tweaks to algorithm for determining when to fail over to TCP, and stop supernodes from resynchronizing unless explicitly ordered.
This commit is contained in:
parent
0e1fc06a6f
commit
700a450806
@ -305,9 +305,14 @@ error_no_byte_order_defined;
|
||||
#define ZT_TCP_TUNNEL_ACTIVITY_TIMEOUT ZT_PEER_PATH_ACTIVITY_TIMEOUT
|
||||
|
||||
/**
|
||||
* Try TCP tunnels if no response to UDP PINGs in this many milliseconds
|
||||
* Try TCP tunnels if nothing received for this long
|
||||
*/
|
||||
#define ZT_PING_UNANSWERED_AFTER 5000
|
||||
#define ZT_TCP_TUNNEL_FAILOVER_TIMEOUT 5000
|
||||
|
||||
/**
|
||||
* Try to ping supernodes this often until we get something from somewhere
|
||||
*/
|
||||
#define ZT_STARTUP_AGGRO 5000
|
||||
|
||||
/**
|
||||
* Stop relaying via peers that have not responded to direct sends in this long
|
||||
|
@ -540,7 +540,7 @@ Node::ReasonForTermination Node::run()
|
||||
long lastDelayDelta = 0;
|
||||
|
||||
uint64_t networkConfigurationFingerprint = 0;
|
||||
_r->timeOfLastResynchronize = 0;
|
||||
_r->timeOfLastResynchronize = Utils::now();
|
||||
|
||||
while (impl->reasonForTermination == NODE_RUNNING) {
|
||||
if (Utils::fileExists(shutdownIfUnreadablePath.c_str(),false)) {
|
||||
@ -551,13 +551,7 @@ Node::ReasonForTermination Node::run()
|
||||
}
|
||||
|
||||
uint64_t now = Utils::now();
|
||||
|
||||
// Did the user send SIGHUP or otherwise order network resync? (mostly for debugging)
|
||||
bool resynchronize = impl->resynchronize;
|
||||
impl->resynchronize = false;
|
||||
if (resynchronize) {
|
||||
LOG("manual resynchronize ordered, resyncing with network");
|
||||
}
|
||||
bool resynchronize = false;
|
||||
|
||||
// If it looks like the computer slept and woke, resynchronize.
|
||||
if (lastDelayDelta >= ZT_SLEEP_WAKE_DETECTION_THRESHOLD) {
|
||||
@ -577,18 +571,29 @@ Node::ReasonForTermination Node::run()
|
||||
}
|
||||
}
|
||||
|
||||
// Supernodes do not resynchronize unless explicitly ordered via SIGHUP.
|
||||
if ((resynchronize)&&(_r->topology->amSupernode()))
|
||||
resynchronize = false;
|
||||
|
||||
// Check for SIGHUP / force resync.
|
||||
if (impl->resynchronize) {
|
||||
impl->resynchronize = false;
|
||||
resynchronize = true;
|
||||
LOG("resynchronize forced by user, syncing with network");
|
||||
}
|
||||
|
||||
if (resynchronize)
|
||||
_r->timeOfLastResynchronize = now;
|
||||
|
||||
/* Ping supernodes separately, and do so more aggressively if we haven't
|
||||
* heard anything from anyone since our last resynchronize / startup. */
|
||||
if ( ((now - lastSupernodePing) >= ZT_PEER_DIRECT_PING_DELAY) ||
|
||||
((_r->timeOfLastResynchronize > _r->timeOfLastPacketReceived) && ((now - lastSupernodePing) >= ZT_PING_UNANSWERED_AFTER)) ) {
|
||||
((_r->timeOfLastResynchronize > _r->timeOfLastPacketReceived) && ((now - lastSupernodePing) >= ZT_STARTUP_AGGRO)) ) {
|
||||
lastSupernodePing = now;
|
||||
std::vector< SharedPtr<Peer> > sns(_r->topology->supernodePeers());
|
||||
TRACE("pinging %d supernodes",(int)sns.size());
|
||||
for(std::vector< SharedPtr<Peer> >::const_iterator p(sns.begin());p!=sns.end();++p)
|
||||
(*p)->sendPing(_r,now,resynchronize);
|
||||
(*p)->sendPing(_r,now);
|
||||
}
|
||||
|
||||
if (resynchronize) {
|
||||
@ -625,7 +630,7 @@ Node::ReasonForTermination Node::run()
|
||||
if ((now - lastPingCheck) >= ZT_PING_CHECK_DELAY) {
|
||||
lastPingCheck = now;
|
||||
try {
|
||||
_r->topology->eachPeer(Topology::PingPeersThatNeedPing(_r,now,resynchronize));
|
||||
_r->topology->eachPeer(Topology::PingPeersThatNeedPing(_r,now));
|
||||
_r->topology->eachPeer(Topology::OpenPeersThatNeedFirewallOpener(_r,now));
|
||||
} catch (std::exception &exc) {
|
||||
LOG("unexpected exception running ping check cycle: %s",exc.what());
|
||||
|
@ -490,6 +490,7 @@ bool PacketDecoder::_doMULTICAST_FRAME(const RuntimeEnvironment *_r,const Shared
|
||||
const unsigned int signatureLen = at<uint16_t>(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FRAME + frameLen);
|
||||
const unsigned char *const signature = field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FRAME + frameLen + 2,signatureLen);
|
||||
|
||||
/*
|
||||
TRACE("MULTICAST_FRAME @%.16llx #%.16llx from %s<%s> via %s(%s) to %s [ %s, %d bytes, depth %d ]",
|
||||
(unsigned long long)nwid,
|
||||
(unsigned long long)guid,
|
||||
@ -499,6 +500,7 @@ bool PacketDecoder::_doMULTICAST_FRAME(const RuntimeEnvironment *_r,const Shared
|
||||
Switch::etherTypeName(etherType),
|
||||
(int)frameLen,
|
||||
(int)depth);
|
||||
*/
|
||||
|
||||
SharedPtr<Network> network(_r->nc->network(nwid));
|
||||
|
||||
|
@ -117,8 +117,13 @@ void Peer::receive(
|
||||
_lastMulticastFrame = now;
|
||||
}
|
||||
|
||||
|
||||
bool Peer::send(const RuntimeEnvironment *_r,const void *data,unsigned int len,uint64_t now)
|
||||
{
|
||||
// Note: we'll still use TCP here if that's all we have, but if this
|
||||
// is false we will prefer UDP.
|
||||
bool useTcp = isTcpFailoverTime(_r,now);
|
||||
|
||||
Mutex::Lock _l(_lock);
|
||||
|
||||
std::vector<Path>::iterator p(_paths.begin());
|
||||
@ -127,11 +132,13 @@ bool Peer::send(const RuntimeEnvironment *_r,const void *data,unsigned int len,u
|
||||
|
||||
uint64_t bestPathLastReceived = p->lastReceived();
|
||||
std::vector<Path>::iterator bestPath = p;
|
||||
bool bestPathIsTcp = p->tcp();
|
||||
while (++p != _paths.end()) {
|
||||
uint64_t lr = p->lastReceived();
|
||||
if (lr > bestPathLastReceived) {
|
||||
if ( (lr > bestPathLastReceived) || ((bestPathIsTcp)&&(!useTcp)) ) {
|
||||
bestPathLastReceived = lr;
|
||||
bestPath = p;
|
||||
bestPathIsTcp = p->tcp();
|
||||
}
|
||||
}
|
||||
|
||||
@ -156,35 +163,19 @@ bool Peer::sendFirewallOpener(const RuntimeEnvironment *_r,uint64_t now)
|
||||
return sent;
|
||||
}
|
||||
|
||||
bool Peer::sendPing(const RuntimeEnvironment *_r,uint64_t now,bool firstSinceReset)
|
||||
bool Peer::sendPing(const RuntimeEnvironment *_r,uint64_t now)
|
||||
{
|
||||
bool sent = false;
|
||||
SharedPtr<Peer> self(this);
|
||||
|
||||
// In the ping case we will never send TCP unless this returns true.
|
||||
bool useTcp = isTcpFailoverTime(_r,now);
|
||||
|
||||
TRACE("PING %s (useTcp==%d)",_id.address().toString().c_str(),(int)useTcp);
|
||||
|
||||
Mutex::Lock _l(_lock);
|
||||
|
||||
// NOTE: this will never ping a peer that has *only* TCP paths. Right
|
||||
// now there's never such a thing as TCP is only for failover.
|
||||
|
||||
bool pingTcp;
|
||||
if (!firstSinceReset) {
|
||||
uint64_t lastUdp = 0;
|
||||
uint64_t lastTcp = 0;
|
||||
uint64_t lastPing = 0;
|
||||
for(std::vector<Path>::iterator p(_paths.begin());p!=_paths.end();++p) {
|
||||
if (p->tcp())
|
||||
lastTcp = std::max(p->lastReceived(),lastTcp);
|
||||
else lastUdp = std::max(p->lastReceived(),lastUdp);
|
||||
lastPing = std::max(p->lastPing(),lastPing);
|
||||
}
|
||||
uint64_t lastAny = std::max(lastUdp,lastTcp);
|
||||
pingTcp = ( ( (lastAny < lastPing) && ((lastPing - lastAny) >= ZT_PING_UNANSWERED_AFTER) ) || (lastTcp > lastUdp) );
|
||||
} else pingTcp = false;
|
||||
|
||||
TRACE("PING %s (pingTcp==%d)",_id.address().toString().c_str(),(int)pingTcp);
|
||||
|
||||
for(std::vector<Path>::iterator p(_paths.begin());p!=_paths.end();++p) {
|
||||
if ((pingTcp)||(!p->tcp())) {
|
||||
if ((useTcp)||(!p->tcp())) {
|
||||
if (_r->sw->sendHELLO(self,*p)) {
|
||||
p->sent(now);
|
||||
p->pinged(now);
|
||||
|
@ -160,10 +160,9 @@ public:
|
||||
*
|
||||
* @param _r Runtime environment
|
||||
* @param now Current time
|
||||
* @param firstSinceReset If true, this is the first ping sent since a network reset
|
||||
* @return True if send appears successful for at least one address type
|
||||
*/
|
||||
bool sendPing(const RuntimeEnvironment *_r,uint64_t now,bool firstSinceReset);
|
||||
bool sendPing(const RuntimeEnvironment *_r,uint64_t now);
|
||||
|
||||
/**
|
||||
* Called periodically by Topology::clean() to remove stale paths and do other cleanup
|
||||
@ -263,6 +262,33 @@ public:
|
||||
return _lastAnnouncedTo;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param _r Runtime environment
|
||||
* @param now Current time
|
||||
* @return True if it's time to attempt TCP failover (if we have TCP_OUT paths)
|
||||
*/
|
||||
inline bool isTcpFailoverTime(const RuntimeEnvironment *_r,uint64_t now) const
|
||||
throw()
|
||||
{
|
||||
if ((now - _r->timeOfLastResynchronize) >= ZT_TCP_TUNNEL_FAILOVER_TIMEOUT) {
|
||||
uint64_t lastUdpPingSent = 0;
|
||||
uint64_t lastUdpReceive = 0;
|
||||
|
||||
{
|
||||
Mutex::Lock _l(_lock);
|
||||
for(std::vector<Path>::const_iterator p(_paths.begin());p!=_paths.end();++p) {
|
||||
if (p->type() == Path::PATH_TYPE_UDP) {
|
||||
lastUdpPingSent = std::max(lastUdpPingSent,p->lastPing());
|
||||
lastUdpReceive = std::max(lastUdpReceive,p->lastReceived());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ( (lastUdpPingSent > lastUdpReceive) && ((now - lastUdpPingSent) >= ZT_TCP_TUNNEL_FAILOVER_TIMEOUT) );
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Current latency or 0 if unknown (max: 65535)
|
||||
*/
|
||||
|
@ -207,11 +207,10 @@ public:
|
||||
class PingPeersThatNeedPing
|
||||
{
|
||||
public:
|
||||
PingPeersThatNeedPing(const RuntimeEnvironment *renv,uint64_t now,bool firstSinceReset) throw() :
|
||||
PingPeersThatNeedPing(const RuntimeEnvironment *renv,uint64_t now) throw() :
|
||||
_now(now),
|
||||
_supernodeAddresses(renv->topology->supernodeAddresses()),
|
||||
_r(renv),
|
||||
_firstSinceReset(firstSinceReset) {}
|
||||
_r(renv) {}
|
||||
|
||||
inline void operator()(Topology &t,const SharedPtr<Peer> &p)
|
||||
{
|
||||
@ -228,14 +227,13 @@ public:
|
||||
/* 2b: peer is not a supernode */
|
||||
(!_supernodeAddresses.count(p->address()))
|
||||
)
|
||||
) { p->sendPing(_r,_now,_firstSinceReset); }
|
||||
) { p->sendPing(_r,_now); }
|
||||
}
|
||||
|
||||
private:
|
||||
uint64_t _now;
|
||||
std::set<Address> _supernodeAddresses;
|
||||
const RuntimeEnvironment *_r;
|
||||
bool _firstSinceReset;
|
||||
};
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user