diff --git a/node/Constants.hpp b/node/Constants.hpp index a003c88c8..f0db9d3f0 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -305,9 +305,14 @@ error_no_byte_order_defined; #define ZT_TCP_TUNNEL_ACTIVITY_TIMEOUT ZT_PEER_PATH_ACTIVITY_TIMEOUT /** - * Try TCP tunnels if no response to UDP PINGs in this many milliseconds + * Try TCP tunnels if nothing received for this long */ -#define ZT_PING_UNANSWERED_AFTER 5000 +#define ZT_TCP_TUNNEL_FAILOVER_TIMEOUT 5000 + +/** + * Try to ping supernodes this often until we get something from somewhere + */ +#define ZT_STARTUP_AGGRO 5000 /** * Stop relaying via peers that have not responded to direct sends in this long diff --git a/node/Node.cpp b/node/Node.cpp index a59789360..a0dd14c17 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -540,7 +540,7 @@ Node::ReasonForTermination Node::run() long lastDelayDelta = 0; uint64_t networkConfigurationFingerprint = 0; - _r->timeOfLastResynchronize = 0; + _r->timeOfLastResynchronize = Utils::now(); while (impl->reasonForTermination == NODE_RUNNING) { if (Utils::fileExists(shutdownIfUnreadablePath.c_str(),false)) { @@ -551,13 +551,7 @@ Node::ReasonForTermination Node::run() } uint64_t now = Utils::now(); - - // Did the user send SIGHUP or otherwise order network resync? (mostly for debugging) - bool resynchronize = impl->resynchronize; - impl->resynchronize = false; - if (resynchronize) { - LOG("manual resynchronize ordered, resyncing with network"); - } + bool resynchronize = false; // If it looks like the computer slept and woke, resynchronize. if (lastDelayDelta >= ZT_SLEEP_WAKE_DETECTION_THRESHOLD) { @@ -577,18 +571,29 @@ Node::ReasonForTermination Node::run() } } + // Supernodes do not resynchronize unless explicitly ordered via SIGHUP. + if ((resynchronize)&&(_r->topology->amSupernode())) + resynchronize = false; + + // Check for SIGHUP / force resync. + if (impl->resynchronize) { + impl->resynchronize = false; + resynchronize = true; + LOG("resynchronize forced by user, syncing with network"); + } + if (resynchronize) _r->timeOfLastResynchronize = now; /* Ping supernodes separately, and do so more aggressively if we haven't * heard anything from anyone since our last resynchronize / startup. */ if ( ((now - lastSupernodePing) >= ZT_PEER_DIRECT_PING_DELAY) || - ((_r->timeOfLastResynchronize > _r->timeOfLastPacketReceived) && ((now - lastSupernodePing) >= ZT_PING_UNANSWERED_AFTER)) ) { + ((_r->timeOfLastResynchronize > _r->timeOfLastPacketReceived) && ((now - lastSupernodePing) >= ZT_STARTUP_AGGRO)) ) { lastSupernodePing = now; std::vector< SharedPtr > sns(_r->topology->supernodePeers()); TRACE("pinging %d supernodes",(int)sns.size()); for(std::vector< SharedPtr >::const_iterator p(sns.begin());p!=sns.end();++p) - (*p)->sendPing(_r,now,resynchronize); + (*p)->sendPing(_r,now); } if (resynchronize) { @@ -625,7 +630,7 @@ Node::ReasonForTermination Node::run() if ((now - lastPingCheck) >= ZT_PING_CHECK_DELAY) { lastPingCheck = now; try { - _r->topology->eachPeer(Topology::PingPeersThatNeedPing(_r,now,resynchronize)); + _r->topology->eachPeer(Topology::PingPeersThatNeedPing(_r,now)); _r->topology->eachPeer(Topology::OpenPeersThatNeedFirewallOpener(_r,now)); } catch (std::exception &exc) { LOG("unexpected exception running ping check cycle: %s",exc.what()); diff --git a/node/PacketDecoder.cpp b/node/PacketDecoder.cpp index 2b8cba515..c7d3ffda5 100644 --- a/node/PacketDecoder.cpp +++ b/node/PacketDecoder.cpp @@ -490,6 +490,7 @@ bool PacketDecoder::_doMULTICAST_FRAME(const RuntimeEnvironment *_r,const Shared const unsigned int signatureLen = at(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FRAME + frameLen); const unsigned char *const signature = field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FRAME + frameLen + 2,signatureLen); + /* TRACE("MULTICAST_FRAME @%.16llx #%.16llx from %s<%s> via %s(%s) to %s [ %s, %d bytes, depth %d ]", (unsigned long long)nwid, (unsigned long long)guid, @@ -499,6 +500,7 @@ bool PacketDecoder::_doMULTICAST_FRAME(const RuntimeEnvironment *_r,const Shared Switch::etherTypeName(etherType), (int)frameLen, (int)depth); + */ SharedPtr network(_r->nc->network(nwid)); diff --git a/node/Peer.cpp b/node/Peer.cpp index 28926f97c..3fad7c050 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -117,8 +117,13 @@ void Peer::receive( _lastMulticastFrame = now; } + bool Peer::send(const RuntimeEnvironment *_r,const void *data,unsigned int len,uint64_t now) { + // Note: we'll still use TCP here if that's all we have, but if this + // is false we will prefer UDP. + bool useTcp = isTcpFailoverTime(_r,now); + Mutex::Lock _l(_lock); std::vector::iterator p(_paths.begin()); @@ -127,11 +132,13 @@ bool Peer::send(const RuntimeEnvironment *_r,const void *data,unsigned int len,u uint64_t bestPathLastReceived = p->lastReceived(); std::vector::iterator bestPath = p; + bool bestPathIsTcp = p->tcp(); while (++p != _paths.end()) { uint64_t lr = p->lastReceived(); - if (lr > bestPathLastReceived) { + if ( (lr > bestPathLastReceived) || ((bestPathIsTcp)&&(!useTcp)) ) { bestPathLastReceived = lr; bestPath = p; + bestPathIsTcp = p->tcp(); } } @@ -156,35 +163,19 @@ bool Peer::sendFirewallOpener(const RuntimeEnvironment *_r,uint64_t now) return sent; } -bool Peer::sendPing(const RuntimeEnvironment *_r,uint64_t now,bool firstSinceReset) +bool Peer::sendPing(const RuntimeEnvironment *_r,uint64_t now) { bool sent = false; SharedPtr self(this); + // In the ping case we will never send TCP unless this returns true. + bool useTcp = isTcpFailoverTime(_r,now); + + TRACE("PING %s (useTcp==%d)",_id.address().toString().c_str(),(int)useTcp); + Mutex::Lock _l(_lock); - - // NOTE: this will never ping a peer that has *only* TCP paths. Right - // now there's never such a thing as TCP is only for failover. - - bool pingTcp; - if (!firstSinceReset) { - uint64_t lastUdp = 0; - uint64_t lastTcp = 0; - uint64_t lastPing = 0; - for(std::vector::iterator p(_paths.begin());p!=_paths.end();++p) { - if (p->tcp()) - lastTcp = std::max(p->lastReceived(),lastTcp); - else lastUdp = std::max(p->lastReceived(),lastUdp); - lastPing = std::max(p->lastPing(),lastPing); - } - uint64_t lastAny = std::max(lastUdp,lastTcp); - pingTcp = ( ( (lastAny < lastPing) && ((lastPing - lastAny) >= ZT_PING_UNANSWERED_AFTER) ) || (lastTcp > lastUdp) ); - } else pingTcp = false; - - TRACE("PING %s (pingTcp==%d)",_id.address().toString().c_str(),(int)pingTcp); - for(std::vector::iterator p(_paths.begin());p!=_paths.end();++p) { - if ((pingTcp)||(!p->tcp())) { + if ((useTcp)||(!p->tcp())) { if (_r->sw->sendHELLO(self,*p)) { p->sent(now); p->pinged(now); diff --git a/node/Peer.hpp b/node/Peer.hpp index d5d0a2914..8b8bf5780 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -160,10 +160,9 @@ public: * * @param _r Runtime environment * @param now Current time - * @param firstSinceReset If true, this is the first ping sent since a network reset * @return True if send appears successful for at least one address type */ - bool sendPing(const RuntimeEnvironment *_r,uint64_t now,bool firstSinceReset); + bool sendPing(const RuntimeEnvironment *_r,uint64_t now); /** * Called periodically by Topology::clean() to remove stale paths and do other cleanup @@ -263,6 +262,33 @@ public: return _lastAnnouncedTo; } + /** + * @param _r Runtime environment + * @param now Current time + * @return True if it's time to attempt TCP failover (if we have TCP_OUT paths) + */ + inline bool isTcpFailoverTime(const RuntimeEnvironment *_r,uint64_t now) const + throw() + { + if ((now - _r->timeOfLastResynchronize) >= ZT_TCP_TUNNEL_FAILOVER_TIMEOUT) { + uint64_t lastUdpPingSent = 0; + uint64_t lastUdpReceive = 0; + + { + Mutex::Lock _l(_lock); + for(std::vector::const_iterator p(_paths.begin());p!=_paths.end();++p) { + if (p->type() == Path::PATH_TYPE_UDP) { + lastUdpPingSent = std::max(lastUdpPingSent,p->lastPing()); + lastUdpReceive = std::max(lastUdpReceive,p->lastReceived()); + } + } + } + + return ( (lastUdpPingSent > lastUdpReceive) && ((now - lastUdpPingSent) >= ZT_TCP_TUNNEL_FAILOVER_TIMEOUT) ); + } + return false; + } + /** * @return Current latency or 0 if unknown (max: 65535) */ diff --git a/node/Topology.hpp b/node/Topology.hpp index bd39d0ec7..35914cc91 100644 --- a/node/Topology.hpp +++ b/node/Topology.hpp @@ -207,11 +207,10 @@ public: class PingPeersThatNeedPing { public: - PingPeersThatNeedPing(const RuntimeEnvironment *renv,uint64_t now,bool firstSinceReset) throw() : + PingPeersThatNeedPing(const RuntimeEnvironment *renv,uint64_t now) throw() : _now(now), _supernodeAddresses(renv->topology->supernodeAddresses()), - _r(renv), - _firstSinceReset(firstSinceReset) {} + _r(renv) {} inline void operator()(Topology &t,const SharedPtr &p) { @@ -228,14 +227,13 @@ public: /* 2b: peer is not a supernode */ (!_supernodeAddresses.count(p->address())) ) - ) { p->sendPing(_r,_now,_firstSinceReset); } + ) { p->sendPing(_r,_now); } } private: uint64_t _now; std::set
_supernodeAddresses; const RuntimeEnvironment *_r; - bool _firstSinceReset; }; /**