mirror of
https://github.com/zerotier/ZeroTierOne.git
synced 2025-01-02 03:06:40 +00:00
Tighten up dead path detection. Should now auto-detect dead paths in less than 10 seconds at a very small cost in ECHO requests (or HELLOs for older peers). GitHib issue #272
This commit is contained in:
parent
4d94ae77b4
commit
05b2c0743f
@ -274,7 +274,7 @@
|
|||||||
/**
|
/**
|
||||||
* No answer timeout to trigger dead path detection
|
* No answer timeout to trigger dead path detection
|
||||||
*/
|
*/
|
||||||
#define ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT 3000
|
#define ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT 2500
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Probation threshold after which a path becomes dead
|
* Probation threshold after which a path becomes dead
|
||||||
|
@ -66,6 +66,8 @@ class Path
|
|||||||
public:
|
public:
|
||||||
Path() :
|
Path() :
|
||||||
_lastSend(0),
|
_lastSend(0),
|
||||||
|
_lastPing(0),
|
||||||
|
_lastKeepalive(0),
|
||||||
_lastReceived(0),
|
_lastReceived(0),
|
||||||
_addr(),
|
_addr(),
|
||||||
_localAddress(),
|
_localAddress(),
|
||||||
@ -76,6 +78,8 @@ public:
|
|||||||
|
|
||||||
Path(const InetAddress &localAddress,const InetAddress &addr) :
|
Path(const InetAddress &localAddress,const InetAddress &addr) :
|
||||||
_lastSend(0),
|
_lastSend(0),
|
||||||
|
_lastPing(0),
|
||||||
|
_lastKeepalive(0),
|
||||||
_lastReceived(0),
|
_lastReceived(0),
|
||||||
_addr(addr),
|
_addr(addr),
|
||||||
_localAddress(localAddress),
|
_localAddress(localAddress),
|
||||||
@ -98,10 +102,21 @@ public:
|
|||||||
*
|
*
|
||||||
* @param t Time of send
|
* @param t Time of send
|
||||||
*/
|
*/
|
||||||
inline void sent(uint64_t t)
|
inline void sent(uint64_t t) { _lastSend = t; }
|
||||||
{
|
|
||||||
_lastSend = t;
|
/**
|
||||||
}
|
* Called when we've sent a ping or echo
|
||||||
|
*
|
||||||
|
* @param t Time of send
|
||||||
|
*/
|
||||||
|
inline void pinged(uint64_t t) { _lastPing = t; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called when we send a NAT keepalive
|
||||||
|
*
|
||||||
|
* @param t Time of send
|
||||||
|
*/
|
||||||
|
inline void sentKeepalive(uint64_t t) { _lastKeepalive = t; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Called when a packet is received from this remote path
|
* Called when a packet is received from this remote path
|
||||||
@ -145,6 +160,16 @@ public:
|
|||||||
*/
|
*/
|
||||||
inline uint64_t lastSend() const throw() { return _lastSend; }
|
inline uint64_t lastSend() const throw() { return _lastSend; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return Time we last pinged or dead path checked this link
|
||||||
|
*/
|
||||||
|
inline uint64_t lastPing() const throw() { return _lastPing; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return Time of last keepalive
|
||||||
|
*/
|
||||||
|
inline uint64_t lastKeepalive() const throw() { return _lastKeepalive; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return Time of last receive from this path
|
* @return Time of last receive from this path
|
||||||
*/
|
*/
|
||||||
@ -260,8 +285,10 @@ public:
|
|||||||
template<unsigned int C>
|
template<unsigned int C>
|
||||||
inline void serialize(Buffer<C> &b) const
|
inline void serialize(Buffer<C> &b) const
|
||||||
{
|
{
|
||||||
b.append((uint8_t)1); // version
|
b.append((uint8_t)2); // version
|
||||||
b.append((uint64_t)_lastSend);
|
b.append((uint64_t)_lastSend);
|
||||||
|
b.append((uint64_t)_lastPing);
|
||||||
|
b.append((uint64_t)_lastKeepalive);
|
||||||
b.append((uint64_t)_lastReceived);
|
b.append((uint64_t)_lastReceived);
|
||||||
_addr.serialize(b);
|
_addr.serialize(b);
|
||||||
_localAddress.serialize(b);
|
_localAddress.serialize(b);
|
||||||
@ -273,9 +300,11 @@ public:
|
|||||||
inline unsigned int deserialize(const Buffer<C> &b,unsigned int startAt = 0)
|
inline unsigned int deserialize(const Buffer<C> &b,unsigned int startAt = 0)
|
||||||
{
|
{
|
||||||
unsigned int p = startAt;
|
unsigned int p = startAt;
|
||||||
if (b[p++] != 1)
|
if (b[p++] != 2)
|
||||||
throw std::invalid_argument("invalid serialized Path");
|
throw std::invalid_argument("invalid serialized Path");
|
||||||
_lastSend = b.template at<uint64_t>(p); p += 8;
|
_lastSend = b.template at<uint64_t>(p); p += 8;
|
||||||
|
_lastPing = b.template at<uint64_t>(p); p += 8;
|
||||||
|
_lastKeepalive = b.template at<uint64_t>(p); p += 8;
|
||||||
_lastReceived = b.template at<uint64_t>(p); p += 8;
|
_lastReceived = b.template at<uint64_t>(p); p += 8;
|
||||||
p += _addr.deserialize(b,p);
|
p += _addr.deserialize(b,p);
|
||||||
p += _localAddress.deserialize(b,p);
|
p += _localAddress.deserialize(b,p);
|
||||||
@ -290,6 +319,8 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
uint64_t _lastSend;
|
uint64_t _lastSend;
|
||||||
|
uint64_t _lastPing;
|
||||||
|
uint64_t _lastKeepalive;
|
||||||
uint64_t _lastReceived;
|
uint64_t _lastReceived;
|
||||||
InetAddress _addr;
|
InetAddress _addr;
|
||||||
InetAddress _localAddress;
|
InetAddress _localAddress;
|
||||||
|
@ -168,7 +168,10 @@ void Peer::received(
|
|||||||
} else {
|
} else {
|
||||||
uint64_t slotLRmin = 0xffffffffffffffffULL;
|
uint64_t slotLRmin = 0xffffffffffffffffULL;
|
||||||
for(unsigned int p=0;p<ZT_MAX_PEER_NETWORK_PATHS;++p) {
|
for(unsigned int p=0;p<ZT_MAX_PEER_NETWORK_PATHS;++p) {
|
||||||
if (_paths[p].lastReceived() <= slotLRmin) {
|
if (!_paths[p].active(now)) {
|
||||||
|
slot = &(_paths[p]);
|
||||||
|
break;
|
||||||
|
} else if (_paths[p].lastReceived() <= slotLRmin) {
|
||||||
slotLRmin = _paths[p].lastReceived();
|
slotLRmin = _paths[p].lastReceived();
|
||||||
slot = &(_paths[p]);
|
slot = &(_paths[p]);
|
||||||
}
|
}
|
||||||
@ -249,11 +252,12 @@ bool Peer::doPingAndKeepalive(uint64_t now,int inetAddressFamily)
|
|||||||
//TRACE("PING %s(%s) after %llums/%llums send/receive inactivity",_id.address().toString().c_str(),p->address().toString().c_str(),now - p->lastSend(),now - p->lastReceived());
|
//TRACE("PING %s(%s) after %llums/%llums send/receive inactivity",_id.address().toString().c_str(),p->address().toString().c_str(),now - p->lastSend(),now - p->lastReceived());
|
||||||
sendHELLO(p->localAddress(),p->address(),now);
|
sendHELLO(p->localAddress(),p->address(),now);
|
||||||
p->sent(now);
|
p->sent(now);
|
||||||
} else if (((now - p->lastSend()) >= ZT_NAT_KEEPALIVE_DELAY)&&(!p->reliable())) {
|
p->pinged(now);
|
||||||
|
} else if (((now - std::max(p->lastSend(),p->lastKeepalive())) >= ZT_NAT_KEEPALIVE_DELAY)&&(!p->reliable())) {
|
||||||
//TRACE("NAT keepalive %s(%s) after %llums/%llums send/receive inactivity",_id.address().toString().c_str(),p->address().toString().c_str(),now - p->lastSend(),now - p->lastReceived());
|
//TRACE("NAT keepalive %s(%s) after %llums/%llums send/receive inactivity",_id.address().toString().c_str(),p->address().toString().c_str(),now - p->lastSend(),now - p->lastReceived());
|
||||||
_natKeepaliveBuf += (uint32_t)((now * 0x9e3779b1) >> 1); // tumble this around to send constantly varying (meaningless) payloads
|
_natKeepaliveBuf += (uint32_t)((now * 0x9e3779b1) >> 1); // tumble this around to send constantly varying (meaningless) payloads
|
||||||
RR->node->putPacket(p->localAddress(),p->address(),&_natKeepaliveBuf,sizeof(_natKeepaliveBuf));
|
RR->node->putPacket(p->localAddress(),p->address(),&_natKeepaliveBuf,sizeof(_natKeepaliveBuf));
|
||||||
p->sent(now);
|
p->sentKeepalive(now);
|
||||||
} else {
|
} else {
|
||||||
//TRACE("no PING or NAT keepalive: addr==%s reliable==%d %llums/%llums send/receive inactivity",p->address().toString().c_str(),(int)p->reliable(),now - p->lastSend(),now - p->lastReceived());
|
//TRACE("no PING or NAT keepalive: addr==%s reliable==%d %llums/%llums send/receive inactivity",p->address().toString().c_str(),(int)p->reliable(),now - p->lastSend(),now - p->lastReceived());
|
||||||
}
|
}
|
||||||
@ -339,6 +343,8 @@ bool Peer::resetWithinScope(InetAddress::IpScope scope,uint64_t now)
|
|||||||
unsigned int y = 0;
|
unsigned int y = 0;
|
||||||
while (x < np) {
|
while (x < np) {
|
||||||
if (_paths[x].address().ipScope() == scope) {
|
if (_paths[x].address().ipScope() == scope) {
|
||||||
|
// Resetting a path means sending a HELLO and then forgetting it. If we
|
||||||
|
// get OK(HELLO) then it will be re-learned.
|
||||||
sendHELLO(_paths[x].localAddress(),_paths[x].address(),now);
|
sendHELLO(_paths[x].localAddress(),_paths[x].address(),now);
|
||||||
} else {
|
} else {
|
||||||
_paths[y++] = _paths[x];
|
_paths[y++] = _paths[x];
|
||||||
@ -491,7 +497,22 @@ bool Peer::_checkPath(Path &p,const uint64_t now)
|
|||||||
if (!p.active(now))
|
if (!p.active(now))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if ( (p.lastSend() > p.lastReceived()) && ((p.lastSend() - p.lastReceived()) >= ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT) ) {
|
/* Dead path detection: if we have sent something to this peer and have not
|
||||||
|
* yet received a reply, double check this path. The majority of outbound
|
||||||
|
* packets including Ethernet frames do generate some kind of reply either
|
||||||
|
* immediately or at some point in the near future. This will occasionally
|
||||||
|
* (every NO_ANSWER_TIMEOUT ms) check paths unnecessarily if traffic that
|
||||||
|
* does not generate a response is being sent such as multicast announcements
|
||||||
|
* or frames belonging to unidirectional UDP protocols, but the cost is very
|
||||||
|
* tiny and the benefit in reliability is very large. This takes care of many
|
||||||
|
* failure modes including crap NATs that forget links and spurious changes
|
||||||
|
* to physical network topology that cannot be otherwise detected.
|
||||||
|
*
|
||||||
|
* Each time we do this we increment a probation counter in the path. This
|
||||||
|
* counter is reset on any packet receive over this path. If it reaches the
|
||||||
|
* MAX_PROBATION threshold the path is considred dead. */
|
||||||
|
|
||||||
|
if ( (p.lastSend() > p.lastReceived()) && ((p.lastSend() - p.lastReceived()) >= ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT) && ((now - p.lastPing()) >= ZT_PEER_DEAD_PATH_DETECTION_NO_ANSWER_TIMEOUT) ) {
|
||||||
TRACE("%s(%s) has not answered, checking if dead (probation: %u)",_id.address().toString().c_str(),p.address().toString().c_str(),p.probation());
|
TRACE("%s(%s) has not answered, checking if dead (probation: %u)",_id.address().toString().c_str(),p.address().toString().c_str(),p.probation());
|
||||||
|
|
||||||
if ( (_vProto >= 5) && ( !((_vMajor == 1)&&(_vMinor == 1)&&(_vRevision == 0)) ) ) {
|
if ( (_vProto >= 5) && ( !((_vMajor == 1)&&(_vMinor == 1)&&(_vRevision == 0)) ) ) {
|
||||||
@ -499,9 +520,11 @@ bool Peer::_checkPath(Path &p,const uint64_t now)
|
|||||||
Packet outp(_id.address(),RR->identity.address(),Packet::VERB_ECHO);
|
Packet outp(_id.address(),RR->identity.address(),Packet::VERB_ECHO);
|
||||||
outp.armor(_key,true);
|
outp.armor(_key,true);
|
||||||
p.send(RR,outp.data(),outp.size(),now);
|
p.send(RR,outp.data(),outp.size(),now);
|
||||||
|
p.pinged(now);
|
||||||
} else {
|
} else {
|
||||||
sendHELLO(p.localAddress(),p.address(),now);
|
sendHELLO(p.localAddress(),p.address(),now);
|
||||||
p.sent(now);
|
p.sent(now);
|
||||||
|
p.pinged(now);
|
||||||
}
|
}
|
||||||
|
|
||||||
p.increaseProbation();
|
p.increaseProbation();
|
||||||
|
Loading…
Reference in New Issue
Block a user