Bunch more path refactoring. Peers no longer forget paths, but do not normally use expired paths. Expired paths might still be tried if nothing else is reachable.

This commit is contained in:
Adam Ierymenko 2016-09-07 11:13:17 -07:00
parent f2d2df2b11
commit b5c86b6ba4
11 changed files with 126 additions and 143 deletions

View File

@ -1053,6 +1053,11 @@ typedef struct
*/
uint64_t trustedPathId;
/**
* Is path expired?
*/
int expired;
/**
* Is path preferred?
*/

View File

@ -244,17 +244,22 @@
* This is also how often pings will be retried to upstream peers (relays, roots)
* constantly until something is heard.
*/
#define ZT_PING_CHECK_INVERVAL 9000
#define ZT_PING_CHECK_INVERVAL 10000
/**
* How frequently to send heartbeats over in-use paths
*/
#define ZT_PATH_HEARTBEAT_PERIOD 15000
#define ZT_PATH_HEARTBEAT_PERIOD 10000
/**
* Paths are considered inactive if they have not received traffic in this long
*/
#define ZT_PATH_ALIVE_TIMEOUT 35000
#define ZT_PATH_ALIVE_TIMEOUT 25000
/**
* Minimum time between attempts to check dead paths to see if they can be re-awakened
*/
#define ZT_PATH_MIN_REACTIVATE_INTERVAL 2500
/**
* Delay between full-fledge pings of directly connected peers
@ -262,10 +267,15 @@
#define ZT_PEER_PING_PERIOD 60000
/**
* Peers forget paths that have not spoken in this long
* Paths are considered expired if they have not produced a real packet in this long
*/
#define ZT_PEER_PATH_EXPIRATION ((ZT_PEER_PING_PERIOD * 4) + 3000)
/**
* How often to retry expired paths that we're still remembering
*/
#define ZT_PEER_EXPIRED_PATH_TRIAL_PERIOD (ZT_PEER_PING_PERIOD * 10)
/**
* Timeout for overall peer activity (measured from last receive)
*/

View File

@ -1163,7 +1163,7 @@ bool IncomingPacket::_doCIRCUIT_TEST(const RuntimeEnvironment *RR,const SharedPt
remainingHopsPtr += ZT_ADDRESS_LENGTH;
SharedPtr<Peer> nhp(RR->topology->getPeer(nextHop[h]));
if (nhp) {
SharedPtr<Path> nhbp(nhp->getBestPath(now));
SharedPtr<Path> nhbp(nhp->getBestPath(now,false));
if ((nhbp)&&(nhbp->alive(now)))
nextHopBestPathAddress[h] = nhbp->address();
}

View File

@ -202,14 +202,6 @@ public:
}
}
if (!upstream) {
// If I am a root server, only ping other root servers -- roots don't ping "down"
// since that would just be a waste of bandwidth and could potentially cause route
// flapping in Cluster mode.
if (RR->topology->amRoot())
return;
}
if (upstream) {
// "Upstream" devices are roots and relays and get special treatment -- they stay alive
// forever and we try to keep (if available) both IPv4 and IPv6 channels open to them.
@ -415,15 +407,16 @@ ZT_PeerList *Node::peers() const
p->latency = pi->second->latency();
p->role = RR->topology->isRoot(pi->second->identity()) ? ZT_PEER_ROLE_ROOT : ZT_PEER_ROLE_LEAF;
std::vector< SharedPtr<Path> > paths(pi->second->paths());
SharedPtr<Path> bestp(pi->second->getBestPath(_now));
std::vector< std::pair< SharedPtr<Path>,bool > > paths(pi->second->paths(_now));
SharedPtr<Path> bestp(pi->second->getBestPath(_now,false));
p->pathCount = 0;
for(std::vector< SharedPtr<Path> >::iterator path(paths.begin());path!=paths.end();++path) {
memcpy(&(p->paths[p->pathCount].address),&((*path)->address()),sizeof(struct sockaddr_storage));
p->paths[p->pathCount].lastSend = (*path)->lastOut();
p->paths[p->pathCount].lastReceive = (*path)->lastIn();
p->paths[p->pathCount].preferred = (*path == bestp) ? 1 : 0;
p->paths[p->pathCount].trustedPathId = RR->topology->getOutboundPathTrust((*path)->address());
for(std::vector< std::pair< SharedPtr<Path>,bool > >::iterator path(paths.begin());path!=paths.end();++path) {
memcpy(&(p->paths[p->pathCount].address),&(path->first->address()),sizeof(struct sockaddr_storage));
p->paths[p->pathCount].lastSend = path->first->lastOut();
p->paths[p->pathCount].lastReceive = path->first->lastIn();
p->paths[p->pathCount].expired = path->second;
p->paths[p->pathCount].preferred = (path->first == bestp) ? 1 : 0;
p->paths[p->pathCount].trustedPathId = RR->topology->getOutboundPathTrust(path->first->address());
++p->pathCount;
}
}

View File

@ -27,6 +27,14 @@
#include "Cluster.hpp"
#include "Packet.hpp"
#ifndef AF_MAX
#if AF_INET > AF_INET6
#define AF_MAX AF_INET
#else
#define AF_MAX AF_INET6
#endif
#endif
namespace ZeroTier {
// Used to send varying values for NAT keepalive
@ -150,7 +158,7 @@ void Peer::received(
uint64_t worstScore = 0xffffffffffffffffULL;
for(unsigned int p=0;p<_numPaths;++p) {
if (_paths[p].path->address().ss_family == path->address().ss_family) {
const uint64_t s = _pathScore(p);
const uint64_t s = _pathScore(p,now);
if (s < worstScore) {
worstScore = s;
worstSlot = (int)p;
@ -163,7 +171,7 @@ void Peer::received(
// If we can't find one with the same family, replace the worst of any family
slot = ZT_MAX_PEER_NETWORK_PATHS - 1;
for(unsigned int p=0;p<_numPaths;++p) {
const uint64_t s = _pathScore(p);
const uint64_t s = _pathScore(p,now);
if (s < worstScore) {
worstScore = s;
slot = p;
@ -210,7 +218,7 @@ bool Peer::hasActivePathTo(uint64_t now,const InetAddress &addr) const
{
Mutex::Lock _l(_paths_m);
for(unsigned int p=0;p<_numPaths;++p) {
if ( (_paths[p].path->address() == addr) && (_paths[p].path->alive(now)) )
if ( (_paths[p].path->address() == addr) && ((now - _paths[p].lastReceive) <= ZT_PEER_PATH_EXPIRATION) && (_paths[p].path->alive(now)) )
return true;
}
return false;
@ -223,8 +231,8 @@ bool Peer::sendDirect(const void *data,unsigned int len,uint64_t now,bool forceE
int bestp = -1;
uint64_t best = 0ULL;
for(unsigned int p=0;p<_numPaths;++p) {
if (_paths[p].path->alive(now)||(forceEvenIfDead)) {
const uint64_t s = _pathScore(p);
if ( ((now - _paths[p].lastReceive) <= ZT_PEER_PATH_EXPIRATION) && (_paths[p].path->alive(now)||(forceEvenIfDead)) ) {
const uint64_t s = _pathScore(p,now);
if (s >= best) {
best = s;
bestp = (int)p;
@ -239,19 +247,21 @@ bool Peer::sendDirect(const void *data,unsigned int len,uint64_t now,bool forceE
}
}
SharedPtr<Path> Peer::getBestPath(uint64_t now)
SharedPtr<Path> Peer::getBestPath(uint64_t now,bool includeExpired)
{
Mutex::Lock _l(_paths_m);
int bestp = -1;
uint64_t best = 0ULL;
for(unsigned int p=0;p<_numPaths;++p) {
const uint64_t s = _pathScore(p);
if ( ((now - _paths[p].lastReceive) < ZT_PEER_PATH_EXPIRATION) || (includeExpired) ) {
const uint64_t s = _pathScore(p,now);
if (s >= best) {
best = s;
bestp = (int)p;
}
}
}
if (bestp >= 0) {
return _paths[bestp].path;
@ -283,8 +293,8 @@ bool Peer::doPingAndKeepalive(uint64_t now,int inetAddressFamily)
int bestp = -1;
uint64_t best = 0ULL;
for(unsigned int p=0;p<_numPaths;++p) {
if ((inetAddressFamily < 0)||((int)_paths[p].path->address().ss_family == inetAddressFamily)) {
const uint64_t s = _pathScore(p);
if ( ((now - _paths[p].lastReceive) <= ZT_PEER_PATH_EXPIRATION) && ((inetAddressFamily < 0)||((int)_paths[p].path->address().ss_family == inetAddressFamily)) ) {
const uint64_t s = _pathScore(p,now);
if (s >= best) {
best = s;
bestp = (int)p;
@ -293,7 +303,7 @@ bool Peer::doPingAndKeepalive(uint64_t now,int inetAddressFamily)
}
if (bestp >= 0) {
if ((now - _paths[bestp].lastReceive) >= ZT_PEER_PING_PERIOD) {
if ((now - _paths[best].lastReceive) >= ZT_PEER_PING_PERIOD) {
sendHELLO(_paths[bestp].path->localAddress(),_paths[bestp].path->address(),now);
} else if (_paths[bestp].path->needsHeartbeat(now)) {
_natKeepaliveBuf += (uint32_t)((now * 0x9e3779b1) >> 1); // tumble this around to send constantly varying (meaningless) payloads
@ -309,39 +319,24 @@ bool Peer::hasActiveDirectPath(uint64_t now) const
{
Mutex::Lock _l(_paths_m);
for(unsigned int p=0;p<_numPaths;++p) {
if (_paths[p].path->alive(now))
if (((now - _paths[p].lastReceive) <= ZT_PEER_PATH_EXPIRATION)&&(_paths[p].path->alive(now)))
return true;
}
return false;
}
bool Peer::resetWithinScope(InetAddress::IpScope scope,uint64_t now)
bool Peer::resetWithinScope(InetAddress::IpScope scope,int inetAddressFamily,uint64_t now)
{
Mutex::Lock _l(_paths_m);
unsigned int np = _numPaths;
unsigned int x = 0;
unsigned int y = 0;
while (x < np) {
if (_paths[x].path->address().ipScope() == scope) {
// Resetting a path means sending a HELLO and then forgetting it. If we
// get OK(HELLO) then it will be re-learned.
sendHELLO(_paths[x].path->localAddress(),_paths[x].path->address(),now);
} else {
if (x != y) {
_paths[y].lastReceive = _paths[x].lastReceive;
_paths[y].path = _paths[x].path;
#ifdef ZT_ENABLE_CLUSTER
_paths[y].localClusterSuboptimal = _paths[x].localClusterSuboptimal;
#endif
bool resetSomething = false;
for(unsigned int p=0;p<_numPaths;++p) {
if ( (_paths[p].path->address().ss_family == inetAddressFamily) && (_paths[p].path->address().ipScope() == scope) ) {
sendHELLO(_paths[p].path->localAddress(),_paths[p].path->address(),now);
_paths[p].lastReceive >>= 2; // de-prioritize heavily vs. other paths, will get reset if we get OK(HELLO) or other traffic
resetSomething = true;
}
++y;
}
++x;
}
_numPaths = y;
while (y < ZT_MAX_PEER_NETWORK_PATHS)
_paths[y++].path.zero(); // let go of unused SmartPtr<>'s
return (_numPaths < np);
return resetSomething;
}
void Peer::getBestActiveAddresses(uint64_t now,InetAddress &v4,InetAddress &v6) const
@ -351,20 +346,22 @@ void Peer::getBestActiveAddresses(uint64_t now,InetAddress &v4,InetAddress &v6)
int bestp4 = -1,bestp6 = -1;
uint64_t best4 = 0ULL,best6 = 0ULL;
for(unsigned int p=0;p<_numPaths;++p) {
if ( ((now - _paths[p].lastReceive) <= ZT_PEER_PATH_EXPIRATION) && (_paths[p].path->alive(now)) ) {
if (_paths[p].path->address().ss_family == AF_INET) {
const uint64_t s = _pathScore(p);
const uint64_t s = _pathScore(p,now);
if (s >= best4) {
best4 = s;
bestp4 = (int)p;
}
} else if (_paths[p].path->address().ss_family == AF_INET6) {
const uint64_t s = _pathScore(p);
const uint64_t s = _pathScore(p,now);
if (s >= best6) {
best6 = s;
bestp6 = (int)p;
}
}
}
}
if (bestp4 >= 0)
v4 = _paths[bestp4].path->address();
@ -372,30 +369,6 @@ void Peer::getBestActiveAddresses(uint64_t now,InetAddress &v4,InetAddress &v6)
v6 = _paths[bestp6].path->address();
}
void Peer::clean(uint64_t now)
{
Mutex::Lock _l(_paths_m);
unsigned int np = _numPaths;
unsigned int x = 0;
unsigned int y = 0;
while (x < np) {
if ((now - _paths[x].lastReceive) <= ZT_PEER_PATH_EXPIRATION) {
if (y != x) {
_paths[y].lastReceive = _paths[x].lastReceive;
_paths[y].path = _paths[x].path;
#ifdef ZT_ENABLE_CLUSTER
_paths[y].localClusterSuboptimal = _paths[x].localClusterSuboptimal;
#endif
}
++y;
}
++x;
}
_numPaths = y;
while (y < ZT_MAX_PEER_NETWORK_PATHS)
_paths[y++].path.zero(); // let go of unused SmartPtr<>'s
}
bool Peer::_pushDirectPaths(const SharedPtr<Path> &path,uint64_t now)
{
#ifdef ZT_ENABLE_CLUSTER

View File

@ -149,9 +149,10 @@ public:
* Get the best current direct path
*
* @param now Current time
* @param includeDead If true, include even expired paths
* @return Best current path or NULL if none
*/
SharedPtr<Path> getBestPath(uint64_t now);
SharedPtr<Path> getBestPath(uint64_t now,bool includeExpired);
/**
* Send a HELLO to this peer at a specified physical address
@ -175,18 +176,22 @@ public:
/**
* @param now Current time
* @return True if this peer has at least one active direct path
* @return True if this peer has at least one active and alive direct path
*/
bool hasActiveDirectPath(uint64_t now) const;
/**
* Reset paths within a given scope
* Reset paths within a given IP scope and address family
*
* @param scope IP scope of paths to reset
* Resetting a path involves sending a HELLO to it and then de-prioritizing
* it vs. other paths.
*
* @param scope IP scope
* @param inetAddressFamily Family e.g. AF_INET
* @param now Current time
* @return True if at least one path was forgotten
* @return True if we forgot at least one path
*/
bool resetWithinScope(InetAddress::IpScope scope,uint64_t now);
bool resetWithinScope(InetAddress::IpScope scope,int inetAddressFamily,uint64_t now);
/**
* Get most recently active path addresses for IPv4 and/or IPv6
@ -201,21 +206,15 @@ public:
void getBestActiveAddresses(uint64_t now,InetAddress &v4,InetAddress &v6) const;
/**
* Perform periodic cleaning operations
*
* @param now Current time
* @return All known direct paths to this peer and whether they are expired (true == expired)
*/
void clean(uint64_t now);
/**
* @return All known direct paths to this peer (active or inactive)
*/
inline std::vector< SharedPtr<Path> > paths() const
inline std::vector< std::pair< SharedPtr<Path>,bool > > paths(const uint64_t now) const
{
std::vector< SharedPtr<Path> > pp;
std::vector< std::pair< SharedPtr<Path>,bool > > pp;
Mutex::Lock _l(_paths_m);
for(unsigned int p=0,np=_numPaths;p<np;++p)
pp.push_back(_paths[p].path);
pp.push_back(std::pair< SharedPtr<Path>,bool >(_paths[p].path,(now - _paths[p].lastReceive) > ZT_PEER_PATH_EXPIRATION));
return pp;
}
@ -370,11 +369,12 @@ public:
private:
bool _pushDirectPaths(const SharedPtr<Path> &path,uint64_t now);
inline uint64_t _pathScore(const unsigned int p) const
inline uint64_t _pathScore(const unsigned int p,const uint64_t now) const
{
uint64_t s = ZT_PEER_PING_PERIOD;
uint64_t s = ZT_PEER_PING_PERIOD + _paths[p].lastReceive + (uint64_t)(_paths[p].path->preferenceRank() * (ZT_PEER_PING_PERIOD / ZT_PATH_MAX_PREFERENCE_RANK));
if (_paths[p].path->address().ss_family == AF_INET) {
s += _paths[p].lastReceive + (uint64_t)(_paths[p].path->preferenceRank() * (ZT_PEER_PING_PERIOD / ZT_PATH_MAX_PREFERENCE_RANK)) + (uint64_t)(ZT_PEER_PING_PERIOD * (unsigned long)(reinterpret_cast<const struct sockaddr_in *>(&(_paths[p].path->address()))->sin_addr.s_addr == _remoteClusterOptimal4));
s += (uint64_t)(ZT_PEER_PING_PERIOD * (unsigned long)(reinterpret_cast<const struct sockaddr_in *>(&(_paths[p].path->address()))->sin_addr.s_addr == _remoteClusterOptimal4));
} else if (_paths[p].path->address().ss_family == AF_INET6) {
uint64_t clusterWeight = ZT_PEER_PING_PERIOD;
const uint8_t *a = reinterpret_cast<const uint8_t *>(reinterpret_cast<const struct sockaddr_in6 *>(&(_paths[p].path->address()))->sin6_addr.s6_addr);
@ -384,13 +384,15 @@ private:
break;
}
}
s += _paths[p].lastReceive + (uint64_t)(_paths[p].path->preferenceRank() * (ZT_PEER_PING_PERIOD / ZT_PATH_MAX_PREFERENCE_RANK)) + clusterWeight;
} else {
s += _paths[p].lastReceive + (uint64_t)(_paths[p].path->preferenceRank() * (ZT_PEER_PING_PERIOD / ZT_PATH_MAX_PREFERENCE_RANK));
s += clusterWeight;
}
s += (ZT_PEER_PING_PERIOD / 2) * (uint64_t)_paths[p].path->alive(now);
#ifdef ZT_ENABLE_CLUSTER
s -= ZT_PEER_PING_PERIOD * (uint64_t)_paths[p].localClusterSuboptimal;
#endif
return s;
}

View File

@ -33,37 +33,31 @@
#include "Switch.hpp"
// Entry timeout -- make it fairly long since this is just to prevent stale buildup
#define ZT_SELFAWARENESS_ENTRY_TIMEOUT 3600000
#define ZT_SELFAWARENESS_ENTRY_TIMEOUT 600000
namespace ZeroTier {
class _ResetWithinScope
{
public:
_ResetWithinScope(uint64_t now,InetAddress::IpScope scope) :
_ResetWithinScope(uint64_t now,int inetAddressFamily,InetAddress::IpScope scope) :
_now(now),
_family(inetAddressFamily),
_scope(scope) {}
inline void operator()(Topology &t,const SharedPtr<Peer> &p)
{
if (p->resetWithinScope(_scope,_now))
peersReset.push_back(p);
}
inline void operator()(Topology &t,const SharedPtr<Peer> &p) { if (p->resetWithinScope(_scope,_family,_now)) peersReset.push_back(p); }
std::vector< SharedPtr<Peer> > peersReset;
private:
uint64_t _now;
int _family;
InetAddress::IpScope _scope;
};
SelfAwareness::SelfAwareness(const RuntimeEnvironment *renv) :
RR(renv),
_phy(32)
{
}
SelfAwareness::~SelfAwareness()
_phy(128)
{
}
@ -98,8 +92,8 @@ void SelfAwareness::iam(const Address &reporter,const InetAddress &receivedOnLoc
}
}
// Reset all paths within this scope
_ResetWithinScope rset(now,(InetAddress::IpScope)scope);
// Reset all paths within this scope and address family
_ResetWithinScope rset(now,myPhysicalAddress.ss_family,(InetAddress::IpScope)scope);
RR->topology->eachPeer<_ResetWithinScope &>(rset);
// Send a NOP to all peers for whom we forgot a path. This will cause direct

View File

@ -36,7 +36,6 @@ class SelfAwareness
{
public:
SelfAwareness(const RuntimeEnvironment *renv);
~SelfAwareness();
/**
* Called when a trusted remote peer informs us of our external network address

View File

@ -747,14 +747,20 @@ Address Switch::_sendWhoisRequest(const Address &addr,const Address *peersAlread
bool Switch::_trySend(const Packet &packet,bool encrypt)
{
SharedPtr<Peer> peer(RR->topology->getPeer(packet.destination()));
const SharedPtr<Peer> peer(RR->topology->getPeer(packet.destination()));
if (peer) {
const uint64_t now = RR->node->now();
SharedPtr<Path> viaPath(peer->getBestPath(now));
// First get the best path, and if it's dead (and this is not a root)
// we attempt to re-activate that path but this packet will flow
// upstream. If the path comes back alive, it will be used in the future.
// For roots we don't do the alive check since roots are not required
// to send heartbeats "down" and because we have to at least try to
// go somewhere.
SharedPtr<Path> viaPath(peer->getBestPath(now,false));
if ( (viaPath) && (!viaPath->alive(now)) && (!RR->topology->isRoot(peer->identity())) ) {
if ((now - viaPath->lastOut()) > 5000) {
if ((now - viaPath->lastOut()) > std::max((now - viaPath->lastIn()) >> 2,(uint64_t)ZT_PATH_MIN_REACTIVATE_INTERVAL)) {
Packet outp(peer->address(),RR->identity.address(),Packet::VERB_ECHO);
outp.armor(peer->key(),true);
viaPath->send(RR,outp.data(),outp.size(),now);
@ -763,9 +769,11 @@ bool Switch::_trySend(const Packet &packet,bool encrypt)
}
if (!viaPath) {
SharedPtr<Peer> relay(RR->topology->getBestRoot());
if ( (!relay) || (!(viaPath = relay->getBestPath(now))) )
if ( (!relay) || (!(viaPath = relay->getBestPath(now,false))) ) {
if (!(viaPath = peer->getBestPath(now,true)))
return false;
}
}
Packet tmp(packet);
@ -787,7 +795,7 @@ bool Switch::_trySend(const Packet &packet,bool encrypt)
unsigned int fragsRemaining = (remaining / (ZT_UDP_DEFAULT_PAYLOAD_MTU - ZT_PROTO_MIN_FRAGMENT_LENGTH));
if ((fragsRemaining * (ZT_UDP_DEFAULT_PAYLOAD_MTU - ZT_PROTO_MIN_FRAGMENT_LENGTH)) < remaining)
++fragsRemaining;
unsigned int totalFragments = fragsRemaining + 1;
const unsigned int totalFragments = fragsRemaining + 1;
for(unsigned int fno=1;fno<totalFragments;++fno) {
chunkSize = std::min(remaining,(unsigned int)(ZT_UDP_DEFAULT_PAYLOAD_MTU - ZT_PROTO_MIN_FRAGMENT_LENGTH));

View File

@ -256,11 +256,8 @@ void Topology::clean(uint64_t now)
Address *a = (Address *)0;
SharedPtr<Peer> *p = (SharedPtr<Peer> *)0;
while (i.next(a,p)) {
if (((now - (*p)->lastUsed()) >= ZT_PEER_IN_MEMORY_EXPIRATION)&&(std::find(_rootAddresses.begin(),_rootAddresses.end(),*a) == _rootAddresses.end())) {
if (((now - (*p)->lastUsed()) >= ZT_PEER_IN_MEMORY_EXPIRATION)&&(std::find(_rootAddresses.begin(),_rootAddresses.end(),*a) == _rootAddresses.end()))
_peers.erase(*a);
} else {
(*p)->clean(now);
}
}
}
{

View File

@ -183,14 +183,16 @@ static std::string _jsonEnumerate(unsigned int depth,const ZT_PeerPhysicalPath *
"%s\t\"address\": \"%s\",\n"
"%s\t\"lastSend\": %llu,\n"
"%s\t\"lastReceive\": %llu,\n"
"%s\t\"active\": true,\n"
"%s\t\"active\": %s,\n"
"%s\t\"expired\": %s,\n"
"%s\t\"preferred\": %s,\n"
"%s\t\"trustedPathId\": %llu\n"
"%s}",
prefix,_jsonEscape(reinterpret_cast<const InetAddress *>(&(pp[i].address))->toString()).c_str(),
prefix,pp[i].lastSend,
prefix,pp[i].lastReceive,
prefix,
prefix,(pp[i].expired != 0) ? "false" : "true",
prefix,(pp[i].expired == 0) ? "false" : "true",
prefix,(pp[i].preferred == 0) ? "false" : "true",
prefix,pp[i].trustedPathId,
prefix);