Improved path selection, more efficient traffic allocation, lower QoS/ACK overhead

This commit is contained in:
Joseph Henry 2018-06-22 16:30:20 -07:00
parent 52264d5e28
commit bdcdccfcc3
7 changed files with 143 additions and 91 deletions

View File

@ -1315,6 +1315,11 @@ typedef struct
*/
unsigned int pathCount;
/**
* Whether this peer was ever reachable via an aggregate link
*/
bool hadAggregateLink;
/**
* Known network paths to peer
*/

View File

@ -274,6 +274,19 @@
*/
#define ZT_MULTIPATH_BINDER_REFRESH_PERIOD 5000
/**
* Packets are only used for QoS/ACK statistical sampling if their packet ID is divisible by
* this integer. This is to provide a mechanism for both peers to agree on which packets need
* special treatment without having to exchange information. Changing this value would be
* a breaking change and would necessitate a protocol version upgrade. Since each incoming and
* outgoing packet ID is checked against this value its evaluation is of the form:
* (id & (divisor - 1)) == 0, thus the divisor must be a power of 2.
*
* This value is set at (16) so that given a normally-distributed RNG output we will sample
* 1/16th (or ~6.25%) of packets.
*/
#define ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR 0x10
/**
* Time horizon for VERB_QOS_MEASUREMENT and VERB_ACK packet processing cutoff
*/
@ -384,7 +397,7 @@
/**
* Minimum amount of time between each ACK packet
*/
#define ZT_PATH_ACK_INTERVAL 250
#define ZT_PATH_ACK_INTERVAL 1000
/**
* How often an aggregate link statistics report is emitted into this tracing system

View File

@ -450,6 +450,7 @@ ZT_PeerList *Node::peers() const
for(std::vector< std::pair< Address,SharedPtr<Peer> > >::iterator pi(peers.begin());pi!=peers.end();++pi) {
ZT_Peer *p = &(pl->peers[pl->peerCount++]);
p->address = pi->second->address().toInt();
p->hadAggregateLink = 0;
if (pi->second->remoteVersionKnown()) {
p->versionMajor = pi->second->remoteVersionMajor();
p->versionMinor = pi->second->remoteVersionMinor();
@ -466,6 +467,7 @@ ZT_PeerList *Node::peers() const
std::vector< SharedPtr<Path> > paths(pi->second->paths(_now));
SharedPtr<Path> bestp(pi->second->getAppropriatePath(_now,false));
p->hadAggregateLink |= pi->second->hasAggregateLink();
p->pathCount = 0;
for(std::vector< SharedPtr<Path> >::iterator path(paths.begin());path!=paths.end();++path) {
ZT_FAST_MEMCPY(&(p->paths[p->pathCount].address),&((*path)->address()),sizeof(struct sockaddr_storage));
@ -475,14 +477,14 @@ ZT_PeerList *Node::peers() const
p->paths[p->pathCount].expired = 0;
p->paths[p->pathCount].preferred = ((*path) == bestp) ? 1 : 0;
p->paths[p->pathCount].latency = (*path)->latency();
p->paths[p->pathCount].packetDelayVariance = (*path)->packetDelayVariance();
p->paths[p->pathCount].packetDelayVariance = (*path)->packetDelayVariance();
p->paths[p->pathCount].throughputDisturbCoeff = (*path)->throughputDisturbanceCoefficient();
p->paths[p->pathCount].packetErrorRatio = (*path)->packetErrorRatio();
p->paths[p->pathCount].packetLossRatio = (*path)->packetLossRatio();
p->paths[p->pathCount].stability = (*path)->lastComputedStability();
p->paths[p->pathCount].throughput = (*path)->meanThroughput();
p->paths[p->pathCount].maxThroughput = (*path)->maxLifetimeThroughput();
p->paths[p->pathCount].allocation = (*path)->allocation();
p->paths[p->pathCount].allocation = (float)(*path)->allocation() / (float)255;
p->paths[p->pathCount].ifname = (*path)->getName();
++p->pathCount;

View File

@ -121,7 +121,7 @@ public:
_lastComputedStability(0.0),
_lastComputedRelativeQuality(0),
_lastComputedThroughputDistCoeff(0.0),
_lastAllocation(0.0)
_lastAllocation(0)
{
prepareBuffers();
}
@ -153,7 +153,7 @@ public:
_lastComputedStability(0.0),
_lastComputedRelativeQuality(0),
_lastComputedThroughputDistCoeff(0.0),
_lastAllocation(0.0)
_lastAllocation(0)
{
prepareBuffers();
_phy->getIfName((PhySocket *)((uintptr_t)_localSocket), _ifname, 16);
@ -316,12 +316,10 @@ public:
{
Mutex::Lock _l(_statistics_m);
if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) {
if (packetId % 2 == 0) { // even -> use for ACK
if ((packetId & (ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR - 1)) == 0) {
_unackedBytes += payloadLength;
// Take note that we're expecting a VERB_ACK on this path as of a specific time
_expectingAckAsOf = ackAge(now) > ZT_PATH_ACK_INTERVAL ? _expectingAckAsOf : now;
}
else { // odd -> use for QoS
if (_outQoSRecords.size() < ZT_PATH_MAX_OUTSTANDING_QOS_RECORDS) {
_outQoSRecords[packetId] = now;
}
@ -341,11 +339,9 @@ public:
{
Mutex::Lock _l(_statistics_m);
if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) {
if (packetId % 2 == 0) { // even -> use for ACK
if ((packetId & (ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR - 1)) == 0) {
_inACKRecords[packetId] = payloadLength;
_packetsReceivedSinceLastAck++;
}
else { // odd -> use for QoS
_inQoSRecords[packetId] = now;
_packetsReceivedSinceLastQoS++;
}
@ -527,12 +523,12 @@ public:
*
* @param allocation Percentage of traffic to be sent over this path to a peer
*/
inline void updateComponentAllocationOfAggregateLink(float allocation) { _lastAllocation = allocation; }
inline void updateComponentAllocationOfAggregateLink(unsigned char allocation) { _lastAllocation = allocation; }
/**
* @return Percentage of traffic allocated to this path in the aggregate link
*/
inline float allocation() { return _lastAllocation; }
inline unsigned char allocation() { return _lastAllocation; }
/**
* @return Stability estimates can become expensive to compute, we cache the most recent result.
@ -704,7 +700,9 @@ private:
float _lastComputedStability;
float _lastComputedRelativeQuality;
float _lastComputedThroughputDistCoeff;
float _lastAllocation;
unsigned char _lastAllocation;
// cached human-readable strings for tracing purposes
char _ifname[16];

View File

@ -56,6 +56,12 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident
_lastSentFullHello(0),
_lastACKWindowReset(0),
_lastQoSWindowReset(0),
_lastMultipathCompatibilityCheck(0),
_freeRandomByte(0),
_uniqueAlivePathCount(0),
_localMultipathSupported(false),
_remoteMultipathSupported(false),
_canUseMultipath(false),
_vProto(0),
_vMajor(0),
_vMinor(0),
@ -69,6 +75,7 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident
_lastAggregateStatsReport(0),
_lastAggregateAllocation(0)
{
Utils::getSecureRandom(&_freeRandomByte, 1);
if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH))
throw ZT_EXCEPTION_INVALID_ARGUMENT;
_pathChoiceHist = new RingBuffer<int>(ZT_MULTIPATH_PROPORTION_WIN_SZ);
@ -110,7 +117,7 @@ void Peer::received(
recordIncomingPacket(tPtr, path, packetId, payloadLength, verb, now);
if (canUseMultipath()) {
if (_canUseMultipath) {
if (path->needsToSendQoS(now)) {
sendQOS_MEASUREMENT(tPtr, path, path->localSocket(), path->address(), now);
}
@ -145,17 +152,23 @@ void Peer::received(
// Paths are redundant if they duplicate an alive path to the same IP or
// with the same local socket and address family.
bool redundant = false;
unsigned int replacePath = ZT_MAX_PEER_NETWORK_PATHS;
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
if ( (_paths[i].p->alive(now)) && ( ((_paths[i].p->localSocket() == path->localSocket())&&(_paths[i].p->address().ss_family == path->address().ss_family)) || (_paths[i].p->address().ipsEqual2(path->address())) ) ) {
if ( (_paths[i].p->alive(now)) && ( ((_paths[i].p->localSocket() == path->localSocket())&&(_paths[i].p->address().ss_family == path->address().ss_family)) || (_paths[i].p->address().ipsEqual2(path->address())) ) ) {
redundant = true;
break;
}
// If the path is the same address and port, simply assume this is a replacement
if ( (_paths[i].p->address().ipsEqual2(path->address()) && (_paths[i].p->address().port() == path->address().port()))) {
replacePath = i;
break;
}
} else break;
}
if (!redundant) {
unsigned int replacePath = ZT_MAX_PEER_NETWORK_PATHS;
// If the path isn't a duplicate of the same localSocket AND we haven't already determined a replacePath,
// then find the worst path and replace it.
if (!redundant && replacePath == ZT_MAX_PEER_NETWORK_PATHS) {
int replacePathQuality = 0;
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
@ -169,29 +182,15 @@ void Peer::received(
break;
}
}
// If we find a pre-existing path with the same address, just replace it.
// If we don't find anything we can replace, just use the replacePath that we previously decided on.
if (canUseMultipath()) {
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
if ( _paths[i].p->address().ss_family == path->address().ss_family && _paths[i].p->address().ipsEqual2(path->address())) {
replacePath = i;
break;
}
}
}
}
if (replacePath != ZT_MAX_PEER_NETWORK_PATHS) {
if (verb == Packet::VERB_OK) {
RR->t->peerLearnedNewPath(tPtr,networkId,*this,path,packetId);
_paths[replacePath].lr = now;
_paths[replacePath].p = path;
_paths[replacePath].priority = 1;
} else {
attemptToContact = true;
}
}
if (replacePath != ZT_MAX_PEER_NETWORK_PATHS) {
if (verb == Packet::VERB_OK) {
RR->t->peerLearnedNewPath(tPtr,networkId,*this,path,packetId);
_paths[replacePath].lr = now;
_paths[replacePath].p = path;
_paths[replacePath].priority = 1;
} else {
attemptToContact = true;
}
}
}
@ -274,7 +273,9 @@ void Peer::received(
void Peer::recordOutgoingPacket(const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, int64_t now)
{
if (localMultipathSupport()) {
// Grab second byte from packetId to use as a source of entropy in the next path selection
_freeRandomByte = (packetId & 0xFF00) >> 8;
if (_canUseMultipath) {
path->recordOutgoingPacket(now, packetId, payloadLength, verb);
}
}
@ -282,7 +283,7 @@ void Peer::recordOutgoingPacket(const SharedPtr<Path> &path, const uint64_t pack
void Peer::recordIncomingPacket(void *tPtr, const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, int64_t now)
{
if (localMultipathSupport()) {
if (_canUseMultipath) {
if (path->needsToSendAck(now)) {
sendACK(tPtr, path, path->localSocket(), path->address(), now);
}
@ -323,6 +324,9 @@ void Peer::computeAggregateProportionalAllocation(int64_t now)
+ (fmax(1, relThroughput[i]) * ZT_PATH_CONTRIB_THROUGHPUT)
+ relScope * ZT_PATH_CONTRIB_SCOPE;
relQuality *= age_contrib;
// Arbitrary cutoffs
relQuality = relQuality > (1.00 / 100.0) ? relQuality : 0.0;
relQuality = relQuality < (99.0 / 100.0) ? relQuality : 1.0;
totalRelativeQuality += relQuality;
_paths[i].p->updateRelativeQuality(relQuality);
}
@ -330,12 +334,12 @@ void Peer::computeAggregateProportionalAllocation(int64_t now)
// Convert set of relative performances into an allocation set
for(uint16_t i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
_paths[i].p->updateComponentAllocationOfAggregateLink(_paths[i].p->relativeQuality() / totalRelativeQuality);
_paths[i].p->updateComponentAllocationOfAggregateLink((_paths[i].p->relativeQuality() / totalRelativeQuality) * 255);
}
}
}
float Peer::computeAggregateLinkPacketDelayVariance()
int Peer::computeAggregateLinkPacketDelayVariance()
{
float pdv = 0.0;
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
@ -346,9 +350,9 @@ float Peer::computeAggregateLinkPacketDelayVariance()
return pdv;
}
float Peer::computeAggregateLinkMeanLatency()
int Peer::computeAggregateLinkMeanLatency()
{
float ml = 0.0;
int ml = 0;
int pathCount = 0;
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
@ -396,7 +400,7 @@ SharedPtr<Path> Peer::getAppropriatePath(int64_t now, bool includeExpired)
* Send traffic across the highest quality path only. This algorithm will still
* use the old path quality metric from protocol version 9.
*/
if (!canUseMultipath()) {
if (!_canUseMultipath) {
long bestPathQuality = 2147483647;
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
@ -443,15 +447,13 @@ SharedPtr<Path> Peer::getAppropriatePath(int64_t now, bool includeExpired)
}
}
}
unsigned int r;
Utils::getSecureRandom(&r, 1);
unsigned int r = _freeRandomByte;
if (numAlivePaths > 0) {
// pick a random out of the set deemed "alive"
int rf = r % numAlivePaths;
return _paths[alivePaths[rf]].p;
}
else if(numStalePaths > 0) {
// resort to trying any non-expired path
// Resort to trying any non-expired path
int rf = r % numStalePaths;
return _paths[stalePaths[rf]].p;
}
@ -461,40 +463,12 @@ SharedPtr<Path> Peer::getAppropriatePath(int64_t now, bool includeExpired)
* Proportionally allocate traffic according to dynamic path quality measurements
*/
if (RR->node->getMultipathMode() == ZT_MULTIPATH_PROPORTIONALLY_BALANCED) {
int numAlivePaths = 0;
int numStalePaths = 0;
int alivePaths[ZT_MAX_PEER_NETWORK_PATHS];
int stalePaths[ZT_MAX_PEER_NETWORK_PATHS];
memset(&alivePaths, -1, sizeof(alivePaths));
memset(&stalePaths, -1, sizeof(stalePaths));
// Attempt to find an excuse not to use the rest of this algorithm
// Alive or Stale?
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
if (_paths[i].p->alive(now)) {
alivePaths[numAlivePaths] = i;
numAlivePaths++;
} else {
stalePaths[numStalePaths] = i;
numStalePaths++;
}
// Record a default path to use as a short-circuit for the rest of the algorithm (if needed)
bestPath = i;
}
}
if ((now - _lastAggregateAllocation) >= ZT_PATH_QUALITY_COMPUTE_INTERVAL) {
_lastAggregateAllocation = now;
computeAggregateProportionalAllocation(now);
}
if (numAlivePaths == 0 && numStalePaths == 0) {
return SharedPtr<Path>();
} if (numAlivePaths == 1 || numStalePaths == 1) {
return _paths[bestPath].p;
}
// Randomly choose path according to their allocations
unsigned int r;
Utils::getSecureRandom(&r, 1);
float rf = (float)(r %= 100) / 100;
float rf = _freeRandomByte;
for(int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
if (rf < _paths[i].p->allocation()) {
@ -676,6 +650,41 @@ void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr<Peer> &o
}
}
inline void Peer::processBackgroundPeerTasks(int64_t now)
{
// Determine current multipath compatibility with other peer
if ((now - _lastMultipathCompatibilityCheck) >= ZT_PATH_QUALITY_COMPUTE_INTERVAL) {
// Cache number of available paths so that we can short-circuit multipath logic elsewhere
//
// We also take notice of duplicate paths (same IP only) because we may have
// recently received a direct path push from a peer and our list might contain
// a dead path which hasn't been fully recognized as such. In this case we
// don't want the duplicate to trigger execution of multipath code prematurely.
//
// This is done to support the behavior of auto multipath enable/disable
// without user intervention.
int currAlivePathCount = 0;
int duplicatePathsFound = 0;
for (unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
currAlivePathCount++;
for (unsigned int j=0;j<ZT_MAX_PEER_NETWORK_PATHS;++j) {
if (_paths[i].p && _paths[j].p && _paths[i].p->address().ipsEqual2(_paths[j].p->address()) && i != j) {
duplicatePathsFound+=1;
break;
}
}
}
}
_uniqueAlivePathCount = (currAlivePathCount - (duplicatePathsFound / 2));
_lastMultipathCompatibilityCheck = now;
_localMultipathSupported = ((RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) && (ZT_PROTO_VERSION > 9));
_remoteMultipathSupported = _vProto > 9;
// If both peers support multipath and more than one path exist, we can use multipath logic
_canUseMultipath = _localMultipathSupported && _remoteMultipathSupported && (_uniqueAlivePathCount > 1);
}
}
void Peer::sendACK(void *tPtr,const SharedPtr<Path> &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now)
{
Packet outp(_id.address(),RR->identity.address(),Packet::VERB_ACK);
@ -774,14 +783,15 @@ void Peer::tryMemorizedPath(void *tPtr,int64_t now)
unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now)
{
unsigned int sent = 0;
Mutex::Lock _l(_paths_m);
const bool sendFullHello = ((now - _lastSentFullHello) >= ZT_PEER_PING_PERIOD);
_lastSentFullHello = now;
processBackgroundPeerTasks(now);
// Emit traces regarding aggregate link status
if (canUseMultipath()) {
if (_canUseMultipath) {
int alivePathCount = aggregateLinkPhysicalPathCount();
if ((now - _lastAggregateStatsReport) > ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL) {
_lastAggregateStatsReport = now;

View File

@ -203,12 +203,12 @@ public:
/**
* @return The aggregate link Packet Delay Variance (PDV)
*/
float computeAggregateLinkPacketDelayVariance();
int computeAggregateLinkPacketDelayVariance();
/**
* @return The aggregate link mean latency
*/
float computeAggregateLinkMeanLatency();
int computeAggregateLinkMeanLatency();
/**
* @return The number of currently alive "physical" paths in the aggregate link
@ -357,7 +357,7 @@ public:
*/
inline unsigned int latency(const int64_t now)
{
if (RR->node->getMultipathMode()) {
if (_canUseMultipath) {
return (int)computeAggregateLinkMeanLatency();
} else {
SharedPtr<Path> bp(getAppropriatePath(now,false));
@ -417,6 +417,14 @@ public:
inline bool remoteVersionKnown() const { return ((_vMajor > 0)||(_vMinor > 0)||(_vRevision > 0)); }
/**
* Periodically update known multipath activation constraints. This is done so that we know when and when
* not to use multipath logic. Doing this once every few seconds is sufficient.
*
* @param now Current time
*/
inline void processBackgroundPeerTasks(int64_t now);
/**
* Record that the remote peer does have multipath enabled. As is evident by the receipt of a VERB_ACK
* or a VERB_QOS_MEASUREMENT packet at some point in the past. Until this flag is set, the local client
@ -427,18 +435,18 @@ public:
/**
* @return Whether the local client supports and is configured to use multipath
*/
inline bool localMultipathSupport() { return ((RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) && (ZT_PROTO_VERSION > 9)); }
inline bool localMultipathSupport() { return _localMultipathSupported; }
/**
* @return Whether the remote peer supports and is configured to use multipath
*/
inline bool remoteMultipathSupport() { return (_remotePeerMultipathEnabled && (_vProto > 9)); }
inline bool remoteMultipathSupport() { return _remoteMultipathSupported; }
/**
* @return Whether this client can use multipath to communicate with this peer. True if both peers are using
* the correct protocol and if both peers have multipath enabled. False if otherwise.
*/
inline bool canUseMultipath() { return (localMultipathSupport() && remoteMultipathSupport()); }
inline bool canUseMultipath() { return _canUseMultipath; }
/**
* @return True if peer has received a trust established packet (e.g. common network membership) in the past ZT_TRUST_EXPIRATION ms
@ -557,6 +565,13 @@ public:
return (_QoSCutoffCount < ZT_PATH_QOS_ACK_CUTOFF_LIMIT);
}
/**
* @return Whether this peer is reachable via an aggregate link
*/
inline bool hasAggregateLink() {
return _localMultipathSupported && _remoteMultipathSupported && _remotePeerMultipathEnabled;
}
/**
* Serialize a peer for storage in local cache
*
@ -658,6 +673,15 @@ private:
int64_t _lastPathPrune;
int64_t _lastACKWindowReset;
int64_t _lastQoSWindowReset;
int64_t _lastMultipathCompatibilityCheck;
unsigned char _freeRandomByte;
int _uniqueAlivePathCount;
bool _localMultipathSupported;
bool _remoteMultipathSupported;
bool _canUseMultipath;
uint16_t _vProto;
uint16_t _vMajor;

View File

@ -1229,7 +1229,7 @@ public:
char peerAddrStr[256];
if (pl) {
for(unsigned long i=0;i<pl->peerCount;++i) {
if (pl->peers[i].role == ZT_PEER_ROLE_LEAF) {
if (pl->peers[i].hadAggregateLink) {
nlohmann::json pj;
_peerAggregateLinkToJson(pj,&(pl->peers[i]));
OSUtils::ztsnprintf(peerAddrStr,sizeof(peerAddrStr),"%.10llx",pl->peers[i].address);