Remove bonds for peers that have fully expired. Remove notion of bond health

This commit is contained in:
Joseph Henry 2022-03-02 09:55:23 -08:00
parent f8e24f4629
commit 8148c658cf
No known key found for this signature in database
GPG Key ID: C45B33FF5EBC9344
8 changed files with 57 additions and 88 deletions

View File

@ -1404,11 +1404,6 @@ typedef struct
*/
int bondingPolicy;
/**
* The health status of the bond to this peer
*/
bool isHealthy;
/**
* The number of links that comprise the bond to this peer that are considered alive
*/

View File

@ -90,7 +90,7 @@ SharedPtr<Bond> Bond::getBondByPeerId(int64_t identity)
return _bonds.count(identity) ? _bonds[identity] : SharedPtr<Bond>();
}
SharedPtr<Bond> Bond::createTransportTriggeredBond(const RuntimeEnvironment* renv, const SharedPtr<Peer>& peer)
SharedPtr<Bond> Bond::createBond(const RuntimeEnvironment* renv, const SharedPtr<Peer>& peer)
{
Mutex::Lock _l(_bonds_m);
int64_t identity = peer->identity().address().toInt();
@ -145,6 +145,12 @@ SharedPtr<Bond> Bond::createTransportTriggeredBond(const RuntimeEnvironment* ren
return SharedPtr<Bond>();
}
void Bond::destroyBond(uint64_t peerId)
{
Mutex::Lock _l(_bonds_m);
_bonds.erase(peerId);
}
SharedPtr<Link> Bond::getLinkBySocket(const std::string& policyAlias, uint64_t localSocket)
{
Mutex::Lock _l(_links_m);
@ -816,6 +822,17 @@ void Bond::curateBond(int64_t now, bool rebuildBond)
if (! _paths[i].p) {
continue;
}
/**
* Remove expired links from bond
*/
if ((now - _paths[i].p->_lastIn) > (ZT_PEER_EXPIRED_PATH_TRIAL_PERIOD)) {
log("link %s has expired, removing from bond", pathToStr(_paths[i].p).c_str());
_paths[i] = NominatedPath();
_paths[i].p = SharedPtr<Path>();
continue;
}
tmpNumTotalLinks++;
if (_paths[i].eligible) {
tmpNumAliveLinks++;
@ -876,42 +893,18 @@ void Bond::curateBond(int64_t now, bool rebuildBond)
}
/**
* Determine health status to report to user
* Trigger status report if number of links change
*/
_numAliveLinks = tmpNumAliveLinks;
_numTotalLinks = tmpNumTotalLinks;
bool tmpHealthStatus = true;
if (_policy == ZT_BOND_POLICY_BROADCAST) {
if (_numAliveLinks < 1) {
// Considered healthy if we're able to send frames at all
tmpHealthStatus = false;
}
}
if ((_policy == ZT_BOND_POLICY_BALANCE_RR) || (_policy == ZT_BOND_POLICY_BALANCE_XOR) || (_policy == ZT_BOND_POLICY_BALANCE_AWARE || (_policy == ZT_BOND_POLICY_ACTIVE_BACKUP))) {
if (_numAliveLinks < _numTotalLinks) {
tmpHealthStatus = false;
}
}
if (tmpHealthStatus != _isHealthy) {
std::string healthStatusStr;
if (tmpHealthStatus == true) {
healthStatusStr = "HEALTHY";
}
else {
healthStatusStr = "DEGRADED";
}
log("bond is %s (%d/%d links)", healthStatusStr.c_str(), _numAliveLinks, _numTotalLinks);
if ((_numAliveLinks != tmpNumAliveLinks) || (_numTotalLinks != tmpNumTotalLinks)) {
dumpInfo(now, true);
}
_isHealthy = tmpHealthStatus;
/**
* Curate the set of paths that are part of the bond proper. Select a set of paths
* per logical link according to eligibility and user-specified constraints.
*/
if ((_policy == ZT_BOND_POLICY_BALANCE_RR) || (_policy == ZT_BOND_POLICY_BALANCE_XOR) || (_policy == ZT_BOND_POLICY_BALANCE_AWARE)) {
if (! _numBondedPaths) {
rebuildBond = true;
@ -1009,14 +1002,14 @@ void Bond::estimatePathQuality(int64_t now)
uint32_t totUserSpecifiedLinkSpeed = 0;
if (_numBondedPaths) { // Compute relative user-specified speeds of links
for (unsigned int i = 0; i < _numBondedPaths; ++i) {
SharedPtr<Link> link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket());
if (_paths[i].p && _paths[i].allowed()) {
SharedPtr<Link> link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket());
totUserSpecifiedLinkSpeed += link->speed();
}
}
for (unsigned int i = 0; i < _numBondedPaths; ++i) {
SharedPtr<Link> link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket());
if (_paths[i].p && _paths[i].allowed()) {
SharedPtr<Link> link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket());
link->setRelativeSpeed((uint8_t)round(((float)link->speed() / (float)totUserSpecifiedLinkSpeed) * 255));
}
}
@ -1213,6 +1206,11 @@ void Bond::processActiveBackupTasks(void* tPtr, int64_t now)
int nonPreferredPathIdx;
bool bFoundPrimaryLink = false;
if (_abPathIdx != ZT_MAX_PEER_NETWORK_PATHS && !_paths[_abPathIdx].p) {
_abPathIdx = ZT_MAX_PEER_NETWORK_PATHS;
log("main active-backup path has been removed");
}
/**
* Generate periodic status report
*/
@ -1242,7 +1240,6 @@ void Bond::processActiveBackupTasks(void* tPtr, int64_t now)
* simply find the next eligible path.
*/
if (! userHasSpecifiedLinks()) {
debug("no user-specified links");
for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) {
if (_paths[i].p && _paths[i].eligible) {
SharedPtr<Link> link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket());
@ -1575,7 +1572,6 @@ void Bond::setBondParameters(int policy, SharedPtr<Bond> templateBond, bool useT
// Bond status
_isHealthy = false;
_numAliveLinks = 0;
_numTotalLinks = 0;
_numBondedPaths = 0;
@ -1685,11 +1681,14 @@ SharedPtr<Link> Bond::getLink(const SharedPtr<Path>& path)
std::string Bond::pathToStr(const SharedPtr<Path>& path)
{
#ifdef ZT_TRACE
char pathStr[64] = { 0 };
char fullPathStr[384] = { 0 };
path->address().toString(pathStr);
snprintf(fullPathStr, 384, "%.16llx-%s/%s", (unsigned long long)(path->localSocket()), getLink(path)->ifname().c_str(), pathStr);
return std::string(fullPathStr);
if (path) {
char pathStr[64] = { 0 };
char fullPathStr[384] = { 0 };
path->address().toString(pathStr);
snprintf(fullPathStr, 384, "%.16llx-%s/%s", (unsigned long long)(path->localSocket()), getLink(path)->ifname().c_str(), pathStr);
return std::string(fullPathStr);
}
return "";
#else
return "";
#endif
@ -1728,7 +1727,7 @@ void Bond::dumpInfo(int64_t now, bool force)
_lastSummaryDump = now;
float overhead = (_overheadBytes / (timeSinceLastDump / 1000.0f) / 1000.0f);
_overheadBytes = 0;
log("bond: bp=%d, fi=%d, mi=%d, ud=%d, dd=%d, flows=%lu, leaf=%d, overhead=%f KB/s",
log("bond: bp=%d, fi=%d, mi=%d, ud=%d, dd=%d, flows=%lu, leaf=%d, overhead=%f KB/s, links=(%d/%d)",
_policy,
_failoverInterval,
_monitorInterval,
@ -1736,7 +1735,9 @@ void Bond::dumpInfo(int64_t now, bool force)
_downDelay,
(unsigned long)_flows.size(),
_isLeaf,
overhead);
overhead,
_numAliveLinks,
_numTotalLinks);
for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) {
if (_paths[i].p) {
dumpPathStatus(now, i);

View File

@ -436,7 +436,14 @@ class Bond {
* @param peer Remote peer that this bond services
* @return A pointer to the newly created Bond
*/
static SharedPtr<Bond> createTransportTriggeredBond(const RuntimeEnvironment* renv, const SharedPtr<Peer>& peer);
static SharedPtr<Bond> createBond(const RuntimeEnvironment* renv, const SharedPtr<Peer>& peer);
/**
* Remove a bond from the bond controller.
*
* @param peerId Remote peer that this bond services
*/
static void destroyBond(uint64_t peerId);
/**
* Periodically perform maintenance tasks for the bonding layer.
@ -1020,14 +1027,6 @@ class Bond {
return _policy;
}
/**
* @return the health status of the bond
*/
inline bool isHealthy()
{
return _isHealthy;
}
/**
* @return the number of links comprising this bond which are considered alive
*/
@ -1344,7 +1343,7 @@ class Bond {
int packetsIn;
int packetsOut;
AtomicCounter __refCount;
//AtomicCounter __refCount;
SharedPtr<Path> p;
void set(uint64_t now, const SharedPtr<Path>& path)
@ -1490,7 +1489,6 @@ class Bond {
/**
* Link state reporting
*/
bool _isHealthy;
uint8_t _numAliveLinks;
uint8_t _numTotalLinks;

View File

@ -509,7 +509,6 @@ ZT_PeerList *Node::peers() const
if (pi->second->bond()) {
p->isBonded = pi->second->bond();
p->bondingPolicy = pi->second->bond()->policy();
p->isHealthy = pi->second->bond()->isHealthy();
p->numAliveLinks = pi->second->bond()->getNumAliveLinks();
p->numTotalLinks = pi->second->bond()->getNumTotalLinks();
}

View File

@ -503,7 +503,7 @@ void Peer::performMultipathStateCheck(void *tPtr, int64_t now)
_localMultipathSupported = ((numAlivePaths >= 1) && (RR->bc->inUse()) && (ZT_PROTO_VERSION > 9));
if (_localMultipathSupported && !_bond) {
if (RR->bc) {
_bond = RR->bc->createTransportTriggeredBond(RR, this);
_bond = RR->bc->createBond(RR, this);
/**
* Allow new bond to retroactively learn all paths known to this peer
*/

View File

@ -53,7 +53,10 @@ private:
Peer() {} // disabled to prevent bugs -- should not be constructed uninitialized
public:
~Peer() { Utils::burn(_key,sizeof(_key)); }
~Peer() {
Utils::burn(_key,sizeof(_key));
RR->bc->destroyBond(_id.address().toInt());
}
/**
* Construct a new peer

33
one.cpp
View File

@ -523,31 +523,23 @@ static int cli(int argc,char **argv)
printf("%s" ZT_EOL_S,OSUtils::jsonDump(j).c_str());
} else {
bool bFoundBond = false;
printf(" <peer> <bondtype> <status> <links>" ZT_EOL_S);
printf(" <peer> <bondtype> <links>" ZT_EOL_S);
if (j.is_array()) {
for(unsigned long k=0;k<j.size();++k) {
nlohmann::json &p = j[k];
bool isBonded = p["isBonded"];
if (isBonded) {
int8_t bondingPolicy = p["bondingPolicy"];
bool isHealthy = p["isHealthy"];
int8_t numAliveLinks = p["numAliveLinks"];
int8_t numTotalLinks = p["numTotalLinks"];
bFoundBond = true;
std::string healthStr;
if (isHealthy) {
healthStr = "HEALTHY";
} else {
healthStr = "DEGRADED";
}
std::string policyStr = "none";
if (bondingPolicy >= ZT_BOND_POLICY_NONE && bondingPolicy <= ZT_BOND_POLICY_BALANCE_AWARE) {
policyStr = Bond::getPolicyStrByCode(bondingPolicy);
}
printf("%10s %32s %8s %d/%d" ZT_EOL_S,
printf("%10s %32s %d/%d" ZT_EOL_S,
OSUtils::jsonString(p ["address"],"-").c_str(),
policyStr.c_str(),
healthStr.c_str(),
numAliveLinks,
numTotalLinks);
}
@ -617,12 +609,6 @@ static int cli(int argc,char **argv)
if (json) {
printf("%s" ZT_EOL_S,OSUtils::jsonDump(j).c_str());
} else {
std::string healthStr;
if (OSUtils::jsonInt(j["isHealthy"],0)) {
healthStr = "Healthy";
} else {
healthStr = "Degraded";
}
int numAliveLinks = OSUtils::jsonInt(j["numAliveLinks"],0);
int numTotalLinks = OSUtils::jsonInt(j["numTotalLinks"],0);
printf("Peer : %s\n", arg1.c_str());
@ -630,7 +616,6 @@ static int cli(int argc,char **argv)
//if (bondingPolicy == ZT_BOND_POLICY_ACTIVE_BACKUP) {
printf("Link Select Method : %d\n", (int)OSUtils::jsonInt(j["linkSelectMethod"],0));
//}
printf("Status : %s\n", healthStr.c_str());
printf("Links : %d/%d\n", numAliveLinks, numTotalLinks);
printf("Failover Interval : %d (ms)\n", (int)OSUtils::jsonInt(j["failoverInterval"],0));
printf("Up Delay : %d (ms)\n", (int)OSUtils::jsonInt(j["upDelay"],0));
@ -705,33 +690,23 @@ static int cli(int argc,char **argv)
printf("%s" ZT_EOL_S,OSUtils::jsonDump(j).c_str());
} else {
bool bFoundBond = false;
printf(" <peer> <bondtype> <status> <links>" ZT_EOL_S);
printf(" <peer> <bondtype> <links>" ZT_EOL_S);
if (j.is_array()) {
for(unsigned long k=0;k<j.size();++k) {
nlohmann::json &p = j[k];
bool isBonded = p["isBonded"];
if (isBonded) {
int8_t bondingPolicy = p["bondingPolicy"];
bool isHealthy = p["isHealthy"];
int8_t numAliveLinks = p["numAliveLinks"];
int8_t numTotalLinks = p["numTotalLinks"];
bFoundBond = true;
std::string healthStr;
if (isHealthy) {
healthStr = "Healthy";
} else {
healthStr = "Degraded";
}
std::string policyStr = "none";
if (bondingPolicy >= ZT_BOND_POLICY_NONE && bondingPolicy <= ZT_BOND_POLICY_BALANCE_AWARE) {
policyStr = Bond::getPolicyStrByCode(bondingPolicy);
}
printf("%10s %32s %8s %d/%d" ZT_EOL_S,
printf("%10s %32s %d/%d" ZT_EOL_S,
OSUtils::jsonString(p["address"],"-").c_str(),
policyStr.c_str(),
healthStr.c_str(),
numAliveLinks,
numTotalLinks);
}

View File

@ -510,7 +510,6 @@ static void _peerToJson(nlohmann::json &pj,const ZT_Peer *peer)
pj["isBonded"] = peer->isBonded;
if (peer->isBonded) {
pj["bondingPolicy"] = peer->bondingPolicy;
pj["isHealthy"] = peer->isHealthy;
pj["numAliveLinks"] = peer->numAliveLinks;
pj["numTotalLinks"] = peer->numTotalLinks;
}
@ -542,7 +541,6 @@ static void _bondToJson(nlohmann::json &pj, SharedPtr<Bond> &bond)
return;
}
pj["isHealthy"] = bond->isHealthy();
pj["numAliveLinks"] = bond->getNumAliveLinks();
pj["numTotalLinks"] = bond->getNumTotalLinks();
pj["failoverInterval"] = bond->getFailoverInterval();