Add basic bond health status reporting (listbonds)

This commit is contained in:
Joseph Henry 2020-07-27 23:01:12 -07:00
parent c92e030a4b
commit 9f4985b11a
6 changed files with 199 additions and 6 deletions

View File

@ -1475,17 +1475,40 @@ typedef struct
enum ZT_PeerRole role;
/**
* Number of paths (size of paths[])
*/
unsigned int pathCount;
/**
* Whether multiple paths to this peer are bonded
* Whether a multi-link bond has formed
*/
bool isBonded;
/**
* The bonding policy used to bond to this peer
*/
int bondingPolicy;
/**
* The health status of the bond to this peer
*/
bool isHealthy;
/**
* The number of links that comprise the bond to this peer that are considered alive
*/
int numAliveLinks;
/**
* The number of links that comprise the bond to this peer
*/
int numTotalLinks;
/**
* The user-specified bond template name
*/
char customBondName[32];
/**
* Number of paths (size of paths[])
*/
unsigned int pathCount;
/**
* Known network paths to peer
*/

View File

@ -730,6 +730,9 @@ void Bond::curateBond(const int64_t now, bool rebuildBond)
{
//fprintf(stderr, "%lu curateBond (rebuildBond=%d), _numBondedPaths=%d\n", ((now - RR->bc->getBondStartTime())), rebuildBond, _numBondedPaths);
char pathStr[128];
uint8_t tmpNumAliveLinks = 0;
uint8_t tmpNumTotalLinks = 0;
/**
* Update path states
*/
@ -737,6 +740,10 @@ void Bond::curateBond(const int64_t now, bool rebuildBond)
if (!_paths[i]) {
continue;
}
tmpNumTotalLinks++;
if (_paths[i]->alive(now, true)) {
tmpNumAliveLinks++;
}
bool currEligibility = _paths[i]->eligible(now,_ackSendInterval);
//_paths[i]->address().toString(pathStr);
//fprintf(stderr, "\n\n%ld path eligibility (for %s, %s):\n", (RR->node->now() - RR->bc->getBondStartTime()), getLink(_paths[i])->ifname().c_str(), pathStr);
@ -764,6 +771,46 @@ void Bond::curateBond(const int64_t now, bool rebuildBond)
}
_paths[i]->_lastEligibilityState = currEligibility;
}
_numAliveLinks = tmpNumAliveLinks;
_numTotalLinks = tmpNumTotalLinks;
/* Determine health status to report to user */
bool tmpHealthStatus = true;
if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) {
if (_numAliveLinks < 2) {
// Considered healthy if there is at least one failover link
tmpHealthStatus = false;
}
}
if (_bondingPolicy == ZT_BONDING_POLICY_BROADCAST) {
if (_numAliveLinks < 1) {
// Considerd healthy if we're able to send frames at all
tmpHealthStatus = false;
}
}
if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) {
if (_numAliveLinks < _numTotalLinks) {
// Considerd healthy if all known paths are alive, this should be refined to account for user bond config settings
tmpHealthStatus = false;
}
}
if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR) {
if (_numAliveLinks < _numTotalLinks) {
// Considerd healthy if all known paths are alive, this should be refined to account for user bond config settings
tmpHealthStatus = false;
}
}
if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) {
if (_numAliveLinks < _numTotalLinks) {
// Considerd healthy if all known paths are alive, this should be refined to account for user bond config settings
tmpHealthStatus = false;
}
}
_isHealthy = tmpHealthStatus;
/**
* Curate the set of paths that are part of the bond proper. Selects a single path
* per logical link according to eligibility and user-specified constraints.
@ -1509,6 +1556,10 @@ void Bond::setReasonableDefaults(int policy, SharedPtr<Bond> templateBond, bool
_lastCheckUserPreferences = 0;
_lastBackgroundTaskCheck = 0;
_isHealthy = false;
_numAliveLinks = 0;
_numTotalLinks = 0;
_downDelay = 0;
_upDelay = 0;
_allowFlowHashing=false;

View File

@ -485,6 +485,21 @@ public:
*/
inline uint8_t getPolicy() { return _bondingPolicy; }
/**
* @return the health status of the bond
*/
inline bool isHealthy() { return _isHealthy; }
/**
* @return the number of links comprising this bond which are considered alive
*/
inline uint8_t getNumAliveLinks() { return _numAliveLinks; };
/**
* @return the number of links comprising this bond
*/
inline uint8_t getNumTotalLinks() { return _numTotalLinks; }
/**
*
* @param allowFlowHashing
@ -626,6 +641,10 @@ private:
uint16_t _maxAcceptablePacketDelayVariance;
uint8_t _minAcceptableAllocation;
bool _isHealthy;
uint8_t _numAliveLinks;
uint8_t _numTotalLinks;
/**
* Default initial punishment inflicted on misbehaving paths. Punishment slowly
* drains linearly. For each eligibility change the remaining punishment is doubled.

View File

@ -513,6 +513,9 @@ ZT_PeerList *Node::peers() const
if (pi->second->bond()) {
p->isBonded = pi->second->bond();
p->bondingPolicy = pi->second->bond()->getPolicy();
p->isHealthy = pi->second->bond()->isHealthy();
p->numAliveLinks = pi->second->bond()->getNumAliveLinks();
p->numTotalLinks = pi->second->bond()->getNumTotalLinks();
}
}

63
one.cpp
View File

@ -72,6 +72,8 @@
#include "osdep/Http.hpp"
#include "osdep/Thread.hpp"
#include "node/BondController.hpp"
#include "service/OneService.hpp"
#include "ext/json/json.hpp"
@ -467,6 +469,67 @@ static int cli(int argc,char **argv)
printf("%u %s %s" ZT_EOL_S,scode,command.c_str(),responseBody.c_str());
return 1;
}
} else if (command == "listbonds") {
const unsigned int scode = Http::GET(1024 * 1024 * 16,60000,(const struct sockaddr *)&addr,"/bonds",requestHeaders,responseHeaders,responseBody);
if (scode == 0) {
printf("Error connecting to the ZeroTier service: %s\n\nPlease check that the service is running and that TCP port 9993 can be contacted via 127.0.0.1." ZT_EOL_S, responseBody.c_str());
return 1;
}
nlohmann::json j;
try {
j = OSUtils::jsonParse(responseBody);
} catch (std::exception &exc) {
printf("%u %s invalid JSON response (%s)" ZT_EOL_S,scode,command.c_str(),exc.what());
return 1;
} catch ( ... ) {
printf("%u %s invalid JSON response (unknown exception)" ZT_EOL_S,scode,command.c_str());
return 1;
}
if (scode == 200) {
if (json) {
printf("%s" ZT_EOL_S,OSUtils::jsonDump(j).c_str());
} else {
printf(" <peer> <bondtype> <status> <links>" ZT_EOL_S);
if (j.is_array()) {
for(unsigned long k=0;k<j.size();++k) {
nlohmann::json &p = j[k];
bool isBonded = p["isBonded"];
int8_t bondingPolicy = p["bondingPolicy"];
bool isHealthy = p["isHealthy"];
int8_t numAliveLinks = p["numAliveLinks"];
int8_t numTotalLinks = p["numTotalLinks"];
if (isBonded) {
std::string healthStr;
if (isHealthy) {
healthStr = "HEALTHY";
} else {
healthStr = "DEGRADED";
}
std::string policyStr = "none";
if (bondingPolicy >= ZT_BONDING_POLICY_NONE && bondingPolicy <= ZT_BONDING_POLICY_BALANCE_AWARE) {
policyStr = BondController::getPolicyStrByCode(bondingPolicy);
}
printf("%10s %32s %8s %d/%d" ZT_EOL_S,
OSUtils::jsonString(p ["address"],"-").c_str(),
policyStr.c_str(),
healthStr.c_str(),
numAliveLinks,
numTotalLinks);
}
}
}
}
return 0;
} else {
printf("%u %s %s" ZT_EOL_S,scode,command.c_str(),responseBody.c_str());
return 1;
}
} else if (command == "listnetworks") {
const unsigned int scode = Http::GET(1024 * 1024 * 16,60000,(const struct sockaddr *)&addr,"/network",requestHeaders,responseHeaders,responseBody);

View File

@ -253,6 +253,11 @@ static void _peerToJson(nlohmann::json &pj,const ZT_Peer *peer)
pj["version"] = tmp;
pj["latency"] = peer->latency;
pj["role"] = prole;
pj["isBonded"] = peer->isBonded;
pj["bondingPolicy"] = peer->bondingPolicy;
pj["isHealthy"] = peer->isHealthy;
pj["numAliveLinks"] = peer->numAliveLinks;
pj["numTotalLinks"] = peer->numTotalLinks;
nlohmann::json pa = nlohmann::json::array();
for(unsigned int i=0;i<peer->pathCount;++i) {
@ -1348,6 +1353,35 @@ public:
} else scode = 404;
_node->freeQueryResult((void *)pl);
} else scode = 500;
} else if (ps[0] == "bonds") {
ZT_PeerList *pl = _node->peers();
if (pl) {
if (ps.size() == 1) {
// Return [array] of all peers
res = nlohmann::json::array();
for(unsigned long i=0;i<pl->peerCount;++i) {
nlohmann::json pj;
_peerToJson(pj,&(pl->peers[i]));
res.push_back(pj);
}
scode = 200;
} else if (ps.size() == 2) {
// Return a single peer by ID or 404 if not found
uint64_t wantp = Utils::hexStrToU64(ps[1].c_str());
for(unsigned long i=0;i<pl->peerCount;++i) {
if (pl->peers[i].address == wantp) {
_peerToJson(res,&(pl->peers[i]));
scode = 200;
break;
}
}
} else scode = 404;
_node->freeQueryResult((void *)pl);
} else scode = 500;
} else {
if (_controller) {
scode = _controller->handleControlPlaneHttpGET(std::vector<std::string>(ps.begin()+1,ps.end()),urlArgs,headers,body,responseBody,responseContentType);