From 08d9dc5c6890d0483588d3a3d46d72bfa87be2e5 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Mon, 6 Aug 2018 10:29:37 -0700 Subject: [PATCH 01/35] Updated protocol version and versioning blurb --- node/Packet.hpp | 52 ++++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/node/Packet.hpp b/node/Packet.hpp index 8e82bd348..828f549a8 100644 --- a/node/Packet.hpp +++ b/node/Packet.hpp @@ -45,31 +45,35 @@ /** * Protocol version -- incremented only for major changes * - * 1 - 0.2.0 ... 0.2.5 - * 2 - 0.3.0 ... 0.4.5 - * + Added signature and originating peer to multicast frame - * + Double size of multicast frame bloom filter - * 3 - 0.5.0 ... 0.6.0 - * + Yet another multicast redesign - * + New crypto completely changes key agreement cipher - * 4 - 0.6.0 ... 1.0.6 - * + BREAKING CHANGE: New identity format based on hashcash design - * 5 - 1.1.0 ... 1.1.5 - * + Supports echo - * + Supports in-band world (root server definition) updates - * + Clustering! (Though this will work with protocol v4 clients.) - * + Otherwise backward compatible with protocol v4 - * 6 - 1.1.5 ... 1.1.10 - * + Network configuration format revisions including binary values - * 7 - 1.1.10 ... 1.1.17 - * + Introduce trusted paths for local SDN use - * 8 - 1.1.17 ... 1.2.0 - * + Multipart network configurations for large network configs - * + Tags and Capabilities - * + Inline push of CertificateOfMembership deprecated - * 9 - 1.2.0 ... CURRENT + * 1 - 0.2.0 ... 0.2.5 + * 2 - 0.3.0 ... 0.4.5 + * + Added signature and originating peer to multicast frame + * + Double size of multicast frame bloom filter + * 3 - 0.5.0 ... 0.6.0 + * + Yet another multicast redesign + * + New crypto completely changes key agreement cipher + * 4 - 0.6.0 ... 1.0.6 + * + BREAKING CHANGE: New identity format based on hashcash design + * 5 - 1.1.0 ... 1.1.5 + * + Supports echo + * + Supports in-band world (root server definition) updates + * + Clustering! (Though this will work with protocol v4 clients.) + * + Otherwise backward compatible with protocol v4 + * 6 - 1.1.5 ... 1.1.10 + * + Network configuration format revisions including binary values + * 7 - 1.1.10 ... 1.1.17 + * + Introduce trusted paths for local SDN use + * 8 - 1.1.17 ... 1.2.0 + * + Multipart network configurations for large network configs + * + Tags and Capabilities + * + Inline push of CertificateOfMembership deprecated + * 9 - 1.2.0 ... 1.4.0 + * + Trace for remote debugging or diagnostics + * 10 - 1.4.0 ... CURRENT + * + Multipath support + * + Measurement of QoS metrics */ -#define ZT_PROTO_VERSION 9 +#define ZT_PROTO_VERSION 10 /** * Minimum supported protocol version From 20a25a6a45fcb37b9508855cfa870f2a7c09790a Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Mon, 6 Aug 2018 14:31:12 -0700 Subject: [PATCH 02/35] Added debug traces --- node/Path.hpp | 10 ++++++++++ node/Peer.cpp | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/node/Path.hpp b/node/Path.hpp index cafff8cf3..c861114ba 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -44,6 +44,8 @@ #include "../osdep/Phy.hpp" +#include "../include/ZeroTierDebug.h" + /** * Maximum return value of preferenceRank() */ @@ -314,6 +316,7 @@ public: */ inline void recordOutgoingPacket(int64_t now, int64_t packetId, uint16_t payloadLength, Packet::Verb verb) { + DEBUG_INFO(""); Mutex::Lock _l(_statistics_m); if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) { if ((packetId & (ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR - 1)) == 0) { @@ -337,6 +340,7 @@ public: */ inline void recordIncomingPacket(int64_t now, int64_t packetId, uint16_t payloadLength, Packet::Verb verb) { + DEBUG_INFO(""); Mutex::Lock _l(_statistics_m); if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) { if ((packetId & (ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR - 1)) == 0) { @@ -357,6 +361,7 @@ public: */ inline void receivedAck(int64_t now, int32_t ackedBytes) { + DEBUG_INFO(""); _expectingAckAsOf = 0; _unackedBytes = (ackedBytes > _unackedBytes) ? 0 : _unackedBytes - ackedBytes; int64_t timeSinceThroughputEstimate = (now - _lastThroughputEstimation); @@ -401,6 +406,7 @@ public: */ inline void sentAck(int64_t now) { + DEBUG_INFO(""); Mutex::Lock _l(_statistics_m); _inACKRecords.clear(); _packetsReceivedSinceLastAck = 0; @@ -418,6 +424,7 @@ public: */ inline void receivedQoS(int64_t now, int count, uint64_t *rx_id, uint16_t *rx_ts) { + DEBUG_INFO(""); Mutex::Lock _l(_statistics_m); // Look up egress times and compute latency values for each record std::map::iterator it; @@ -442,6 +449,7 @@ public: */ inline int32_t generateQoSPacket(int64_t now, char *qosBuffer) { + DEBUG_INFO(""); Mutex::Lock _l(_statistics_m); int32_t len = 0; std::map::iterator it = _inQoSRecords.begin(); @@ -466,6 +474,7 @@ public: * @param Current time */ inline void sentQoS(int64_t now) { + DEBUG_INFO(""); _packetsReceivedSinceLastQoS = 0; _lastQoSMeasurement = now; } @@ -584,6 +593,7 @@ public: */ inline void processBackgroundPathMeasurements(int64_t now) { if (now - _lastPathQualityComputeTime > ZT_PATH_QUALITY_COMPUTE_INTERVAL) { + DEBUG_INFO(""); Mutex::Lock _l(_statistics_m); _lastPathQualityComputeTime = now; address().toString(_addrString); diff --git a/node/Peer.cpp b/node/Peer.cpp index 21bbfabe2..923b3b476 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -37,6 +37,8 @@ #include "RingBuffer.hpp" #include "Utils.hpp" +#include "../include/ZeroTierDebug.h" + namespace ZeroTier { Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Identity &peerIdentity) : @@ -681,6 +683,8 @@ inline void Peer::processBackgroundPeerTasks(int64_t now) _localMultipathSupported = ((RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) && (ZT_PROTO_VERSION > 9)); _remoteMultipathSupported = _vProto > 9; // If both peers support multipath and more than one path exist, we can use multipath logic + DEBUG_INFO("from=%llx, _localMultipathSupported=%d, _remoteMultipathSupported=%d, (_uniqueAlivePathCount > 1)=%d", + this->_id.address().toInt(), _localMultipathSupported, _remoteMultipathSupported, (_uniqueAlivePathCount > 1)); _canUseMultipath = _localMultipathSupported && _remoteMultipathSupported && (_uniqueAlivePathCount > 1); } } From 12f2df55863943366cc8f15d7e0826cd7f3d7ac2 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 7 Aug 2018 12:39:06 -0700 Subject: [PATCH 03/35] uncommented status fields --- service/OneService.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/service/OneService.cpp b/service/OneService.cpp index 9b12f17b2..76a1be081 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -306,18 +306,18 @@ static void _peerAggregateLinkToJson(nlohmann::json &pj,const ZT_Peer *peer) nlohmann::json pa = nlohmann::json::array(); for(unsigned int i=0;ipathCount;++i) { - //int64_t lastSend = peer->paths[i].lastSend; - //int64_t lastReceive = peer->paths[i].lastReceive; + int64_t lastSend = peer->paths[i].lastSend; + int64_t lastReceive = peer->paths[i].lastReceive; nlohmann::json j; j["address"] = reinterpret_cast(&(peer->paths[i].address))->toString(tmp); - //j["lastSend"] = (lastSend < 0) ? 0 : lastSend; - //j["lastReceive"] = (lastReceive < 0) ? 0 : lastReceive; + j["lastSend"] = (lastSend < 0) ? 0 : lastSend; + j["lastReceive"] = (lastReceive < 0) ? 0 : lastReceive; //j["trustedPathId"] = peer->paths[i].trustedPathId; //j["active"] = (bool)(peer->paths[i].expired == 0); //j["expired"] = (bool)(peer->paths[i].expired != 0); //j["preferred"] = (bool)(peer->paths[i].preferred != 0); j["latency"] = peer->paths[i].latency; - //j["packetDelayVariance"] = peer->paths[i].packetDelayVariance; + j["pdv"] = peer->paths[i].packetDelayVariance; //j["throughputDisturbCoeff"] = peer->paths[i].throughputDisturbCoeff; //j["packetErrorRatio"] = peer->paths[i].packetErrorRatio; //j["packetLossRatio"] = peer->paths[i].packetLossRatio; From 1e66854b59d831f05dac52e94d71ff07f1a4fe28 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 7 Aug 2018 12:57:40 -0700 Subject: [PATCH 04/35] Temporarily added SO_REUSEADDR to netlink binding code --- osdep/LinuxNetLink.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/osdep/LinuxNetLink.cpp b/osdep/LinuxNetLink.cpp index 634126e79..8c2ea7bf9 100644 --- a/osdep/LinuxNetLink.cpp +++ b/osdep/LinuxNetLink.cpp @@ -66,6 +66,8 @@ LinuxNetLink::LinuxNetLink() // set socket timeout to 1 sec so we're not permablocking recv() calls _setSocketTimeout(_fd, 1); + int yes=1; + setsockopt(_fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); _la.nl_family = AF_NETLINK; _la.nl_pid = getpid()+1; @@ -442,6 +444,8 @@ void LinuxNetLink::_linkDeleted(struct nlmsghdr *nlp) void LinuxNetLink::_requestIPv4Routes() { int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + int yes=1; + setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; @@ -494,6 +498,8 @@ void LinuxNetLink::_requestIPv4Routes() void LinuxNetLink::_requestIPv6Routes() { int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + int yes=1; + setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; @@ -546,6 +552,8 @@ void LinuxNetLink::_requestIPv6Routes() void LinuxNetLink::_requestInterfaceList() { int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + int yes=1; + setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; @@ -596,6 +604,8 @@ void LinuxNetLink::_requestInterfaceList() void LinuxNetLink::addRoute(const InetAddress &target, const InetAddress &via, const InetAddress &src, const char *ifaceName) { int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + int yes=1; + setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; @@ -718,6 +728,8 @@ void LinuxNetLink::addRoute(const InetAddress &target, const InetAddress &via, c void LinuxNetLink::delRoute(const InetAddress &target, const InetAddress &via, const InetAddress &src, const char *ifaceName) { int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + int yes=1; + setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; @@ -838,6 +850,8 @@ void LinuxNetLink::delRoute(const InetAddress &target, const InetAddress &via, c void LinuxNetLink::addAddress(const InetAddress &addr, const char *iface) { int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + int yes=1; + setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; @@ -946,6 +960,8 @@ void LinuxNetLink::addAddress(const InetAddress &addr, const char *iface) void LinuxNetLink::removeAddress(const InetAddress &addr, const char *iface) { int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + int yes=1; + setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; From 3c7e25ed58f8b622146b9ebd82e1938f9df24d36 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Mon, 12 Aug 2019 17:04:27 -0700 Subject: [PATCH 05/35] Added call to computeAggregateAllocation() in multipath mode=1 to give realtime allocation output --- node/Peer.cpp | 14 +++++++++++--- node/Peer.hpp | 2 +- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/node/Peer.cpp b/node/Peer.cpp index 8a5bf40c2..838136968 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -275,7 +275,7 @@ void Peer::recordIncomingPacket(void *tPtr, const SharedPtr &path, const u } } -void Peer::computeAggregateProportionalAllocation(int64_t now) +void Peer::computeAggregateAllocation(int64_t now) { float maxStability = 0; float totalRelativeQuality = 0; @@ -318,7 +318,13 @@ void Peer::computeAggregateProportionalAllocation(int64_t now) // Convert set of relative performances into an allocation set for(uint16_t i=0;iupdateComponentAllocationOfAggregateLink((unsigned char)((_paths[i].p->relativeQuality() / totalRelativeQuality) * 255)); + + if (RR->node->getMultipathMode() == ZT_MULTIPATH_RANDOM) { + _paths[i].p->updateComponentAllocationOfAggregateLink(((float)_pathChoiceHist.countValue(i) / (float)_pathChoiceHist.count()) * 255); + } + if (RR->node->getMultipathMode() == ZT_MULTIPATH_PROPORTIONALLY_BALANCED) { + _paths[i].p->updateComponentAllocationOfAggregateLink((unsigned char)((_paths[i].p->relativeQuality() / totalRelativeQuality) * 255)); + } } } } @@ -415,6 +421,7 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) int numAlivePaths = 0; int numStalePaths = 0; if (RR->node->getMultipathMode() == ZT_MULTIPATH_RANDOM) { + computeAggregateAllocation(now); /* This call is algorithmically inert but gives us a value to show in the status output */ int alivePaths[ZT_MAX_PEER_NETWORK_PATHS]; int stalePaths[ZT_MAX_PEER_NETWORK_PATHS]; memset(&alivePaths, -1, sizeof(alivePaths)); @@ -434,6 +441,7 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) unsigned int r = _freeRandomByte; if (numAlivePaths > 0) { int rf = r % numAlivePaths; + _pathChoiceHist.push(alivePaths[rf]); // Record which path we chose return _paths[alivePaths[rf]].p; } else if(numStalePaths > 0) { @@ -449,7 +457,7 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) if (RR->node->getMultipathMode() == ZT_MULTIPATH_PROPORTIONALLY_BALANCED) { if ((now - _lastAggregateAllocation) >= ZT_PATH_QUALITY_COMPUTE_INTERVAL) { _lastAggregateAllocation = now; - computeAggregateProportionalAllocation(now); + computeAggregateAllocation(now); } // Randomly choose path according to their allocations float rf = _freeRandomByte; diff --git a/node/Peer.hpp b/node/Peer.hpp index b4cbe0572..6d3ce553e 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -194,7 +194,7 @@ public: * * @param now Current time */ - void computeAggregateProportionalAllocation(int64_t now); + void computeAggregateAllocation(int64_t now); /** * @return The aggregate link Packet Delay Variance (PDV) From d8ce1f7914c462f4731ba009520d3cc526dfcc12 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 13 Aug 2019 12:41:30 -0700 Subject: [PATCH 06/35] Added ZT_DIRECT_PATH_PUSH_INTERVAL_MULTIPATH to decrease link aggregation time --- node/Constants.hpp | 9 +++++++++ node/Peer.cpp | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/node/Constants.hpp b/node/Constants.hpp index d58e408f3..16be0c206 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -546,6 +546,15 @@ */ #define ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH 120000 +/** + * Interval between direct path pushes in milliseconds if we are currently in multipath + * mode. In this mode the distinction between ZT_DIRECT_PATH_PUSH_INTERVAL and + * ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH does not exist since we want to inform other + * peers of this peer's new link/address as soon as possible so that both peers can + * begin forming an aggregated link. + */ +#define ZT_DIRECT_PATH_PUSH_INTERVAL_MULTIPATH ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH / 16 + /** * Time horizon for push direct paths cutoff */ diff --git a/node/Peer.cpp b/node/Peer.cpp index 838136968..a7c8bf1af 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -210,7 +210,8 @@ void Peer::received( // is done less frequently. if (this->trustEstablished(now)) { const int64_t sinceLastPush = now - _lastDirectPathPushSent; - if (sinceLastPush >= ((hops == 0) ? ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH : ZT_DIRECT_PATH_PUSH_INTERVAL)) { + if (sinceLastPush >= ((hops == 0) ? ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH : ZT_DIRECT_PATH_PUSH_INTERVAL) + || (_canUseMultipath && (sinceLastPush >= (ZT_DIRECT_PATH_PUSH_INTERVAL_MULTIPATH)))) { _lastDirectPathPushSent = now; std::vector pathsToPush(RR->node->directPaths()); if (pathsToPush.size() > 0) { From 36d368cb78c4c352ec309dd9ba7c4cd8d434a725 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 13 Aug 2019 13:26:41 -0700 Subject: [PATCH 07/35] Check for (local multipath support only) during decision to decrease direct path push interval. This prevents the chicken-and-egg situation of not knowing if two peers can support multipath on both ends and thusly not sending eachother their direct paths quickly enough. --- node/Constants.hpp | 2 +- node/Peer.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/node/Constants.hpp b/node/Constants.hpp index 16be0c206..010b69684 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -553,7 +553,7 @@ * peers of this peer's new link/address as soon as possible so that both peers can * begin forming an aggregated link. */ -#define ZT_DIRECT_PATH_PUSH_INTERVAL_MULTIPATH ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH / 16 +#define ZT_DIRECT_PATH_PUSH_INTERVAL_MULTIPATH (ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH / 16) /** * Time horizon for push direct paths cutoff diff --git a/node/Peer.cpp b/node/Peer.cpp index a7c8bf1af..c1bfc1707 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -211,7 +211,7 @@ void Peer::received( if (this->trustEstablished(now)) { const int64_t sinceLastPush = now - _lastDirectPathPushSent; if (sinceLastPush >= ((hops == 0) ? ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH : ZT_DIRECT_PATH_PUSH_INTERVAL) - || (_canUseMultipath && (sinceLastPush >= (ZT_DIRECT_PATH_PUSH_INTERVAL_MULTIPATH)))) { + || (_localMultipathSupported && (sinceLastPush >= (ZT_DIRECT_PATH_PUSH_INTERVAL_MULTIPATH)))) { _lastDirectPathPushSent = now; std::vector pathsToPush(RR->node->directPaths()); if (pathsToPush.size() > 0) { From 2593c6efee84f7bc1d6ca85aa19a3f2345b29ceb Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 13 Aug 2019 14:34:11 -0700 Subject: [PATCH 08/35] Adjusted multipath constants --- node/Constants.hpp | 4 ++-- node/Peer.cpp | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/node/Constants.hpp b/node/Constants.hpp index 010b69684..3f95ac29c 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -401,7 +401,7 @@ /** * How often an aggregate link statistics report is emitted into this tracing system */ -#define ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL 60000 +#define ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL 30000 /** * How much an aggregate link's component paths can vary from their target allocation @@ -467,7 +467,7 @@ * by default to avoid increasing idle bandwidth use for regular * links. */ -#define ZT_MULTIPATH_PEER_PING_PERIOD 5000 +#define ZT_MULTIPATH_PEER_PING_PERIOD (ZT_PEER_PING_PERIOD / 10) /** * Paths are considered expired if they have not sent us a real packet in this long diff --git a/node/Peer.cpp b/node/Peer.cpp index c1bfc1707..2bba9bd9b 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -782,9 +782,6 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) unsigned int sent = 0; Mutex::Lock _l(_paths_m); - const bool sendFullHello = ((now - _lastSentFullHello) >= ZT_PEER_PING_PERIOD); - _lastSentFullHello = now; - processBackgroundPeerTasks(now); // Emit traces regarding aggregate link status @@ -815,6 +812,9 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) else break; } + const bool sendFullHello = ((now - _lastSentFullHello) >= ZT_PEER_PING_PERIOD); + _lastSentFullHello = now; + unsigned int j = 0; for(unsigned int i=0;i Date: Tue, 13 Aug 2019 14:34:47 -0700 Subject: [PATCH 09/35] More informative link aggregation trace outputs --- node/Trace.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/node/Trace.cpp b/node/Trace.cpp index cccab9c91..6e0b9f05d 100644 --- a/node/Trace.cpp +++ b/node/Trace.cpp @@ -109,17 +109,22 @@ void Trace::peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId, void Trace::peerLinkNowRedundant(void *const tPtr,Peer &peer) { - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is fully redundant",peer.address().toInt()); + if ((RR->node->getMultipathMode() != ZT_MULTIPATH_RANDOM)) { + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is now a randomly-distributed aggregate link",peer.address().toInt()); + } + if ((RR->node->getMultipathMode() != ZT_MULTIPATH_PROPORTIONALLY_BALANCED)) { + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is now a proportionally-balanced aggregate link",peer.address().toInt()); + } } void Trace::peerLinkNoLongerRedundant(void *const tPtr,Peer &peer) { - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is no longer redundant",peer.address().toInt()); + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx has degraded and is no longer an aggregate link",peer.address().toInt()); } void Trace::peerLinkAggregateStatistics(void *const tPtr,Peer &peer) { - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is composed of (%d) physical paths %s, has packet delay variance (%.0f ms), mean latency (%.0f ms)", + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is composed of (%d) physical paths %s, has PDV (%.0f ms), mean latency (%.0f ms)", peer.address().toInt(), peer.aggregateLinkPhysicalPathCount(), peer.interfaceListStr(), From 5b7d60f5cdd9157cb7d0beb3a82a2ceee7ee4c9c Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 13 Aug 2019 14:42:48 -0700 Subject: [PATCH 10/35] Whoops --- node/Trace.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/node/Trace.cpp b/node/Trace.cpp index 6e0b9f05d..8e98cb072 100644 --- a/node/Trace.cpp +++ b/node/Trace.cpp @@ -109,10 +109,10 @@ void Trace::peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId, void Trace::peerLinkNowRedundant(void *const tPtr,Peer &peer) { - if ((RR->node->getMultipathMode() != ZT_MULTIPATH_RANDOM)) { + if ((RR->node->getMultipathMode() == ZT_MULTIPATH_RANDOM)) { ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is now a randomly-distributed aggregate link",peer.address().toInt()); } - if ((RR->node->getMultipathMode() != ZT_MULTIPATH_PROPORTIONALLY_BALANCED)) { + if ((RR->node->getMultipathMode() == ZT_MULTIPATH_PROPORTIONALLY_BALANCED)) { ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is now a proportionally-balanced aggregate link",peer.address().toInt()); } } From b0e86d11c947d080b3fd13d7d6a8e2b3745150cb Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Wed, 14 Aug 2019 11:24:03 -0700 Subject: [PATCH 11/35] Minor. Name change for trace functions --- node/Peer.cpp | 4 ++-- node/Trace.cpp | 4 ++-- node/Trace.hpp | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/node/Peer.cpp b/node/Peer.cpp index 2bba9bd9b..ce3083cc1 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -794,10 +794,10 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) } } if (alivePathCount < 2 && _linkIsRedundant) { _linkIsRedundant = !_linkIsRedundant; - RR->t->peerLinkNoLongerRedundant(NULL,*this); + RR->t->peerLinkNoLongerAggregate(NULL,*this); } if (alivePathCount > 1 && !_linkIsRedundant) { _linkIsRedundant = !_linkIsRedundant; - RR->t->peerLinkNowRedundant(NULL,*this); + RR->t->peerLinkNoLongerAggregate(NULL,*this); } } diff --git a/node/Trace.cpp b/node/Trace.cpp index 8e98cb072..b7c002719 100644 --- a/node/Trace.cpp +++ b/node/Trace.cpp @@ -107,7 +107,7 @@ void Trace::peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId, } } -void Trace::peerLinkNowRedundant(void *const tPtr,Peer &peer) +void Trace::peerLinkNowAggregate(void *const tPtr,Peer &peer) { if ((RR->node->getMultipathMode() == ZT_MULTIPATH_RANDOM)) { ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is now a randomly-distributed aggregate link",peer.address().toInt()); @@ -117,7 +117,7 @@ void Trace::peerLinkNowRedundant(void *const tPtr,Peer &peer) } } -void Trace::peerLinkNoLongerRedundant(void *const tPtr,Peer &peer) +void Trace::peerLinkNoLongerAggregate(void *const tPtr,Peer &peer) { ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx has degraded and is no longer an aggregate link",peer.address().toInt()); } diff --git a/node/Trace.hpp b/node/Trace.hpp index 2effb7f0e..162df1542 100644 --- a/node/Trace.hpp +++ b/node/Trace.hpp @@ -122,8 +122,8 @@ public: void peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &path,const uint64_t packetId,const Packet::Verb verb); - void peerLinkNowRedundant(void *const tPtr,Peer &peer); - void peerLinkNoLongerRedundant(void *const tPtr,Peer &peer); + void peerLinkNowAggregate(void *const tPtr,Peer &peer); + void peerLinkNoLongerAggregate(void *const tPtr,Peer &peer); void peerLinkAggregateStatistics(void *const tPtr,Peer &peer); From 0634214f2ca1f16572b4313eefc05c879d300d18 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Mon, 19 Aug 2019 21:52:33 -0700 Subject: [PATCH 12/35] Added notion of Flows --- include/ZeroTierOne.h | 48 ++++++--- node/Constants.hpp | 6 ++ node/Path.hpp | 8 -- node/Peer.cpp | 215 +++++++++++++++++++++++++++++++------- node/Peer.hpp | 47 ++++++++- node/Switch.cpp | 232 +++++++++++++++++++++++++++++++----------- node/Switch.hpp | 12 ++- node/Trace.cpp | 4 +- 8 files changed, 449 insertions(+), 123 deletions(-) diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h index b0be01056..5355cd3e2 100644 --- a/include/ZeroTierOne.h +++ b/include/ZeroTierOne.h @@ -434,27 +434,49 @@ enum ZT_ResultCode enum ZT_MultipathMode { /** - * No active multipath. - * - * Traffic is merely sent over the strongest path. That being - * said, this mode will automatically failover in the event that a link goes down. + * No fault tolerance or balancing. */ ZT_MULTIPATH_NONE = 0, /** - * Traffic is randomly distributed among all active paths. - * - * Will cease sending traffic over links that appear to be stale. + * Sends traffic out on all paths. */ - ZT_MULTIPATH_RANDOM = 1, + ZT_MULTIPATH_BROADCAST = 1, /** - * Traffic is allocated across all active paths in proportion to their strength and - * reliability. - * - * Will cease sending traffic over links that appear to be stale. + * Sends traffic out on only one path at a time. Immediate fail-over. */ - ZT_MULTIPATH_PROPORTIONALLY_BALANCED = 2, + ZT_MULTIPATH_ACTIVE_BACKUP= 2, + + /** + * Sends traffic out on all interfaces according to a uniform random distribution. + */ + ZT_MULTIPATH_BALANCE_RANDOM = 3, + + /** + * Stripes packets across all paths. + */ + ZT_MULTIPATH_BALANCE_RR_OPAQUE = 4, + + /** + * Balances flows across all paths. + */ + ZT_MULTIPATH_BALANCE_RR_FLOW = 5, + + /** + * Hashes flows across all paths. + */ + ZT_MULTIPATH_BALANCE_XOR_FLOW = 6, + + /** + * Balances traffic across all paths according to observed performance. + */ + ZT_MULTIPATH_BALANCE_DYNAMIC_OPAQUE = 7, + + /** + * Balances flows across all paths. + */ + ZT_MULTIPATH_BALANCE_DYNAMIC_FLOW = 8, }; /** diff --git a/node/Constants.hpp b/node/Constants.hpp index 3f95ac29c..7f962851a 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -266,6 +266,12 @@ */ #define ZT_LOCAL_CONF_FILE_CHECK_INTERVAL 10000 +/** + * How long before we consider a flow to be dead and remove it from the balancing + * policy's list. + */ +#define ZT_MULTIPATH_FLOW_EXPIRATION 60000 + /** * How frequently to check for changes to the system's network interfaces. When * the service decides to use this constant it's because we want to react more diff --git a/node/Path.hpp b/node/Path.hpp index bc8d7dc5a..bc28c7341 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -308,7 +308,6 @@ public: */ inline void recordOutgoingPacket(int64_t now, int64_t packetId, uint16_t payloadLength, Packet::Verb verb) { - DEBUG_INFO(""); Mutex::Lock _l(_statistics_m); if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) { if ((packetId & (ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR - 1)) == 0) { @@ -332,7 +331,6 @@ public: */ inline void recordIncomingPacket(int64_t now, int64_t packetId, uint16_t payloadLength, Packet::Verb verb) { - DEBUG_INFO(""); Mutex::Lock _l(_statistics_m); if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) { if ((packetId & (ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR - 1)) == 0) { @@ -353,7 +351,6 @@ public: */ inline void receivedAck(int64_t now, int32_t ackedBytes) { - DEBUG_INFO(""); _expectingAckAsOf = 0; _unackedBytes = (ackedBytes > _unackedBytes) ? 0 : _unackedBytes - ackedBytes; int64_t timeSinceThroughputEstimate = (now - _lastThroughputEstimation); @@ -398,7 +395,6 @@ public: */ inline void sentAck(int64_t now) { - DEBUG_INFO(""); Mutex::Lock _l(_statistics_m); _inACKRecords.clear(); _packetsReceivedSinceLastAck = 0; @@ -416,7 +412,6 @@ public: */ inline void receivedQoS(int64_t now, int count, uint64_t *rx_id, uint16_t *rx_ts) { - DEBUG_INFO(""); Mutex::Lock _l(_statistics_m); // Look up egress times and compute latency values for each record std::map::iterator it; @@ -441,7 +436,6 @@ public: */ inline int32_t generateQoSPacket(int64_t now, char *qosBuffer) { - DEBUG_INFO(""); Mutex::Lock _l(_statistics_m); int32_t len = 0; std::map::iterator it = _inQoSRecords.begin(); @@ -466,7 +460,6 @@ public: * @param Current time */ inline void sentQoS(int64_t now) { - DEBUG_INFO(""); _packetsReceivedSinceLastQoS = 0; _lastQoSMeasurement = now; } @@ -586,7 +579,6 @@ public: inline void processBackgroundPathMeasurements(const int64_t now) { if (now - _lastPathQualityComputeTime > ZT_PATH_QUALITY_COMPUTE_INTERVAL) { - DEBUG_INFO(""); Mutex::Lock _l(_statistics_m); _lastPathQualityComputeTime = now; address().toString(_addrString); diff --git a/node/Peer.cpp b/node/Peer.cpp index ce3083cc1..d1ef9ecf6 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -75,7 +75,9 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _linkIsRedundant(false), _remotePeerMultipathEnabled(false), _lastAggregateStatsReport(0), - _lastAggregateAllocation(0) + _lastAggregateAllocation(0), + _virtualPathCount(0), + _roundRobinPathAssignmentIdx(0) { if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) throw ZT_EXCEPTION_INVALID_ARGUMENT; @@ -195,6 +197,9 @@ void Peer::received( } else { attemptToContact = true; } + + // Every time we learn of new path, rebuild set of virtual paths + constructSetOfVirtualPaths(); } } @@ -256,6 +261,39 @@ void Peer::received( } } +void Peer::constructSetOfVirtualPaths() +{ + if (!_remoteMultipathSupported) { + return; + } + Mutex::Lock _l(_virtual_paths_m); + + int64_t now = RR->node->now(); + _virtualPathCount = 0; + for(unsigned int i=0;ialive(now)) { + for(unsigned int j=0;jalive(now)) { + int64_t localSocket = _paths[j].p->localSocket(); + bool foundVirtualPath = false; + for (int k=0; k<_virtualPaths.size(); k++) { + if (_virtualPaths[k]->localSocket == localSocket && _virtualPaths[k]->p == _paths[i].p) { + foundVirtualPath = true; + } + } + if (!foundVirtualPath) + { + VirtualPath *np = new VirtualPath; + np->p = _paths[i].p; + np->localSocket = localSocket; + _virtualPaths.push_back(np); + } + } + } + } + } +} + void Peer::recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now) { @@ -320,10 +358,10 @@ void Peer::computeAggregateAllocation(int64_t now) for(uint16_t i=0;inode->getMultipathMode() == ZT_MULTIPATH_RANDOM) { + if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RANDOM) { _paths[i].p->updateComponentAllocationOfAggregateLink(((float)_pathChoiceHist.countValue(i) / (float)_pathChoiceHist.count()) * 255); } - if (RR->node->getMultipathMode() == ZT_MULTIPATH_PROPORTIONALLY_BALANCED) { + if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_DYNAMIC_OPAQUE) { _paths[i].p->updateComponentAllocationOfAggregateLink((unsigned char)((_paths[i].p->relativeQuality() / totalRelativeQuality) * 255)); } } @@ -382,9 +420,22 @@ int Peer::aggregateLinkLogicalPathCount() return pathCount; } -SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) +std::vector> Peer::getAllPaths(int64_t now) +{ + Mutex::Lock _l(_virtual_paths_m); // FIXME: TX can now lock RX + std::vector> paths; + for (int i=0; i<_virtualPaths.size(); i++) { + if (_virtualPaths[i]->p) { + paths.push_back(_virtualPaths[i]->p); + } + } + return paths; +} + +SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired, int64_t flowId) { Mutex::Lock _l(_paths_m); + SharedPtr selectedPath; unsigned int bestPath = ZT_MAX_PEER_NETWORK_PATHS; /** @@ -410,52 +461,129 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) return SharedPtr(); } + // Update path measurements for(unsigned int i=0;iprocessBackgroundPathMeasurements(now); } } + // Detect new flows and update existing records + if (_flows.count(flowId)) { + _flows[flowId]->lastSend = now; + } + else { + fprintf(stderr, "new flow %llx detected between this node and %llx (%lu active flow(s))\n", + flowId, this->_id.address().toInt(), (_flows.size()+1)); + struct Flow *newFlow = new Flow(flowId, now); + _flows[flowId] = newFlow; + newFlow->assignedPath = nullptr; + } + // Construct set of virtual paths if needed + if (!_virtualPaths.size()) { + constructSetOfVirtualPaths(); + } + if (!_virtualPaths.size()) { + fprintf(stderr, "no paths to send packet out on\n"); + return SharedPtr(); + } /** - * Randomly distribute traffic across all paths + * Traffic is randomly distributed among all active paths. */ int numAlivePaths = 0; int numStalePaths = 0; - if (RR->node->getMultipathMode() == ZT_MULTIPATH_RANDOM) { - computeAggregateAllocation(now); /* This call is algorithmically inert but gives us a value to show in the status output */ - int alivePaths[ZT_MAX_PEER_NETWORK_PATHS]; - int stalePaths[ZT_MAX_PEER_NETWORK_PATHS]; - memset(&alivePaths, -1, sizeof(alivePaths)); - memset(&stalePaths, -1, sizeof(stalePaths)); - for(unsigned int i=0;ialive(now)) { - alivePaths[numAlivePaths] = i; - numAlivePaths++; - } - else { - stalePaths[numStalePaths] = i; - numStalePaths++; + if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RANDOM) { + int sz = _virtualPaths.size(); + if (sz) { + int idx = _freeRandomByte % sz; + _pathChoiceHist.push(idx); + char pathStr[128]; + _virtualPaths[idx]->p->address().toString(pathStr); + fprintf(stderr, "sending out: (%llx), idx=%d: path=%s, localSocket=%lld\n", + this->_id.address().toInt(), idx, pathStr, _virtualPaths[idx]->localSocket); + return _virtualPaths[idx]->p; + } + // This call is algorithmically inert but gives us a value to show in the status output + computeAggregateAllocation(now); + } + + /** + * All traffic is sent on all paths. + */ + if (RR->node->getMultipathMode() == ZT_MULTIPATH_BROADCAST) { + // Not handled here. Handled in Switch.cpp + } + + /** + * Only one link is active. Fail-over is immediate. + */ + if (RR->node->getMultipathMode() == ZT_MULTIPATH_ACTIVE_BACKUP) { + // fprintf(stderr, "ZT_MULTIPATH_ACTIVE_BACKUP\n"); + } + + /** + * Packets are striped across all available paths. + */ + if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RR_OPAQUE) { + // fprintf(stderr, "ZT_MULTIPATH_BALANCE_RR_OPAQUE\n"); + int16_t previousIdx = _roundRobinPathAssignmentIdx; + if (_roundRobinPathAssignmentIdx < (_virtualPaths.size()-1)) { + _roundRobinPathAssignmentIdx++; + } + else { + _roundRobinPathAssignmentIdx = 0; + } + selectedPath = _virtualPaths[previousIdx]->p; + char pathStr[128]; + selectedPath->address().toString(pathStr); + fprintf(stderr, "sending packet out on path %s at index %d\n", + pathStr, previousIdx); + return selectedPath; + } + + /** + * Flows are striped across all available paths. + */ + if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RR_FLOW) { + // fprintf(stderr, "ZT_MULTIPATH_BALANCE_RR_FLOW\n"); + } + + /** + * Flows are hashed across all available paths. + */ + if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_XOR_FLOW) { + // fprintf(stderr, "ZT_MULTIPATH_BALANCE_XOR_FLOW (%llx) \n", flowId); + char pathStr[128]; + struct Flow *currFlow = NULL; + if (_flows.count(flowId)) { + currFlow = _flows[flowId]; + if (!currFlow->assignedPath) { + int idx = abs((int)(currFlow->flowId % (_virtualPaths.size()-1))); + currFlow->assignedPath = _virtualPaths[idx]; + _virtualPaths[idx]->p->address().toString(pathStr); + fprintf(stderr, "assigning flow %llx between this node and peer %llx to path %s at index %d\n", + currFlow->flowId, this->_id.address().toInt(), pathStr, idx); + } + else { + if (!currFlow->assignedPath->p->alive(now)) { + char newPathStr[128]; + currFlow->assignedPath->p->address().toString(pathStr); + // Re-assign + int idx = abs((int)(currFlow->flowId % (_virtualPaths.size()-1))); + currFlow->assignedPath = _virtualPaths[idx]; + _virtualPaths[idx]->p->address().toString(newPathStr); + fprintf(stderr, "path %s assigned to flow %llx between this node and %llx appears to be dead, reassigning to path %s\n", + pathStr, currFlow->flowId, this->_id.address().toInt(), newPathStr); } } - } - unsigned int r = _freeRandomByte; - if (numAlivePaths > 0) { - int rf = r % numAlivePaths; - _pathChoiceHist.push(alivePaths[rf]); // Record which path we chose - return _paths[alivePaths[rf]].p; - } - else if(numStalePaths > 0) { - // Resort to trying any non-expired path - int rf = r % numStalePaths; - return _paths[stalePaths[rf]].p; + return currFlow->assignedPath->p; } } /** - * Proportionally allocate traffic according to dynamic path quality measurements + * Proportionally allocate traffic according to dynamic path quality measurements. */ - if (RR->node->getMultipathMode() == ZT_MULTIPATH_PROPORTIONALLY_BALANCED) { + if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_DYNAMIC_OPAQUE) { if ((now - _lastAggregateAllocation) >= ZT_PATH_QUALITY_COMPUTE_INTERVAL) { _lastAggregateAllocation = now; computeAggregateAllocation(now); @@ -476,6 +604,13 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired) return _paths[bestPath].p; } } + + /** + * Flows are dynamically allocated across paths in proportion to link strength and load. + */ + if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_DYNAMIC_FLOW) { + } + return SharedPtr(); } @@ -676,10 +811,20 @@ inline void Peer::processBackgroundPeerTasks(const int64_t now) _localMultipathSupported = ((RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) && (ZT_PROTO_VERSION > 9)); _remoteMultipathSupported = _vProto > 9; // If both peers support multipath and more than one path exist, we can use multipath logic - DEBUG_INFO("from=%llx, _localMultipathSupported=%d, _remoteMultipathSupported=%d, (_uniqueAlivePathCount > 1)=%d", - this->_id.address().toInt(), _localMultipathSupported, _remoteMultipathSupported, (_uniqueAlivePathCount > 1)); _canUseMultipath = _localMultipathSupported && _remoteMultipathSupported && (_uniqueAlivePathCount > 1); } + + // Remove old flows + std::map::iterator it = _flows.begin(); + while (it != _flows.end()) { + if ((now - it->second->lastSend) > ZT_MULTIPATH_FLOW_EXPIRATION) { + fprintf(stderr, "forgetting flow %llx between this node and %llx (%lu active flow(s))\n", + it->first, this->_id.address().toInt(), _flows.size()); + it = _flows.erase(it); + } else { + it++; + } + } } void Peer::sendACK(void *tPtr,const SharedPtr &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now) diff --git a/node/Peer.hpp b/node/Peer.hpp index 6d3ce553e..7633ad7d5 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -28,6 +28,8 @@ #define ZT_PEER_HPP #include +#include +#include #include "../include/ZeroTierOne.h" @@ -147,6 +149,8 @@ public: return false; } + void constructSetOfVirtualPaths(); + /** * Record statistics on outgoing packets * @@ -216,14 +220,17 @@ public: */ int aggregateLinkLogicalPathCount(); + std::vector> getAllPaths(int64_t now); + /** * Get the most appropriate direct path based on current multipath and QoS configuration * * @param now Current time + * @param flowId Session-specific protocol flow identifier used for path allocation * @param includeExpired If true, include even expired paths * @return Best current path or NULL if none */ - SharedPtr getAppropriatePath(int64_t now, bool includeExpired); + SharedPtr getAppropriatePath(int64_t now, bool includeExpired, int64_t flowId = -1); /** * Generate a human-readable string of interface names making up the aggregate link, also include @@ -680,6 +687,44 @@ private: int64_t _lastAggregateAllocation; char _interfaceListStr[256]; // 16 characters * 16 paths in a link + + // + struct LinkPerformanceEntry + { + int64_t packetId; + struct VirtualPath *egressVirtualPath; + struct VirtualPath *ingressVirtualPath; + }; + + // Virtual paths + int _virtualPathCount; + Mutex _virtual_paths_m; + struct VirtualPath + { + SharedPtr p; + int64_t localSocket; + std::queue performanceEntries; + }; + std::vector _virtualPaths; + + // Flows + struct Flow + { + Flow(int64_t fid, int64_t ls) : + flowId(fid), + lastSend(ls), + assignedPath(NULL) + {} + + int64_t flowId; + int64_t bytesPerSecond; + int64_t lastSend; + struct VirtualPath *assignedPath; + }; + + std::map _flows; + + int16_t _roundRobinPathAssignmentIdx; }; } // namespace ZeroTier diff --git a/node/Switch.cpp b/node/Switch.cpp index a6852d9f4..c2251f23d 100644 --- a/node/Switch.cpp +++ b/node/Switch.cpp @@ -255,6 +255,35 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre } catch ( ... ) {} // sanity check, should be caught elsewhere } +// Returns true if packet appears valid; pos and proto will be set +static bool _ipv6GetPayload(const uint8_t *frameData,unsigned int frameLen,unsigned int &pos,unsigned int &proto) +{ + if (frameLen < 40) + return false; + pos = 40; + proto = frameData[6]; + while (pos <= frameLen) { + switch(proto) { + case 0: // hop-by-hop options + case 43: // routing + case 60: // destination options + case 135: // mobility options + if ((pos + 8) > frameLen) + return false; // invalid! + proto = frameData[pos]; + pos += ((unsigned int)frameData[pos + 1] * 8) + 8; + break; + + //case 44: // fragment -- we currently can't parse these and they are deprecated in IPv6 anyway + //case 50: + //case 51: // IPSec ESP and AH -- we have to stop here since this is encrypted stuff + default: + return true; + } + } + return false; // overflow == invalid +} + void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const MAC &from,const MAC &to,unsigned int etherType,unsigned int vlanId,const void *data,unsigned int len) { if (!network->hasConfig()) @@ -271,6 +300,73 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const uint8_t qosBucket = ZT_QOS_DEFAULT_BUCKET; + /* A pseudo-unique identifier used by the balancing and bonding policies to associate properties + * of a specific protocol flow over time and to determine which virtual path this packet + * shall be sent out on. This identifier consists of the source port and destination port + * of the encapsulated frame. + * + * A flowId of -1 will indicate that whatever packet we are about transmit has no + * preferred virtual path and will be sent out according to what the multipath logic + * deems appropriate. An example of this would be an ICMP packet. + */ + int64_t flowId = -1; + + if (etherType == ZT_ETHERTYPE_IPV4 && (len >= 20)) { + uint16_t srcPort = 0; + uint16_t dstPort = 0; + int8_t proto = (reinterpret_cast(data)[9]); + const unsigned int headerLen = 4 * (reinterpret_cast(data)[0] & 0xf); + switch(proto) { + case 0x01: // ICMP + flowId = 0x01; + break; + // All these start with 16-bit source and destination port in that order + case 0x06: // TCP + case 0x11: // UDP + case 0x84: // SCTP + case 0x88: // UDPLite + if (len > (headerLen + 4)) { + unsigned int pos = headerLen + 0; + srcPort = (reinterpret_cast(data)[pos++]) << 8; + srcPort |= (reinterpret_cast(data)[pos]); + pos++; + dstPort = (reinterpret_cast(data)[pos++]) << 8; + dstPort |= (reinterpret_cast(data)[pos]); + flowId = ((int64_t)srcPort << 48) | ((int64_t)dstPort << 32) | proto; + } + break; + } + } + + if (etherType == ZT_ETHERTYPE_IPV6 && (len >= 40)) { + uint16_t srcPort = 0; + uint16_t dstPort = 0; + unsigned int pos; + unsigned int proto; + _ipv6GetPayload((const uint8_t *)data, len, pos, proto); + switch(proto) { + case 0x3A: // ICMPv6 + flowId = 0x3A; + break; + // All these start with 16-bit source and destination port in that order + case 0x06: // TCP + case 0x11: // UDP + case 0x84: // SCTP + case 0x88: // UDPLite + if (len > (pos + 4)) { + srcPort = (reinterpret_cast(data)[pos++]) << 8; + srcPort |= (reinterpret_cast(data)[pos]); + pos++; + dstPort = (reinterpret_cast(data)[pos++]) << 8; + dstPort |= (reinterpret_cast(data)[pos]); + flowId = ((int64_t)srcPort << 48) | ((int64_t)dstPort << 32) | proto; + } + break; + default: + break; + } + } + if (to.isMulticast()) { MulticastGroup multicastGroup(to,0); @@ -280,7 +376,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const * otherwise a straightforward Ethernet switch emulation. Vanilla ARP * is dumb old broadcast and simply doesn't scale. ZeroTier multicast * groups have an additional field called ADI (additional distinguishing - * information) which was added specifically for ARP though it could + * information) which was added specifically for ARP though it could * be used for other things too. We then take ARP broadcasts and turn * them into multicasts by stuffing the IP address being queried into * the 32-bit ADI field. In practice this uses our multicast pub/sub @@ -429,7 +525,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const outp.append(data,len); if (!network->config().disableCompression()) outp.compress(); - aqm_enqueue(tPtr,network,outp,true,qosBucket); + aqm_enqueue(tPtr,network,outp,true,qosBucket,flowId); } else { Packet outp(toZT,RR->identity.address(),Packet::VERB_FRAME); outp.append(network->id()); @@ -437,7 +533,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const outp.append(data,len); if (!network->config().disableCompression()) outp.compress(); - aqm_enqueue(tPtr,network,outp,true,qosBucket); + aqm_enqueue(tPtr,network,outp,true,qosBucket,flowId); } } else { // Destination is bridged behind a remote peer @@ -493,7 +589,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const outp.append(data,len); if (!network->config().disableCompression()) outp.compress(); - aqm_enqueue(tPtr,network,outp,true,qosBucket); + aqm_enqueue(tPtr,network,outp,true,qosBucket,flowId); } else { RR->t->outgoingNetworkFrameDropped(tPtr,network,from,to,etherType,vlanId,len,"filter blocked (bridge replication)"); } @@ -501,10 +597,10 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const } } -void Switch::aqm_enqueue(void *tPtr, const SharedPtr &network, Packet &packet,bool encrypt,int qosBucket) +void Switch::aqm_enqueue(void *tPtr, const SharedPtr &network, Packet &packet,bool encrypt,int qosBucket,int64_t flowId) { if(!network->qosEnabled()) { - send(tPtr, packet, encrypt); + send(tPtr, packet, encrypt, flowId); return; } NetworkQoSControlBlock *nqcb = _netQueueControlBlock[network->id()]; @@ -518,11 +614,9 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr &network, Packet & nqcb->inactiveQueues.push_back(new ManagedQueue(i)); } } - + // Don't apply QoS scheduling to ZT protocol traffic if (packet.verb() != Packet::VERB_FRAME && packet.verb() != Packet::VERB_EXT_FRAME) { - // DEBUG_INFO("skipping, no QoS for this packet, verb=%x", packet.verb()); - // just send packet normally, no QoS for ZT protocol traffic - send(tPtr, packet, encrypt); + send(tPtr, packet, encrypt, flowId); } _aqm_m.lock(); @@ -530,7 +624,7 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr &network, Packet & // Enqueue packet and move queue to appropriate list const Address dest(packet.destination()); - TXQueueEntry *txEntry = new TXQueueEntry(dest,RR->node->now(),packet,encrypt); + TXQueueEntry *txEntry = new TXQueueEntry(dest,RR->node->now(),packet,encrypt,flowId); ManagedQueue *selectedQueue = nullptr; for (size_t i=0; ibyteCredit -= len; // Send the packet! queueAtFrontOfList->q.pop_front(); - send(tPtr, entryToEmit->packet, entryToEmit->encrypt); + send(tPtr, entryToEmit->packet, entryToEmit->encrypt, entryToEmit->flowId); (*nqcb).second->_currEnqueuedPackets--; } if (queueAtFrontOfList) { @@ -734,7 +828,7 @@ void Switch::aqm_dequeue(void *tPtr) queueAtFrontOfList->byteLength -= len; queueAtFrontOfList->byteCredit -= len; queueAtFrontOfList->q.pop_front(); - send(tPtr, entryToEmit->packet, entryToEmit->encrypt); + send(tPtr, entryToEmit->packet, entryToEmit->encrypt, entryToEmit->flowId); (*nqcb).second->_currEnqueuedPackets--; } if (queueAtFrontOfList) { @@ -758,18 +852,18 @@ void Switch::removeNetworkQoSControlBlock(uint64_t nwid) } } -void Switch::send(void *tPtr,Packet &packet,bool encrypt) +void Switch::send(void *tPtr,Packet &packet,bool encrypt,int64_t flowId) { const Address dest(packet.destination()); if (dest == RR->identity.address()) return; - if (!_trySend(tPtr,packet,encrypt)) { + if (!_trySend(tPtr,packet,encrypt,flowId)) { { Mutex::Lock _l(_txQueue_m); if (_txQueue.size() >= ZT_TX_QUEUE_SIZE) { _txQueue.pop_front(); } - _txQueue.push_back(TXQueueEntry(dest,RR->node->now(),packet,encrypt)); + _txQueue.push_back(TXQueueEntry(dest,RR->node->now(),packet,encrypt,flowId)); } if (!RR->topology->getPeer(tPtr,dest)) requestWhois(tPtr,RR->node->now(),dest); @@ -791,10 +885,11 @@ void Switch::requestWhois(void *tPtr,const int64_t now,const Address &addr) const SharedPtr upstream(RR->topology->getUpstreamPeer()); if (upstream) { + int64_t flowId = -1; Packet outp(upstream->address(),RR->identity.address(),Packet::VERB_WHOIS); addr.appendTo(outp); RR->node->expectReplyTo(outp.packetId()); - send(tPtr,outp,true); + send(tPtr,outp,true,flowId); } } @@ -819,7 +914,7 @@ void Switch::doAnythingWaitingForPeer(void *tPtr,const SharedPtr &peer) Mutex::Lock _l(_txQueue_m); for(std::list< TXQueueEntry >::iterator txi(_txQueue.begin());txi!=_txQueue.end();) { if (txi->dest == peer->address()) { - if (_trySend(tPtr,txi->packet,txi->encrypt)) { + if (_trySend(tPtr,txi->packet,txi->encrypt,txi->flowId)) { _txQueue.erase(txi++); } else { ++txi; @@ -843,7 +938,7 @@ unsigned long Switch::doTimerTasks(void *tPtr,int64_t now) Mutex::Lock _l(_txQueue_m); for(std::list< TXQueueEntry >::iterator txi(_txQueue.begin());txi!=_txQueue.end();) { - if (_trySend(tPtr,txi->packet,txi->encrypt)) { + if (_trySend(tPtr,txi->packet,txi->encrypt,txi->flowId)) { _txQueue.erase(txi++); } else if ((now - txi->creationTime) > ZT_TRANSMIT_QUEUE_TIMEOUT) { _txQueue.erase(txi++); @@ -907,7 +1002,7 @@ bool Switch::_shouldUnite(const int64_t now,const Address &source,const Address return false; } -bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt) +bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt,int64_t flowId) { SharedPtr viaPath; const int64_t now = RR->node->now(); @@ -915,54 +1010,73 @@ bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt) const SharedPtr peer(RR->topology->getPeer(tPtr,destination)); if (peer) { - viaPath = peer->getAppropriatePath(now,false); - if (!viaPath) { - peer->tryMemorizedPath(tPtr,now); // periodically attempt memorized or statically defined paths, if any are known - const SharedPtr relay(RR->topology->getUpstreamPeer()); - if ( (!relay) || (!(viaPath = relay->getAppropriatePath(now,false))) ) { - if (!(viaPath = peer->getAppropriatePath(now,true))) - return false; + if (RR->node->getMultipathMode() == ZT_MULTIPATH_BROADCAST) { + // Nothing here, we'll grab an entire set of paths to send out on below + } + else { + viaPath = peer->getAppropriatePath(now,false,flowId); + if (!viaPath) { + peer->tryMemorizedPath(tPtr,now); // periodically attempt memorized or statically defined paths, if any are known + const SharedPtr relay(RR->topology->getUpstreamPeer()); + if ( (!relay) || (!(viaPath = relay->getAppropriatePath(now,false,flowId))) ) { + if (!(viaPath = peer->getAppropriatePath(now,true,flowId))) + return false; + } } } } else { return false; } - unsigned int mtu = ZT_DEFAULT_PHYSMTU; - uint64_t trustedPathId = 0; - RR->topology->getOutboundPathInfo(viaPath->address(),mtu,trustedPathId); - - unsigned int chunkSize = std::min(packet.size(),mtu); - packet.setFragmented(chunkSize < packet.size()); - - peer->recordOutgoingPacket(viaPath, packet.packetId(), packet.payloadLength(), packet.verb(), now); - - if (trustedPathId) { - packet.setTrusted(trustedPathId); - } else { - packet.armor(peer->key(),encrypt); - } - - if (viaPath->send(RR,tPtr,packet.data(),chunkSize,now)) { - if (chunkSize < packet.size()) { - // Too big for one packet, fragment the rest - unsigned int fragStart = chunkSize; - unsigned int remaining = packet.size() - chunkSize; - unsigned int fragsRemaining = (remaining / (mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH)); - if ((fragsRemaining * (mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH)) < remaining) - ++fragsRemaining; - const unsigned int totalFragments = fragsRemaining + 1; - - for(unsigned int fno=1;fnosend(RR,tPtr,frag.data(),frag.size(),now); - fragStart += chunkSize; - remaining -= chunkSize; - } + // If sending on all paths, set viaPath to first path + int nextPathIdx = 0; + std::vector> paths = peer->getAllPaths(now); + if (RR->node->getMultipathMode() == ZT_MULTIPATH_BROADCAST) { + if (paths.size()) { + viaPath = paths[nextPathIdx++]; } } + while (viaPath) { + unsigned int mtu = ZT_DEFAULT_PHYSMTU; + uint64_t trustedPathId = 0; + RR->topology->getOutboundPathInfo(viaPath->address(),mtu,trustedPathId); + unsigned int chunkSize = std::min(packet.size(),mtu); + packet.setFragmented(chunkSize < packet.size()); + peer->recordOutgoingPacket(viaPath, packet.packetId(), packet.payloadLength(), packet.verb(), now); + + if (trustedPathId) { + packet.setTrusted(trustedPathId); + } else { + packet.armor(peer->key(),encrypt); + } + + if (viaPath->send(RR,tPtr,packet.data(),chunkSize,now)) { + if (chunkSize < packet.size()) { + // Too big for one packet, fragment the rest + unsigned int fragStart = chunkSize; + unsigned int remaining = packet.size() - chunkSize; + unsigned int fragsRemaining = (remaining / (mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH)); + if ((fragsRemaining * (mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH)) < remaining) + ++fragsRemaining; + const unsigned int totalFragments = fragsRemaining + 1; + + for(unsigned int fno=1;fnosend(RR,tPtr,frag.data(),frag.size(),now); + fragStart += chunkSize; + remaining -= chunkSize; + } + } + } + viaPath.zero(); + if (RR->node->getMultipathMode() == ZT_MULTIPATH_BROADCAST) { + if (paths.size() > nextPathIdx) { + viaPath = paths[nextPathIdx++]; + } + } + } return true; } diff --git a/node/Switch.hpp b/node/Switch.hpp index a531b2686..388e1ccf1 100644 --- a/node/Switch.hpp +++ b/node/Switch.hpp @@ -131,7 +131,7 @@ public: * @param encrypt Encrypt packet payload? (always true except for HELLO) * @param qosBucket Which bucket the rule-system determined this packet should fall into */ - void aqm_enqueue(void *tPtr, const SharedPtr &network, Packet &packet,bool encrypt,int qosBucket); + void aqm_enqueue(void *tPtr, const SharedPtr &network, Packet &packet,bool encrypt,int qosBucket,int64_t flowId = -1); /** * Performs a single AQM cycle and dequeues and transmits all eligible packets on all networks @@ -177,7 +177,7 @@ public: * @param packet Packet to send (buffer may be modified) * @param encrypt Encrypt packet payload? (always true except for HELLO) */ - void send(void *tPtr,Packet &packet,bool encrypt); + void send(void *tPtr,Packet &packet,bool encrypt,int64_t flowId = -1); /** * Request WHOIS on a given address @@ -212,7 +212,7 @@ public: private: bool _shouldUnite(const int64_t now,const Address &source,const Address &destination); - bool _trySend(void *tPtr,Packet &packet,bool encrypt); // packet is modified if return is true + bool _trySend(void *tPtr,Packet &packet,bool encrypt,int64_t flowId = -1); // packet is modified if return is true const RuntimeEnvironment *const RR; int64_t _lastBeaconResponse; @@ -261,16 +261,18 @@ private: struct TXQueueEntry { TXQueueEntry() {} - TXQueueEntry(Address d,uint64_t ct,const Packet &p,bool enc) : + TXQueueEntry(Address d,uint64_t ct,const Packet &p,bool enc,int64_t fid) : dest(d), creationTime(ct), packet(p), - encrypt(enc) {} + encrypt(enc), + flowId(fid) {} Address dest; uint64_t creationTime; Packet packet; // unencrypted/unMAC'd packet -- this is done at send time bool encrypt; + int64_t flowId; }; std::list< TXQueueEntry > _txQueue; Mutex _txQueue_m; diff --git a/node/Trace.cpp b/node/Trace.cpp index b7c002719..e38aaa41e 100644 --- a/node/Trace.cpp +++ b/node/Trace.cpp @@ -109,10 +109,10 @@ void Trace::peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId, void Trace::peerLinkNowAggregate(void *const tPtr,Peer &peer) { - if ((RR->node->getMultipathMode() == ZT_MULTIPATH_RANDOM)) { + if ((RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RANDOM)) { ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is now a randomly-distributed aggregate link",peer.address().toInt()); } - if ((RR->node->getMultipathMode() == ZT_MULTIPATH_PROPORTIONALLY_BALANCED)) { + if ((RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_DYNAMIC_OPAQUE)) { ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is now a proportionally-balanced aggregate link",peer.address().toInt()); } } From 963113b86dc0c1c214038333fd35ea0e014fafc7 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 20 Aug 2019 10:38:18 -0700 Subject: [PATCH 13/35] Minor adjustment to how _allowTcpFallbackRelay is disabled when _multipathMode is set --- service/OneService.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/service/OneService.cpp b/service/OneService.cpp index 93e97bdef..0d1dd337b 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -1572,18 +1572,15 @@ public: json &settings = lc["settings"]; _primaryPort = (unsigned int)OSUtils::jsonInt(settings["primaryPort"],(uint64_t)_primaryPort) & 0xffff; - _allowTcpFallbackRelay = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],true); + _multipathMode = (unsigned int)OSUtils::jsonInt(settings["multipathMode"],0); + // multipathMode cannot be used with allowTcpFallbackRelay + _allowTcpFallbackRelay = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],true) && !_multipathMode; _allowSecondaryPort = OSUtils::jsonBool(settings["allowSecondaryPort"],true); _secondaryPort = (unsigned int)OSUtils::jsonInt(settings["secondaryPort"],0); _tertiaryPort = (unsigned int)OSUtils::jsonInt(settings["tertiaryPort"],0); if (_secondaryPort != 0 || _tertiaryPort != 0) { fprintf(stderr,"WARNING: using manually-specified ports. This can cause NAT issues." ZT_EOL_S); } - _multipathMode = (unsigned int)OSUtils::jsonInt(settings["multipathMode"],0); - if (_multipathMode != 0 && _allowTcpFallbackRelay) { - fprintf(stderr,"WARNING: multipathMode cannot be used with allowTcpFallbackRelay. Disabling allowTcpFallbackRelay" ZT_EOL_S); - _allowTcpFallbackRelay = false; - } _portMappingEnabled = OSUtils::jsonBool(settings["portMappingEnabled"],true); #ifndef ZT_SDK From b0a91c018727f20789ac10b70b766f7b6041feb5 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 20 Aug 2019 16:19:20 -0700 Subject: [PATCH 14/35] Partial implementation of ZT_MULTIPATH_ACTIVE_BACKUP --- node/Constants.hpp | 18 +++++++++++ node/Peer.cpp | 75 ++++++++++++++++++++++++++++++++++++++-------- node/Peer.hpp | 2 ++ 3 files changed, 83 insertions(+), 12 deletions(-) diff --git a/node/Constants.hpp b/node/Constants.hpp index 7f962851a..278c705d8 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -357,16 +357,29 @@ /** * How much each factor contributes to the "stability" score of a path */ + +#if 0 +#define ZT_PATH_CONTRIB_PDV (1.5 / 3.0) +#define ZT_PATH_CONTRIB_LATENCY (0.0 / 3.0) +#define ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE (1.5 / 3.0) +#else #define ZT_PATH_CONTRIB_PDV (1.0 / 3.0) #define ZT_PATH_CONTRIB_LATENCY (1.0 / 3.0) #define ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE (1.0 / 3.0) +#endif /** * How much each factor contributes to the "quality" score of a path */ +#if 0 +#define ZT_PATH_CONTRIB_STABILITY (2.00 / 3.0) +#define ZT_PATH_CONTRIB_THROUGHPUT (0.50 / 3.0) +#define ZT_PATH_CONTRIB_SCOPE (0.50 / 3.0) +#else #define ZT_PATH_CONTRIB_STABILITY (0.75 / 3.0) #define ZT_PATH_CONTRIB_THROUGHPUT (1.50 / 3.0) #define ZT_PATH_CONTRIB_SCOPE (0.75 / 3.0) +#endif /** * How often a QoS packet is sent @@ -475,6 +488,11 @@ */ #define ZT_MULTIPATH_PEER_PING_PERIOD (ZT_PEER_PING_PERIOD / 10) +/** + * How long before we consider a path to be dead in rapid fail-over scenarios + */ +#define ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD 1000 + /** * Paths are considered expired if they have not sent us a real packet in this long */ diff --git a/node/Peer.cpp b/node/Peer.cpp index d1ef9ecf6..7e96b5f06 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -347,7 +347,7 @@ void Peer::computeAggregateAllocation(int64_t now) + (fmaxf(1.0f, relThroughput[i]) * (float)ZT_PATH_CONTRIB_THROUGHPUT) + relScope * (float)ZT_PATH_CONTRIB_SCOPE; relQuality *= age_contrib; - // Arbitrary cutoffs + // Clamp values relQuality = relQuality > (1.00f / 100.0f) ? relQuality : 0.0f; relQuality = relQuality < (99.0f / 100.0f) ? relQuality : 1.0f; totalRelativeQuality += relQuality; @@ -357,7 +357,6 @@ void Peer::computeAggregateAllocation(int64_t now) // Convert set of relative performances into an allocation set for(uint16_t i=0;inode->getMultipathMode() == ZT_MULTIPATH_BALANCE_RANDOM) { _paths[i].p->updateComponentAllocationOfAggregateLink(((float)_pathChoiceHist.countValue(i) / (float)_pathChoiceHist.count()) * 255); } @@ -420,10 +419,10 @@ int Peer::aggregateLinkLogicalPathCount() return pathCount; } -std::vector> Peer::getAllPaths(int64_t now) +std::vector > Peer::getAllPaths(int64_t now) { Mutex::Lock _l(_virtual_paths_m); // FIXME: TX can now lock RX - std::vector> paths; + std::vector > paths; for (int i=0; i<_virtualPaths.size(); i++) { if (_virtualPaths[i]->p) { paths.push_back(_virtualPaths[i]->p); @@ -436,6 +435,8 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired, int64 { Mutex::Lock _l(_paths_m); SharedPtr selectedPath; + char curPathStr[128]; + char newPathStr[128]; unsigned int bestPath = ZT_MAX_PEER_NETWORK_PATHS; /** @@ -511,14 +512,66 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired, int64 * All traffic is sent on all paths. */ if (RR->node->getMultipathMode() == ZT_MULTIPATH_BROADCAST) { - // Not handled here. Handled in Switch.cpp + // Not handled here. Handled in Switch::_trySend() } /** * Only one link is active. Fail-over is immediate. */ if (RR->node->getMultipathMode() == ZT_MULTIPATH_ACTIVE_BACKUP) { - // fprintf(stderr, "ZT_MULTIPATH_ACTIVE_BACKUP\n"); + bool bFoundHotPath = false; + if (!_activeBackupPath) { + /* Select the fist path that appears to still be active. + * This will eventually be user-configurable */ + for (int i=0; ilastIn()) < ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) { + bFoundHotPath = true; + _activeBackupPath = _paths[i].p; + _activeBackupPath->address().toString(curPathStr); + fprintf(stderr, "selected %s as the primary active-backup path to %llx\n", + curPathStr, this->_id.address().toInt()); + } + } + } + if (!_activeBackupPath) { + return SharedPtr(); + } + if (!bFoundHotPath) { + _activeBackupPath->address().toString(curPathStr); + fprintf(stderr, "no hot paths available to to use as active-backup primary to %llx, selected %s anyway\n", + this->_id.address().toInt(), curPathStr); + } + } + else { + if ((now - _activeBackupPath->lastIn()) > ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) { + _activeBackupPath->address().toString(curPathStr); + /* Fail-over to the fist path that appears to still be active. + * This will eventually be user-configurable */ + for (int i=0; ilastIn()) < ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) { + bFoundHotPath = true; + _activeBackupPath->address().toString(curPathStr); // Record path string for later debug trace + _activeBackupPath = _paths[i].p; + _activeBackupPath->address().toString(newPathStr); + } + } + } + if (bFoundHotPath) { + fprintf(stderr, "primary active-backup path %s to %llx appears to be dead, switched to path %s\n", + curPathStr, this->_id.address().toInt(), newPathStr); + } + } + } + return _activeBackupPath; } /** @@ -553,27 +606,25 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired, int64 */ if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_XOR_FLOW) { // fprintf(stderr, "ZT_MULTIPATH_BALANCE_XOR_FLOW (%llx) \n", flowId); - char pathStr[128]; struct Flow *currFlow = NULL; if (_flows.count(flowId)) { currFlow = _flows[flowId]; if (!currFlow->assignedPath) { int idx = abs((int)(currFlow->flowId % (_virtualPaths.size()-1))); currFlow->assignedPath = _virtualPaths[idx]; - _virtualPaths[idx]->p->address().toString(pathStr); + _virtualPaths[idx]->p->address().toString(curPathStr); fprintf(stderr, "assigning flow %llx between this node and peer %llx to path %s at index %d\n", - currFlow->flowId, this->_id.address().toInt(), pathStr, idx); + currFlow->flowId, this->_id.address().toInt(), curPathStr, idx); } else { if (!currFlow->assignedPath->p->alive(now)) { - char newPathStr[128]; - currFlow->assignedPath->p->address().toString(pathStr); + currFlow->assignedPath->p->address().toString(curPathStr); // Re-assign int idx = abs((int)(currFlow->flowId % (_virtualPaths.size()-1))); currFlow->assignedPath = _virtualPaths[idx]; _virtualPaths[idx]->p->address().toString(newPathStr); fprintf(stderr, "path %s assigned to flow %llx between this node and %llx appears to be dead, reassigning to path %s\n", - pathStr, currFlow->flowId, this->_id.address().toInt(), newPathStr); + curPathStr, currFlow->flowId, this->_id.address().toInt(), newPathStr); } } return currFlow->assignedPath->p; diff --git a/node/Peer.hpp b/node/Peer.hpp index 7633ad7d5..84d7d43a2 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -725,6 +725,8 @@ private: std::map _flows; int16_t _roundRobinPathAssignmentIdx; + + SharedPtr _activeBackupPath; }; } // namespace ZeroTier From 5453cab22b26b4caa38457dadf64e5f53920cb2d Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 20 Aug 2019 18:50:38 -0700 Subject: [PATCH 15/35] Added flow-awareness check for policies, more work on ZT_MULTIPATH_ACTIVE_BACKUP --- node/Constants.hpp | 2 +- node/Peer.cpp | 106 ++++++++++++++++++++++++++---------------- node/Peer.hpp | 1 + node/Switch.cpp | 113 +++++++++++++++++++++++++-------------------- node/Switch.hpp | 5 ++ 5 files changed, 134 insertions(+), 93 deletions(-) diff --git a/node/Constants.hpp b/node/Constants.hpp index 278c705d8..ee656aaec 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -491,7 +491,7 @@ /** * How long before we consider a path to be dead in rapid fail-over scenarios */ -#define ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD 1000 +#define ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD 250 /** * Paths are considered expired if they have not sent us a real packet in this long diff --git a/node/Peer.cpp b/node/Peer.cpp index 7e96b5f06..622095e29 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -77,7 +77,8 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _lastAggregateStatsReport(0), _lastAggregateAllocation(0), _virtualPathCount(0), - _roundRobinPathAssignmentIdx(0) + _roundRobinPathAssignmentIdx(0), + _pathAssignmentIdx(0) { if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) throw ZT_EXCEPTION_INVALID_ARGUMENT; @@ -468,16 +469,18 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired, int64 _paths[i].p->processBackgroundPathMeasurements(now); } } - // Detect new flows and update existing records - if (_flows.count(flowId)) { - _flows[flowId]->lastSend = now; - } - else { - fprintf(stderr, "new flow %llx detected between this node and %llx (%lu active flow(s))\n", - flowId, this->_id.address().toInt(), (_flows.size()+1)); - struct Flow *newFlow = new Flow(flowId, now); - _flows[flowId] = newFlow; - newFlow->assignedPath = nullptr; + if (RR->sw->isFlowAware()) { + // Detect new flows and update existing records + if (_flows.count(flowId)) { + _flows[flowId]->lastSend = now; + } + else { + fprintf(stderr, "new flow %llx detected between this node and %llx (%lu active flow(s))\n", + flowId, this->_id.address().toInt(), (_flows.size()+1)); + struct Flow *newFlow = new Flow(flowId, now); + _flows[flowId] = newFlow; + newFlow->assignedPath = nullptr; + } } // Construct set of virtual paths if needed if (!_virtualPaths.size()) { @@ -532,45 +535,64 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired, int64 if ((now - _paths[i].p->lastIn()) < ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) { bFoundHotPath = true; _activeBackupPath = _paths[i].p; + _pathAssignmentIdx = i; _activeBackupPath->address().toString(curPathStr); - fprintf(stderr, "selected %s as the primary active-backup path to %llx\n", - curPathStr, this->_id.address().toInt()); + fprintf(stderr, "selected %s as the primary active-backup path to %llx (idx=%d)\n", + curPathStr, this->_id.address().toInt(), _pathAssignmentIdx); + break; } } } - if (!_activeBackupPath) { - return SharedPtr(); - } - if (!bFoundHotPath) { - _activeBackupPath->address().toString(curPathStr); - fprintf(stderr, "no hot paths available to to use as active-backup primary to %llx, selected %s anyway\n", - this->_id.address().toInt(), curPathStr); - } } else { + char what[128]; if ((now - _activeBackupPath->lastIn()) > ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) { - _activeBackupPath->address().toString(curPathStr); - /* Fail-over to the fist path that appears to still be active. - * This will eventually be user-configurable */ - for (int i=0; iaddress().toString(curPathStr); // Record path string for later debug trace + int16_t previousIdx = _pathAssignmentIdx; + SharedPtr nextAlternativePath; + // Search for a hot path, at the same time find the next path in + // a RR sequence that seems viable to use as an alternative + int searchCount = 0; + while (searchCount < ZT_MAX_PEER_NETWORK_PATHS) { + _pathAssignmentIdx++; + if (_pathAssignmentIdx == ZT_MAX_PEER_NETWORK_PATHS) { + _pathAssignmentIdx = 0; + } + searchCount++; + if (_paths[_pathAssignmentIdx].p) { + _paths[_pathAssignmentIdx].p->address().toString(what); + if (_activeBackupPath.ptr() == _paths[_pathAssignmentIdx].p.ptr()) { continue; } - if ((now - _paths[i].p->lastIn()) < ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) { + if (!nextAlternativePath) { // Record the first viable alternative in the RR sequence + nextAlternativePath = _paths[_pathAssignmentIdx].p; + } + if ((now - _paths[_pathAssignmentIdx].p->lastIn()) < ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) { bFoundHotPath = true; - _activeBackupPath->address().toString(curPathStr); // Record path string for later debug trace - _activeBackupPath = _paths[i].p; + _activeBackupPath = _paths[_pathAssignmentIdx].p; _activeBackupPath->address().toString(newPathStr); + fprintf(stderr, "primary active-backup path %s to %llx appears to be dead, switched to %s\n", + curPathStr, this->_id.address().toInt(), newPathStr); + break; } } } - if (bFoundHotPath) { - fprintf(stderr, "primary active-backup path %s to %llx appears to be dead, switched to path %s\n", - curPathStr, this->_id.address().toInt(), newPathStr); + if (!bFoundHotPath) { + if (nextAlternativePath) { + _activeBackupPath = nextAlternativePath; + _activeBackupPath->address().toString(curPathStr); + //fprintf(stderr, "no hot paths found to use as active-backup primary to %llx, using next best: %s\n", + // this->_id.address().toInt(), curPathStr); + } + else { + // No change + } } } } + if (!_activeBackupPath) { + return SharedPtr(); + } return _activeBackupPath; } @@ -866,14 +888,16 @@ inline void Peer::processBackgroundPeerTasks(const int64_t now) } // Remove old flows - std::map::iterator it = _flows.begin(); - while (it != _flows.end()) { - if ((now - it->second->lastSend) > ZT_MULTIPATH_FLOW_EXPIRATION) { - fprintf(stderr, "forgetting flow %llx between this node and %llx (%lu active flow(s))\n", - it->first, this->_id.address().toInt(), _flows.size()); - it = _flows.erase(it); - } else { - it++; + if (RR->sw->isFlowAware()) { + std::map::iterator it = _flows.begin(); + while (it != _flows.end()) { + if ((now - it->second->lastSend) > ZT_MULTIPATH_FLOW_EXPIRATION) { + fprintf(stderr, "forgetting flow %llx between this node and %llx (%lu active flow(s))\n", + it->first, this->_id.address().toInt(), _flows.size()); + it = _flows.erase(it); + } else { + it++; + } } } } diff --git a/node/Peer.hpp b/node/Peer.hpp index 84d7d43a2..dddd8fc01 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -727,6 +727,7 @@ private: int16_t _roundRobinPathAssignmentIdx; SharedPtr _activeBackupPath; + int16_t _pathAssignmentIdx; }; } // namespace ZeroTier diff --git a/node/Switch.cpp b/node/Switch.cpp index c2251f23d..51f23f674 100644 --- a/node/Switch.cpp +++ b/node/Switch.cpp @@ -284,6 +284,14 @@ static bool _ipv6GetPayload(const uint8_t *frameData,unsigned int frameLen,unsig return false; // overflow == invalid } +bool Switch::isFlowAware() +{ + int mode = RR->node->getMultipathMode(); + return (( mode == ZT_MULTIPATH_BALANCE_RR_FLOW) + || (mode == ZT_MULTIPATH_BALANCE_XOR_FLOW) + || (mode == ZT_MULTIPATH_BALANCE_DYNAMIC_FLOW)); +} + void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const MAC &from,const MAC &to,unsigned int etherType,unsigned int vlanId,const void *data,unsigned int len) { if (!network->hasConfig()) @@ -309,61 +317,64 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const * preferred virtual path and will be sent out according to what the multipath logic * deems appropriate. An example of this would be an ICMP packet. */ + int64_t flowId = -1; - if (etherType == ZT_ETHERTYPE_IPV4 && (len >= 20)) { - uint16_t srcPort = 0; - uint16_t dstPort = 0; - int8_t proto = (reinterpret_cast(data)[9]); - const unsigned int headerLen = 4 * (reinterpret_cast(data)[0] & 0xf); - switch(proto) { - case 0x01: // ICMP - flowId = 0x01; - break; - // All these start with 16-bit source and destination port in that order - case 0x06: // TCP - case 0x11: // UDP - case 0x84: // SCTP - case 0x88: // UDPLite - if (len > (headerLen + 4)) { - unsigned int pos = headerLen + 0; - srcPort = (reinterpret_cast(data)[pos++]) << 8; - srcPort |= (reinterpret_cast(data)[pos]); - pos++; - dstPort = (reinterpret_cast(data)[pos++]) << 8; - dstPort |= (reinterpret_cast(data)[pos]); - flowId = ((int64_t)srcPort << 48) | ((int64_t)dstPort << 32) | proto; - } - break; + if (isFlowAware()) { + if (etherType == ZT_ETHERTYPE_IPV4 && (len >= 20)) { + uint16_t srcPort = 0; + uint16_t dstPort = 0; + int8_t proto = (reinterpret_cast(data)[9]); + const unsigned int headerLen = 4 * (reinterpret_cast(data)[0] & 0xf); + switch(proto) { + case 0x01: // ICMP + flowId = 0x01; + break; + // All these start with 16-bit source and destination port in that order + case 0x06: // TCP + case 0x11: // UDP + case 0x84: // SCTP + case 0x88: // UDPLite + if (len > (headerLen + 4)) { + unsigned int pos = headerLen + 0; + srcPort = (reinterpret_cast(data)[pos++]) << 8; + srcPort |= (reinterpret_cast(data)[pos]); + pos++; + dstPort = (reinterpret_cast(data)[pos++]) << 8; + dstPort |= (reinterpret_cast(data)[pos]); + flowId = ((int64_t)srcPort << 48) | ((int64_t)dstPort << 32) | proto; + } + break; + } } - } - if (etherType == ZT_ETHERTYPE_IPV6 && (len >= 40)) { - uint16_t srcPort = 0; - uint16_t dstPort = 0; - unsigned int pos; - unsigned int proto; - _ipv6GetPayload((const uint8_t *)data, len, pos, proto); - switch(proto) { - case 0x3A: // ICMPv6 - flowId = 0x3A; - break; - // All these start with 16-bit source and destination port in that order - case 0x06: // TCP - case 0x11: // UDP - case 0x84: // SCTP - case 0x88: // UDPLite - if (len > (pos + 4)) { - srcPort = (reinterpret_cast(data)[pos++]) << 8; - srcPort |= (reinterpret_cast(data)[pos]); - pos++; - dstPort = (reinterpret_cast(data)[pos++]) << 8; - dstPort |= (reinterpret_cast(data)[pos]); - flowId = ((int64_t)srcPort << 48) | ((int64_t)dstPort << 32) | proto; - } - break; - default: - break; + if (etherType == ZT_ETHERTYPE_IPV6 && (len >= 40)) { + uint16_t srcPort = 0; + uint16_t dstPort = 0; + unsigned int pos; + unsigned int proto; + _ipv6GetPayload((const uint8_t *)data, len, pos, proto); + switch(proto) { + case 0x3A: // ICMPv6 + flowId = 0x3A; + break; + // All these start with 16-bit source and destination port in that order + case 0x06: // TCP + case 0x11: // UDP + case 0x84: // SCTP + case 0x88: // UDPLite + if (len > (pos + 4)) { + srcPort = (reinterpret_cast(data)[pos++]) << 8; + srcPort |= (reinterpret_cast(data)[pos]); + pos++; + dstPort = (reinterpret_cast(data)[pos++]) << 8; + dstPort |= (reinterpret_cast(data)[pos]); + flowId = ((int64_t)srcPort << 48) | ((int64_t)dstPort << 32) | proto; + } + break; + default: + break; + } } } diff --git a/node/Switch.hpp b/node/Switch.hpp index 388e1ccf1..666ee0531 100644 --- a/node/Switch.hpp +++ b/node/Switch.hpp @@ -91,6 +91,11 @@ public: */ void onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddress &fromAddr,const void *data,unsigned int len); + /** + * Returns whether our bonding or balancing policy is aware of flows. + */ + bool isFlowAware(); + /** * Called when a packet comes from a local Ethernet tap * From afca5c25369aab1a3cb1144229ada1e9f5609030 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 20 Aug 2019 23:28:59 -0700 Subject: [PATCH 16/35] Partial implementation of ZT_MULTIPATH_BALANCE_RR_OPAQUE --- node/Peer.cpp | 79 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 50 insertions(+), 29 deletions(-) diff --git a/node/Peer.cpp b/node/Peer.cpp index 622095e29..f9e452c12 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -491,26 +491,6 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired, int64 return SharedPtr(); } - /** - * Traffic is randomly distributed among all active paths. - */ - int numAlivePaths = 0; - int numStalePaths = 0; - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RANDOM) { - int sz = _virtualPaths.size(); - if (sz) { - int idx = _freeRandomByte % sz; - _pathChoiceHist.push(idx); - char pathStr[128]; - _virtualPaths[idx]->p->address().toString(pathStr); - fprintf(stderr, "sending out: (%llx), idx=%d: path=%s, localSocket=%lld\n", - this->_id.address().toInt(), idx, pathStr, _virtualPaths[idx]->localSocket); - return _virtualPaths[idx]->p; - } - // This call is algorithmically inert but gives us a value to show in the status output - computeAggregateAllocation(now); - } - /** * All traffic is sent on all paths. */ @@ -596,23 +576,64 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired, int64 return _activeBackupPath; } + /** + * Traffic is randomly distributed among all active paths. + */ + if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RANDOM) { + int sz = _virtualPaths.size(); + if (sz) { + int idx = _freeRandomByte % sz; + _pathChoiceHist.push(idx); + _virtualPaths[idx]->p->address().toString(curPathStr); + fprintf(stderr, "sending out: (%llx), idx=%d: path=%s, localSocket=%lld\n", + this->_id.address().toInt(), idx, curPathStr, _virtualPaths[idx]->localSocket); + return _virtualPaths[idx]->p; + } + // This call is algorithmically inert but gives us a value to show in the status output + computeAggregateAllocation(now); + } + /** * Packets are striped across all available paths. */ if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RR_OPAQUE) { - // fprintf(stderr, "ZT_MULTIPATH_BALANCE_RR_OPAQUE\n"); int16_t previousIdx = _roundRobinPathAssignmentIdx; - if (_roundRobinPathAssignmentIdx < (_virtualPaths.size()-1)) { - _roundRobinPathAssignmentIdx++; + int cycleCount = 0; + int minLastIn = 0; + int bestAlternativeIdx = -1; + while (cycleCount < ZT_MAX_PEER_NETWORK_PATHS) { + if (_roundRobinPathAssignmentIdx < (_virtualPaths.size()-1)) { + _roundRobinPathAssignmentIdx++; + } + else { + _roundRobinPathAssignmentIdx = 0; + } + cycleCount++; + if (_virtualPaths[_roundRobinPathAssignmentIdx]->p) { + uint64_t lastIn = _virtualPaths[_roundRobinPathAssignmentIdx]->p->lastIn(); + if (bestAlternativeIdx == -1) { + minLastIn = lastIn; // Initialization + bestAlternativeIdx = 0; + } + if (lastIn < minLastIn) { + minLastIn = lastIn; + bestAlternativeIdx = _roundRobinPathAssignmentIdx; + } + if ((now - lastIn) < 5000) { + selectedPath = _virtualPaths[_roundRobinPathAssignmentIdx]->p; + } + } } - else { - _roundRobinPathAssignmentIdx = 0; + // If we can't find an appropriate path, try the most recently active one + if (!selectedPath) { + _roundRobinPathAssignmentIdx = bestAlternativeIdx; + selectedPath = _virtualPaths[bestAlternativeIdx]->p; + selectedPath->address().toString(curPathStr); + fprintf(stderr, "could not find good path, settling for next best %s\n",curPathStr); } - selectedPath = _virtualPaths[previousIdx]->p; - char pathStr[128]; - selectedPath->address().toString(pathStr); + selectedPath->address().toString(curPathStr); fprintf(stderr, "sending packet out on path %s at index %d\n", - pathStr, previousIdx); + curPathStr, _roundRobinPathAssignmentIdx); return selectedPath; } From a50e8e9878b8e451862952c335b735dacfef11aa Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 12 May 2020 01:35:48 -0700 Subject: [PATCH 17/35] Add Bonds, Slaves, and Flows --- include/ZeroTierOne.h | 164 +++- node/Bond.cpp | 1730 +++++++++++++++++++++++++++++++++++ node/Bond.hpp | 689 ++++++++++++++ node/BondController.cpp | 203 ++++ node/BondController.hpp | 231 +++++ node/Constants.hpp | 452 ++++----- node/Flow.hpp | 123 +++ node/IncomingPacket.cpp | 284 ++++-- node/IncomingPacket.hpp | 11 +- node/Node.cpp | 52 +- node/Node.hpp | 13 +- node/Packet.cpp | 4 +- node/Packet.hpp | 40 +- node/Path.hpp | 809 ++++++++-------- node/Peer.cpp | 782 +++------------- node/Peer.hpp | 297 ++---- node/RingBuffer.hpp | 23 +- node/RuntimeEnvironment.hpp | 2 + node/Switch.cpp | 337 ++++--- node/Switch.hpp | 20 +- node/Trace.cpp | 17 +- node/Trace.hpp | 4 +- node/Utils.hpp | 11 +- objects.mk | 4 +- osdep/Binder.hpp | 29 +- osdep/LinuxNetLink.cpp | 16 - osdep/OSUtils.cpp | 20 +- osdep/OSUtils.hpp | 5 +- osdep/Phy.hpp | 44 +- osdep/Slave.hpp | 238 +++++ service/OneService.cpp | 210 ++++- 31 files changed, 4898 insertions(+), 1966 deletions(-) create mode 100644 node/Bond.cpp create mode 100644 node/Bond.hpp create mode 100644 node/BondController.cpp create mode 100644 node/BondController.hpp create mode 100644 node/Flow.hpp create mode 100644 osdep/Slave.hpp diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h index e5667acc0..890e56048 100644 --- a/include/ZeroTierOne.h +++ b/include/ZeroTierOne.h @@ -415,55 +415,128 @@ enum ZT_ResultCode */ #define ZT_ResultCode_isFatal(x) ((((int)(x)) >= 100)&&(((int)(x)) < 1000)) + /** - * The multipath algorithm in use by this node. + * Multipath bonding policy */ -enum ZT_MultipathMode +enum ZT_MultipathBondingPolicy { /** - * No fault tolerance or balancing. + * Normal operation. No fault tolerance, no load balancing */ - ZT_MULTIPATH_NONE = 0, + ZT_BONDING_POLICY_NONE = 0, /** - * Sends traffic out on all paths. + * Sends traffic out on only one path at a time. Configurable immediate + * fail-over. */ - ZT_MULTIPATH_BROADCAST = 1, + ZT_BONDING_POLICY_ACTIVE_BACKUP = 1, /** - * Sends traffic out on only one path at a time. Immediate fail-over. + * Sends traffic out on all paths */ - ZT_MULTIPATH_ACTIVE_BACKUP= 2, + ZT_BONDING_POLICY_BROADCAST = 2, /** - * Sends traffic out on all interfaces according to a uniform random distribution. + * Stripes packets across all paths */ - ZT_MULTIPATH_BALANCE_RANDOM = 3, + ZT_BONDING_POLICY_BALANCE_RR = 3, /** - * Stripes packets across all paths. + * Packets destined for specific peers will always be sent over the same + * path. */ - ZT_MULTIPATH_BALANCE_RR_OPAQUE = 4, + ZT_BONDING_POLICY_BALANCE_XOR = 4, /** - * Balances flows across all paths. + * Balances flows among all paths according to path performance */ - ZT_MULTIPATH_BALANCE_RR_FLOW = 5, + ZT_BONDING_POLICY_BALANCE_AWARE = 5 +}; + +/** + * Multipath active re-selection policy (slaveSelectMethod) + */ +enum ZT_MultipathSlaveSelectMethod +{ + /** + * Primary slave regains status as active slave whenever it comes back up + * (default when slaves are explicitly specified) + */ + ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS = 0, /** - * Hashes flows across all paths. + * Primary slave regains status as active slave when it comes back up and + * (if) it is better than the currently-active slave. */ - ZT_MULTIPATH_BALANCE_XOR_FLOW = 6, + ZT_MULTIPATH_RESELECTION_POLICY_BETTER = 1, /** - * Balances traffic across all paths according to observed performance. + * Primary slave regains status as active slave only if the currently-active + * slave fails. */ - ZT_MULTIPATH_BALANCE_DYNAMIC_OPAQUE = 7, + ZT_MULTIPATH_RESELECTION_POLICY_FAILURE = 2, /** - * Balances flows across all paths. + * The primary slave can change if a superior path is detected. + * (default if user provides no fail-over guidance) */ - ZT_MULTIPATH_BALANCE_DYNAMIC_FLOW = 8, + ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE = 3 +}; + +/** + * Mode of multipath slave interface + */ +enum ZT_MultipathSlaveMode +{ + ZT_MULTIPATH_SLAVE_MODE_PRIMARY = 0, + ZT_MULTIPATH_SLAVE_MODE_SPARE = 1 +}; + +/** + * Strategy for path monitoring + */ +enum ZT_MultipathMonitorStrategy +{ + /** + * Use bonding policy's default strategy + */ + ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DEFAULT = 0, + + /** + * Does not actively send probes to judge aliveness, will rely + * on conventional traffic and summary statistics. + */ + ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE = 1, + + /** + * Sends probes at a constant rate to judge aliveness. + */ + ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_ACTIVE = 2, + + /** + * Sends probes at varying rates which correlate to native + * traffic loads to judge aliveness. + */ + ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC = 3 +}; + +/** + * Indices for the path quality weight vector + */ +enum ZT_MultipathQualityWeightIndex +{ + ZT_QOS_LAT_IDX, + ZT_QOS_LTM_IDX, + ZT_QOS_PDV_IDX, + ZT_QOS_PLR_IDX, + ZT_QOS_PER_IDX, + ZT_QOS_THR_IDX, + ZT_QOS_THM_IDX, + ZT_QOS_THV_IDX, + ZT_QOS_AGE_IDX, + ZT_QOS_SCP_IDX, + ZT_QOS_WEIGHT_SIZE }; /** @@ -1272,44 +1345,49 @@ typedef struct uint64_t trustedPathId; /** - * One-way latency + * Mean latency */ - float latency; + float latencyMean; /** - * How much latency varies over time + * Maximum observed latency */ - float packetDelayVariance; + float latencyMax; /** - * How much observed throughput varies over time + * Variance of latency */ - float throughputDisturbCoeff; + float latencyVariance; /** - * Packet Error Ratio (PER) - */ - float packetErrorRatio; - - /** - * Packet Loss Ratio (PLR) + * Packet loss ratio */ float packetLossRatio; /** - * Stability of the path + * Packet error ratio */ - float stability; + float packetErrorRatio; /** - * Current throughput (moving average) + * Mean throughput */ - uint64_t throughput; + uint64_t throughputMean; /** - * Maximum observed throughput for this path + * Maximum observed throughput */ - uint64_t maxThroughput; + float throughputMax; + + /** + * Throughput variance + */ + float throughputVariance; + + /** + * Address scope + */ + uint8_t scope; /** * Percentage of traffic allocated to this path @@ -1319,7 +1397,9 @@ typedef struct /** * Name of physical interface (for monitoring) */ - char *ifname; + char ifname[32]; + + uint64_t localSocket; /** * Is path expired? @@ -1373,9 +1453,11 @@ typedef struct unsigned int pathCount; /** - * Whether this peer was ever reachable via an aggregate link + * Whether multiple paths to this peer are bonded */ - bool hadAggregateLink; + bool isBonded; + + int bondingPolicy; /** * Known network paths to peer diff --git a/node/Bond.cpp b/node/Bond.cpp new file mode 100644 index 000000000..9a5ab1df8 --- /dev/null +++ b/node/Bond.cpp @@ -0,0 +1,1730 @@ +/* + * Copyright (c)2013-2020 ZeroTier, Inc. + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file in the project's root directory. + * + * Change Date: 2024-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2.0 of the Apache License. + */ +/****/ + +#include + +#include "Peer.hpp" +#include "Bond.hpp" +#include "Switch.hpp" +#include "Flow.hpp" +#include "Path.hpp" + +namespace ZeroTier { + +Bond::Bond(const RuntimeEnvironment *renv, int policy, const SharedPtr& peer) : + RR(renv), + _peer(peer) +{ + setReasonableDefaults(policy); + _policyAlias = BondController::getPolicyStrByCode(policy); +} + +Bond::Bond(std::string& basePolicy, std::string& policyAlias, const SharedPtr& peer) : + _policyAlias(policyAlias), + _peer(peer) +{ + setReasonableDefaults(BondController::getPolicyCodeByStr(basePolicy)); +} + +Bond::Bond(const RuntimeEnvironment *renv, const Bond &originalBond, const SharedPtr& peer) : + RR(renv), + _peer(peer) +{ + // First, set everything to sane defaults + setReasonableDefaults(originalBond._bondingPolicy); + _policyAlias = originalBond._policyAlias; + // Second, apply user specified values (only if they make sense) + _downDelay = originalBond._downDelay; + _upDelay = originalBond._upDelay; + if (originalBond._bondMonitorInterval > 0 && originalBond._bondMonitorInterval < 65535) { + _bondMonitorInterval = originalBond._bondMonitorInterval; + } + else { + fprintf(stderr, "warning: bondMonitorInterval (%d) is out of range, using default (%d)\n", originalBond._bondMonitorInterval, _bondMonitorInterval); + } + if (originalBond._slaveMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE + && originalBond._failoverInterval != 0) { + fprintf(stderr, "warning: passive path monitoring was specified, this will prevent failovers from happening in a timely manner.\n"); + } + _abSlaveSelectMethod = originalBond._abSlaveSelectMethod; + memcpy(_qualityWeights, originalBond._qualityWeights, ZT_QOS_WEIGHT_SIZE * sizeof(float)); +} + +void Bond::nominatePath(const SharedPtr& path, int64_t now) +{ + char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "nominatePath: %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + Mutex::Lock _l(_paths_m); + if (!RR->bc->slaveAllowed(_policyAlias, getSlave(path))) { + return; + } + bool alreadyPresent = false; + for (int i=0; islave = RR->bc->getSlaveBySocket(_policyAlias, path->localSocket()); + _paths[i]->startTrial(now); + break; + } + } + } + curateBond(now, true); + estimatePathQuality(now); +} + +SharedPtr Bond::getAppropriatePath(int64_t now, int32_t flowId) +{ + Mutex::Lock _l(_paths_m); + /** + * active-backup + */ + if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP) { + if (_abPath) { + return _abPath; + } + } + /** + * broadcast + */ + if (_bondingPolicy== ZT_BONDING_POLICY_BROADCAST) { + return SharedPtr(); // Handled in Switch::_trySend() + } + if (!_numBondedPaths) { + return SharedPtr(); // No paths assigned to bond yet, cannot balance traffic + } + /** + * balance-rr + */ + if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_RR) { + if (!_allowFlowHashing) { + //fprintf(stderr, "_rrPacketsSentOnCurrSlave=%d, _numBondedPaths=%d, _rrIdx=%d\n", _rrPacketsSentOnCurrSlave, _numBondedPaths, _rrIdx); + if (_packetsPerSlave == 0) { + // Randomly select a path + return _paths[_bondedIdx[_freeRandomByte % _numBondedPaths]]; // TODO: Optimize + } + if (_rrPacketsSentOnCurrSlave < _packetsPerSlave) { + // Continue to use this slave + ++_rrPacketsSentOnCurrSlave; + return _paths[_bondedIdx[_rrIdx]]; + } + // Reset striping counter + _rrPacketsSentOnCurrSlave = 0; + if (_numBondedPaths == 1) { + _rrIdx = 0; + } + else { + int _tempIdx = _rrIdx; + for (int searchCount = 0; searchCount < (_numBondedPaths-1); searchCount++) { + _tempIdx = (_tempIdx == (_numBondedPaths-1)) ? 0 : _tempIdx+1; + if (_paths[_bondedIdx[_tempIdx]] && _paths[_bondedIdx[_tempIdx]]->eligible(now,_ackSendInterval)) { + _rrIdx = _tempIdx; + break; + } + } + } + //fprintf(stderr, "resultant _rrIdx=%d\n", _rrIdx); + if (_paths[_bondedIdx[_rrIdx]]) { + return _paths[_bondedIdx[_rrIdx]]; + } + } + } + /** + * balance-xor + */ + if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_XOR || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE) { + if (!_allowFlowHashing || flowId == -1) { + // No specific path required for unclassified traffic, send on anything + return _paths[_bondedIdx[_freeRandomByte % _numBondedPaths]]; // TODO: Optimize + } + else if (_allowFlowHashing) { + // TODO: Optimize + Mutex::Lock _l(_flows_m); + SharedPtr flow; + if (_flows.count(flowId)) { + flow = _flows[flowId]; + flow->updateActivity(now); + } + else { + unsigned char entropy; + Utils::getSecureRandom(&entropy, 1); + flow = createFlow(SharedPtr(), flowId, entropy, now); + } + if (flow) { + return flow->assignedPath(); + } + } + } + return SharedPtr(); +} + +void Bond::recordIncomingInvalidPacket(const SharedPtr& path) +{ + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "recordIncomingInvalidPacket() %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + Mutex::Lock _l(_paths_m); + for (int i=0; ipacketValiditySamples.push(false); + } + } +} + +void Bond::recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, + uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now) +{ + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "recordOutgoingPacket() %s %s, packetId=%llx, payloadLength=%d, verb=%x, flowId=%lx\n", getSlave(path)->ifname().c_str(), pathStr, packetId, payloadLength, verb, flowId); + _freeRandomByte += (unsigned char)(packetId >> 8); // Grab entropy to use in path selection logic + if (!_shouldCollectPathStatistics) { + return; + } + bool isFrame = (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME); + bool shouldRecord = (packetId & (ZT_QOS_ACK_DIVISOR - 1) + && (verb != Packet::VERB_ACK) + && (verb != Packet::VERB_QOS_MEASUREMENT)); + if (isFrame || shouldRecord) { + Mutex::Lock _l(_paths_m); + if (isFrame) { + ++(path->_packetsOut); + _lastFrame=now; + } + if (shouldRecord) { + path->_unackedBytes += payloadLength; + // Take note that we're expecting a VERB_ACK on this path as of a specific time + if (path->qosStatsOut.size() < ZT_QOS_MAX_OUTSTANDING_RECORDS) { + path->qosStatsOut[packetId] = now; + } + } + } + if (_allowFlowHashing) { + if (_allowFlowHashing && (flowId != ZT_QOS_NO_FLOW)) { + Mutex::Lock _l(_flows_m); + if (_flows.count(flowId)) { + _flows[flowId]->recordOutgoingBytes(payloadLength); + } + } + } +} + +void Bond::recordIncomingPacket(const SharedPtr& path, uint64_t packetId, uint16_t payloadLength, + Packet::Verb verb, int32_t flowId, int64_t now) +{ + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "recordIncomingPacket() %s %s, packetId=%llx, payloadLength=%d, verb=%x, flowId=%lx\n", getSlave(path)->ifname().c_str(), pathStr, packetId, payloadLength, verb, flowId); + bool isFrame = (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME); + bool shouldRecord = (packetId & (ZT_QOS_ACK_DIVISOR - 1) + && (verb != Packet::VERB_ACK) + && (verb != Packet::VERB_QOS_MEASUREMENT)); + if (isFrame || shouldRecord) { + Mutex::Lock _l(_paths_m); + if (isFrame) { + ++(path->_packetsIn); + _lastFrame=now; + } + if (shouldRecord) { + path->ackStatsIn[packetId] = payloadLength; + ++(path->_packetsReceivedSinceLastAck); + path->qosStatsIn[packetId] = now; + ++(path->_packetsReceivedSinceLastQoS); + path->packetValiditySamples.push(true); + } + } + /** + * Learn new flows and pro-actively create entries for them in the bond so + * that the next time we send a packet out that is part of a flow we know + * which path to use. + */ + if ((flowId != ZT_QOS_NO_FLOW) + && (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_RR + || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_XOR + || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE)) { + Mutex::Lock _l(_flows_m); + SharedPtr flow; + if (!_flows.count(flowId)) { + flow = createFlow(path, flowId, 0, now); + } else { + flow = _flows[flowId]; + } + if (flow) { + flow->recordIncomingBytes(payloadLength); + } + } +} + +void Bond::receivedQoS(const SharedPtr& path, int64_t now, int count, uint64_t *rx_id, uint16_t *rx_ts) +{ + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "receivedQoS() %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + Mutex::Lock _l(_paths_m); + // Look up egress times and compute latency values for each record + std::map::iterator it; + for (int j=0; jqosStatsOut.find(rx_id[j]); + if (it != path->qosStatsOut.end()) { + path->latencySamples.push(((uint16_t)(now - it->second) - rx_ts[j]) / 2); + path->qosStatsOut.erase(it); + } + } + path->qosRecordSize.push(count); + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "receivedQoS() on path %s %s, count=%d, successful=%d, qosStatsOut.size()=%d\n", getSlave(path)->ifname().c_str(), pathStr, count, path->aknowledgedQoSRecordCountSinceLastCheck, path->qosStatsOut.size()); +} + +void Bond::receivedAck(const SharedPtr& path, int64_t now, int32_t ackedBytes) +{ + Mutex::Lock _l(_paths_m); + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "receivedAck() %s %s, (ackedBytes=%d, lastAckReceived=%lld, ackAge=%lld)\n", getSlave(path)->ifname().c_str(), pathStr, ackedBytes, path->lastAckReceived, path->ackAge(now)); + path->_lastAckReceived = now; + path->_unackedBytes = (ackedBytes > path->_unackedBytes) ? 0 : path->_unackedBytes - ackedBytes; + int64_t timeSinceThroughputEstimate = (now - path->_lastThroughputEstimation); + if (timeSinceThroughputEstimate >= throughputMeasurementInterval) { + // TODO: See if this floating point math can be reduced + uint64_t throughput = (uint64_t)((float)(path->_bytesAckedSinceLastThroughputEstimation) / ((float)timeSinceThroughputEstimate / (float)1000)); + throughput /= 1000; + if (throughput > 0.0) { + path->throughputSamples.push(throughput); + path->_throughputMax = throughput > path->_throughputMax ? throughput : path->_throughputMax; + } + path->_lastThroughputEstimation = now; + path->_bytesAckedSinceLastThroughputEstimation = 0; + } else { + path->_bytesAckedSinceLastThroughputEstimation += ackedBytes; + } +} + +int32_t Bond::generateQoSPacket(const SharedPtr& path, int64_t now, char *qosBuffer) +{ + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "generateQoSPacket() %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + int32_t len = 0; + std::map::iterator it = path->qosStatsIn.begin(); + int i=0; + int numRecords = std::min(path->_packetsReceivedSinceLastQoS,ZT_QOS_TABLE_SIZE); + while (iqosStatsIn.end()) { + uint64_t id = it->first; + memcpy(qosBuffer, &id, sizeof(uint64_t)); + qosBuffer+=sizeof(uint64_t); + uint16_t holdingTime = (uint16_t)(now - it->second); + memcpy(qosBuffer, &holdingTime, sizeof(uint16_t)); + qosBuffer+=sizeof(uint16_t); + len+=sizeof(uint64_t)+sizeof(uint16_t); + path->qosStatsIn.erase(it++); + ++i; + } + return len; +} + +bool Bond::assignFlowToBondedPath(SharedPtr &flow, int64_t now) +{ + //fprintf(stderr, "assignFlowToBondedPath\n"); + char curPathStr[128]; + unsigned int idx = ZT_MAX_PEER_NETWORK_PATHS; + if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR) { + idx = abs((int)(flow->id() % (_numBondedPaths))); + flow->assignPath(_paths[_bondedIdx[idx]],now); + } + if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { + unsigned char entropy; + Utils::getSecureRandom(&entropy, 1); + if (_totalBondUnderload) { + entropy %= _totalBondUnderload; + } + if (!_numBondedPaths) { + fprintf(stderr, "no bonded paths for flow assignment\n"); + return false; + } + for(unsigned int i=0;ibonded()) { + SharedPtr slave = RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + _paths[i]->address().toString(curPathStr); + uint8_t probabilitySegment = (_totalBondUnderload > 0) ? _paths[i]->_affinity : _paths[i]->_allocation; + //fprintf(stderr, "i=%2d, entropy=%3d, alloc=%3d, byteload=%4d, segment=%3d, _totalBondUnderload=%3d, ifname=%s, path=%20s\n", i, entropy, _paths[i]->allocation, _paths[i]->relativeByteLoad, probabilitySegment, _totalBondUnderload, slave->ifname().c_str(), curPathStr); + if (entropy <= probabilitySegment) { + idx = i; + //fprintf(stderr, "\t is best path\n"); + break; + } + entropy -= probabilitySegment; + } + } + if (idx < ZT_MAX_PEER_NETWORK_PATHS) { + flow->assignPath(_paths[idx],now); + ++(_paths[idx]->_assignedFlowCount); + } + else { + fprintf(stderr, "could not assign flow?\n"); exit(0); // TODO: Remove + return false; + } + } + flow->assignedPath()->address().toString(curPathStr); + SharedPtr slave = RR->bc->getSlaveBySocket(_policyAlias, flow->assignedPath()->localSocket()); + fprintf(stderr, "assigned (tx) flow %x with peer %llx to path %s on %s (idx=%d)\n", flow->id(), _peer->_id.address().toInt(), curPathStr, slave->ifname().c_str(), idx); + return true; +} + +SharedPtr Bond::createFlow(const SharedPtr &path, int32_t flowId, unsigned char entropy, int64_t now) +{ + //fprintf(stderr, "createFlow\n"); + char curPathStr[128]; + // --- + if (!_numBondedPaths) { + fprintf(stderr, "there are no bonded paths, cannot assign flow\n"); + return SharedPtr(); + } + if (_flows.size() >= ZT_FLOW_MAX_COUNT) { + fprintf(stderr, "max number of flows reached (%d), forcibly forgetting oldest flow\n", ZT_FLOW_MAX_COUNT); + forgetFlowsWhenNecessary(0,true,now); + } + SharedPtr flow = new Flow(flowId, now); + _flows[flowId] = flow; + fprintf(stderr, "new flow %x detected with peer %llx, %lu active flow(s)\n", flowId, _peer->_id.address().toInt(), (_flows.size())); + /** + * Add a flow with a given Path already provided. This is the case when a packet + * is received on a path but no flow exists, in this case we simply assign the path + * that the remote peer chose for us. + */ + if (path) { + flow->assignPath(path,now); + path->address().toString(curPathStr); + SharedPtr slave = RR->bc->getSlaveBySocket(_policyAlias, flow->assignedPath()->localSocket()); + fprintf(stderr, "assigned (rx) flow %x with peer %llx to path %s on %s\n", flow->id(), _peer->_id.address().toInt(), curPathStr, slave->ifname().c_str()); + } + /** + * Add a flow when no path was provided. This means that it is an outgoing packet + * and that it is up to the local peer to decide how to load-balance its transmission. + */ + else if (!path) { + assignFlowToBondedPath(flow, now); + } + return flow; +} + +void Bond::forgetFlowsWhenNecessary(uint64_t age, bool oldest, int64_t now) +{ + //fprintf(stderr, "forgetFlowsWhenNecessary\n"); + std::map >::iterator it = _flows.begin(); + std::map >::iterator oldestFlow = _flows.end(); + SharedPtr expiredFlow; + if (age) { // Remove by specific age + while (it != _flows.end()) { + if (it->second->age(now) > age) { + fprintf(stderr, "forgetting flow %x between this node and %llx, %lu active flow(s)\n", it->first, _peer->_id.address().toInt(), (_flows.size()-1)); + it = _flows.erase(it); + } else { + ++it; + } + } + } + else if (oldest) { // Remove single oldest by natural expiration + uint64_t maxAge = 0; + while (it != _flows.end()) { + if (it->second->age(now) > maxAge) { + maxAge = (now - it->second->age(now)); + oldestFlow = it; + } + ++it; + } + if (oldestFlow != _flows.end()) { + fprintf(stderr, "forgetting oldest flow %x (of age %llu) between this node and %llx, %lu active flow(s)\n", oldestFlow->first, oldestFlow->second->age(now), _peer->_id.address().toInt(), (_flows.size()-1)); + _flows.erase(oldestFlow); + } + } + fprintf(stderr, "000\n"); +} + +void Bond::processIncomingPathNegotiationRequest(uint64_t now, SharedPtr &path, int16_t remoteUtility) +{ + //fprintf(stderr, "processIncomingPathNegotiationRequest\n"); + if (_abSlaveSelectMethod != ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { + return; + } + Mutex::Lock _l(_paths_m); + char pathStr[128]; + path->address().toString(pathStr); + if (!_lastPathNegotiationCheck) { + return; + } + SharedPtr slave = RR->bc->getSlaveBySocket(_policyAlias, path->localSocket()); + if (remoteUtility > _localUtility) { + fprintf(stderr, "peer suggests path, its utility (%d) is greater than ours (%d), we will switch to %s on %s (ls=%llx)\n", remoteUtility, _localUtility, pathStr, slave->ifname().c_str(), path->localSocket()); + negotiatedPath = path; + } + if (remoteUtility < _localUtility) { + fprintf(stderr, "peer suggests path, its utility (%d) is less than ours (%d), we will NOT switch to %s on %s (ls=%llx)\n", remoteUtility, _localUtility, pathStr, slave->ifname().c_str(), path->localSocket()); + } + if (remoteUtility == _localUtility) { + fprintf(stderr, "peer suggest path, but utility is equal, picking choice made by peer with greater identity.\n"); + if (_peer->_id.address().toInt() > RR->node->identity().address().toInt()) { + fprintf(stderr, "peer identity was greater, going with their choice of %s on %s (ls=%llx)\n", pathStr, slave->ifname().c_str(), path->localSocket()); + negotiatedPath = path; + } else { + fprintf(stderr, "our identity was greater, no change\n"); + } + } +} + +void Bond::pathNegotiationCheck(void *tPtr, const int64_t now) +{ + //fprintf(stderr, "pathNegotiationCheck\n"); + char pathStr[128]; + int maxInPathIdx = ZT_MAX_PEER_NETWORK_PATHS; + int maxOutPathIdx = ZT_MAX_PEER_NETWORK_PATHS; + uint64_t maxInCount = 0; + uint64_t maxOutCount = 0; + for(unsigned int i=0;i_packetsIn > maxInCount) { + maxInCount = _paths[i]->_packetsIn; + maxInPathIdx = i; + } + if (_paths[i]->_packetsOut > maxOutCount) { + maxOutCount = _paths[i]->_packetsOut; + maxOutPathIdx = i; + } + _paths[i]->resetPacketCounts(); + } + bool _peerLinksSynchronized = ((maxInPathIdx != ZT_MAX_PEER_NETWORK_PATHS) + && (maxOutPathIdx != ZT_MAX_PEER_NETWORK_PATHS) + && (maxInPathIdx != maxOutPathIdx)) ? false : true; + /** + * Determine utility and attempt to petition remote peer to switch to our chosen path + */ + if (!_peerLinksSynchronized) { + _localUtility = _paths[maxOutPathIdx]->_failoverScore - _paths[maxInPathIdx]->_failoverScore; + if (_paths[maxOutPathIdx]->_negotiated) { + _localUtility -= ZT_MULTIPATH_FAILOVER_HANDICAP_NEGOTIATED; + } + if ((now - _lastSentPathNegotiationRequest) > ZT_PATH_NEGOTIATION_CUTOFF_TIME) { + fprintf(stderr, "BT: (sync) it's been long enough, sending more requests.\n"); + _numSentPathNegotiationRequests = 0; + } + if (_numSentPathNegotiationRequests < ZT_PATH_NEGOTIATION_TRY_COUNT) { + if (_localUtility >= 0) { + fprintf(stderr, "BT: (sync) paths appear to be out of sync (utility=%d)\n", _localUtility); + sendPATH_NEGOTIATION_REQUEST(tPtr, _paths[maxOutPathIdx]); + ++_numSentPathNegotiationRequests; + _lastSentPathNegotiationRequest = now; + _paths[maxOutPathIdx]->address().toString(pathStr); + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[maxOutPathIdx]->localSocket()); + fprintf(stderr, "sending request to use %s on %s, ls=%llx, utility=%d\n", pathStr, slave->ifname().c_str(), _paths[maxOutPathIdx]->localSocket(), _localUtility); + } + } + /** + * Give up negotiating and consider switching + */ + else if ((now - _lastSentPathNegotiationRequest) > (2 * ZT_PATH_NEGOTIATION_CHECK_INTERVAL)) { + if (_localUtility == 0) { + // There's no loss to us, just switch without sending a another request + fprintf(stderr, "BT: (sync) giving up, switching to remote peer's path.\n"); + negotiatedPath = _paths[maxInPathIdx]; + } + } + } +} + +void Bond::sendPATH_NEGOTIATION_REQUEST(void *tPtr, const SharedPtr &path) +{ + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "sendPATH_NEGOTIATION_REQUEST() %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + if (_abSlaveSelectMethod != ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { + return; + } + Packet outp(_peer->_id.address(),RR->identity.address(),Packet::VERB_PATH_NEGOTIATION_REQUEST); + outp.append(_localUtility); + if (path->address()) { + outp.armor(_peer->key(),false); + RR->node->putPacket(tPtr,path->localSocket(),path->address(),outp.data(),outp.size()); + } +} + +void Bond::sendACK(void *tPtr,const SharedPtr &path,const int64_t localSocket, + const InetAddress &atAddress,int64_t now) +{ + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "sendACK() %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + Packet outp(_peer->_id.address(),RR->identity.address(),Packet::VERB_ACK); + int32_t bytesToAck = 0; + std::map::iterator it = path->ackStatsIn.begin(); + while (it != path->ackStatsIn.end()) { + bytesToAck += it->second; + ++it; + } + outp.append(bytesToAck); + if (atAddress) { + outp.armor(_peer->key(),false); + RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size()); + } else { + RR->sw->send(tPtr,outp,false); + } + path->ackStatsIn.clear(); + path->_packetsReceivedSinceLastAck = 0; + path->_lastAckSent = now; +} + +void Bond::sendQOS_MEASUREMENT(void *tPtr,const SharedPtr &path,const int64_t localSocket, + const InetAddress &atAddress,int64_t now) +{ + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "sendQOS() %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + const int64_t _now = RR->node->now(); + Packet outp(_peer->_id.address(),RR->identity.address(),Packet::VERB_QOS_MEASUREMENT); + char qosData[ZT_QOS_MAX_PACKET_SIZE]; + int16_t len = generateQoSPacket(path, _now,qosData); + outp.append(qosData,len); + if (atAddress) { + outp.armor(_peer->key(),false); + RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size()); + } else { + RR->sw->send(tPtr,outp,false); + } + // Account for the fact that a VERB_QOS_MEASUREMENT was just sent. Reset timers. + path->_packetsReceivedSinceLastQoS = 0; + path->_lastQoSMeasurement = now; +} + +void Bond::processBackgroundTasks(void *tPtr, const int64_t now) +{ + Mutex::Lock _l(_paths_m); + if (!_peer->_canUseMultipath || (now - _lastBackgroundTaskCheck) < ZT_BOND_BACKGROUND_TASK_MIN_INTERVAL) { + return; + } + _lastBackgroundTaskCheck = now; + + // Compute dynamic path monitor timer interval + if (_slaveMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC) { + int suggestedMonitorInterval = (now - _lastFrame) / 100; + _dynamicPathMonitorInterval = std::min(ZT_PATH_HEARTBEAT_PERIOD, ((suggestedMonitorInterval > _bondMonitorInterval) ? suggestedMonitorInterval : _bondMonitorInterval)); + //fprintf(stderr, "_lastFrame=%llu, suggestedMonitorInterval=%d, _dynamicPathMonitorInterval=%d\n", + // (now-_lastFrame), suggestedMonitorInterval, _dynamicPathMonitorInterval); + } + + if (_slaveMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC) { + _shouldCollectPathStatistics = true; + } + + // Memoize oft-used properties in the packet ingress/egress logic path + if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE) { + // Required for real-time balancing + _shouldCollectPathStatistics = true; + } + if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP) { + if (_abSlaveSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_BETTER) { + // Required for judging suitability of primary slave after recovery + _shouldCollectPathStatistics = true; + } + if (_abSlaveSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { + // Required for judging suitability of new candidate primary + _shouldCollectPathStatistics = true; + } + } + if ((now - _lastCheckUserPreferences) > 1000) { + _lastCheckUserPreferences = now; + applyUserPrefs(); + } + + curateBond(now,false); + if ((now - _lastQualityEstimation) > _qualityEstimationInterval) { + _lastQualityEstimation = now; + estimatePathQuality(now); + } + dumpInfo(now); + + // Send QOS/ACK packets as needed + if (_shouldCollectPathStatistics) { + for(unsigned int i=0;iallowed()) { + if (_paths[i]->needsToSendQoS(now,_qosSendInterval)) { + sendQOS_MEASUREMENT(tPtr, _paths[i], _paths[i]->localSocket(), _paths[i]->address(), now); + } + if (_paths[i]->needsToSendAck(now,_ackSendInterval)) { + sendACK(tPtr, _paths[i], _paths[i]->localSocket(), _paths[i]->address(), now); + } + } + } + } + // Perform periodic background tasks unique to each bonding policy + switch (_bondingPolicy) + { + case ZT_BONDING_POLICY_ACTIVE_BACKUP: + processActiveBackupTasks(now); + break; + case ZT_BONDING_POLICY_BROADCAST: + break; + case ZT_BONDING_POLICY_BALANCE_RR: + case ZT_BONDING_POLICY_BALANCE_XOR: + case ZT_BONDING_POLICY_BALANCE_AWARE: + processBalanceTasks(now); + break; + default: + break; + } + // Check whether or not a path negotiation needs to be performed + if (((now - _lastPathNegotiationCheck) > ZT_PATH_NEGOTIATION_CHECK_INTERVAL) && _allowPathNegotiation) { + _lastPathNegotiationCheck = now; + pathNegotiationCheck(tPtr, now); + } +} + +void Bond::applyUserPrefs() +{ + fprintf(stderr, "applyUserPrefs, _minReqPathMonitorInterval=%d\n", RR->bc->minReqPathMonitorInterval()); + for(unsigned int i=0;i sl = getSlave(_paths[i]); + if (sl) { + if (sl->monitorInterval() == 0) { // If no interval was specified for this slave, use more generic bond-wide interval + sl->setMonitorInterval(_bondMonitorInterval); + } + RR->bc->setMinReqPathMonitorInterval((sl->monitorInterval() < RR->bc->minReqPathMonitorInterval()) ? sl->monitorInterval() : RR->bc->minReqPathMonitorInterval()); + bool bFoundCommonSlave = false; + SharedPtr commonSlave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + for(unsigned int j=0;jbc->getSlaveBySocket(_policyAlias, _paths[j]->localSocket()) == commonSlave) { + bFoundCommonSlave = true; + } + } + } + _paths[i]->_monitorInterval = sl->monitorInterval(); + _paths[i]->_upDelay = sl->upDelay() ? sl->upDelay() : _upDelay; + _paths[i]->_downDelay = sl->downDelay() ? sl->downDelay() : _downDelay; + _paths[i]->_ipvPref = sl->ipvPref(); + _paths[i]->_mode = sl->mode(); + _paths[i]->_enabled = sl->enabled(); + _paths[i]->_onlyPathOnSlave = !bFoundCommonSlave; + } + } + if (_peer) { + _peer->_shouldCollectPathStatistics = _shouldCollectPathStatistics; + _peer->_bondingPolicy = _bondingPolicy; + } +} + +void Bond::curateBond(const int64_t now, bool rebuildBond) +{ + //fprintf(stderr, "%lu curateBond (rebuildBond=%d)\n", ((now - RR->bc->getBondStartTime())), rebuildBond); + char pathStr[128]; + /** + * Update path states + */ + for(unsigned int i=0;ieligible(now,_ackSendInterval); + if (currEligibility != _paths[i]->_lastEligibilityState) { + _paths[i]->address().toString(pathStr); + //fprintf(stderr, "\n\n%ld path eligibility (for %s, %s) has changed (from %d to %d)\n", (RR->node->now() - RR->bc->getBondStartTime()), getSlave(_paths[i])->ifname().c_str(), pathStr, _paths[i]->lastCheckedEligibility, _paths[i]->eligible(now,_ackSendInterval)); + if (currEligibility) { + rebuildBond = true; + } + if (!currEligibility) { + _paths[i]->adjustRefractoryPeriod(now, _defaultPathRefractoryPeriod, !currEligibility); + if (_paths[i]->bonded()) { + //fprintf(stderr, "the path was bonded, reallocation of its flows will occur soon\n"); + rebuildBond = true; + _paths[i]->_shouldReallocateFlows = _paths[i]->bonded(); + _paths[i]->setBonded(false); + } else { + //fprintf(stderr, "the path was not bonded, no consequences\n"); + } + } + } + if (currEligibility) { + _paths[i]->adjustRefractoryPeriod(now, _defaultPathRefractoryPeriod, false); + } + _paths[i]->_lastEligibilityState = currEligibility; + } + /** + * Curate the set of paths that are part of the bond proper. Selects a single path + * per logical slave according to eligibility and user-specified constraints. + */ + if ((_bondingPolicy== ZT_BONDING_POLICY_BALANCE_RR) + || (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_XOR) + || (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE)) { + if (!_numBondedPaths) { + rebuildBond = true; + } + // TODO: Optimize + if (rebuildBond) { + int updatedBondedPathCount = 0; + std::map,int> slaveMap; + for (int i=0;iallowed() && (_paths[i]->eligible(now,_ackSendInterval) || !_numBondedPaths)) { + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + if (!slaveMap.count(slave)) { + slaveMap[slave] = i; + } + else { + bool overriden = false; + _paths[i]->address().toString(pathStr); + //fprintf(stderr, " slave representative path already exists! (%s %s)\n", getSlave(_paths[i])->ifname().c_str(), pathStr); + if (_paths[i]->preferred() && !_paths[slaveMap[slave]]->preferred()) { + // Override previous choice if preferred + //fprintf(stderr, "overriding since its preferred!\n"); + if (_paths[slaveMap[slave]]->_assignedFlowCount) { + _paths[slaveMap[slave]]->_deprecated = true; + } + else { + _paths[slaveMap[slave]]->_deprecated = true; + _paths[slaveMap[slave]]->setBonded(false); + } + slaveMap[slave] = i; + overriden = true; + } + if ((_paths[i]->preferred() && _paths[slaveMap[slave]]->preferred()) + || (!_paths[i]->preferred() && !_paths[slaveMap[slave]]->preferred())) { + if (_paths[i]->preferenceRank() > _paths[slaveMap[slave]]->preferenceRank()) { + // Override if higher preference + //fprintf(stderr, "overriding according to preference preferenceRank!\n"); + if (_paths[slaveMap[slave]]->_assignedFlowCount) { + _paths[slaveMap[slave]]->_deprecated = true; + } + else { + _paths[slaveMap[slave]]->_deprecated = true; + _paths[slaveMap[slave]]->setBonded(false); + } + slaveMap[slave] = i; + } + } + } + } + } + std::map,int>::iterator it = slaveMap.begin(); + for (int i=0; isecond; + _paths[_bondedIdx[i]]->setBonded(true); + ++it; + ++updatedBondedPathCount; + _paths[_bondedIdx[i]]->address().toString(pathStr); + fprintf(stderr, "setting i=%d, _bondedIdx[%d]=%d to bonded (%s %s)\n", i, i, _bondedIdx[i], getSlave(_paths[_bondedIdx[i]])->ifname().c_str(), pathStr); + } + } + _numBondedPaths = updatedBondedPathCount; + + if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_RR) { + // Cause a RR reset since the currently used index might no longer be valid + _rrPacketsSentOnCurrSlave = _packetsPerSlave; + } + } + } +} + +void Bond::estimatePathQuality(const int64_t now) +{ + char pathStr[128]; + //--- + + uint32_t totUserSpecifiedSlaveSpeed = 0; + if (_numBondedPaths) { // Compute relative user-specified speeds of slaves + for(unsigned int i=0;i<_numBondedPaths;++i) { + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + if (_paths[i] && _paths[i]->allowed()) { + totUserSpecifiedSlaveSpeed += slave->speed(); + } + } + for(unsigned int i=0;i<_numBondedPaths;++i) { + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + if (_paths[i] && _paths[i]->allowed()) { + slave->setRelativeSpeed(round( ((float)slave->speed() / (float)totUserSpecifiedSlaveSpeed) * 255)); + } + } + } + + float lat[ZT_MAX_PEER_NETWORK_PATHS]; + float pdv[ZT_MAX_PEER_NETWORK_PATHS]; + float plr[ZT_MAX_PEER_NETWORK_PATHS]; + float per[ZT_MAX_PEER_NETWORK_PATHS]; + float thr[ZT_MAX_PEER_NETWORK_PATHS]; + float thm[ZT_MAX_PEER_NETWORK_PATHS]; + float thv[ZT_MAX_PEER_NETWORK_PATHS]; + + float maxLAT = 0; + float maxPDV = 0; + float maxPLR = 0; + float maxPER = 0; + float maxTHR = 0; + float maxTHM = 0; + float maxTHV = 0; + + float quality[ZT_MAX_PEER_NETWORK_PATHS]; + uint8_t alloc[ZT_MAX_PEER_NETWORK_PATHS]; + + float totQuality = 0.0f; + + memset(&lat, 0, sizeof(lat)); + memset(&pdv, 0, sizeof(pdv)); + memset(&plr, 0, sizeof(plr)); + memset(&per, 0, sizeof(per)); + memset(&thr, 0, sizeof(thr)); + memset(&thm, 0, sizeof(thm)); + memset(&thv, 0, sizeof(thv)); + memset(&quality, 0, sizeof(quality)); + memset(&alloc, 0, sizeof(alloc)); + + // Compute initial summary statistics + for(unsigned int i=0;iallowed()) { + continue; + } + // Compute/Smooth average of real-world observations + _paths[i]->_latencyMean = _paths[i]->latencySamples.mean(); + _paths[i]->_latencyVariance = _paths[i]->latencySamples.stddev(); + _paths[i]->_packetErrorRatio = 1.0 - (_paths[i]->packetValiditySamples.count() ? _paths[i]->packetValiditySamples.mean() : 1.0); + + if (userHasSpecifiedSlaveSpeeds()) { + // Use user-reported metrics + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + if (slave) { + _paths[i]->_throughputMean = slave->speed(); + _paths[i]->_throughputVariance = 0; + } + } + /* + else { + // Use estimated metrics + if (_paths[i]->throughputSamples.count()) { + // If we have samples, use them + _paths[i]->throughputMean = (uint64_t)_paths[i]->throughputSamples.mean(); + if (_paths[i]->throughputMean > 0) { + _paths[i]->throughputVarianceSamples.push((float)_paths[i]->throughputSamples.stddev() / (float)_paths[i]->throughputMean); + _paths[i]->throughputVariance = _paths[i]->throughputVarianceSamples.mean(); + } + } + else { + // No samples have been collected yet, assume best case scenario + _paths[i]->throughputMean = ZT_QOS_THR_NORM_MAX; + _paths[i]->throughputVariance = 0; + } + } + */ + // Drain unacknowledged QoS records + std::map::iterator it = _paths[i]->qosStatsOut.begin(); + uint64_t currentLostRecords = 0; + while (it != _paths[i]->qosStatsOut.end()) { + int qosRecordTimeout = 5000; //_paths[i]->monitorInterval() * ZT_MULTIPATH_QOS_ACK_INTERVAL_MULTIPLIER * 8; + if ((now - it->second) >= qosRecordTimeout) { + //fprintf(stderr, "packetId=%llx was lost\n", it->first); + it = _paths[i]->qosStatsOut.erase(it); + ++currentLostRecords; + } else { ++it; } + } + + quality[i]=0; + totQuality=0; + // Normalize raw observations according to sane limits and/or user specified values + lat[i] = 1.0 / expf(4*Utils::normalize(_paths[i]->_latencyMean, 0, _maxAcceptableLatency, 0, 1)); + pdv[i] = 1.0 / expf(4*Utils::normalize(_paths[i]->_latencyVariance, 0, _maxAcceptablePacketDelayVariance, 0, 1)); + plr[i] = 1.0 / expf(4*Utils::normalize(_paths[i]->_packetLossRatio, 0, _maxAcceptablePacketLossRatio, 0, 1)); + per[i] = 1.0 / expf(4*Utils::normalize(_paths[i]->_packetErrorRatio, 0, _maxAcceptablePacketErrorRatio, 0, 1)); + //thr[i] = 1.0; //Utils::normalize(_paths[i]->throughputMean, 0, ZT_QOS_THR_NORM_MAX, 0, 1); + //thm[i] = 1.0; //Utils::normalize(_paths[i]->throughputMax, 0, ZT_QOS_THM_NORM_MAX, 0, 1); + //thv[i] = 1.0; //1.0 / expf(4*Utils::normalize(_paths[i]->throughputVariance, 0, ZT_QOS_THV_NORM_MAX, 0, 1)); + //scp[i] = _paths[i]->ipvPref != 0 ? 1.0 : Utils::normalize(_paths[i]->ipScope(), InetAddress::IP_SCOPE_NONE, InetAddress::IP_SCOPE_PRIVATE, 0, 1); + // Record bond-wide maximums to determine relative values + maxLAT = lat[i] > maxLAT ? lat[i] : maxLAT; + maxPDV = pdv[i] > maxPDV ? pdv[i] : maxPDV; + maxPLR = plr[i] > maxPLR ? plr[i] : maxPLR; + maxPER = per[i] > maxPER ? per[i] : maxPER; + //maxTHR = thr[i] > maxTHR ? thr[i] : maxTHR; + //maxTHM = thm[i] > maxTHM ? thm[i] : maxTHM; + //maxTHV = thv[i] > maxTHV ? thv[i] : maxTHV; + + //fprintf(stdout, "EH %d: lat=%8.3f, ltm=%8.3f, pdv=%8.3f, plr=%5.3f, per=%5.3f, thr=%8f, thm=%5.3f, thv=%5.3f, avl=%5.3f, age=%8.2f, scp=%4d, q=%5.3f, qtot=%5.3f, ac=%d if=%s, path=%s\n", + // i, lat[i], ltm[i], pdv[i], plr[i], per[i], thr[i], thm[i], thv[i], avl[i], age[i], scp[i], quality[i], totQuality, alloc[i], getSlave(_paths[i])->ifname().c_str(), pathStr); + + } + // Convert metrics to relative quantities and apply contribution weights + for(unsigned int i=0;ibonded()) { + quality[i] += ((maxLAT > 0.0f ? lat[i] / maxLAT : 0.0f) * _qualityWeights[ZT_QOS_LAT_IDX]); + quality[i] += ((maxPDV > 0.0f ? pdv[i] / maxPDV : 0.0f) * _qualityWeights[ZT_QOS_PDV_IDX]); + quality[i] += ((maxPLR > 0.0f ? plr[i] / maxPLR : 0.0f) * _qualityWeights[ZT_QOS_PLR_IDX]); + quality[i] += ((maxPER > 0.0f ? per[i] / maxPER : 0.0f) * _qualityWeights[ZT_QOS_PER_IDX]); + //quality[i] += ((maxTHR > 0.0f ? thr[i] / maxTHR : 0.0f) * _qualityWeights[ZT_QOS_THR_IDX]); + //quality[i] += ((maxTHM > 0.0f ? thm[i] / maxTHM : 0.0f) * _qualityWeights[ZT_QOS_THM_IDX]); + //quality[i] += ((maxTHV > 0.0f ? thv[i] / maxTHV : 0.0f) * _qualityWeights[ZT_QOS_THV_IDX]); + //quality[i] += (scp[i] * _qualityWeights[ZT_QOS_SCP_IDX]); + totQuality += quality[i]; + } + } + // + for(unsigned int i=0;ibonded()) { + alloc[i] = std::ceil((quality[i] / totQuality) * (float)255); + _paths[i]->_allocation = alloc[i]; + } + } + /* + if ((now - _lastLogTS) > 500) { + if (!relevant()) {return;} + //fprintf(stderr, "\n"); + _lastPrintTS = now; + _lastLogTS = now; + int numPlottablePaths=0; + for(unsigned int i=0;iaddress().toString(pathStr); + //fprintf(stderr, "%lu FIN [%d/%d]: pmi=%5d, lat=%4.3f, ltm=%4.3f, pdv=%4.3f, plr=%4.3f, per=%4.3f, thr=%4.3f, thm=%4.3f, thv=%4.3f, age=%4.3f, scp=%4d, q=%4.3f, qtot=%4.3f, ac=%4d, asf=%3d, if=%s, path=%20s, bond=%d, qosout=%d, plrraw=%d\n", + // ((now - RR->bc->getBondStartTime())), i, _numBondedPaths, _paths[i]->monitorInterval, + // lat[i], ltm[i], pdv[i], plr[i], per[i], thr[i], thm[i], thv[i], age[i], scp[i], + // quality[i], totQuality, alloc[i], _paths[i]->assignedFlowCount, getSlave(_paths[i])->ifname().c_str(), pathStr, _paths[i]->bonded(), _paths[i]->qosStatsOut.size(), _paths[i]->packetLossRatio); + } + } + if (numPlottablePaths < 2) { + return; + } + if (!_header) { + fprintf(stdout, "now, bonded, relativeUnderload, flows, "); + for(unsigned int i=0;iaddress().toString(pathStr); + std::string label = std::string((pathStr)) + " " + getSlave(_paths[i])->ifname(); + for (int i=0; i<19; ++i) { + fprintf(stdout, "%s, ", label.c_str()); + } + } + } + _header=true; + } + fprintf(stdout, "%ld, %d, %d, %d, ",((now - RR->bc->getBondStartTime())),_numBondedPaths,_totalBondUnderload, _flows.size()); + for(unsigned int i=0;iaddress().toString(pathStr); + fprintf(stdout, "%s, %s, %8.3f, %8.3f, %8.3f, %5.3f, %5.3f, %5.3f, %8f, %5.3f, %5.3f, %d, %5.3f, %d, %d, %d, %d, %d, %d, ", + getSlave(_paths[i])->ifname().c_str(), pathStr, _paths[i]->latencyMean, lat[i],pdv[i], _paths[i]->packetLossRatio, plr[i],per[i],thr[i],thm[i],thv[i],(now - _paths[i]->lastIn()),quality[i],alloc[i], + _paths[i]->relativeByteLoad, _paths[i]->assignedFlowCount, _paths[i]->alive(now, true), _paths[i]->eligible(now,_ackSendInterval), _paths[i]->qosStatsOut.size()); + } + } + fprintf(stdout, "\n"); + } + */ +} + +void Bond::processBalanceTasks(const int64_t now) +{ + // Omitted +} + +void Bond::dequeueNextActiveBackupPath(const uint64_t now) +{ + //fprintf(stderr, "dequeueNextActiveBackupPath\n"); + if (_abFailoverQueue.empty()) { + return; + } + _abPath = _abFailoverQueue.front(); + _abFailoverQueue.pop_front(); + _lastActiveBackupPathChange = now; + for (int i=0; iresetPacketCounts(); + } + } +} + +void Bond::processActiveBackupTasks(const int64_t now) +{ + //fprintf(stderr, "%llu processActiveBackupTasks\n", (now - RR->bc->getBondStartTime())); + char pathStr[128]; char prevPathStr[128]; char curPathStr[128]; + + SharedPtr prevActiveBackupPath = _abPath; + SharedPtr nonPreferredPath; + bool bFoundPrimarySlave = false; + + /** + * Select initial "active" active-backup slave + */ + if (!_abPath) { + fprintf(stderr, "%llu no active backup path yet...\n", ((now - RR->bc->getBondStartTime()))); + /** + * [Automatic mode] + * The user has not explicitly specified slaves or their failover schedule, + * the bonding policy will now select the first eligible path and set it as + * its active backup path, if a substantially better path is detected the bonding + * policy will assign it as the new active backup path. If the path fails it will + * simply find the next eligible path. + */ + if (!userHasSpecifiedSlaves()) { + fprintf(stderr, "%llu AB: (auto) user did not specify any slaves. waiting until we know more\n", ((now - RR->bc->getBondStartTime()))); + for (int i=0; ieligible(now,_ackSendInterval)) { + _paths[i]->address().toString(curPathStr); + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + if (slave) { + fprintf(stderr, "%llu AB: (initial) [%d] found eligible path %s on: %s\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, slave->ifname().c_str()); + } + _abPath = _paths[i]; + break; + } + } + } + /** + * [Manual mode] + * The user has specified slaves or failover rules that the bonding policy should adhere to. + */ + else if (userHasSpecifiedSlaves()) { + fprintf(stderr, "%llu AB: (manual) no active backup slave, checking local.conf\n", ((now - RR->bc->getBondStartTime()))); + if (userHasSpecifiedPrimarySlave()) { + fprintf(stderr, "%llu AB: (manual) user has specified primary slave, looking for it.\n", ((now - RR->bc->getBondStartTime()))); + for (int i=0; i slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + if (_paths[i]->eligible(now,_ackSendInterval) && slave->primary()) { + if (!_paths[i]->preferred()) { + _paths[i]->address().toString(curPathStr); + fprintf(stderr, "%llu AB: (initial) [%d] found path on primary slave, taking note in case we don't find a preferred path\n", ((now - RR->bc->getBondStartTime())), i); + nonPreferredPath = _paths[i]; + bFoundPrimarySlave = true; + } + if (_paths[i]->preferred()) { + _abPath = _paths[i]; + _abPath->address().toString(curPathStr); + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + if (slave) { + fprintf(stderr, "%llu AB: (initial) [%d] found preferred path %s on primary slave: %s\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, slave->ifname().c_str()); + } + bFoundPrimarySlave = true; + break; + } + } + } + if (_abPath) { + _abPath->address().toString(curPathStr); + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _abPath->localSocket()); + if (slave) { + fprintf(stderr, "%llu AB: (initial) found preferred primary path: %s on %s\n", ((now - RR->bc->getBondStartTime())), curPathStr, slave->ifname().c_str()); + } + } + else { + if (bFoundPrimarySlave && nonPreferredPath) { + fprintf(stderr, "%llu AB: (initial) found a non-preferred primary path\n", ((now - RR->bc->getBondStartTime()))); + _abPath = nonPreferredPath; + } + } + if (!_abPath) { + fprintf(stderr, "%llu AB: (initial) designated primary slave is not yet ready\n", ((now - RR->bc->getBondStartTime()))); + // TODO: Should fail-over to specified backup or just wait? + } + } + else if (!userHasSpecifiedPrimarySlave()) { + int _abIdx = ZT_MAX_PEER_NETWORK_PATHS; + fprintf(stderr, "%llu AB: (initial) user did not specify primary slave, just picking something\n", ((now - RR->bc->getBondStartTime()))); + for (int i=0; ieligible(now,_ackSendInterval)) { + _abIdx = i; + break; + } + } + if (_abIdx == ZT_MAX_PEER_NETWORK_PATHS) { + fprintf(stderr, "%llu AB: (initial) unable to find a candidate next-best, no change\n", ((now - RR->bc->getBondStartTime()))); + } + else { + _abPath = _paths[_abIdx]; + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _abPath->localSocket()); + if (slave) { + fprintf(stderr, "%llu AB: (initial) selected non-primary slave idx=%d, %s on %s\n", ((now - RR->bc->getBondStartTime())), _abIdx, pathStr, slave->ifname().c_str()); + } + } + } + } + } + /** + * Update and maintain the active-backup failover queue + */ + if (_abPath) { + // Don't worry about the failover queue until we have an active slave + // Remove ineligible paths from the failover slave queue + for (std::list >::iterator it(_abFailoverQueue.begin()); it!=_abFailoverQueue.end();) { + if ((*it) && !(*it)->eligible(now,_ackSendInterval)) { + (*it)->address().toString(curPathStr); + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, (*it)->localSocket()); + if (slave) { + fprintf(stderr, "%llu AB: (fq) %s on %s is now ineligible, removing from failover queue\n", ((now - RR->bc->getBondStartTime())), curPathStr, slave->ifname().c_str()); + } + it = _abFailoverQueue.erase(it); + } else { + ++it; + } + } + /** + * Failover instructions were provided by user, build queue according those as well as IPv + * preference, disregarding performance. + */ + if (userHasSpecifiedFailoverInstructions()) { + /** + * Clear failover scores + */ + for (int i=0; i_failoverScore = 0; + } + } + //fprintf(stderr, "AB: (fq) user has specified specific failover instructions, will follow them.\n"); + for (int i=0; iallowed() || !_paths[i]->eligible(now,_ackSendInterval)) { + continue; + } + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + _paths[i]->address().toString(pathStr); + + int failoverScoreHandicap = _paths[i]->_failoverScore; + if (_paths[i]->preferred()) + { + failoverScoreHandicap += ZT_MULTIPATH_FAILOVER_HANDICAP_PREFERRED; + //fprintf(stderr, "%s on %s ----> %d for preferred\n", pathStr, _paths[i]->ifname().c_str(), failoverScoreHandicap); + } + if (slave->primary()) { + // If using "optimize" primary reselect mode, ignore user slave designations + failoverScoreHandicap += ZT_MULTIPATH_FAILOVER_HANDICAP_PRIMARY; + //fprintf(stderr, "%s on %s ----> %d for primary\n", pathStr, _paths[i]->ifname().c_str(), failoverScoreHandicap); + } + if (!_paths[i]->_failoverScore) { + // If we didn't inherit a failover score from a "parent" that wants to use this path as a failover + int newHandicap = failoverScoreHandicap ? failoverScoreHandicap : _paths[i]->_allocation; + _paths[i]->_failoverScore = newHandicap; + //fprintf(stderr, "%s on %s ----> %d for allocation\n", pathStr, _paths[i]->ifname().c_str(), newHandicap); + } + SharedPtr failoverSlave; + if (slave->failoverToSlave().length()) { + failoverSlave = RR->bc->getSlaveByName(_policyAlias, slave->failoverToSlave()); + } + if (failoverSlave) { + for (int j=0; jaddress().toString(pathStr); + int inheritedHandicap = failoverScoreHandicap - 10; + int newHandicap = _paths[j]->_failoverScore > inheritedHandicap ? _paths[j]->_failoverScore : inheritedHandicap; + //fprintf(stderr, "\thanding down %s on %s ----> %d\n", pathStr, getSlave(_paths[j])->ifname().c_str(), newHandicap); + if (!_paths[j]->preferred()) { + newHandicap--; + } + _paths[j]->_failoverScore = newHandicap; + } + } + } + if (_paths[i].ptr() != _abPath.ptr()) { + bool bFoundPathInQueue = false; + for (std::list >::iterator it(_abFailoverQueue.begin()); it!=_abFailoverQueue.end();++it) { + if (_paths[i].ptr() == (*it).ptr()) { + bFoundPathInQueue = true; + } + } + if (!bFoundPathInQueue) { + _paths[i]->address().toString(curPathStr); + fprintf(stderr, "%llu AB: (fq) [%d] added %s on %s to queue\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, getSlave(_paths[i])->ifname().c_str()); + _abFailoverQueue.push_front(_paths[i]); + } + } + } + } + /** + * No failover instructions provided by user, build queue according to performance + * and IPv preference. + */ + else if (!userHasSpecifiedFailoverInstructions()) { + for (int i=0; iallowed() + || !_paths[i]->eligible(now,_ackSendInterval)) { + continue; + } + int failoverScoreHandicap = 0; + if (_paths[i]->preferred()) { + failoverScoreHandicap = ZT_MULTIPATH_FAILOVER_HANDICAP_PREFERRED; + } + bool includeRefractoryPeriod = true; + if (!_paths[i]->eligible(now,includeRefractoryPeriod)) { + failoverScoreHandicap = -10000; + } + if (getSlave(_paths[i])->primary() && _abSlaveSelectMethod != ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { + // If using "optimize" primary reselect mode, ignore user slave designations + failoverScoreHandicap = ZT_MULTIPATH_FAILOVER_HANDICAP_PRIMARY; + } + if (_paths[i].ptr() == negotiatedPath.ptr()) { + _paths[i]->_negotiated = true; + failoverScoreHandicap = ZT_MULTIPATH_FAILOVER_HANDICAP_NEGOTIATED; + } else { + _paths[i]->_negotiated = false; + } + _paths[i]->_failoverScore = _paths[i]->_allocation + failoverScoreHandicap; + if (_paths[i].ptr() != _abPath.ptr()) { + bool bFoundPathInQueue = false; + for (std::list >::iterator it(_abFailoverQueue.begin()); it!=_abFailoverQueue.end();++it) { + if (_paths[i].ptr() == (*it).ptr()) { + bFoundPathInQueue = true; + } + } + if (!bFoundPathInQueue) { + _paths[i]->address().toString(curPathStr); + fprintf(stderr, "%llu AB: (fq) [%d] added %s on %s to queue\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, getSlave(_paths[i])->ifname().c_str()); + _abFailoverQueue.push_front(_paths[i]); + } + } + } + } + _abFailoverQueue.sort(PathQualityComparator()); + if (_abFailoverQueue.empty()) { + fprintf(stderr, "%llu AB: (fq) the failover queue is empty, the active-backup bond is no longer fault-tolerant\n", ((now - RR->bc->getBondStartTime()))); + } + } + /** + * Short-circuit if we have no queued paths + */ + if (_abFailoverQueue.empty()) { + return; + } + /** + * Fulfill primary reselect obligations + */ + if (_abPath && !_abPath->eligible(now,_ackSendInterval)) { // Implicit ZT_MULTIPATH_RESELECTION_POLICY_FAILURE + _abPath->address().toString(curPathStr); fprintf(stderr, "%llu AB: (failure) failover event!, active backup path (%s) is no-longer eligible\n", ((now - RR->bc->getBondStartTime())), curPathStr); + if (!_abFailoverQueue.empty()) { + fprintf(stderr, "%llu AB: (failure) there are (%lu) slaves in queue to choose from...\n", ((now - RR->bc->getBondStartTime())), _abFailoverQueue.size()); + dequeueNextActiveBackupPath(now); + _abPath->address().toString(curPathStr); fprintf(stderr, "%llu sAB: (failure) switched to %s on %s\n", ((now - RR->bc->getBondStartTime())), curPathStr, getSlave(_abPath)->ifname().c_str()); + } else { + fprintf(stderr, "%llu AB: (failure) nothing available in the slave queue, doing nothing.\n", ((now - RR->bc->getBondStartTime()))); + } + } + /** + * Detect change to prevent flopping during later optimization step. + */ + if (prevActiveBackupPath != _abPath) { + _lastActiveBackupPathChange = now; + } + if (_abSlaveSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS) { + if (_abPath && !getSlave(_abPath)->primary() + && getSlave(_abFailoverQueue.front())->primary()) { + fprintf(stderr, "%llu AB: (always) switching to available primary\n", ((now - RR->bc->getBondStartTime()))); + dequeueNextActiveBackupPath(now); + } + } + if (_abSlaveSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_BETTER) { + if (_abPath && !getSlave(_abPath)->primary()) { + fprintf(stderr, "%llu AB: (better) active backup has switched to \"better\" primary slave according to re-select policy.\n", ((now - RR->bc->getBondStartTime()))); + if (getSlave(_abFailoverQueue.front())->primary() + && (_abFailoverQueue.front()->_failoverScore > _abPath->_failoverScore)) { + dequeueNextActiveBackupPath(now); + fprintf(stderr, "%llu AB: (better) switched back to user-defined primary\n", ((now - RR->bc->getBondStartTime()))); + } + } + } + if (_abSlaveSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE && !_abFailoverQueue.empty()) { + /** + * Implement link negotiation that was previously-decided + */ + if (_abFailoverQueue.front()->_negotiated) { + dequeueNextActiveBackupPath(now); + _abPath->address().toString(prevPathStr); + fprintf(stderr, "%llu AB: (optimize) switched to negotiated path %s on %s\n", ((now - RR->bc->getBondStartTime())), prevPathStr, getSlave(_abPath)->ifname().c_str()); + _lastPathNegotiationCheck = now; + } + else { + // Try to find a better path and automatically switch to it -- not too often, though. + if ((now - _lastActiveBackupPathChange) > ZT_MULTIPATH_MIN_ACTIVE_BACKUP_AUTOFLOP_INTERVAL) { + if (!_abFailoverQueue.empty()) { + //fprintf(stderr, "AB: (optimize) there are (%d) slaves in queue to choose from...\n", _abFailoverQueue.size()); + int newFScore = _abFailoverQueue.front()->_failoverScore; + int prevFScore = _abPath->_failoverScore; + // Establish a minimum switch threshold to prevent flapping + int failoverScoreDifference = _abFailoverQueue.front()->_failoverScore - _abPath->_failoverScore; + int thresholdQuantity = (ZT_MULTIPATH_ACTIVE_BACKUP_OPTIMIZE_MIN_THRESHOLD * (float)_abPath->_allocation); + if ((failoverScoreDifference > 0) && (failoverScoreDifference > thresholdQuantity)) { + SharedPtr oldPath = _abPath; + _abPath->address().toString(prevPathStr); + dequeueNextActiveBackupPath(now); + _abPath->address().toString(curPathStr); + fprintf(stderr, "%llu AB: (optimize) switched from %s on %s (fs=%d) to %s on %s (fs=%d)\n", ((now - RR->bc->getBondStartTime())), prevPathStr, getSlave(oldPath)->ifname().c_str(), prevFScore, curPathStr, getSlave(_abPath)->ifname().c_str(), newFScore); + } + } + } + } + } +} + +void Bond::setReasonableDefaults(int policy) +{ + // If invalid bonding policy, try default + int _defaultBondingPolicy = BondController::defaultBondingPolicy(); + if (policy <= ZT_BONDING_POLICY_NONE || policy > ZT_BONDING_POLICY_BALANCE_AWARE) { + // If no default set, use NONE (effectively disabling this bond) + if (_defaultBondingPolicy < ZT_BONDING_POLICY_NONE || _defaultBondingPolicy > ZT_BONDING_POLICY_BALANCE_AWARE) { + _bondingPolicy= ZT_BONDING_POLICY_NONE; + } + _bondingPolicy= _defaultBondingPolicy; + } else { + _bondingPolicy= policy; + } + + _downDelay = 0; + _upDelay = 0; + _allowFlowHashing=false; + _bondMonitorInterval=0; + _allowPathNegotiation=false; + _shouldCollectPathStatistics=false; + _lastPathNegotiationReceived=0; + _lastBackgroundTaskCheck=0; + _lastPathNegotiationCheck=0; + + _lastFlowStatReset=0; + _lastFlowExpirationCheck=0; + _localUtility=0; + _numBondedPaths=0; + _rrPacketsSentOnCurrSlave=0; + _rrIdx=0; + _lastPathNegotiationReceived=0; + _pathNegotiationCutoffCount=0; + _lastFlowRebalance=0; + _totalBondUnderload = 0; + + //_maxAcceptableLatency + _maxAcceptablePacketDelayVariance = 50; + _maxAcceptablePacketLossRatio = 0.10; + _maxAcceptablePacketErrorRatio = 0.10; + _userHasSpecifiedSlaveSpeeds=0; + + _lastFrame=0; + + // TODO: Remove + _header=false; + _lastLogTS = 0; + _lastPrintTS = 0; + + + + + /** + * Paths are actively monitored to provide a real-time quality/preference-ordered rapid failover queue. + */ + switch (policy) { + case ZT_BONDING_POLICY_ACTIVE_BACKUP: + _failoverInterval = 5000; + _abSlaveSelectMethod = ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE; + _slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; + _qualityWeights[ZT_QOS_LAT_IDX] = 0.2f; + _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; + _qualityWeights[ZT_QOS_PDV_IDX] = 0.2f; + _qualityWeights[ZT_QOS_PLR_IDX] = 0.2f; + _qualityWeights[ZT_QOS_PER_IDX] = 0.2f; + _qualityWeights[ZT_QOS_THR_IDX] = 0.2f; + _qualityWeights[ZT_QOS_THM_IDX] = 0.0f; + _qualityWeights[ZT_QOS_THV_IDX] = 0.0f; + _qualityWeights[ZT_QOS_SCP_IDX] = 0.0f; + break; + /** + * All seemingly-alive paths are used. Paths are not actively monitored. + */ + case ZT_BONDING_POLICY_BROADCAST: + _downDelay = 30000; + _upDelay = 0; + break; + /** + * Paths are monitored to determine when/if one needs to be added or removed from the rotation + */ + case ZT_BONDING_POLICY_BALANCE_RR: + _failoverInterval = 5000; + _allowFlowHashing = false; + _packetsPerSlave = 8; + _slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; + _qualityWeights[ZT_QOS_LAT_IDX] = 0.4f; + _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; + _qualityWeights[ZT_QOS_PDV_IDX] = 0.2f; + _qualityWeights[ZT_QOS_PLR_IDX] = 0.1f; + _qualityWeights[ZT_QOS_PER_IDX] = 0.1f; + _qualityWeights[ZT_QOS_THR_IDX] = 0.1f; + _qualityWeights[ZT_QOS_THM_IDX] = 0.0f; + _qualityWeights[ZT_QOS_THV_IDX] = 0.0f; + _qualityWeights[ZT_QOS_SCP_IDX] = 0.0f; + break; + /** + * Path monitoring is used to determine the capacity of each + * path and where to place the next flow. + */ + case ZT_BONDING_POLICY_BALANCE_XOR: + _failoverInterval = 5000;; + _upDelay=_bondMonitorInterval*2; + _allowFlowHashing = true; + _slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; + _qualityWeights[ZT_QOS_LAT_IDX] = 0.4f; + _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; + _qualityWeights[ZT_QOS_PDV_IDX] = 0.2f; + _qualityWeights[ZT_QOS_PLR_IDX] = 0.1f; + _qualityWeights[ZT_QOS_PER_IDX] = 0.1f; + _qualityWeights[ZT_QOS_THR_IDX] = 0.1f; + _qualityWeights[ZT_QOS_THM_IDX] = 0.0f; + _qualityWeights[ZT_QOS_THV_IDX] = 0.0f; + _qualityWeights[ZT_QOS_SCP_IDX] = 0.0f; + break; + /** + * Path monitoring is used to determine the capacity of each + * path and where to place the next flow. Additionally, re-shuffling + * of flows may take place. + */ + case ZT_BONDING_POLICY_BALANCE_AWARE: + _failoverInterval = 3000; + _allowFlowHashing = true; + _slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; + _qualityWeights[ZT_QOS_LAT_IDX] = 0.3f; + _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; + _qualityWeights[ZT_QOS_PDV_IDX] = 0.1f; + _qualityWeights[ZT_QOS_PLR_IDX] = 0.1f; + _qualityWeights[ZT_QOS_PER_IDX] = 0.1f; + _qualityWeights[ZT_QOS_THR_IDX] = 0.0f; + _qualityWeights[ZT_QOS_THM_IDX] = 0.4f; + _qualityWeights[ZT_QOS_THV_IDX] = 0.0f; + _qualityWeights[ZT_QOS_SCP_IDX] = 0.0f; + break; + default: + break; + } + + /** + * Timer geometries and counters + */ + _bondMonitorInterval = _failoverInterval / 3; + _ackSendInterval = _failoverInterval; + _qualityEstimationInterval = _failoverInterval * 2; + + _dynamicPathMonitorInterval = 0; + + _downDelay=0; + _upDelay=0; + + _ackCutoffCount = 0; + _lastAckRateCheck = 0; + _qosSendInterval = _bondMonitorInterval * 4; + _qosCutoffCount = 0; + _lastQoSRateCheck = 0; + throughputMeasurementInterval = _ackSendInterval * 2; + BondController::setMinReqPathMonitorInterval(_bondMonitorInterval); + + _defaultPathRefractoryPeriod = 8000; + + fprintf(stderr, "TIMERS: strat=%d, fi= %d, bmi= %d, qos= %d, ack= %d, estimateInt= %d, refractory= %d, ud= %d, dd= %d\n", + _slaveMonitorStrategy, + _failoverInterval, + _bondMonitorInterval, + _qosSendInterval, + _ackSendInterval, + _qualityEstimationInterval, + _defaultPathRefractoryPeriod, + _upDelay, + _downDelay); + + _lastQualityEstimation=0; +} + +void Bond::setUserQualityWeights(float weights[], int len) +{ + if (len == ZT_QOS_WEIGHT_SIZE) { + float weightTotal = 0.0; + for (unsigned int i=0; i 0.99 && weightTotal < 1.01) { + memcpy(_qualityWeights, weights, len * sizeof(float)); + } + } +} + + +bool Bond::relevant() { + return _peer->identity().address().toInt() == 0x16a03a3d03 + || _peer->identity().address().toInt() == 0x4410300d03 + || _peer->identity().address().toInt() == 0x795cbf86fa; +} + +SharedPtr Bond::getSlave(const SharedPtr& path) +{ + return RR->bc->getSlaveBySocket(_policyAlias, path->localSocket()); +} + +void Bond::dumpInfo(const int64_t now) +{ + char pathStr[128]; + //char oldPathStr[128]; + char currPathStr[128]; + + if (!relevant()) { + return; + } + /* + fprintf(stderr, "---[ bp=%d, id=%llx, dd=%d, up=%d, pmi=%d, specifiedSlaves=%d, _specifiedPrimarySlave=%d, _specifiedFailInst=%d ]\n", + _policy, _peer->identity().address().toInt(), _downDelay, _upDelay, _monitorInterval, _userHasSpecifiedSlaves, _userHasSpecifiedPrimarySlave, _userHasSpecifiedFailoverInstructions); + + if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP) { + fprintf(stderr, "Paths (bp=%d, stats=%d, primaryReselect=%d) :\n", + _policy, _shouldCollectPathStatistics, _abSlaveSelectMethod); + } + if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_RR + || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_XOR + || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE) { + fprintf(stderr, "Paths (bp=%d, stats=%d, fh=%d) :\n", + _policy, _shouldCollectPathStatistics, _allowFlowHashing); + }*/ + + if ((now - _lastLogTS) < 1000) { + return; + } + _lastPrintTS = now; + _lastLogTS = now; + + fprintf(stderr, "\n\n"); + + for(int i=0; i slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + _paths[i]->address().toString(pathStr); + fprintf(stderr, " %2d: lat=%8.3f, ac=%3d, fail%5s, fscore=%6d, in=%7d, out=%7d, age=%7ld, ack=%7ld, ref=%6d, ls=%llx", + i, + _paths[i]->_latencyMean, + _paths[i]->_allocation, + slave->failoverToSlave().c_str(), + _paths[i]->_failoverScore, + _paths[i]->_packetsIn, + _paths[i]->_packetsOut, + (long)_paths[i]->age(now), + (long)_paths[i]->ackAge(now), + _paths[i]->_refractoryPeriod, + _paths[i]->localSocket() + ); + if (slave->spare()) { + fprintf(stderr, " SPR."); + } else { + fprintf(stderr, " "); + } + if (slave->primary()) { + fprintf(stderr, " PRIM."); + } else { + fprintf(stderr, " "); + } + if (_paths[i]->allowed()) { + fprintf(stderr, " ALL."); + } else { + fprintf(stderr, " "); + } + if (_paths[i]->eligible(now,_ackSendInterval)) { + fprintf(stderr, " ELI."); + } else { + fprintf(stderr, " "); + } + if (_paths[i]->preferred()) { + fprintf(stderr, " PREF."); + } else { + fprintf(stderr, " "); + } + if (_paths[i]->_negotiated) { + fprintf(stderr, " NEG."); + } else { + fprintf(stderr, " "); + } + if (_paths[i]->bonded()) { + fprintf(stderr, " BOND "); + } else { + fprintf(stderr, " "); + } + if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP && _abPath && (_abPath == _paths[i].ptr())) { + fprintf(stderr, " ACTIVE "); + } else if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP) { + fprintf(stderr, " "); + } + if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP && _abFailoverQueue.size() && (_abFailoverQueue.front().ptr() == _paths[i].ptr())) { + fprintf(stderr, " NEXT "); + } else if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP) { + fprintf(stderr, " "); + } + fprintf(stderr, "%5s %s\n", slave->ifname().c_str(), pathStr); + } + } + + if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP) { + if (!_abFailoverQueue.empty()) { + fprintf(stderr, "\nFailover Queue:\n"); + for (std::list >::iterator it(_abFailoverQueue.begin()); it!=_abFailoverQueue.end();++it) { + (*it)->address().toString(currPathStr); + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, (*it)->localSocket()); + fprintf(stderr, "\t%8s\tspeed=%7d\trelSpeed=%3d\tipvPref=%3d\tfscore=%9d\t\t%s\n", + slave->ifname().c_str(), + slave->speed(), + slave->relativeSpeed(), + slave->ipvPref(), + (*it)->_failoverScore, + currPathStr); + } + } + else + { + fprintf(stderr, "\nFailover Queue size = %lu\n", _abFailoverQueue.size()); + } + } + + if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_RR + || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_XOR + || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE) { + /* + if (_numBondedPaths) { + fprintf(stderr, "\nBonded Paths:\n"); + for (int i=0; i<_numBondedPaths; ++i) { + _paths[_bondedIdx[i]].p->address().toString(currPathStr); + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[_bondedIdx[i]].p->localSocket()); + fprintf(stderr, " [%d]\t%8s\tflows=%3d\tspeed=%7d\trelSpeed=%3d\tipvPref=%3d\tfscore=%9d\t\t%s\n", i, + //fprintf(stderr, " [%d]\t%8s\tspeed=%7d\trelSpeed=%3d\tflowCount=%2d\tipvPref=%3d\tfscore=%9d\t\t%s\n", i, + slave->ifname().c_str(), + numberOfAssignedFlows(_paths[_bondedIdx[i]].p), + slave->speed(), + slave->relativeSpeed(), + //_paths[_bondedIdx[i]].p->assignedFlows.size(), + slave->ipvPref(), + _paths[_bondedIdx[i]].p->failoverScore(), + currPathStr); + } + } + */ + /* + if (_allowFlowHashing) { + //Mutex::Lock _l(_flows_m); + if (_flows.size()) { + fprintf(stderr, "\nFlows:\n"); + std::map >::iterator it = _flows.begin(); + while (it != _flows.end()) { + it->second->assignedPath()->address().toString(currPathStr); + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, it->second->assignedPath()->localSocket()); + fprintf(stderr, " [%4x] in=%16llu, out=%16llu, bytes=%16llu, last=%16llu, if=%8s\t\t%s\n", + it->second->id(), + it->second->bytesInPerUnitTime(), + it->second->bytesOutPerUnitTime(), + it->second->totalBytes(), + it->second->age(now), + slave->ifname().c_str(), + currPathStr); + ++it; + } + } + } + */ + } + //fprintf(stderr, "\n\n\n\n\n"); +} + +} // namespace ZeroTier \ No newline at end of file diff --git a/node/Bond.hpp b/node/Bond.hpp new file mode 100644 index 000000000..6318f3936 --- /dev/null +++ b/node/Bond.hpp @@ -0,0 +1,689 @@ +/* + * Copyright (c)2013-2020 ZeroTier, Inc. + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file in the project's root directory. + * + * Change Date: 2024-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2.0 of the Apache License. + */ +/****/ + +#ifndef ZT_BOND_HPP +#define ZT_BOND_HPP + +#include + +#include "Path.hpp" +#include "Peer.hpp" +#include "../osdep/Slave.hpp" +#include "Flow.hpp" + +namespace ZeroTier { + +class RuntimeEnvironment; +class Slave; + +class Bond +{ + friend class SharedPtr; + friend class Peer; + friend class BondController; + + struct PathQualityComparator + { + bool operator ()(const SharedPtr & a, const SharedPtr & b) + { + if(a->_failoverScore == b->_failoverScore) { + return a < b; + } + return a->_failoverScore > b->_failoverScore; + } + }; + +public: + + // TODO: Remove + bool _header; + int64_t _lastLogTS; + int64_t _lastPrintTS; + void dumpInfo(const int64_t now); + bool relevant(); + + SharedPtr getSlave(const SharedPtr& path); + + /** + * Constructor. For use only in first initialization in Node + * + * @param renv Runtime environment + */ + Bond(const RuntimeEnvironment *renv); + + /** + * Constructor. Creates a bond based off of ZT defaults + * + * @param renv Runtime environment + * @param policy Bonding policy + * @param peer + */ + Bond(const RuntimeEnvironment *renv, int policy, const SharedPtr& peer); + + /** + * Constructor. For use when user intends to manually specify parameters + * + * @param basePolicy + * @param policyAlias + * @param peer + */ + Bond(std::string& basePolicy, std::string& policyAlias, const SharedPtr& peer); + + /** + * Constructor. Creates a bond based off of a user-defined bond template + * + * @param renv Runtime environment + * @param original + * @param peer + */ + Bond(const RuntimeEnvironment *renv, const Bond &original, const SharedPtr& peer); + + /** + * + * @return + */ + std::string policyAlias() { return _policyAlias; } + + /** + * Inform the bond about the path that its peer just learned about + * + * @param path Newly-learned Path which should now be handled by the Bond + * @param now Current time + */ + void nominatePath(const SharedPtr& path, int64_t now); + + /** + * Propagate and memoize often-used bonding preferences for each path + */ + void applyUserPrefs(); + + /** + * Check path states and perform bond rebuilds if needed. + * + * @param now Current time + * @param rebuild Whether or not the bond should be reconstructed. + */ + void curateBond(const int64_t now, bool rebuild); + + /** + * Periodically perform statistical summaries of quality metrics for all paths. + * + * @param now Current time + */ + void estimatePathQuality(int64_t now); + + /** + * Record an invalid incoming packet. This packet failed + * MAC/compression/cipher checks and will now contribute to a + * Packet Error Ratio (PER). + * + * @param path Path over which packet was received + */ + void recordIncomingInvalidPacket(const SharedPtr& path); + + /** + * Record statistics on outbound an packet. + * + * @param path Path over which packet is being sent + * @param packetId Packet ID + * @param payloadLength Packet data length + * @param verb Packet verb + * @param flowId Flow ID + * @param now Current time + */ + void recordOutgoingPacket(const SharedPtr &path, uint64_t packetId, + uint16_t payloadLength, Packet::Verb verb, int32_t flowId, int64_t now); + + /** + * Process the contents of an inbound VERB_QOS_MEASUREMENT to gather path quality observations. + * + * @param now Current time + * @param count Number of records + * @param rx_id table of packet IDs + * @param rx_ts table of holding times + */ + void receivedQoS(const SharedPtr& path, int64_t now, int count, uint64_t *rx_id, uint16_t *rx_ts); + + /** + * Process the contents of an inbound VERB_ACK to gather path quality observations. + * + * @param path Path over which packet was received + * @param now Current time + * @param ackedBytes Number of bytes ACKed by this VERB_ACK + */ + void receivedAck(const SharedPtr& path, int64_t now, int32_t ackedBytes); + + /** + * Generate the contents of a VERB_QOS_MEASUREMENT packet. + * + * @param now Current time + * @param qosBuffer destination buffer + * @return Size of payload + */ + int32_t generateQoSPacket(const SharedPtr& path, int64_t now, char *qosBuffer); + + /** + * Record statistics for an inbound packet. + * + * @param path Path over which packet was received + * @param packetId Packet ID + * @param payloadLength Packet data length + * @param verb Packet verb + * @param flowId Flow ID + * @param now Current time + */ + void recordIncomingPacket(const SharedPtr& path, uint64_t packetId, uint16_t payloadLength, + Packet::Verb verb, int32_t flowId, int64_t now); + + /** + * Determines the most appropriate path for packet and flow egress. This decision is made by + * the underlying bonding policy as well as QoS-related statistical observations of path quality. + * + * @param now Current time + * @param flowId Flow ID + * @return Pointer to suggested Path + */ + SharedPtr getAppropriatePath(int64_t now, int32_t flowId); + + /** + * Creates a new flow record + * + * @param path Path over which flow shall be handled + * @param flowId Flow ID + * @param entropy A byte of entropy to be used by the bonding algorithm + * @param now Current time + * @return Pointer to newly-created Flow + */ + SharedPtr createFlow(const SharedPtr &path, int32_t flowId, unsigned char entropy, int64_t now); + + /** + * Removes flow records that are past a certain age limit. + * + * @param age Age threshold to be forgotten + * @param oldest Whether only the oldest shall be forgotten + * @param now Current time + */ + void forgetFlowsWhenNecessary(uint64_t age, bool oldest, int64_t now); + + /** + * Assigns a new flow to a bonded path + * + * @param flow Flow to be assigned + * @param now Current time + */ + bool assignFlowToBondedPath(SharedPtr &flow, int64_t now); + + /** + * Determine whether a path change should occur given the remote peer's reported utility and our + * local peer's known utility. This has the effect of assigning inbound and outbound traffic to + * the same path. + * + * @param now Current time + * @param path Path over which the negotiation request was received + * @param remoteUtility How much utility the remote peer claims to gain by using the declared path + */ + void processIncomingPathNegotiationRequest(uint64_t now, SharedPtr &path, int16_t remoteUtility); + + /** + * Determine state of path synchronization and whether a negotiation request + * shall be sent to the peer. + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param now Current time + */ + void pathNegotiationCheck(void *tPtr, const int64_t now); + + /** + * Sends a VERB_ACK to the remote peer. + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param path Path over which packet should be sent + * @param localSocket Local source socket + * @param atAddress + * @param now Current time + */ + void sendACK(void *tPtr,const SharedPtr &path,int64_t localSocket, + const InetAddress &atAddress,int64_t now); + + /** + * Sends a VERB_QOS_MEASUREMENT to the remote peer. + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param path Path over which packet should be sent + * @param localSocket Local source socket + * @param atAddress + * @param now Current time + */ + void sendQOS_MEASUREMENT(void *tPtr,const SharedPtr &path,int64_t localSocket, + const InetAddress &atAddress,int64_t now); + + /** + * Sends a VERB_PATH_NEGOTIATION_REQUEST to the remote peer. + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param path Path over which packet should be sent + */ + void sendPATH_NEGOTIATION_REQUEST(void *tPtr, const SharedPtr &path); + + /** + * + * @param now Current time + */ + void processBalanceTasks(int64_t now); + + /** + * Perform periodic tasks unique to active-backup + * + * @param now Current time + */ + void processActiveBackupTasks(int64_t now); + + /** + * Switches the active slave in an active-backup scenario to the next best during + * a failover event. + * + * @param now Current time + */ + void dequeueNextActiveBackupPath(uint64_t now); + + /** + * Set bond parameters to reasonable defaults, these may later be overwritten by + * user-specified parameters. + * + * @param policy Bonding policy + */ + void setReasonableDefaults(int policy); + + /** + * Check and assign user-specified quality weights to this bond. + * + * @param weights Set of user-specified weights + * @param len Length of weight vector + */ + void setUserQualityWeights(float weights[], int len); + + /** + * @param latencyInMilliseconds Maximum acceptable latency. + */ + void setMaxAcceptableLatency(int16_t latencyInMilliseconds) { + _maxAcceptableLatency = latencyInMilliseconds; + } + + /** + * @param latencyInMilliseconds Maximum acceptable (mean) latency. + */ + void setMaxAcceptableMeanLatency(int16_t latencyInMilliseconds) { + _maxAcceptableMeanLatency = latencyInMilliseconds; + } + + /** + * @param latencyVarianceInMilliseconds Maximum acceptable packet delay variance (jitter). + */ + void setMaxAcceptablePacketDelayVariance(int16_t latencyVarianceInMilliseconds) { + _maxAcceptablePacketDelayVariance = latencyVarianceInMilliseconds; + } + + /** + * @param lossRatio Maximum acceptable packet loss ratio (PLR). + */ + void setMaxAcceptablePacketLossRatio(float lossRatio) { + _maxAcceptablePacketLossRatio = lossRatio; + } + + /** + * @param errorRatio Maximum acceptable packet error ratio (PER). + */ + void setMaxAcceptablePacketErrorRatio(float errorRatio) { + _maxAcceptablePacketErrorRatio = errorRatio; + } + + /** + * @param errorRatio Maximum acceptable packet error ratio (PER). + */ + void setMinAcceptableAllocation(float minAlloc) { + _minAcceptableAllocation = minAlloc * 255; + } + + /** + * @return Whether the user has defined slaves for use on this bond + */ + inline bool userHasSpecifiedSlaves() { return _userHasSpecifiedSlaves; } + + /** + * @return Whether the user has defined a set of failover slave(s) for this bond + */ + inline bool userHasSpecifiedFailoverInstructions() { return _userHasSpecifiedFailoverInstructions; }; + + /** + * @return Whether the user has specified a primary slave + */ + inline bool userHasSpecifiedPrimarySlave() { return _userHasSpecifiedPrimarySlave; } + + /** + * @return Whether the user has specified slave speeds + */ + inline bool userHasSpecifiedSlaveSpeeds() { return _userHasSpecifiedSlaveSpeeds; } + + /** + * Periodically perform maintenance tasks for each active bond. + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param now Current time + */ + void processBackgroundTasks(void *tPtr, int64_t now); + + /** + * Rate limit gate for VERB_ACK + * + * @param now Current time + * @return Whether the incoming packet should be rate-gated + */ + inline bool rateGateACK(const int64_t now) + { + _ackCutoffCount++; + int numToDrain = _lastAckRateCheck ? (now - _lastAckRateCheck) / ZT_ACK_DRAINAGE_DIVISOR : _ackCutoffCount; + _lastAckRateCheck = now; + if (_ackCutoffCount > numToDrain) { + _ackCutoffCount-=numToDrain; + } else { + _ackCutoffCount = 0; + } + return (_ackCutoffCount < ZT_ACK_CUTOFF_LIMIT); + } + + /** + * Rate limit gate for VERB_QOS_MEASUREMENT + * + * @param now Current time + * @return Whether the incoming packet should be rate-gated + */ + inline bool rateGateQoS(const int64_t now) + { + _qosCutoffCount++; + int numToDrain = (now - _lastQoSRateCheck) / ZT_QOS_DRAINAGE_DIVISOR; + _lastQoSRateCheck = now; + if (_qosCutoffCount > numToDrain) { + _qosCutoffCount-=numToDrain; + } else { + _qosCutoffCount = 0; + } + return (_qosCutoffCount < ZT_QOS_CUTOFF_LIMIT); + } + + /** + * Rate limit gate for VERB_PATH_NEGOTIATION_REQUEST + * + * @param now Current time + * @return Whether the incoming packet should be rate-gated + */ + inline bool rateGatePathNegotiation(const int64_t now) + { + if ((now - _lastPathNegotiationReceived) <= ZT_PATH_NEGOTIATION_CUTOFF_TIME) + ++_pathNegotiationCutoffCount; + else _pathNegotiationCutoffCount = 0; + _lastPathNegotiationReceived = now; + return (_pathNegotiationCutoffCount < ZT_PATH_NEGOTIATION_CUTOFF_LIMIT); + } + + /** + * @param interval Maximum amount of time user expects a failover to take on this bond. + */ + inline void setFailoverInterval(uint32_t interval) { _failoverInterval = interval; } + + /** + * @param strategy The strategy that the bond uses to prob for path aliveness and quality + */ + inline void setSlaveMonitorStrategy(uint8_t strategy) { _slaveMonitorStrategy = strategy; } + + /** + * @return the current up delay parameter + */ + inline uint16_t getUpDelay() { return _upDelay; } + + /** + * @param upDelay Length of time before a newly-discovered path is admitted to the bond + */ + inline void setUpDelay(int upDelay) { if (upDelay >= 0) { _upDelay = upDelay; } } + + /** + * @return Length of time before a newly-failed path is removed from the bond + */ + inline uint16_t getDownDelay() { return _downDelay; } + + /** + * @param downDelay Length of time before a newly-failed path is removed from the bond + */ + inline void setDownDelay(int downDelay) { if (downDelay >= 0) { _downDelay = downDelay; } } + + /** + * @return the current monitoring interval for the bond (can be overridden with intervals specific to certain slaves.) + */ + inline uint16_t getBondMonitorInterval() { return _bondMonitorInterval; } + + /** + * Set the current monitoring interval for the bond (can be overridden with intervals specific to certain slaves.) + * + * @param monitorInterval How often gratuitous VERB_HELLO(s) are sent to remote peer. + */ + inline void setBondMonitorInterval(uint16_t interval) { _bondMonitorInterval = interval; } + + /** + * @param policy Bonding policy for this bond + */ + inline void setPolicy(uint8_t policy) { _bondingPolicy = policy; } + + /** + * @return the current bonding policy + */ + inline uint8_t getPolicy() { return _bondingPolicy; } + + /** + * + * @param allowFlowHashing + */ + inline void setFlowHashing(bool allowFlowHashing) { _allowFlowHashing = allowFlowHashing; } + + /** + * @return Whether flow-hashing is currently enabled for this bond. + */ + bool flowHashingEnabled() { return _allowFlowHashing; } + + /** + * + * @param packetsPerSlave + */ + inline void setPacketsPerSlave(int packetsPerSlave) { _packetsPerSlave = packetsPerSlave; } + + /** + * + * @param slaveSelectMethod + */ + inline void setSlaveSelectMethod(uint8_t method) { _abSlaveSelectMethod = method; } + + /** + * + * @return + */ + inline uint8_t getSlaveSelectMethod() { return _abSlaveSelectMethod; } + + /** + * + * @param allowPathNegotiation + */ + inline void setAllowPathNegotiation(bool allowPathNegotiation) { _allowPathNegotiation = allowPathNegotiation; } + + /** + * + * @return + */ + inline bool allowPathNegotiation() { return _allowPathNegotiation; } + +private: + + const RuntimeEnvironment *RR; + AtomicCounter __refCount; + + /** + * Custom name given by the user to this bond type. + */ + std::string _policyAlias; + + /** + * Paths that this bond has been made aware of but that are not necessarily + * part of the bond proper. + */ + SharedPtr _paths[ZT_MAX_PEER_NETWORK_PATHS]; + + /** + * Set of indices corresponding to paths currently included in the bond proper. This + * may only be updated during a call to curateBond(). The reason for this is so that + * we can simplify the high frequency packet egress logic. + */ + int _bondedIdx[ZT_MAX_PEER_NETWORK_PATHS]; + + /** + * Number of paths currently included in the _bondedIdx set. + */ + int _numBondedPaths; + + /** + * Flows hashed according to port and protocol + */ + std::map > _flows; + + float _qualityWeights[ZT_QOS_WEIGHT_SIZE]; // How much each factor contributes to the "quality" score of a path. + + uint8_t _bondingPolicy; + uint32_t _upDelay; + uint32_t _downDelay; + + // active-backup + SharedPtr _abPath; // current active path + std::list > _abFailoverQueue; + uint8_t _abSlaveSelectMethod; // slave re-selection policy for the primary slave in active-backup + uint64_t _lastActiveBackupPathChange; + + // balance-rr + uint8_t _rrIdx; // index to path currently in use during Round Robin operation + uint16_t _rrPacketsSentOnCurrSlave; // number of packets sent on this slave since the most recent path switch. + /** + * How many packets will be sent on a path before moving to the next path + * in the round-robin sequence. A value of zero will cause a random path + * selection for each outgoing packet. + */ + int _packetsPerSlave; + + // balance-aware + uint64_t _totalBondUnderload; + + // dynamic slave monitoring + uint8_t _slaveMonitorStrategy; + uint64_t _lastFrame; + uint32_t _dynamicPathMonitorInterval; + + // path negotiation + int16_t _localUtility; + SharedPtr negotiatedPath; + uint8_t _numSentPathNegotiationRequests; + unsigned int _pathNegotiationCutoffCount; + bool _allowPathNegotiation; + uint64_t _lastPathNegotiationReceived; + uint64_t _lastSentPathNegotiationRequest; + + // timers + uint32_t _failoverInterval; + uint32_t _qosSendInterval; + uint32_t _ackSendInterval; + uint16_t _ackCutoffCount; + uint64_t _lastAckRateCheck; + uint16_t _qosCutoffCount; + uint64_t _lastQoSRateCheck; + uint32_t throughputMeasurementInterval; + uint32_t _qualityEstimationInterval; + + // timestamps + uint64_t _lastCheckUserPreferences; + uint64_t _lastQualityEstimation; + uint64_t _lastFlowStatReset; + uint64_t _lastFlowExpirationCheck; + uint64_t _lastFlowRebalance; + uint64_t _lastPathNegotiationCheck; + uint64_t _lastBackgroundTaskCheck; + + float _maxAcceptablePacketLossRatio; + float _maxAcceptablePacketErrorRatio; + uint16_t _maxAcceptableLatency; + uint16_t _maxAcceptableMeanLatency; + uint16_t _maxAcceptablePacketDelayVariance; + uint8_t _minAcceptableAllocation; + + /** + * Default initial punishment inflicted on misbehaving paths. Punishment slowly + * drains linearly. For each eligibility change the remaining punishment is doubled. + */ + uint32_t _defaultPathRefractoryPeriod; + + /** + * Whether the current bonding policy requires computation of path statistics + */ + bool _shouldCollectPathStatistics; + + /** + * Free byte of entropy that is updated on every packet egress event. + */ + unsigned char _freeRandomByte; + + /** + * Remote peer that this bond services + */ + SharedPtr _peer; + + Mutex _paths_m; + Mutex _flows_m; + + /** + * Whether the user has specified slaves for this bond. + */ + bool _userHasSpecifiedSlaves; + + /** + * Whether the user has specified a primary slave for this bond. + */ + bool _userHasSpecifiedPrimarySlave; + + /** + * Whether the user has specified failover instructions for this bond. + */ + bool _userHasSpecifiedFailoverInstructions; + + /** + * Whether the user has specified slaves speeds for this bond. + */ + bool _userHasSpecifiedSlaveSpeeds; + + /** + * How frequently (in ms) a VERB_ECHO is sent to a peer to verify that a + * path is still active. A value of zero (0) will disable active path + * monitoring; as result, all monitoring will be a function of traffic. + */ + uint16_t _bondMonitorInterval; + + /** + * Whether or not flow hashing is allowed. + */ + bool _allowFlowHashing; +}; + +} // namespace ZeroTier + +#endif \ No newline at end of file diff --git a/node/BondController.cpp b/node/BondController.cpp new file mode 100644 index 000000000..4bc8d2261 --- /dev/null +++ b/node/BondController.cpp @@ -0,0 +1,203 @@ +/* + * Copyright (c)2013-2020 ZeroTier, Inc. + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file in the project's root directory. + * + * Change Date: 2024-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2.0 of the Apache License. + */ +/****/ + +#include "BondController.hpp" +#include "Peer.hpp" + +namespace ZeroTier { + +int BondController::_minReqPathMonitorInterval; +uint8_t BondController::_defaultBondingPolicy; + +BondController::BondController(const RuntimeEnvironment *renv) : + RR(renv) +{ + bondStartTime = RR->node->now(); +} + +bool BondController::slaveAllowed(std::string &policyAlias, SharedPtr slave) +{ + bool foundInDefinitions = false; + if (_slaveDefinitions.count(policyAlias)) { + auto it = _slaveDefinitions[policyAlias].begin(); + while (it != _slaveDefinitions[policyAlias].end()) { + if (slave->ifname() == (*it)->ifname()) { + foundInDefinitions = true; + break; + } + ++it; + } + } + return _slaveDefinitions[policyAlias].empty() || foundInDefinitions; +} + +void BondController::addCustomSlave(std::string& policyAlias, SharedPtr slave) +{ + Mutex::Lock _l(_slaves_m); + _slaveDefinitions[policyAlias].push_back(slave); + auto search = _interfaceToSlaveMap[policyAlias].find(slave->ifname()); + if (search == _interfaceToSlaveMap[policyAlias].end()) { + slave->setAsUserSpecified(true); + _interfaceToSlaveMap[policyAlias].insert(std::pair>(slave->ifname(), slave)); + } else { + fprintf(stderr, "slave already exists=%s\n", slave->ifname().c_str()); + // Slave is already defined, overlay user settings + } +} + +bool BondController::addCustomPolicy(const SharedPtr& newBond) +{ + Mutex::Lock _l(_bonds_m); + if (!_bondPolicyTemplates.count(newBond->policyAlias())) { + _bondPolicyTemplates[newBond->policyAlias()] = newBond; + return true; + } + return false; +} + +bool BondController::assignBondingPolicyToPeer(int64_t identity, const std::string& policyAlias) +{ + Mutex::Lock _l(_bonds_m); + if (!_policyTemplateAssignments.count(identity)) { + _policyTemplateAssignments[identity] = policyAlias; + return true; + } + return false; +} + +SharedPtr BondController::createTransportTriggeredBond(const RuntimeEnvironment *renv, const SharedPtr& peer) +{ + fprintf(stderr, "createTransportTriggeredBond\n"); + Mutex::Lock _l(_bonds_m); + int64_t identity = peer->identity().address().toInt(); + Bond *bond = nullptr; + if (!_bonds.count(identity)) { + std::string policyAlias; + int _defaultBondingPolicy = defaultBondingPolicy(); + fprintf(stderr, "new bond, registering for %llx\n", identity); + if (!_policyTemplateAssignments.count(identity)) { + if (defaultBondingPolicy()) { + fprintf(stderr, " no assignment, using default (%d)\n", _defaultBondingPolicy); + bond = new Bond(renv, _defaultBondingPolicy, peer); + } + if (!_defaultBondingPolicy && _defaultBondingPolicyStr.length()) { + fprintf(stderr, " no assignment, using default custom (%s)\n", _defaultBondingPolicyStr.c_str()); + bond = new Bond(renv, *(_bondPolicyTemplates[_defaultBondingPolicyStr].ptr()), peer); + } + } + else { + fprintf(stderr, " assignment found for %llx, using it as a template (%s)\n", identity,_policyTemplateAssignments[identity].c_str()); + if (!_bondPolicyTemplates[_policyTemplateAssignments[identity]]) { + fprintf(stderr, "unable to locate template (%s), ignoring assignment for (%llx), using defaults\n", _policyTemplateAssignments[identity].c_str(), identity); + bond = new Bond(renv, _defaultBondingPolicy, peer); + } + else { + bond = new Bond(renv, *(_bondPolicyTemplates[_policyTemplateAssignments[identity]].ptr()), peer); + } + } + } + else { + fprintf(stderr, "bond already exists for %llx, cannot re-register. exiting\n", identity); exit(0); // TODO: Remove + } + if (bond) { + _bonds[identity] = bond; + /** + * Determine if user has specified anything that could affect the bonding policy's decisions + */ + if (_interfaceToSlaveMap.count(bond->policyAlias())) { + std::map >::iterator it = _interfaceToSlaveMap[bond->policyAlias()].begin(); + while (it != _interfaceToSlaveMap[bond->policyAlias()].end()) { + if (it->second->isUserSpecified()) { + bond->_userHasSpecifiedSlaves = true; + } + if (it->second->isUserSpecified() && it->second->primary()) { + bond->_userHasSpecifiedPrimarySlave = true; + } + if (it->second->isUserSpecified() && it->second->userHasSpecifiedFailoverInstructions()) { + bond->_userHasSpecifiedFailoverInstructions = true; + } + if (it->second->isUserSpecified() && (it->second->speed() > 0)) { + bond->_userHasSpecifiedSlaveSpeeds = true; + } + ++it; + } + } + return bond; + } + return SharedPtr(); +} + +SharedPtr BondController::getSlaveBySocket(const std::string& policyAlias, uint64_t localSocket) +{ + Mutex::Lock _l(_slaves_m); + char ifname[16]; + _phy->getIfName((PhySocket *) ((uintptr_t)localSocket), ifname, 16); + std::string ifnameStr(ifname); + auto search = _interfaceToSlaveMap[policyAlias].find(ifnameStr); + if (search == _interfaceToSlaveMap[policyAlias].end()) { + SharedPtr s = new Slave(ifnameStr, 0, 0, 0, 0, 0, true, ZT_MULTIPATH_SLAVE_MODE_SPARE, "", 0.0); + _interfaceToSlaveMap[policyAlias].insert(std::pair >(ifnameStr, s)); + return s; + } + else { + return search->second; + } +} + +SharedPtr BondController::getSlaveByName(const std::string& policyAlias, const std::string& ifname) +{ + Mutex::Lock _l(_slaves_m); + auto search = _interfaceToSlaveMap[policyAlias].find(ifname); + if (search != _interfaceToSlaveMap[policyAlias].end()) { + return search->second; + } + return SharedPtr(); +} + +bool BondController::allowedToBind(const std::string& ifname) +{ + return true; + /* + if (!_defaultBondingPolicy) { + return true; // no restrictions + } + Mutex::Lock _l(_slaves_m); + if (_interfaceToSlaveMap.empty()) { + return true; // no restrictions + } + std::map > >::iterator policyItr = _interfaceToSlaveMap.begin(); + while (policyItr != _interfaceToSlaveMap.end()) { + std::map >::iterator slaveItr = policyItr->second.begin(); + while (slaveItr != policyItr->second.end()) { + if (slaveItr->first == ifname) { + return true; + } + ++slaveItr; + } + ++policyItr; + } + return false; + */ +} + +void BondController::processBackgroundTasks(void *tPtr, const int64_t now) +{ + Mutex::Lock _l(_bonds_m); + std::map >::iterator bondItr = _bonds.begin(); + while (bondItr != _bonds.end()) { + bondItr->second->processBackgroundTasks(tPtr, now); + ++bondItr; + } +} + +} // namespace ZeroTier \ No newline at end of file diff --git a/node/BondController.hpp b/node/BondController.hpp new file mode 100644 index 000000000..c8fa660b0 --- /dev/null +++ b/node/BondController.hpp @@ -0,0 +1,231 @@ +/* + * Copyright (c)2013-2020 ZeroTier, Inc. + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file in the project's root directory. + * + * Change Date: 2024-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2.0 of the Apache License. + */ +/****/ + +#ifndef ZT_BONDCONTROLLER_HPP +#define ZT_BONDCONTROLLER_HPP + +#include +#include + +#include "SharedPtr.hpp" +#include "../osdep/Phy.hpp" +#include "../osdep/Slave.hpp" + +namespace ZeroTier { + +class RuntimeEnvironment; +class Bond; +class Peer; + +class BondController +{ + friend class Bond; + +public: + + BondController(const RuntimeEnvironment *renv); + + /** + * @return The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements. + */ + bool slaveAllowed(std::string &policyAlias, SharedPtr slave); + + /** + * @return The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements. + */ + int minReqPathMonitorInterval() { return _minReqPathMonitorInterval; } + + /** + * @return The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements. + */ + static void setMinReqPathMonitorInterval(int minReqPathMonitorInterval) { _minReqPathMonitorInterval = minReqPathMonitorInterval; } + + /** + * @return Whether the bonding layer is currently set up to be used. + */ + bool inUse() { return !_bondPolicyTemplates.empty() || _defaultBondingPolicy; } + + /** + * @param basePolicyName Bonding policy name (See ZeroTierOne.h) + * @return The bonding policy code for a given human-readable bonding policy name + */ + static int getPolicyCodeByStr(const std::string& basePolicyName) + { + if (basePolicyName == "active-backup") { return 1; } + if (basePolicyName == "broadcast") { return 2; } + if (basePolicyName == "balance-rr") { return 3; } + if (basePolicyName == "balance-xor") { return 4; } + if (basePolicyName == "balance-aware") { return 5; } + return 0; // "none" + } + + /** + * @param policy Bonding policy code (See ZeroTierOne.h) + * @return The human-readable name for the given bonding policy code + */ + static std::string getPolicyStrByCode(int policy) + { + if (policy == 1) { return "active-backup"; } + if (policy == 2) { return "broadcast"; } + if (policy == 3) { return "balance-rr"; } + if (policy == 4) { return "balance-xor"; } + if (policy == 5) { return "balance-aware"; } + return "none"; + } + + /** + * Sets the default bonding policy for new or undefined bonds. + * + * @param bp Bonding policy + */ + void setBondingLayerDefaultPolicy(uint8_t bp) { _defaultBondingPolicy = bp; } + + /** + * Sets the default (custom) bonding policy for new or undefined bonds. + * + * @param alias Human-readable string alias for bonding policy + */ + void setBondingLayerDefaultPolicyStr(std::string alias) { _defaultBondingPolicyStr = alias; } + + /** + * @return The default bonding policy + */ + static int defaultBondingPolicy() { return _defaultBondingPolicy; } + + /** + * Add a user-defined slave to a given bonding policy. + * + * @param policyAlias User-defined custom name for variant of bonding policy + * @param slave Pointer to new slave definition + */ + void addCustomSlave(std::string& policyAlias, SharedPtr slave); + + /** + * Add a user-defined bonding policy that is based on one of the standard types. + * + * @param newBond Pointer to custom Bond object + * @return Whether a uniquely-named custom policy was successfully added + */ + bool addCustomPolicy(const SharedPtr& newBond); + + /** + * Assigns a specific bonding policy + * + * @param identity + * @param policyAlias + * @return + */ + bool assignBondingPolicyToPeer(int64_t identity, const std::string& policyAlias); + + /** + * Add a new bond to the bond controller. + * + * @param renv Runtime environment + * @param peer Remote peer that this bond services + * @return A pointer to the newly created Bond + */ + SharedPtr createTransportTriggeredBond(const RuntimeEnvironment *renv, const SharedPtr& peer); + + /** + * Periodically perform maintenance tasks for the bonding layer. + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param now Current time + */ + void processBackgroundTasks(void *tPtr, int64_t now); + + /** + * Gets a reference to a physical slave definition given a policy alias and a local socket. + * + * @param policyAlias Policy in use + * @param localSocket Local source socket + * @return Physical slave definition + */ + SharedPtr getSlaveBySocket(const std::string& policyAlias, uint64_t localSocket); + + /** + * Gets a reference to a physical slave definition given its human-readable system name. + * + * @param policyAlias Policy in use + * @param ifname Alphanumeric human-readable name + * @return Physical slave definition + */ + SharedPtr getSlaveByName(const std::string& policyAlias, const std::string& ifname); + + /** + * @param ifname Name of interface that we want to know if we can bind to + */ + bool allowedToBind(const std::string& ifname); + + uint64_t getBondStartTime() { return bondStartTime; } + +private: + + Phy *_phy; + const RuntimeEnvironment *RR; + + Mutex _bonds_m; + Mutex _slaves_m; + + /** + * The last time that the bond controller updated the set of bonds. + */ + uint64_t _lastBackgroundBondControlTaskCheck; + + /** + * The minimum monitoring interval among all paths in this bond. + */ + static int _minReqPathMonitorInterval; + + /** + * The default bonding policy used for new bonds unless otherwise specified. + */ + static uint8_t _defaultBondingPolicy; + + /** + * The default bonding policy used for new bonds unless otherwise specified. + */ + std::string _defaultBondingPolicyStr; + + /** + * All currently active bonds. + */ + std::map > _bonds; + + /** + * Map of peers to custom bonding policies + */ + std::map _policyTemplateAssignments; + + /** + * User-defined bonding policies (can be assigned to a peer) + */ + std::map > _bondPolicyTemplates; + + /** + * Set of slaves defined for a given bonding policy + */ + std::map > > _slaveDefinitions; + + /** + * Set of slave objects mapped to their physical interfaces + */ + std::map > > _interfaceToSlaveMap; + + // TODO: Remove + uint64_t bondStartTime; +}; + +} // namespace ZeroTier + +#endif \ No newline at end of file diff --git a/node/Constants.hpp b/node/Constants.hpp index 4b88798df..c27e02319 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -192,7 +192,7 @@ /** * Minimum delay between timer task checks to prevent thrashing */ -#define ZT_CORE_TIMER_TASK_GRANULARITY 500 +#define ZT_CORE_TIMER_TASK_GRANULARITY 60 /** * How often Topology::clean() and Network::clean() and similar are called, in ms @@ -253,203 +253,6 @@ */ #define ZT_LOCAL_CONF_FILE_CHECK_INTERVAL 10000 -/** - * How long before we consider a flow to be dead and remove it from the balancing - * policy's list. - */ -#define ZT_MULTIPATH_FLOW_EXPIRATION 60000 - -/** - * How frequently to check for changes to the system's network interfaces. When - * the service decides to use this constant it's because we want to react more - * quickly to new interfaces that pop up or go down. - */ -#define ZT_MULTIPATH_BINDER_REFRESH_PERIOD 5000 - -/** - * Packets are only used for QoS/ACK statistical sampling if their packet ID is divisible by - * this integer. This is to provide a mechanism for both peers to agree on which packets need - * special treatment without having to exchange information. Changing this value would be - * a breaking change and would necessitate a protocol version upgrade. Since each incoming and - * outgoing packet ID is checked against this value its evaluation is of the form: - * (id & (divisor - 1)) == 0, thus the divisor must be a power of 2. - * - * This value is set at (16) so that given a normally-distributed RNG output we will sample - * 1/16th (or ~6.25%) of packets. - */ -#define ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR 0x10 - -/** - * Time horizon for VERB_QOS_MEASUREMENT and VERB_ACK packet processing cutoff - */ -#define ZT_PATH_QOS_ACK_CUTOFF_TIME 30000 - -/** - * Maximum number of VERB_QOS_MEASUREMENT and VERB_ACK packets allowed to be - * processed within cutoff time. Separate totals are kept for each type but - * the limit is the same for both. - * - * This limits how often this peer will compute statistical estimates - * of various QoS measures from a VERB_QOS_MEASUREMENT or VERB_ACK packets to - * CUTOFF_LIMIT times per CUTOFF_TIME milliseconds per peer to prevent - * this from being useful for DOS amplification attacks. - */ -#define ZT_PATH_QOS_ACK_CUTOFF_LIMIT 128 - -/** - * Path choice history window size. This is used to keep track of which paths were - * previously selected so that we can maintain a target allocation over time. - */ -#define ZT_MULTIPATH_PROPORTION_WIN_SZ 128 - -/** - * How often we will sample packet latency. Should be at least greater than ZT_PING_CHECK_INVERVAL - * since we will record a 0 bit/s measurement if no valid latency measurement was made within this - * window of time. - */ -#define ZT_PATH_LATENCY_SAMPLE_INTERVAL (ZT_MULTIPATH_PEER_PING_PERIOD * 2) - -/** - * Interval used for rate-limiting the computation of path quality estimates. - */ -#define ZT_PATH_QUALITY_COMPUTE_INTERVAL 1000 - -/** - * Number of samples to consider when computing real-time path statistics - */ -#define ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ 128 - -/** - * Number of samples to consider when computing performing long-term path quality analysis. - * By default this value is set to ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ but can - * be set to any value greater than that to observe longer-term path quality behavior. - */ -#define ZT_PATH_QUALITY_METRIC_WIN_SZ ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ - -/** - * Maximum acceptable Packet Delay Variance (PDV) over a path - */ -#define ZT_PATH_MAX_PDV 1000 - -/** - * Maximum acceptable time interval between expectation and receipt of at least one ACK over a path - */ -#define ZT_PATH_MAX_AGE 30000 - -/** - * Maximum acceptable mean latency over a path - */ -#define ZT_PATH_MAX_MEAN_LATENCY 1000 - -/** - * How much each factor contributes to the "stability" score of a path - */ - -#if 0 -#define ZT_PATH_CONTRIB_PDV (1.5 / 3.0) -#define ZT_PATH_CONTRIB_LATENCY (0.0 / 3.0) -#define ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE (1.5 / 3.0) -#else -#define ZT_PATH_CONTRIB_PDV (1.0 / 3.0) -#define ZT_PATH_CONTRIB_LATENCY (1.0 / 3.0) -#define ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE (1.0 / 3.0) -#endif - -/** - * How much each factor contributes to the "quality" score of a path - */ -#if 0 -#define ZT_PATH_CONTRIB_STABILITY (2.00 / 3.0) -#define ZT_PATH_CONTRIB_THROUGHPUT (0.50 / 3.0) -#define ZT_PATH_CONTRIB_SCOPE (0.50 / 3.0) -#else -#define ZT_PATH_CONTRIB_STABILITY (0.75 / 3.0) -#define ZT_PATH_CONTRIB_THROUGHPUT (1.50 / 3.0) -#define ZT_PATH_CONTRIB_SCOPE (0.75 / 3.0) -#endif - -/** - * How often a QoS packet is sent - */ -#define ZT_PATH_QOS_INTERVAL 3000 - -/** - * Min and max acceptable sizes for a VERB_QOS_MEASUREMENT packet - */ -#define ZT_PATH_MIN_QOS_PACKET_SZ 8 + 1 -#define ZT_PATH_MAX_QOS_PACKET_SZ 1400 - -/** - * How many ID:sojourn time pairs in a single QoS packet - */ -#define ZT_PATH_QOS_TABLE_SIZE ((ZT_PATH_MAX_QOS_PACKET_SZ * 8) / (64 + 16)) - -/** - * Maximum number of outgoing packets we monitor for QoS information - */ -#define ZT_PATH_MAX_OUTSTANDING_QOS_RECORDS 128 - -/** - * Timeout for QoS records - */ -#define ZT_PATH_QOS_TIMEOUT (ZT_PATH_QOS_INTERVAL * 2) - -/** - * How often the service tests the path throughput - */ -#define ZT_PATH_THROUGHPUT_MEASUREMENT_INTERVAL (ZT_PATH_ACK_INTERVAL * 8) - -/** - * Minimum amount of time between each ACK packet - */ -#define ZT_PATH_ACK_INTERVAL 1000 - -/** - * How often an aggregate link statistics report is emitted into this tracing system - */ -#define ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL 30000 - -/** - * How much an aggregate link's component paths can vary from their target allocation - * before the link is considered to be in a state of imbalance. - */ -#define ZT_PATH_IMBALANCE_THRESHOLD 0.20 - -/** - * Max allowable time spent in any queue - */ -#define ZT_QOS_TARGET 5 // ms - -/** - * Time period where the time spent in the queue by a packet should fall below - * target at least once - */ -#define ZT_QOS_INTERVAL 100 // ms - -/** - * The number of bytes that each queue is allowed to send during each DRR cycle. - * This approximates a single-byte-based fairness queuing scheme - */ -#define ZT_QOS_QUANTUM ZT_DEFAULT_MTU - -/** - * The maximum total number of packets that can be queued among all - * active/inactive, old/new queues - */ -#define ZT_QOS_MAX_ENQUEUED_PACKETS 1024 - -/** - * Number of QoS queues (buckets) - */ -#define ZT_QOS_NUM_BUCKETS 9 - -/** - * All unspecified traffic is put in this bucket. Anything in a bucket with a smaller - * value is de-prioritized. Anything in a bucket with a higher value is prioritized over - * other traffic. - */ -#define ZT_QOS_DEFAULT_BUCKET 0 - /** * How frequently to send heartbeats over in-use paths */ @@ -465,21 +268,6 @@ */ #define ZT_PEER_PING_PERIOD 60000 -/** - * Delay between full-fledge pings of directly connected peers. - * - * With multipath bonding enabled ping peers more often to measure - * packet loss and latency. This uses more bandwidth so is disabled - * by default to avoid increasing idle bandwidth use for regular - * links. - */ -#define ZT_MULTIPATH_PEER_PING_PERIOD (ZT_PEER_PING_PERIOD / 10) - -/** - * How long before we consider a path to be dead in rapid fail-over scenarios - */ -#define ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD 250 - /** * Paths are considered expired if they have not sent us a real packet in this long */ @@ -490,6 +278,210 @@ */ #define ZT_PEER_EXPIRED_PATH_TRIAL_PERIOD (ZT_PEER_PING_PERIOD * 10) +/** + * Outgoing packets are only used for QoS/ACK statistical sampling if their + * packet ID is divisible by this integer. This is to provide a mechanism for + * both peers to agree on which packets need special treatment without having + * to exchange information. Changing this value would be a breaking change and + * would necessitate a protocol version upgrade. Since each incoming and + * outgoing packet ID is checked against this value its evaluation is of the + * form: + * + * (id & (divisor - 1)) == 0, thus the divisor must be a power of 2. + * + * This value is set at (16) so that given a normally-distributed RNG output + * we will sample 1/16th (or ~6.25%) of packets. + */ +#define ZT_QOS_ACK_DIVISOR 0x2 + +/** + * Time horizon for VERB_QOS_MEASUREMENT and VERB_ACK packet processing cutoff + */ +#define ZT_QOS_ACK_CUTOFF_TIME 30000 + +/** + * Maximum number of VERB_QOS_MEASUREMENT and VERB_ACK packets allowed to be + * processed within cutoff time. Separate totals are kept for each type but + * the limit is the same for both. + * + * This limits how often this peer will compute statistical estimates + * of various QoS measures from a VERB_QOS_MEASUREMENT or VERB_ACK packets to + * CUTOFF_LIMIT times per CUTOFF_TIME milliseconds per peer to prevent + * this from being useful for DOS amplification attacks. + */ +#define ZT_QOS_ACK_CUTOFF_LIMIT 128 + +/** + * Minimum acceptable size for a VERB_QOS_MEASUREMENT packet + */ +#define ZT_QOS_MIN_PACKET_SIZE (8 + 1) + +/** + * Maximum acceptable size for a VERB_QOS_MEASUREMENT packet + */ +#define ZT_QOS_MAX_PACKET_SIZE 1400 + +/** + * How many ID:sojourn time pairs are in a single QoS packet + */ +#define ZT_QOS_TABLE_SIZE ((ZT_QOS_MAX_PACKET_SIZE * 8) / (64 + 16)) + +/** + * Maximum number of outgoing packets we monitor for QoS information + */ +#define ZT_QOS_MAX_OUTSTANDING_RECORDS (1024*16) + +/** + * Interval used for rate-limiting the computation of path quality estimates. + */ +#define ZT_QOS_COMPUTE_INTERVAL 1000 + +/** + * Number of samples to consider when processing real-time path statistics + */ +#define ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE 32 + +/** + * Number of samples to consider when processing long-term trends + */ +#define ZT_QOS_LONGTERM_SAMPLE_WIN_SIZE (ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE * 4) + +/** + * Max allowable time spent in any queue (in ms) + */ +#define ZT_AQM_TARGET 5 + +/** + * Time period where the time spent in the queue by a packet should fall below. + * target at least once. (in ms) + */ +#define ZT_AQM_INTERVAL 100 + +/** + * The number of bytes that each queue is allowed to send during each DRR cycle. + * This approximates a single-byte-based fairness queuing scheme. + */ +#define ZT_AQM_QUANTUM ZT_DEFAULT_MTU + +/** + * The maximum total number of packets that can be queued among all + * active/inactive, old/new queues. + */ +#define ZT_AQM_MAX_ENQUEUED_PACKETS 1024 + +/** + * Number of QoS queues (buckets) + */ +#define ZT_AQM_NUM_BUCKETS 9 + +/** + * All unspecified traffic is put in this bucket. Anything in a bucket with a + * smaller value is deprioritized. Anything in a bucket with a higher value is + prioritized over other traffic. + */ +#define ZT_AQM_DEFAULT_BUCKET 0 + +/** + * How long before we consider a path to be dead in the general sense. This is + * used while searching for default or alternative paths to try in the absence + * of direct guidance from the user or a selection policy. + */ +#define ZT_MULTIPATH_DEFAULT_FAILOVER_INTERVAL 10000 + +/** + * How often flows are evaluated + */ +#define ZT_MULTIPATH_FLOW_CHECK_INTERVAL 10000 + +/** + * How long before we consider a flow to be dead and remove it from the + * policy's list. + */ +#define ZT_MULTIPATH_FLOW_EXPIRATION_INTERVAL 30000 + +/** + * How often a flow's statistical counters are reset + */ +#define ZT_FLOW_STATS_RESET_INTERVAL ZT_MULTIPATH_FLOW_EXPIRATION_INTERVAL + +/** + * Maximum number of flows allowed before we start forcibly forgetting old ones + */ +#define ZT_FLOW_MAX_COUNT (1024*64) + +/** + * How often flows are rebalanced across slave interfaces (if at all) + */ +#define ZT_FLOW_MIN_REBALANCE_INTERVAL 5000 + +/** + * How often flows are rebalanced across slave interfaces (if at all) + */ +#define ZT_FLOW_REBALANCE_INTERVAL 5000 + +/** + * A defensive timer to prevent path quality metrics from being + * processed too often. + */ +#define ZT_BOND_BACKGROUND_TASK_MIN_INTERVAL ZT_CORE_TIMER_TASK_GRANULARITY + +/** + * How often a bonding policy's background tasks are processed, + * some need more frequent attention than others. + */ +#define ZT_MULTIPATH_ACTIVE_BACKUP_CHECK_INTERVAL ZT_CORE_TIMER_TASK_GRANULARITY + +/** + * Minimum amount of time (since a previous transition) before the active-backup bonding + * policy is allowed to transition to a different slave. Only valid for active-backup. + */ +#define ZT_MULTIPATH_MIN_ACTIVE_BACKUP_AUTOFLOP_INTERVAL 10000 + +/** + * How often a peer checks that incoming (and outgoing) traffic on a bonded link is + * appropriately paired. + */ +#define ZT_PATH_NEGOTIATION_CHECK_INTERVAL 15000 + +/** + * Time horizon for path negotiation paths cutoff + */ +#define ZT_PATH_NEGOTIATION_CUTOFF_TIME 60000 + +/** + * Maximum number of path negotiations within cutoff time + * + * This limits response to PATH_NEGOTIATION to CUTOFF_LIMIT responses + * per CUTOFF_TIME milliseconds per peer to prevent this from being + * useful for DOS amplification attacks. + */ +#define ZT_PATH_NEGOTIATION_CUTOFF_LIMIT 8 + +/** + * How many times a peer will attempt to petition another peer to synchronize its + * traffic to the same path before giving up and surrendering to the other peer's preference. + */ +#define ZT_PATH_NEGOTIATION_TRY_COUNT 3 + +/** + * How much greater the quality of a path should be before an + * optimization procedure triggers a switch. + */ +#define ZT_MULTIPATH_ACTIVE_BACKUP_OPTIMIZE_MIN_THRESHOLD 0.10 + +/** + * Artificially inflates the failover score for paths which meet + * certain non-performance-related policy ranking criteria. + */ +#define ZT_MULTIPATH_FAILOVER_HANDICAP_PREFERRED 500 +#define ZT_MULTIPATH_FAILOVER_HANDICAP_PRIMARY 1000 +#define ZT_MULTIPATH_FAILOVER_HANDICAP_NEGOTIATED 5000 + +/** + * An indicator that no flow is to be associated with the given packet + */ +#define ZT_QOS_NO_FLOW -1 + /** * Timeout for overall peer activity (measured from last receive) */ @@ -557,20 +549,32 @@ */ #define ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH 120000 -/** - * Interval between direct path pushes in milliseconds if we are currently in multipath - * mode. In this mode the distinction between ZT_DIRECT_PATH_PUSH_INTERVAL and - * ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH does not exist since we want to inform other - * peers of this peer's new link/address as soon as possible so that both peers can - * begin forming an aggregated link. - */ -#define ZT_DIRECT_PATH_PUSH_INTERVAL_MULTIPATH (ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH / 16) - /** * Time horizon for push direct paths cutoff */ #define ZT_PUSH_DIRECT_PATHS_CUTOFF_TIME 30000 +/** + * Drainage constants for VERB_ECHO rate-limiters + */ +#define ZT_ECHO_CUTOFF_LIMIT ((1000 / ZT_CORE_TIMER_TASK_GRANULARITY) * ZT_MAX_PEER_NETWORK_PATHS) +#define ZT_ECHO_DRAINAGE_DIVISOR (1000 / ZT_ECHO_CUTOFF_LIMIT) + +/** + * Drainage constants for VERB_QOS rate-limiters + */ +#define ZT_QOS_CUTOFF_LIMIT ((1000 / ZT_CORE_TIMER_TASK_GRANULARITY) * ZT_MAX_PEER_NETWORK_PATHS) +#define ZT_QOS_DRAINAGE_DIVISOR (1000 / ZT_QOS_CUTOFF_LIMIT) + +/** + * Drainage constants for VERB_ACK rate-limiters + */ +#define ZT_ACK_CUTOFF_LIMIT 128 +#define ZT_ACK_DRAINAGE_DIVISOR (1000 / ZT_ACK_CUTOFF_LIMIT) + +#define ZT_MULTIPATH_DEFAULT_REFRCTORY_PERIOD 8000 +#define ZT_MULTIPATH_MAX_REFRACTORY_PERIOD 600000 + /** * Maximum number of direct path pushes within cutoff time * diff --git a/node/Flow.hpp b/node/Flow.hpp new file mode 100644 index 000000000..cb8c3e4aa --- /dev/null +++ b/node/Flow.hpp @@ -0,0 +1,123 @@ +/* + * Copyright (c)2013-2020 ZeroTier, Inc. + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file in the project's root directory. + * + * Change Date: 2024-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2.0 of the Apache License. + */ +/****/ + +#ifndef ZT_FLOW_HPP +#define ZT_FLOW_HPP + +#include "Path.hpp" +#include "SharedPtr.hpp" + +namespace ZeroTier { + +/** + * A protocol flow that is identified by the origin and destination port. + */ +struct Flow +{ + /** + * @param flowId Given flow ID + * @param now Current time + */ + Flow(int32_t flowId, int64_t now) : + _flowId(flowId), + _bytesInPerUnitTime(0), + _bytesOutPerUnitTime(0), + _lastActivity(now), + _lastPathReassignment(0), + _assignedPath(SharedPtr()) + {} + + /** + * Reset flow statistics + */ + void resetByteCounts() + { + _bytesInPerUnitTime = 0; + _bytesOutPerUnitTime = 0; + } + + /** + * @return The Flow's ID + */ + int32_t id() { return _flowId; } + + /** + * @return Number of incoming bytes processed on this flow per unit time + */ + int64_t bytesInPerUnitTime() { return _bytesInPerUnitTime; } + + /** + * Record number of incoming bytes on this flow + * + * @param bytes Number of incoming bytes + */ + void recordIncomingBytes(uint64_t bytes) { _bytesInPerUnitTime += bytes; } + + /** + * @return Number of outgoing bytes processed on this flow per unit time + */ + int64_t bytesOutPerUnitTime() { return _bytesOutPerUnitTime; } + + /** + * Record number of outgoing bytes on this flow + * + * @param bytes + */ + void recordOutgoingBytes(uint64_t bytes) { _bytesOutPerUnitTime += bytes; } + + /** + * @return The total number of bytes processed on this flow + */ + uint64_t totalBytes() { return _bytesInPerUnitTime + _bytesOutPerUnitTime; } + + /** + * How long since a packet was sent or received in this flow + * + * @param now Current time + * @return The age of the flow in terms of last recorded activity + */ + int64_t age(int64_t now) { return now - _lastActivity; } + + /** + * Record that traffic was processed on this flow at the given time. + * + * @param now Current time + */ + void updateActivity(int64_t now) { _lastActivity = now; } + + /** + * @return Path assigned to this flow + */ + SharedPtr assignedPath() { return _assignedPath; } + + /** + * @param path Assigned path over which this flow should be handled + */ + void assignPath(const SharedPtr &path, int64_t now) { + _assignedPath = path; + _lastPathReassignment = now; + } + + AtomicCounter __refCount; + + int32_t _flowId; + uint64_t _bytesInPerUnitTime; + uint64_t _bytesOutPerUnitTime; + int64_t _lastActivity; + int64_t _lastPathReassignment; + SharedPtr _assignedPath; +}; + +} // namespace ZeroTier + +#endif \ No newline at end of file diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index 331446ced..702c08090 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -35,10 +35,12 @@ #include "Tag.hpp" #include "Revocation.hpp" #include "Trace.hpp" +#include "Path.hpp" +#include "Bond.hpp" namespace ZeroTier { -bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) +bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr,int32_t flowId) { const Address sourceAddress(source()); @@ -67,7 +69,7 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) if (!trusted) { if (!dearmor(peer->key())) { RR->t->incomingPacketMessageAuthenticationFailure(tPtr,_path,packetId(),sourceAddress,hops(),"invalid MAC"); - _path->recordInvalidPacket(); + peer->recordIncomingInvalidPacket(_path); return true; } } @@ -78,11 +80,12 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) } const Packet::Verb v = verb(); + bool r = true; switch(v) { //case Packet::VERB_NOP: default: // ignore unknown verbs, but if they pass auth check they are "received" - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),v,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),v,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); break; case Packet::VERB_HELLO: r = _doHELLO(RR,tPtr,true); break; case Packet::VERB_ACK: r = _doACK(RR,tPtr,peer); break; @@ -91,8 +94,8 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) case Packet::VERB_OK: r = _doOK(RR,tPtr,peer); break; case Packet::VERB_WHOIS: r = _doWHOIS(RR,tPtr,peer); break; case Packet::VERB_RENDEZVOUS: r = _doRENDEZVOUS(RR,tPtr,peer); break; - case Packet::VERB_FRAME: r = _doFRAME(RR,tPtr,peer); break; - case Packet::VERB_EXT_FRAME: r = _doEXT_FRAME(RR,tPtr,peer); break; + case Packet::VERB_FRAME: r = _doFRAME(RR,tPtr,peer,flowId); break; + case Packet::VERB_EXT_FRAME: r = _doEXT_FRAME(RR,tPtr,peer,flowId); break; case Packet::VERB_ECHO: r = _doECHO(RR,tPtr,peer); break; case Packet::VERB_MULTICAST_LIKE: r = _doMULTICAST_LIKE(RR,tPtr,peer); break; case Packet::VERB_NETWORK_CREDENTIALS: r = _doNETWORK_CREDENTIALS(RR,tPtr,peer); break; @@ -103,6 +106,7 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) case Packet::VERB_PUSH_DIRECT_PATHS: r = _doPUSH_DIRECT_PATHS(RR,tPtr,peer); break; case Packet::VERB_USER_MESSAGE: r = _doUSER_MESSAGE(RR,tPtr,peer); break; case Packet::VERB_REMOTE_TRACE: r = _doREMOTE_TRACE(RR,tPtr,peer); break; + case Packet::VERB_PATH_NEGOTIATION_REQUEST: r = _doPATH_NEGOTIATION_REQUEST(RR,tPtr,peer); break; } if (r) { RR->node->statsLogVerb((unsigned int)v,(unsigned int)size()); @@ -113,9 +117,6 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) RR->sw->requestWhois(tPtr,RR->node->now(),sourceAddress); return false; } - } catch (int ztExcCode) { - RR->t->incomingPacketInvalid(tPtr,_path,packetId(),sourceAddress,hops(),verb(),"unexpected exception in tryDecode()"); - return true; } catch ( ... ) { RR->t->incomingPacketInvalid(tPtr,_path,packetId(),sourceAddress,hops(),verb(),"unexpected exception in tryDecode()"); return true; @@ -193,59 +194,59 @@ bool IncomingPacket::_doERROR(const RuntimeEnvironment *RR,void *tPtr,const Shar default: break; } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_ERROR,inRePacketId,inReVerb,false,networkId); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_ERROR,inRePacketId,inReVerb,false,networkId,ZT_QOS_NO_FLOW); return true; } bool IncomingPacket::_doACK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) { - if (!peer->rateGateACK(RR->node->now())) + SharedPtr bond = peer->bond(); + if (!bond || !bond->rateGateACK(RR->node->now())) { return true; + } /* Dissect incoming ACK packet. From this we can estimate current throughput of the path, establish known * maximums and detect packet loss. */ - if (peer->localMultipathSupport()) { - int32_t ackedBytes; - if (payloadLength() != sizeof(ackedBytes)) { - return true; // ignore - } - memcpy(&ackedBytes, payload(), sizeof(ackedBytes)); - _path->receivedAck(RR->node->now(), Utils::ntoh(ackedBytes)); - peer->inferRemoteMultipathEnabled(); + int32_t ackedBytes; + if (payloadLength() != sizeof(ackedBytes)) { + return true; // ignore + } + memcpy(&ackedBytes, payload(), sizeof(ackedBytes)); + if (bond) { + bond->receivedAck(_path, RR->node->now(), Utils::ntoh(ackedBytes)); } - return true; } bool IncomingPacket::_doQOS_MEASUREMENT(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) { - if (!peer->rateGateQoS(RR->node->now())) + SharedPtr bond = peer->bond(); + if (!bond || !bond->rateGateQoS(RR->node->now())) { return true; + } /* Dissect incoming QoS packet. From this we can compute latency values and their variance. * The latency variance is used as a measure of "jitter". */ - if (peer->localMultipathSupport()) { - if (payloadLength() > ZT_PATH_MAX_QOS_PACKET_SZ || payloadLength() < ZT_PATH_MIN_QOS_PACKET_SZ) { - return true; // ignore - } - const int64_t now = RR->node->now(); - uint64_t rx_id[ZT_PATH_QOS_TABLE_SIZE]; - uint16_t rx_ts[ZT_PATH_QOS_TABLE_SIZE]; - char *begin = (char *)payload(); - char *ptr = begin; - int count = 0; - int len = payloadLength(); - // Read packet IDs and latency compensation intervals for each packet tracked by this QoS packet - while (ptr < (begin + len) && (count < ZT_PATH_QOS_TABLE_SIZE)) { - memcpy((void*)&rx_id[count], ptr, sizeof(uint64_t)); - ptr+=sizeof(uint64_t); - memcpy((void*)&rx_ts[count], ptr, sizeof(uint16_t)); - ptr+=sizeof(uint16_t); - count++; - } - _path->receivedQoS(now, count, rx_id, rx_ts); - peer->inferRemoteMultipathEnabled(); + if (payloadLength() > ZT_QOS_MAX_PACKET_SIZE || payloadLength() < ZT_QOS_MIN_PACKET_SIZE) { + return true; // ignore + } + const int64_t now = RR->node->now(); + uint64_t rx_id[ZT_QOS_TABLE_SIZE]; + uint16_t rx_ts[ZT_QOS_TABLE_SIZE]; + char *begin = (char *)payload(); + char *ptr = begin; + int count = 0; + unsigned int len = payloadLength(); + // Read packet IDs and latency compensation intervals for each packet tracked by this QoS packet + while (ptr < (begin + len) && (count < ZT_QOS_TABLE_SIZE)) { + memcpy((void*)&rx_id[count], ptr, sizeof(uint64_t)); + ptr+=sizeof(uint64_t); + memcpy((void*)&rx_ts[count], ptr, sizeof(uint16_t)); + ptr+=sizeof(uint16_t); + count++; + } + if (bond) { + bond->receivedQoS(_path, now, count, rx_id, rx_ts); } - return true; } @@ -441,11 +442,12 @@ bool IncomingPacket::_doHELLO(const RuntimeEnvironment *RR,void *tPtr,const bool } outp.setAt(worldUpdateSizeAt,(uint16_t)(outp.size() - (worldUpdateSizeAt + 2))); + peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now); outp.armor(peer->key(),true); _path->send(RR,tPtr,outp.data(),outp.size(),now); peer->setRemoteVersion(protoVersion,vMajor,vMinor,vRevision); // important for this to go first so received() knows the version - peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_HELLO,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_HELLO,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); return true; } @@ -493,7 +495,10 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP } if (!hops()) { - _path->updateLatency((unsigned int)latency,RR->node->now()); + SharedPtr bond = peer->bond(); + if (!bond) { + _path->updateLatency((unsigned int)latency,RR->node->now()); + } } peer->setRemoteVersion(vProto,vMajor,vMinor,vRevision); @@ -522,8 +527,7 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP if (network) { const MulticastGroup mg(MAC(field(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_MAC,6),6),at(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_ADI)); const unsigned int count = at(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS + 4); - if (((ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS + 6) + (count * 5)) <= size()) - RR->mc->addMultiple(tPtr,RR->node->now(),networkId,mg,field(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS + 6,count * 5),count,at(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS)); + RR->mc->addMultiple(tPtr,RR->node->now(),networkId,mg,field(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS + 6,count * 5),count,at(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS)); } } break; @@ -556,7 +560,7 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP default: break; } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_OK,inRePacketId,inReVerb,false,networkId); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_OK,inRePacketId,inReVerb,false,networkId,ZT_QOS_NO_FLOW); return true; } @@ -591,7 +595,7 @@ bool IncomingPacket::_doWHOIS(const RuntimeEnvironment *RR,void *tPtr,const Shar _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_WHOIS,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_WHOIS,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); return true; } @@ -615,13 +619,108 @@ bool IncomingPacket::_doRENDEZVOUS(const RuntimeEnvironment *RR,void *tPtr,const } } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_RENDEZVOUS,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_RENDEZVOUS,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); return true; } -bool IncomingPacket::_doFRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) +// Returns true if packet appears valid; pos and proto will be set +static bool _ipv6GetPayload(const uint8_t *frameData,unsigned int frameLen,unsigned int &pos,unsigned int &proto) { + if (frameLen < 40) + return false; + pos = 40; + proto = frameData[6]; + while (pos <= frameLen) { + switch(proto) { + case 0: // hop-by-hop options + case 43: // routing + case 60: // destination options + case 135: // mobility options + if ((pos + 8) > frameLen) + return false; // invalid! + proto = frameData[pos]; + pos += ((unsigned int)frameData[pos + 1] * 8) + 8; + break; + + //case 44: // fragment -- we currently can't parse these and they are deprecated in IPv6 anyway + //case 50: + //case 51: // IPSec ESP and AH -- we have to stop here since this is encrypted stuff + default: + return true; + } + } + return false; // overflow == invalid +} + +bool IncomingPacket::_doFRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer,int32_t flowId) +{ + int32_t _flowId = ZT_QOS_NO_FLOW; + SharedPtr bond = peer->bond(); + if (bond && bond->flowHashingEnabled()) { + if (size() > ZT_PROTO_VERB_EXT_FRAME_IDX_PAYLOAD) { + const unsigned int etherType = at(ZT_PROTO_VERB_FRAME_IDX_ETHERTYPE); + const unsigned int frameLen = size() - ZT_PROTO_VERB_FRAME_IDX_PAYLOAD; + const uint8_t *const frameData = reinterpret_cast(data()) + ZT_PROTO_VERB_FRAME_IDX_PAYLOAD; + + if (etherType == ZT_ETHERTYPE_IPV4 && (frameLen >= 20)) { + uint16_t srcPort = 0; + uint16_t dstPort = 0; + uint8_t proto = (reinterpret_cast(frameData)[9]); + const unsigned int headerLen = 4 * (reinterpret_cast(frameData)[0] & 0xf); + switch(proto) { + case 0x01: // ICMP + //flowId = 0x01; + break; + // All these start with 16-bit source and destination port in that order + case 0x06: // TCP + case 0x11: // UDP + case 0x84: // SCTP + case 0x88: // UDPLite + if (frameLen > (headerLen + 4)) { + unsigned int pos = headerLen + 0; + srcPort = (reinterpret_cast(frameData)[pos++]) << 8; + srcPort |= (reinterpret_cast(frameData)[pos]); + pos++; + dstPort = (reinterpret_cast(frameData)[pos++]) << 8; + dstPort |= (reinterpret_cast(frameData)[pos]); + _flowId = dstPort ^ srcPort ^ proto; + } + break; + } + } + + if (etherType == ZT_ETHERTYPE_IPV6 && (frameLen >= 40)) { + uint16_t srcPort = 0; + uint16_t dstPort = 0; + unsigned int pos; + unsigned int proto; + _ipv6GetPayload((const uint8_t *)frameData, frameLen, pos, proto); + switch(proto) { + case 0x3A: // ICMPv6 + //flowId = 0x3A; + break; + // All these start with 16-bit source and destination port in that order + case 0x06: // TCP + case 0x11: // UDP + case 0x84: // SCTP + case 0x88: // UDPLite + if (frameLen > (pos + 4)) { + srcPort = (reinterpret_cast(frameData)[pos++]) << 8; + srcPort |= (reinterpret_cast(frameData)[pos]); + pos++; + dstPort = (reinterpret_cast(frameData)[pos++]) << 8; + dstPort |= (reinterpret_cast(frameData)[pos]); + _flowId = dstPort ^ srcPort ^ proto; + } + break; + default: + break; + } + } + } + } + const uint64_t nwid = at(ZT_PROTO_VERB_FRAME_IDX_NETWORK_ID); const SharedPtr network(RR->node->network(nwid)); bool trustEstablished = false; @@ -641,13 +740,12 @@ bool IncomingPacket::_doFRAME(const RuntimeEnvironment *RR,void *tPtr,const Shar return false; } } - - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_FRAME,0,Packet::VERB_NOP,trustEstablished,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_FRAME,0,Packet::VERB_NOP,trustEstablished,nwid,_flowId); return true; } -bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) +bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer,int32_t flowId) { const uint64_t nwid = at(ZT_PROTO_VERB_EXT_FRAME_IDX_NETWORK_ID); const SharedPtr network(RR->node->network(nwid)); @@ -676,7 +774,7 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const const uint8_t *const frameData = (const uint8_t *)field(comLen + ZT_PROTO_VERB_EXT_FRAME_IDX_PAYLOAD,frameLen); if ((!from)||(from == network->mac())) { - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); // trustEstablished because COM is okay return true; } @@ -687,19 +785,19 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const network->learnBridgeRoute(from,peer->address()); } else { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"bridging not allowed (remote)"); - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); // trustEstablished because COM is okay return true; } } else if (to != network->mac()) { if (to.isMulticast()) { if (network->config().multicastLimit == 0) { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"multicast disabled"); - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); // trustEstablished because COM is okay return true; } } else if (!network->config().permitsBridging(RR->identity.address())) { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"bridging not allowed (local)"); - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); // trustEstablished because COM is okay return true; } } @@ -715,13 +813,15 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const outp.append((uint8_t)Packet::VERB_EXT_FRAME); outp.append((uint64_t)packetId()); outp.append((uint64_t)nwid); + const int64_t now = RR->node->now(); + peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now); outp.armor(peer->key(),true); _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); } else { - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid,flowId); } return true; @@ -729,8 +829,10 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const bool IncomingPacket::_doECHO(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) { - if (!peer->rateGateEchoRequest(RR->node->now())) + uint64_t now = RR->node->now(); + if (!peer->rateGateEchoRequest(now)) { return true; + } const uint64_t pid = packetId(); Packet outp(peer->address(),RR->identity.address(),Packet::VERB_OK); @@ -738,10 +840,11 @@ bool IncomingPacket::_doECHO(const RuntimeEnvironment *RR,void *tPtr,const Share outp.append((uint64_t)pid); if (size() > ZT_PACKET_IDX_PAYLOAD) outp.append(reinterpret_cast(data()) + ZT_PACKET_IDX_PAYLOAD,size() - ZT_PACKET_IDX_PAYLOAD); + peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now); outp.armor(peer->key(),true); _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); - peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_ECHO,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_ECHO,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); return true; } @@ -767,7 +870,7 @@ bool IncomingPacket::_doMULTICAST_LIKE(const RuntimeEnvironment *RR,void *tPtr,c RR->mc->add(tPtr,now,nwid,MulticastGroup(MAC(field(ptr + 8,6),6),at(ptr + 14)),peer->address()); } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_LIKE,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_LIKE,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); return true; } @@ -889,7 +992,7 @@ bool IncomingPacket::_doNETWORK_CREDENTIALS(const RuntimeEnvironment *RR,void *t } } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CREDENTIALS,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CREDENTIALS,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0,ZT_QOS_NO_FLOW); return true; } @@ -915,7 +1018,7 @@ bool IncomingPacket::_doNETWORK_CONFIG_REQUEST(const RuntimeEnvironment *RR,void _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); } - peer->received(tPtr,_path,hopCount,requestPacketId,payloadLength(),Packet::VERB_NETWORK_CONFIG_REQUEST,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hopCount,requestPacketId,payloadLength(),Packet::VERB_NETWORK_CONFIG_REQUEST,0,Packet::VERB_NOP,false,nwid,ZT_QOS_NO_FLOW); return true; } @@ -931,12 +1034,14 @@ bool IncomingPacket::_doNETWORK_CONFIG(const RuntimeEnvironment *RR,void *tPtr,c outp.append((uint64_t)packetId()); outp.append((uint64_t)network->id()); outp.append((uint64_t)configUpdateId); + const int64_t now = RR->node->now(); + peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now); outp.armor(peer->key(),true); _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); } } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CONFIG,0,Packet::VERB_NOP,false,(network) ? network->id() : 0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CONFIG,0,Packet::VERB_NOP,false,(network) ? network->id() : 0,ZT_QOS_NO_FLOW); return true; } @@ -979,12 +1084,13 @@ bool IncomingPacket::_doMULTICAST_GATHER(const RuntimeEnvironment *RR,void *tPtr outp.append((uint32_t)mg.adi()); const unsigned int gatheredLocally = RR->mc->gather(peer->address(),nwid,mg,outp,gatherLimit); if (gatheredLocally > 0) { + peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now); outp.armor(peer->key(),true); _path->send(RR,tPtr,outp.data(),outp.size(),now); } } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_GATHER,0,Packet::VERB_NOP,trustEstablished,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_GATHER,0,Packet::VERB_NOP,trustEstablished,nwid,ZT_QOS_NO_FLOW); return true; } @@ -1032,19 +1138,19 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr, if (network->config().multicastLimit == 0) { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_MULTICAST_FRAME,from,to.mac(),"multicast disabled"); - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid,ZT_QOS_NO_FLOW); return true; } if ((frameLen > 0)&&(frameLen <= ZT_MAX_MTU)) { if (!to.mac().isMulticast()) { RR->t->incomingPacketInvalid(tPtr,_path,packetId(),source(),hops(),Packet::VERB_MULTICAST_FRAME,"destination not multicast"); - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid,ZT_QOS_NO_FLOW); // trustEstablished because COM is okay return true; } if ((!from)||(from.isMulticast())||(from == network->mac())) { RR->t->incomingPacketInvalid(tPtr,_path,packetId(),source(),hops(),Packet::VERB_MULTICAST_FRAME,"invalid source MAC"); - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid,ZT_QOS_NO_FLOW); // trustEstablished because COM is okay return true; } @@ -1058,7 +1164,7 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr, network->learnBridgeRoute(from,peer->address()); } else { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_MULTICAST_FRAME,from,to.mac(),"bridging not allowed (remote)"); - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid,ZT_QOS_NO_FLOW); // trustEstablished because COM is okay return true; } } @@ -1076,12 +1182,14 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr, outp.append((uint32_t)to.adi()); outp.append((unsigned char)0x02); // flag 0x02 = contains gather results if (RR->mc->gather(peer->address(),nwid,to,outp,gatherLimit)) { + const int64_t now = RR->node->now(); + peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now); outp.armor(peer->key(),true); _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); } } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid,ZT_QOS_NO_FLOW); } else { _sendErrorNeedCredentials(RR,tPtr,peer,nwid); return false; @@ -1094,9 +1202,8 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt { const int64_t now = RR->node->now(); - // First, subject this to a rate limit if (!peer->rateGatePushDirectPaths(now)) { - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); return true; } @@ -1108,8 +1215,6 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt unsigned int ptr = ZT_PACKET_IDX_PAYLOAD + 2; while (count--) { // if ptr overflows Buffer will throw - // TODO: some flags are not yet implemented - unsigned int flags = (*this)[ptr++]; unsigned int extLen = at(ptr); ptr += 2; ptr += extLen; // unused right now @@ -1132,6 +1237,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt } } break; case 6: { + const InetAddress a(field(ptr,16),16,at(ptr + 16)); if ( ((flags & ZT_PUSH_DIRECT_PATHS_FLAG_FORGET_PATH) == 0) && // not being told to forget @@ -1149,7 +1255,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt ptr += addrLen; } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); return true; } @@ -1165,7 +1271,7 @@ bool IncomingPacket::_doUSER_MESSAGE(const RuntimeEnvironment *RR,void *tPtr,con RR->node->postEvent(tPtr,ZT_EVENT_USER_MESSAGE,reinterpret_cast(&um)); } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_USER_MESSAGE,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_USER_MESSAGE,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); return true; } @@ -1189,11 +1295,29 @@ bool IncomingPacket::_doREMOTE_TRACE(const RuntimeEnvironment *RR,void *tPtr,con } } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_REMOTE_TRACE,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_REMOTE_TRACE,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); return true; } +bool IncomingPacket::_doPATH_NEGOTIATION_REQUEST(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) +{ + uint64_t now = RR->node->now(); + SharedPtr bond = peer->bond(); + if (!bond || !bond->rateGatePathNegotiation(now)) { + return true; + } + if (payloadLength() != sizeof(int16_t)) { + return true; + } + int16_t remoteUtility = 0; + memcpy(&remoteUtility, payload(), sizeof(int16_t)); + if (peer->bond()) { + peer->bond()->processIncomingPathNegotiationRequest(now, _path, Utils::ntoh(remoteUtility)); + } + return true; +} + void IncomingPacket::_sendErrorNeedCredentials(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer,const uint64_t nwid) { Packet outp(source(),RR->identity.address(),Packet::VERB_ERROR); diff --git a/node/IncomingPacket.hpp b/node/IncomingPacket.hpp index cf9a6474f..b1032d99d 100644 --- a/node/IncomingPacket.hpp +++ b/node/IncomingPacket.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -100,7 +100,7 @@ public: * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call * @return True if decoding and processing is complete, false if caller should try again */ - bool tryDecode(const RuntimeEnvironment *RR,void *tPtr); + bool tryDecode(const RuntimeEnvironment *RR,void *tPtr,int32_t flowId); /** * @return Time of packet receipt / start of decode @@ -117,8 +117,8 @@ private: bool _doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doWHOIS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doRENDEZVOUS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); - bool _doFRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); - bool _doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); + bool _doFRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer,int32_t flowId); + bool _doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer,int32_t flowId); bool _doECHO(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doMULTICAST_LIKE(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doNETWORK_CREDENTIALS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); @@ -129,6 +129,7 @@ private: bool _doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doUSER_MESSAGE(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doREMOTE_TRACE(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); + bool _doPATH_NEGOTIATION_REQUEST(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); void _sendErrorNeedCredentials(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer,const uint64_t nwid); diff --git a/node/Node.cpp b/node/Node.cpp index 5330b74c2..e71c1424c 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -48,6 +48,7 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,int64 _networks(8), _now(now), _lastPingCheck(0), + _lastGratuitousPingCheck(0), _lastHousekeepingRun(0), _lastMemoizedTraceSettings(0) { @@ -102,8 +103,9 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,int64 const unsigned long mcs = sizeof(Multicaster) + (((sizeof(Multicaster) & 0xf) != 0) ? (16 - (sizeof(Multicaster) & 0xf)) : 0); const unsigned long topologys = sizeof(Topology) + (((sizeof(Topology) & 0xf) != 0) ? (16 - (sizeof(Topology) & 0xf)) : 0); const unsigned long sas = sizeof(SelfAwareness) + (((sizeof(SelfAwareness) & 0xf) != 0) ? (16 - (sizeof(SelfAwareness) & 0xf)) : 0); + const unsigned long bc = sizeof(BondController) + (((sizeof(BondController) & 0xf) != 0) ? (16 - (sizeof(BondController) & 0xf)) : 0); - m = reinterpret_cast(::malloc(16 + ts + sws + mcs + topologys + sas)); + m = reinterpret_cast(::malloc(16 + ts + sws + mcs + topologys + sas + bc)); if (!m) throw std::bad_alloc(); RR->rtmem = m; @@ -118,12 +120,15 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,int64 RR->topology = new (m) Topology(RR,tptr); m += topologys; RR->sa = new (m) SelfAwareness(RR); + m += sas; + RR->bc = new (m) BondController(RR); } catch ( ... ) { if (RR->sa) RR->sa->~SelfAwareness(); if (RR->topology) RR->topology->~Topology(); if (RR->mc) RR->mc->~Multicaster(); if (RR->sw) RR->sw->~Switch(); if (RR->t) RR->t->~Trace(); + if (RR->bc) RR->bc->~BondController(); ::free(m); throw; } @@ -142,6 +147,7 @@ Node::~Node() if (RR->mc) RR->mc->~Multicaster(); if (RR->sw) RR->sw->~Switch(); if (RR->t) RR->t->~Trace(); + if (RR->bc) RR->bc->~BondController(); ::free(RR->rtmem); } @@ -246,9 +252,23 @@ ZT_ResultCode Node::processBackgroundTasks(void *tptr,int64_t now,volatile int64 _now = now; Mutex::Lock bl(_backgroundTasksLock); + + unsigned long bondCheckInterval = ZT_CORE_TIMER_TASK_GRANULARITY; + if (RR->bc->inUse()) { + // Gratuitously ping active peers so that QoS metrics have enough data to work with (if active path monitoring is enabled) + bondCheckInterval = std::min(std::max(RR->bc->minReqPathMonitorInterval(), ZT_CORE_TIMER_TASK_GRANULARITY), ZT_PING_CHECK_INVERVAL); + if ((now - _lastGratuitousPingCheck) >= bondCheckInterval) { + Hashtable< Address,std::vector > alwaysContact; + _PingPeersThatNeedPing pfunc(RR,tptr,alwaysContact,now); + RR->topology->eachPeer<_PingPeersThatNeedPing &>(pfunc); + _lastGratuitousPingCheck = now; + } + RR->bc->processBackgroundTasks(tptr, now); + } + unsigned long timeUntilNextPingCheck = ZT_PING_CHECK_INVERVAL; const int64_t timeSinceLastPingCheck = now - _lastPingCheck; - if (timeSinceLastPingCheck >= ZT_PING_CHECK_INVERVAL) { + if (timeSinceLastPingCheck >= timeUntilNextPingCheck) { try { _lastPingCheck = now; @@ -354,7 +374,7 @@ ZT_ResultCode Node::processBackgroundTasks(void *tptr,int64_t now,volatile int64 } try { - *nextBackgroundTaskDeadline = now + (int64_t)std::max(std::min(timeUntilNextPingCheck,RR->sw->doTimerTasks(tptr,now)),(unsigned long)ZT_CORE_TIMER_TASK_GRANULARITY); + *nextBackgroundTaskDeadline = now + (int64_t)std::max(std::min(bondCheckInterval,std::min(timeUntilNextPingCheck,RR->sw->doTimerTasks(tptr,now))),(unsigned long)ZT_CORE_TIMER_TASK_GRANULARITY); } catch ( ... ) { return ZT_RESULT_FATAL_ERROR_INTERNAL; } @@ -461,7 +481,7 @@ ZT_PeerList *Node::peers() const for(std::vector< std::pair< Address,SharedPtr > >::iterator pi(peers.begin());pi!=peers.end();++pi) { ZT_Peer *p = &(pl->peers[pl->peerCount++]); p->address = pi->second->address().toInt(); - p->hadAggregateLink = 0; + p->isBonded = 0; if (pi->second->remoteVersionKnown()) { p->versionMajor = pi->second->remoteVersionMajor(); p->versionMinor = pi->second->remoteVersionMinor(); @@ -478,28 +498,24 @@ ZT_PeerList *Node::peers() const std::vector< SharedPtr > paths(pi->second->paths(_now)); SharedPtr bestp(pi->second->getAppropriatePath(_now,false)); - p->hadAggregateLink |= pi->second->hasAggregateLink(); p->pathCount = 0; for(std::vector< SharedPtr >::iterator path(paths.begin());path!=paths.end();++path) { memcpy(&(p->paths[p->pathCount].address),&((*path)->address()),sizeof(struct sockaddr_storage)); + //memcpy(&(p->paths[p->pathCount].ifname,&((*path)->slave()),32);) + p->paths[p->pathCount].localSocket = (*path)->localSocket(); p->paths[p->pathCount].lastSend = (*path)->lastOut(); p->paths[p->pathCount].lastReceive = (*path)->lastIn(); p->paths[p->pathCount].trustedPathId = RR->topology->getOutboundPathTrust((*path)->address()); p->paths[p->pathCount].expired = 0; p->paths[p->pathCount].preferred = ((*path) == bestp) ? 1 : 0; - p->paths[p->pathCount].latency = (float)(*path)->latency(); - p->paths[p->pathCount].packetDelayVariance = (*path)->packetDelayVariance(); - p->paths[p->pathCount].throughputDisturbCoeff = (*path)->throughputDisturbanceCoefficient(); - p->paths[p->pathCount].packetErrorRatio = (*path)->packetErrorRatio(); - p->paths[p->pathCount].packetLossRatio = (*path)->packetLossRatio(); - p->paths[p->pathCount].stability = (*path)->lastComputedStability(); - p->paths[p->pathCount].throughput = (*path)->meanThroughput(); - p->paths[p->pathCount].maxThroughput = (*path)->maxLifetimeThroughput(); - p->paths[p->pathCount].allocation = (float)(*path)->allocation() / (float)255; - p->paths[p->pathCount].ifname = (*path)->getName(); - + //p->paths[p->pathCount].age = (*path)->age(_now); + p->paths[p->pathCount].scope = (*path)->ipScope(); ++p->pathCount; } + if (pi->second->bond()) { + p->isBonded = pi->second->bond(); + p->bondingPolicy = pi->second->bond()->getPolicy(); + } } return pl; diff --git a/node/Node.hpp b/node/Node.hpp index 21d49f515..6461e4cd6 100644 --- a/node/Node.hpp +++ b/node/Node.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -34,6 +34,7 @@ #include "Salsa20.hpp" #include "NetworkController.hpp" #include "Hashtable.hpp" +#include "BondController.hpp" // Bit mask for "expecting reply" hash #define ZT_EXPECTING_REPLIES_BUCKET_MASK1 255 @@ -186,6 +187,8 @@ public: inline const Identity &identity() const { return _RR.identity; } + inline BondController *bondController() const { return _RR.bc; } + /** * Register that we are expecting a reply to a packet ID * @@ -247,9 +250,6 @@ public: inline const Address &remoteTraceTarget() const { return _remoteTraceTarget; } inline Trace::Level remoteTraceLevel() const { return _remoteTraceLevel; } - inline void setMultipathMode(uint8_t mode) { _multipathMode = mode; } - inline uint8_t getMultipathMode() { return _multipathMode; } - inline bool localControllerHasAuthorized(const int64_t now,const uint64_t nwid,const Address &addr) const { _localControllerAuthorizations_m.lock(); @@ -306,10 +306,9 @@ private: Address _remoteTraceTarget; enum Trace::Level _remoteTraceLevel; - uint8_t _multipathMode; - volatile int64_t _now; int64_t _lastPingCheck; + int64_t _lastGratuitousPingCheck; int64_t _lastHousekeepingRun; int64_t _lastMemoizedTraceSettings; volatile int64_t _prngState[2]; diff --git a/node/Packet.cpp b/node/Packet.cpp index 25006416a..381864a45 100644 --- a/node/Packet.cpp +++ b/node/Packet.cpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. diff --git a/node/Packet.hpp b/node/Packet.hpp index 53a1883ce..ca789db81 100644 --- a/node/Packet.hpp +++ b/node/Packet.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -931,13 +931,13 @@ public: * * Upon receipt of this packet, the local peer will verify that the correct * number of bytes were received by the remote peer. If these values do - * not agree that could be an indicator of packet loss. + * not agree that could be an indication of packet loss. * * Additionally, the local peer knows the interval of time that has * elapsed since the last received ACK. With this information it can compute * a rough estimate of the current throughput. * - * This is sent at a maximum rate of once per every ZT_PATH_ACK_INTERVAL + * This is sent at a maximum rate of once per every ZT_QOS_ACK_INTERVAL */ VERB_ACK = 0x12, @@ -963,7 +963,8 @@ public: * measure of the amount of time between when a packet was received and the * egress time of its tracking QoS packet. * - * This is sent at a maximum rate of once per every ZT_PATH_QOS_INTERVAL + * This is sent at a maximum rate of once per every + * ZT_QOS_MEASUREMENT_INTERVAL */ VERB_QOS_MEASUREMENT = 0x13, @@ -996,7 +997,34 @@ public: * node on startup. This is helpful in identifying traces from different * members of a cluster. */ - VERB_REMOTE_TRACE = 0x15 + VERB_REMOTE_TRACE = 0x15, + + /** + * A request to a peer to use a specific path in a multi-path scenario: + * <[2] 16-bit unsigned integer that encodes a path choice utility> + * + * This is sent when a node operating in multipath mode observes that + * its inbound and outbound traffic aren't going over the same path. The + * node will compute its perceived utility for using its chosen outbound + * path and send this to a peer in an attempt to petition it to send + * its traffic over this same path. + * + * Scenarios: + * + * (1) Remote peer utility is GREATER than ours: + * - Remote peer will refuse the petition and continue using current path + * (2) Remote peer utility is LESS than than ours: + * - Remote peer will accept the petition and switch to our chosen path + * (3) Remote peer utility is EQUAL to our own: + * - To prevent confusion and flapping, both side will agree to use the + * numerical values of their identities to determine which path to use. + * The peer with the greatest identity will win. + * + * If a node petitions a peer repeatedly with no effect it will regard + * that as a refusal by the remote peer, in this case if the utility is + * negligible it will voluntarily switch to the remote peer's chosen path. + */ + VERB_PATH_NEGOTIATION_REQUEST = 0x16 }; /** diff --git a/node/Path.hpp b/node/Path.hpp index fc5dbff16..9c54f718f 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -26,12 +26,11 @@ #include "SharedPtr.hpp" #include "AtomicCounter.hpp" #include "Utils.hpp" -#include "RingBuffer.hpp" #include "Packet.hpp" +#include "RingBuffer.hpp" +//#include "Bond.hpp" -#include "../osdep/Phy.hpp" - -#include "../include/ZeroTierDebug.h" +#include "../osdep/Slave.hpp" /** * Maximum return value of preferenceRank() @@ -48,7 +47,8 @@ class RuntimeEnvironment; class Path { friend class SharedPtr; - Phy *_phy; + friend class Bond; + //friend class SharedPtr; public: /** @@ -87,77 +87,113 @@ public: _lastOut(0), _lastIn(0), _lastTrustEstablishedPacketReceived(0), - _lastPathQualityComputeTime(0), _localSocket(-1), _latency(0xffff), _addr(), _ipScope(InetAddress::IP_SCOPE_NONE), - _lastAck(0), - _lastThroughputEstimation(0), + _lastAckReceived(0), + _lastAckSent(0), _lastQoSMeasurement(0), - _lastQoSRecordPurge(0), + _lastThroughputEstimation(0), + _lastRefractoryUpdate(0), + _lastAliveToggle(0), + _lastEligibilityState(false), + _lastTrialBegin(0), + _refractoryPeriod(0), + _monitorInterval(0), + _upDelay(0), + _downDelay(0), + _ipvPref(0), + _mode(0), + _onlyPathOnSlave(false), + _enabled(false), + _bonded(false), + _negotiated(false), + _deprecated(false), + _shouldReallocateFlows(false), + _assignedFlowCount(0), + _latencyMean(0), + _latencyVariance(0), + _packetLossRatio(0), + _packetErrorRatio(0), + _throughputMean(0), + _throughputMax(0), + _throughputVariance(0), + _allocation(0), + _byteLoad(0), + _relativeByteLoad(0), + _affinity(0), + _failoverScore(0), _unackedBytes(0), - _expectingAckAsOf(0), _packetsReceivedSinceLastAck(0), _packetsReceivedSinceLastQoS(0), - _maxLifetimeThroughput(0), - _lastComputedMeanThroughput(0), _bytesAckedSinceLastThroughputEstimation(0), - _lastComputedMeanLatency(0.0), - _lastComputedPacketDelayVariance(0.0), - _lastComputedPacketErrorRatio(0.0), - _lastComputedPacketLossRatio(0), - _lastComputedStability(0.0), - _lastComputedRelativeQuality(0), - _lastComputedThroughputDistCoeff(0.0), - _lastAllocation(0) - { - memset(_ifname, 0, 16); - memset(_addrString, 0, sizeof(_addrString)); - } + _packetsIn(0), + _packetsOut(0), + _prevEligibility(false) + {} Path(const int64_t localSocket,const InetAddress &addr) : _lastOut(0), _lastIn(0), _lastTrustEstablishedPacketReceived(0), - _lastPathQualityComputeTime(0), _localSocket(localSocket), _latency(0xffff), _addr(addr), _ipScope(addr.ipScope()), - _lastAck(0), - _lastThroughputEstimation(0), + _lastAckReceived(0), + _lastAckSent(0), _lastQoSMeasurement(0), - _lastQoSRecordPurge(0), + _lastThroughputEstimation(0), + _lastRefractoryUpdate(0), + _lastAliveToggle(0), + _lastEligibilityState(false), + _lastTrialBegin(0), + _refractoryPeriod(0), + _monitorInterval(0), + _upDelay(0), + _downDelay(0), + _ipvPref(0), + _mode(0), + _onlyPathOnSlave(false), + _enabled(false), + _bonded(false), + _negotiated(false), + _deprecated(false), + _shouldReallocateFlows(false), + _assignedFlowCount(0), + _latencyMean(0), + _latencyVariance(0), + _packetLossRatio(0), + _packetErrorRatio(0), + _throughputMean(0), + _throughputMax(0), + _throughputVariance(0), + _allocation(0), + _byteLoad(0), + _relativeByteLoad(0), + _affinity(0), + _failoverScore(0), _unackedBytes(0), - _expectingAckAsOf(0), _packetsReceivedSinceLastAck(0), _packetsReceivedSinceLastQoS(0), - _maxLifetimeThroughput(0), - _lastComputedMeanThroughput(0), _bytesAckedSinceLastThroughputEstimation(0), - _lastComputedMeanLatency(0.0), - _lastComputedPacketDelayVariance(0.0), - _lastComputedPacketErrorRatio(0.0), - _lastComputedPacketLossRatio(0), - _lastComputedStability(0.0), - _lastComputedRelativeQuality(0), - _lastComputedThroughputDistCoeff(0.0), - _lastAllocation(0) - { - memset(_ifname, 0, 16); - memset(_addrString, 0, sizeof(_addrString)); - if (_localSocket != -1) { - _phy->getIfName((PhySocket *) ((uintptr_t) _localSocket), _ifname, 16); - } - } + _packetsIn(0), + _packetsOut(0), + _prevEligibility(false) + {} /** * Called when a packet is received from this remote path, regardless of content * * @param t Time of receive */ - inline void received(const uint64_t t) { _lastIn = t; } + inline void received(const uint64_t t) { + _lastIn = t; + if (!_prevEligibility) { + _lastAliveToggle = _lastIn; + } + } /** * Set time last trusted packet was received (done in Peer::received()) @@ -197,7 +233,6 @@ public: else { _latency = l; } - _latencySamples.push(l); } /** @@ -286,341 +321,32 @@ public: } /** - * Record statistics on outgoing packets. Used later to estimate QoS metrics. - * - * @param now Current time - * @param packetId ID of packet - * @param payloadLength Length of payload - * @param verb Packet verb + * @param bonded Whether this path is part of a bond. */ - inline void recordOutgoingPacket(int64_t now, int64_t packetId, uint16_t payloadLength, Packet::Verb verb) - { - Mutex::Lock _l(_statistics_m); - if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) { - if ((packetId & (ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR - 1)) == 0) { - _unackedBytes += payloadLength; - // Take note that we're expecting a VERB_ACK on this path as of a specific time - _expectingAckAsOf = ackAge(now) > ZT_PATH_ACK_INTERVAL ? _expectingAckAsOf : now; - if (_outQoSRecords.size() < ZT_PATH_MAX_OUTSTANDING_QOS_RECORDS) { - _outQoSRecords[packetId] = now; - } - } - } - } + inline void setBonded(bool bonded) { _bonded = bonded; } /** - * Record statistics on incoming packets. Used later to estimate QoS metrics. - * - * @param now Current time - * @param packetId ID of packet - * @param payloadLength Length of payload - * @param verb Packet verb + * @return True if this path is currently part of a bond. */ - inline void recordIncomingPacket(int64_t now, int64_t packetId, uint16_t payloadLength, Packet::Verb verb) - { - Mutex::Lock _l(_statistics_m); - if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) { - if ((packetId & (ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR - 1)) == 0) { - _inACKRecords[packetId] = payloadLength; - _packetsReceivedSinceLastAck++; - _inQoSRecords[packetId] = now; - _packetsReceivedSinceLastQoS++; - } - _packetValiditySamples.push(true); - } - } - - /** - * Record that we've received a VERB_ACK on this path, also compute throughput if required. - * - * @param now Current time - * @param ackedBytes Number of bytes acknowledged by other peer - */ - inline void receivedAck(int64_t now, int32_t ackedBytes) - { - _expectingAckAsOf = 0; - _unackedBytes = (ackedBytes > _unackedBytes) ? 0 : _unackedBytes - ackedBytes; - int64_t timeSinceThroughputEstimate = (now - _lastThroughputEstimation); - if (timeSinceThroughputEstimate >= ZT_PATH_THROUGHPUT_MEASUREMENT_INTERVAL) { - uint64_t throughput = (uint64_t)((float)(_bytesAckedSinceLastThroughputEstimation * 8) / ((float)timeSinceThroughputEstimate / (float)1000)); - _throughputSamples.push(throughput); - _maxLifetimeThroughput = throughput > _maxLifetimeThroughput ? throughput : _maxLifetimeThroughput; - _lastThroughputEstimation = now; - _bytesAckedSinceLastThroughputEstimation = 0; - } else { - _bytesAckedSinceLastThroughputEstimation += ackedBytes; - } - } - - /** - * @return Number of bytes this peer is responsible for ACKing since last ACK - */ - inline int32_t bytesToAck() - { - Mutex::Lock _l(_statistics_m); - int32_t bytesToAck = 0; - std::map::iterator it = _inACKRecords.begin(); - while (it != _inACKRecords.end()) { - bytesToAck += it->second; - it++; - } - return bytesToAck; - } - - /** - * @return Number of bytes thus far sent that have not been acknowledged by the remote peer - */ - inline int64_t unackedSentBytes() - { - return _unackedBytes; - } - - /** - * Account for the fact that an ACK was just sent. Reset counters, timers, and clear statistics buffers - * - * @param Current time - */ - inline void sentAck(int64_t now) - { - Mutex::Lock _l(_statistics_m); - _inACKRecords.clear(); - _packetsReceivedSinceLastAck = 0; - _lastAck = now; - } - - /** - * Receive QoS data, match with recorded egress times from this peer, compute latency - * estimates. - * - * @param now Current time - * @param count Number of records - * @param rx_id table of packet IDs - * @param rx_ts table of holding times - */ - inline void receivedQoS(int64_t now, int count, uint64_t *rx_id, uint16_t *rx_ts) - { - Mutex::Lock _l(_statistics_m); - // Look up egress times and compute latency values for each record - std::map::iterator it; - for (int j=0; jsecond); - uint16_t rtt_compensated = rtt - rx_ts[j]; - uint16_t latency = rtt_compensated / 2; - updateLatency(latency, now); - _outQoSRecords.erase(it); - } - } - } - - /** - * Generate the contents of a VERB_QOS_MEASUREMENT packet. - * - * @param now Current time - * @param qosBuffer destination buffer - * @return Size of payload - */ - inline int32_t generateQoSPacket(int64_t now, char *qosBuffer) - { - Mutex::Lock _l(_statistics_m); - int32_t len = 0; - std::map::iterator it = _inQoSRecords.begin(); - int i=0; - while (i<_packetsReceivedSinceLastQoS && it != _inQoSRecords.end()) { - uint64_t id = it->first; - memcpy(qosBuffer, &id, sizeof(uint64_t)); - qosBuffer+=sizeof(uint64_t); - uint16_t holdingTime = (uint16_t)(now - it->second); - memcpy(qosBuffer, &holdingTime, sizeof(uint16_t)); - qosBuffer+=sizeof(uint16_t); - len+=sizeof(uint64_t)+sizeof(uint16_t); - _inQoSRecords.erase(it++); - i++; - } - return len; - } - - /** - * Account for the fact that a VERB_QOS_MEASUREMENT was just sent. Reset timers. - * - * @param Current time - */ - inline void sentQoS(int64_t now) { - _packetsReceivedSinceLastQoS = 0; - _lastQoSMeasurement = now; - } - - /** - * @param now Current time - * @return Whether an ACK (VERB_ACK) packet needs to be emitted at this time - */ - inline bool needsToSendAck(int64_t now) { - return ((now - _lastAck) >= ZT_PATH_ACK_INTERVAL || - (_packetsReceivedSinceLastAck == ZT_PATH_QOS_TABLE_SIZE)) && _packetsReceivedSinceLastAck; - } - - /** - * @param now Current time - * @return Whether a QoS (VERB_QOS_MEASUREMENT) packet needs to be emitted at this time - */ - inline bool needsToSendQoS(int64_t now) { - return ((_packetsReceivedSinceLastQoS >= ZT_PATH_QOS_TABLE_SIZE) || - ((now - _lastQoSMeasurement) > ZT_PATH_QOS_INTERVAL)) && _packetsReceivedSinceLastQoS; - } - - /** - * How much time has elapsed since we've been expecting a VERB_ACK on this path. This value - * is used to determine a more relevant path "age". This lets us penalize paths which are no - * longer ACKing, but not those that simple aren't being used to carry traffic at the - * current time. - */ - inline int64_t ackAge(int64_t now) { return _expectingAckAsOf ? now - _expectingAckAsOf : 0; } - - /** - * The maximum observed throughput (in bits/s) for this path - */ - inline uint64_t maxLifetimeThroughput() { return _maxLifetimeThroughput; } - - /** - * @return The mean throughput (in bits/s) of this link - */ - inline uint64_t meanThroughput() { return _lastComputedMeanThroughput; } - - /** - * Assign a new relative quality value for this path in the aggregate link - * - * @param rq Quality of this path in comparison to other paths available to this peer - */ - inline void updateRelativeQuality(float rq) { _lastComputedRelativeQuality = rq; } - - /** - * @return Quality of this path compared to others in the aggregate link - */ - inline float relativeQuality() { return _lastComputedRelativeQuality; } - - /** - * Assign a new allocation value for this path in the aggregate link - * - * @param allocation Percentage of traffic to be sent over this path to a peer - */ - inline void updateComponentAllocationOfAggregateLink(unsigned char allocation) { _lastAllocation = allocation; } - - /** - * @return Percentage of traffic allocated to this path in the aggregate link - */ - inline unsigned char allocation() { return _lastAllocation; } - - /** - * @return Stability estimates can become expensive to compute, we cache the most recent result. - */ - inline float lastComputedStability() { return _lastComputedStability; } - - /** - * @return A pointer to a cached copy of the human-readable name of the interface this Path's localSocket is bound to - */ - inline char *getName() { return _ifname; } - - /** - * @return Packet delay variance - */ - inline float packetDelayVariance() { return _lastComputedPacketDelayVariance; } - - /** - * @return Previously-computed mean latency - */ - inline float meanLatency() { return _lastComputedMeanLatency; } - - /** - * @return Packet loss rate (PLR) - */ - inline float packetLossRatio() { return _lastComputedPacketLossRatio; } - - /** - * @return Packet error ratio (PER) - */ - inline float packetErrorRatio() { return _lastComputedPacketErrorRatio; } - - /** - * Record an invalid incoming packet. This packet failed MAC/compression/cipher checks and will now - * contribute to a Packet Error Ratio (PER). - */ - inline void recordInvalidPacket() { _packetValiditySamples.push(false); } - - /** - * @return A pointer to a cached copy of the address string for this Path (For debugging only) - */ - inline char *getAddressString() { return _addrString; } - - /** - * @return The current throughput disturbance coefficient - */ - inline float throughputDisturbanceCoefficient() { return _lastComputedThroughputDistCoeff; } - - /** - * Compute and cache stability and performance metrics. The resultant stability coefficient is a measure of how "well behaved" - * this path is. This figure is substantially different from (but required for the estimation of the path's overall "quality". - * - * @param now Current time - */ - inline void processBackgroundPathMeasurements(const int64_t now) - { - if (now - _lastPathQualityComputeTime > ZT_PATH_QUALITY_COMPUTE_INTERVAL) { - Mutex::Lock _l(_statistics_m); - _lastPathQualityComputeTime = now; - address().toString(_addrString); - _lastComputedMeanLatency = _latencySamples.mean(); - _lastComputedPacketDelayVariance = _latencySamples.stddev(); // Similar to "jitter" (SEE: RFC 3393, RFC 4689) - _lastComputedMeanThroughput = (uint64_t)_throughputSamples.mean(); - - // If no packet validity samples, assume PER==0 - _lastComputedPacketErrorRatio = 1 - (_packetValiditySamples.count() ? _packetValiditySamples.mean() : 1); - - // Compute path stability - // Normalize measurements with wildly different ranges into a reasonable range - float normalized_pdv = Utils::normalize(_lastComputedPacketDelayVariance, 0, ZT_PATH_MAX_PDV, 0, 10); - float normalized_la = Utils::normalize(_lastComputedMeanLatency, 0, ZT_PATH_MAX_MEAN_LATENCY, 0, 10); - float throughput_cv = _throughputSamples.mean() > 0 ? _throughputSamples.stddev() / _throughputSamples.mean() : 1; - - // Form an exponential cutoff and apply contribution weights - float pdv_contrib = expf((-1.0f)*normalized_pdv) * (float)ZT_PATH_CONTRIB_PDV; - float latency_contrib = expf((-1.0f)*normalized_la) * (float)ZT_PATH_CONTRIB_LATENCY; - - // Throughput Disturbance Coefficient - float throughput_disturbance_contrib = expf((-1.0f)*throughput_cv) * (float)ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE; - _throughputDisturbanceSamples.push(throughput_cv); - _lastComputedThroughputDistCoeff = _throughputDisturbanceSamples.mean(); - - // Obey user-defined ignored contributions - pdv_contrib = ZT_PATH_CONTRIB_PDV > 0.0 ? pdv_contrib : 1; - latency_contrib = ZT_PATH_CONTRIB_LATENCY > 0.0 ? latency_contrib : 1; - throughput_disturbance_contrib = ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE > 0.0 ? throughput_disturbance_contrib : 1; - - // Stability - _lastComputedStability = pdv_contrib + latency_contrib + throughput_disturbance_contrib; - _lastComputedStability *= 1 - _lastComputedPacketErrorRatio; - - // Prevent QoS records from sticking around for too long - std::map::iterator it = _outQoSRecords.begin(); - while (it != _outQoSRecords.end()) { - // Time since egress of tracked packet - if ((now - it->second) >= ZT_PATH_QOS_TIMEOUT) { - _outQoSRecords.erase(it++); - } else { it++; } - } - } - } + inline bool bonded() { return _bonded; } /** * @return True if this path is alive (receiving heartbeats) */ - inline bool alive(const int64_t now) const { return ((now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000)); } + inline bool alive(const int64_t now, bool bondingEnabled = false) const { + return (bondingEnabled && _monitorInterval) ? ((now - _lastIn) < (_monitorInterval * 3)) : ((now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000)); + } /** * @return True if this path needs a heartbeat */ inline bool needsHeartbeat(const int64_t now) const { return ((now - _lastOut) >= ZT_PATH_HEARTBEAT_PERIOD); } + /** + * @return True if this path needs a heartbeat in accordance to the user-specified path monitor frequency + */ + inline bool needsGratuitousHeartbeat(const int64_t now) { return allowed() && (_monitorInterval > 0) && ((now - _lastOut) >= _monitorInterval); } + /** * @return Last time we sent something */ @@ -631,62 +357,339 @@ public: */ inline int64_t lastIn() const { return _lastIn; } + /** + * @return the age of the path in terms of receiving packets + */ + inline int64_t age(int64_t now) { return (now - _lastIn); } + /** * @return Time last trust-established packet was received */ inline int64_t lastTrustEstablishedPacketReceived() const { return _lastTrustEstablishedPacketReceived; } + /** + * @return Time since last VERB_ACK was received + */ + inline int64_t ackAge(int64_t now) { return _lastAckReceived ? now - _lastAckReceived : 0; } + + /** + * Set or update a refractory period for the path. + * + * @param punishment How much a path should be punished + * @param pathFailure Whether this call is the result of a recent path failure + */ + inline void adjustRefractoryPeriod(int64_t now, uint32_t punishment, bool pathFailure) { + if (pathFailure) { + unsigned int suggestedRefractoryPeriod = _refractoryPeriod ? punishment + (_refractoryPeriod * 2) : punishment; + _refractoryPeriod = std::min(suggestedRefractoryPeriod, (unsigned int)ZT_MULTIPATH_MAX_REFRACTORY_PERIOD); + _lastRefractoryUpdate = 0; + } else { + uint32_t drainRefractory = 0; + if (_lastRefractoryUpdate) { + drainRefractory = (now - _lastRefractoryUpdate); + } else { + drainRefractory = (now - _lastAliveToggle); + } + _lastRefractoryUpdate = now; + if (_refractoryPeriod > drainRefractory) { + _refractoryPeriod -= drainRefractory; + } else { + _refractoryPeriod = 0; + _lastRefractoryUpdate = 0; + } + } + } + + /** + * Determine the current state of eligibility of the path. + * + * @param includeRefractoryPeriod Whether current punishment should be taken into consideration + * @return True if this path can be used in a bond at the current time + */ + inline bool eligible(uint64_t now, int ackSendInterval, bool includeRefractoryPeriod = false) { + if (includeRefractoryPeriod && _refractoryPeriod) { + return false; + } + bool acceptableAge = age(now) < ((_monitorInterval * 4) + _downDelay); // Simple RX age (driven by packets of any type and gratuitous VERB_HELLOs) + bool acceptableAckAge = ackAge(now) < (ackSendInterval); // Whether the remote peer is actually responding to our outgoing traffic or simply sending stuff to us + bool notTooEarly = (now - _lastAliveToggle) >= _upDelay; // Whether we've waited long enough since the link last came online + bool inTrial = (now - _lastTrialBegin) < _upDelay; // Whether this path is still in its trial period + bool currEligibility = allowed() && (((acceptableAge || acceptableAckAge) && notTooEarly) || inTrial); + return currEligibility; + } + + /** + * Record when this path first entered the bond. Each path is given a trial period where it is admitted + * to the bond without requiring observations to prove its performance or reliability. + */ + inline void startTrial(uint64_t now) { _lastTrialBegin = now; } + + /** + * @return True if a path is permitted to be used in a bond (according to user pref.) + */ + inline bool allowed() { + return _enabled + && (!_ipvPref + || ((_addr.isV4() && (_ipvPref == 4 || _ipvPref == 46 || _ipvPref == 64)) + || ((_addr.isV6() && (_ipvPref == 6 || _ipvPref == 46 || _ipvPref == 64))))); + } + + /** + * @return True if a path is preferred over another on the same physical slave (according to user pref.) + */ + inline bool preferred() { + return _onlyPathOnSlave + || (_addr.isV4() && (_ipvPref == 4 || _ipvPref == 46)) + || (_addr.isV6() && (_ipvPref == 6 || _ipvPref == 64)); + } + + /** + * @param now Current time + * @return Whether an ACK (VERB_ACK) packet needs to be emitted at this time + */ + inline bool needsToSendAck(int64_t now, int ackSendInterval) { + return ((now - _lastAckSent) >= ackSendInterval || + (_packetsReceivedSinceLastAck == ZT_QOS_TABLE_SIZE)) && _packetsReceivedSinceLastAck; + } + + /** + * @param now Current time + * @return Whether a QoS (VERB_QOS_MEASUREMENT) packet needs to be emitted at this time + */ + inline bool needsToSendQoS(int64_t now, int qosSendInterval) { + return ((_packetsReceivedSinceLastQoS >= ZT_QOS_TABLE_SIZE) || + ((now - _lastQoSMeasurement) > qosSendInterval)) && _packetsReceivedSinceLastQoS; + } + + /** + * Reset packet counters + */ + inline void resetPacketCounts() + { + _packetsIn = 0; + _packetsOut = 0; + } + private: - Mutex _statistics_m; volatile int64_t _lastOut; volatile int64_t _lastIn; volatile int64_t _lastTrustEstablishedPacketReceived; - volatile int64_t _lastPathQualityComputeTime; int64_t _localSocket; volatile unsigned int _latency; InetAddress _addr; InetAddress::IpScope _ipScope; // memoize this since it's a computed value checked often AtomicCounter __refCount; - std::map _outQoSRecords; // id:egress_time - std::map _inQoSRecords; // id:now - std::map _inACKRecords; // id:len + std::map qosStatsOut; // id:egress_time + std::map qosStatsIn; // id:now + std::map ackStatsIn; // id:len - int64_t _lastAck; - int64_t _lastThroughputEstimation; - int64_t _lastQoSMeasurement; - int64_t _lastQoSRecordPurge; + RingBuffer qosRecordSize; + RingBuffer qosRecordLossSamples; + RingBuffer throughputSamples; + RingBuffer packetValiditySamples; + RingBuffer _throughputVarianceSamples; + RingBuffer latencySamples; + /** + * Last time that a VERB_ACK was received on this path. + */ + uint64_t _lastAckReceived; + + /** + * Last time that a VERB_ACK was sent out on this path. + */ + uint64_t _lastAckSent; + + /** + * Last time that a VERB_QOS_MEASUREMENT was sent out on this path. + */ + uint64_t _lastQoSMeasurement; + + /** + * Last time that a the path's throughput was estimated. + */ + uint64_t _lastThroughputEstimation; + + /** + * The last time that the refractory period was updated. + */ + uint64_t _lastRefractoryUpdate; + + /** + * The last time that the path was marked as "alive". + */ + uint64_t _lastAliveToggle; + + /** + * State of eligibility at last check. Used for determining state changes. + */ + bool _lastEligibilityState; + + /** + * Timestamp indicating when this path's trial period began. + */ + uint64_t _lastTrialBegin; + + /** + * Amount of time that this path is prevented from becoming a member of a bond. + */ + uint32_t _refractoryPeriod; + + /** + * Monitor interval specific to this path or that was inherited from the bond controller. + */ + int32_t _monitorInterval; + + /** + * Up delay interval specific to this path or that was inherited from the bond controller. + */ + uint32_t _upDelay; + + /** + * Down delay interval specific to this path or that was inherited from the bond controller. + */ + uint32_t _downDelay; + + /** + * IP version preference inherited from the physical slave. + */ + uint8_t _ipvPref; + + /** + * Mode inherited from the physical slave. + */ + uint8_t _mode; + + /** + * IP version preference inherited from the physical slave. + */ + bool _onlyPathOnSlave; + + /** + * Enabled state inherited from the physical slave. + */ + bool _enabled; + + /** + * Whether this path is currently part of a bond. + */ + bool _bonded; + + /** + * Whether this path was intentionally _negotiated by either peer. + */ + bool _negotiated; + + /** + * Whether this path has been deprecated due to performance issues. Current traffic flows + * will be re-allocated to other paths in the most non-disruptive manner (if possible), + * and new traffic will not be allocated to this path. + */ + bool _deprecated; + + /** + * Whether flows should be moved from this path. Current traffic flows will be re-allocated + * immediately. + */ + bool _shouldReallocateFlows; + + /** + * The number of flows currently assigned to this path. + */ + uint16_t _assignedFlowCount; + + /** + * The mean latency (computed from a sliding window.) + */ + float _latencyMean; + + /** + * Packet delay variance (computed from a sliding window.) + */ + float _latencyVariance; + + /** + * The ratio of lost packets to received packets. + */ + float _packetLossRatio; + + /** + * The ratio of packets that failed their MAC/CRC checks to those that did not. + */ + float _packetErrorRatio; + + /** + * The estimated mean throughput of this path. + */ + uint64_t _throughputMean; + + /** + * The maximum observed throughput of this path. + */ + uint64_t _throughputMax; + + /** + * The variance in the estimated throughput of this path. + */ + float _throughputVariance; + + /** + * The relative quality of this path to all others in the bond, [0-255]. + */ + uint8_t _allocation; + + /** + * How much load this path is under. + */ + uint64_t _byteLoad; + + /** + * How much load this path is under (relative to other paths in the bond.) + */ + uint8_t _relativeByteLoad; + + /** + * Relative value expressing how "deserving" this path is of new traffic. + */ + uint8_t _affinity; + + /** + * Score that indicates to what degree this path is preferred over others that + * are available to the bonding policy. (specifically for active-backup) + */ + uint32_t _failoverScore; + + /** + * Number of bytes thus far sent that have not been acknowledged by the remote peer. + */ int64_t _unackedBytes; - int64_t _expectingAckAsOf; - int16_t _packetsReceivedSinceLastAck; - int16_t _packetsReceivedSinceLastQoS; - uint64_t _maxLifetimeThroughput; - uint64_t _lastComputedMeanThroughput; + /** + * Number of packets received since the last VERB_ACK was sent to the remote peer. + */ + int32_t _packetsReceivedSinceLastAck; + + /** + * Number of packets received since the last VERB_QOS_MEASUREMENT was sent to the remote peer. + */ + int32_t _packetsReceivedSinceLastQoS; + + /** + * Bytes acknowledged via incoming VERB_ACK since the last estimation of throughput. + */ uint64_t _bytesAckedSinceLastThroughputEstimation; - float _lastComputedMeanLatency; - float _lastComputedPacketDelayVariance; + /** + * Counters used for tracking path load. + */ + int _packetsIn; + int _packetsOut; - float _lastComputedPacketErrorRatio; - float _lastComputedPacketLossRatio; + // TODO: Remove - // cached estimates - float _lastComputedStability; - float _lastComputedRelativeQuality; - float _lastComputedThroughputDistCoeff; - unsigned char _lastAllocation; - - // cached human-readable strings for tracing purposes - char _ifname[16]; - char _addrString[256]; - - RingBuffer _throughputSamples; - RingBuffer _latencySamples; - RingBuffer _packetValiditySamples; - RingBuffer _throughputDisturbanceSamples; + bool _prevEligibility; }; } // namespace ZeroTier diff --git a/node/Peer.cpp b/node/Peer.cpp index 3c45d53fb..1ee0c1240 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -14,7 +14,6 @@ #include "../version.h" #include "Constants.hpp" #include "Peer.hpp" -#include "Node.hpp" #include "Switch.hpp" #include "Network.hpp" #include "SelfAwareness.hpp" @@ -24,8 +23,6 @@ #include "RingBuffer.hpp" #include "Utils.hpp" -#include "../include/ZeroTierDebug.h" - namespace ZeroTier { static unsigned char s_freeRandomByteCounter = 0; @@ -37,20 +34,14 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _lastTriedMemorizedPath(0), _lastDirectPathPushSent(0), _lastDirectPathPushReceive(0), + _lastEchoRequestReceived(0), _lastCredentialRequestSent(0), _lastWhoisRequestReceived(0), - _lastEchoRequestReceived(0), _lastCredentialsReceived(0), _lastTrustEstablishedPacketReceived(0), _lastSentFullHello(0), - _lastACKWindowReset(0), - _lastQoSWindowReset(0), - _lastMultipathCompatibilityCheck(0), + _lastEchoCheck(0), _freeRandomByte((unsigned char)((uintptr_t)this >> 4) ^ ++s_freeRandomByteCounter), - _uniqueAlivePathCount(0), - _localMultipathSupported(false), - _remoteMultipathSupported(false), - _canUseMultipath(false), _vProto(0), _vMajor(0), _vMinor(0), @@ -58,17 +49,17 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _id(peerIdentity), _directPathPushCutoffCount(0), _credentialsCutoffCount(0), - _linkIsBalanced(false), - _linkIsRedundant(false), - _remotePeerMultipathEnabled(false), - _lastAggregateStatsReport(0), - _lastAggregateAllocation(0), - _virtualPathCount(0), - _roundRobinPathAssignmentIdx(0), - _pathAssignmentIdx(0) + _echoRequestCutoffCount(0), + _uniqueAlivePathCount(0), + _localMultipathSupported(false), + _remoteMultipathSupported(false), + _canUseMultipath(false), + _shouldCollectPathStatistics(0), + _lastComputedAggregateMeanLatency(0) { - if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) + if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) { throw ZT_EXCEPTION_INVALID_ARGUMENT; + } } void Peer::received( @@ -81,7 +72,8 @@ void Peer::received( const uint64_t inRePacketId, const Packet::Verb inReVerb, const bool trustEstablished, - const uint64_t networkId) + const uint64_t networkId, + const int32_t flowId) { const int64_t now = RR->node->now(); @@ -98,28 +90,13 @@ void Peer::received( break; } + recordIncomingPacket(tPtr, path, packetId, payloadLength, verb, flowId, now); + if (trustEstablished) { _lastTrustEstablishedPacketReceived = now; path->trustedPacketReceived(now); } - { - Mutex::Lock _l(_paths_m); - - recordIncomingPacket(tPtr, path, packetId, payloadLength, verb, now); - - if (_canUseMultipath) { - if (path->needsToSendQoS(now)) { - sendQOS_MEASUREMENT(tPtr, path, path->localSocket(), path->address(), now); - } - for(unsigned int i=0;iprocessBackgroundPathMeasurements(now); - } - } - } - } - if (hops == 0) { // If this is a direct packet (no hops), update existing paths or learn new ones bool havePath = false; @@ -137,60 +114,45 @@ void Peer::received( } bool attemptToContact = false; + + int replaceIdx = ZT_MAX_PEER_NETWORK_PATHS; if ((!havePath)&&(RR->node->shouldUsePathForZeroTierTraffic(tPtr,_id.address(),path->localSocket(),path->address()))) { Mutex::Lock _l(_paths_m); - - // Paths are redundant if they duplicate an alive path to the same IP or - // with the same local socket and address family. - bool redundant = false; - unsigned int replacePath = ZT_MAX_PEER_NETWORK_PATHS; for(unsigned int i=0;ialive(now)) && ( ((_paths[i].p->localSocket() == path->localSocket())&&(_paths[i].p->address().ss_family == path->address().ss_family)) || (_paths[i].p->address().ipsEqual2(path->address())) ) ) { - redundant = true; - break; - } - // If the path is the same address and port, simply assume this is a replacement - if ( (_paths[i].p->address().ipsEqual2(path->address()))) { - replacePath = i; - break; - } - } else break; - } - - // If the path isn't a duplicate of the same localSocket AND we haven't already determined a replacePath, - // then find the worst path and replace it. - if (!redundant && replacePath == ZT_MAX_PEER_NETWORK_PATHS) { - int replacePathQuality = 0; - for(unsigned int i=0;iquality(now); - if (q > replacePathQuality) { - replacePathQuality = q; - replacePath = i; + // match addr + if ( (_paths[i].p->alive(now)) && ( ((_paths[i].p->localSocket() == path->localSocket())&&(_paths[i].p->address().ss_family == path->address().ss_family)) && (_paths[i].p->address().ipsEqual2(path->address())) ) ) { + // port + if (_paths[i].p->address().port() == path->address().port()) { + replaceIdx = i; + break; } - } else { - replacePath = i; + } + } + } + if (replaceIdx == ZT_MAX_PEER_NETWORK_PATHS) { + for(unsigned int i=0;it->peerLearnedNewPath(tPtr,networkId,*this,path,packetId); - _paths[replacePath].lr = now; - _paths[replacePath].p = path; - _paths[replacePath].priority = 1; + performMultipathStateCheck(now); + if (_bondToPeer) { + _bondToPeer->nominatePath(path, now); + } + _paths[replaceIdx].lr = now; + _paths[replaceIdx].p = path; + _paths[replaceIdx].priority = 1; } else { attemptToContact = true; } - - // Every time we learn of new path, rebuild set of virtual paths - constructSetOfVirtualPaths(); } } - if (attemptToContact) { attemptToContactAt(tPtr,path->localSocket(),path->address(),now,true); path->sent(now); @@ -203,8 +165,7 @@ void Peer::received( // is done less frequently. if (this->trustEstablished(now)) { const int64_t sinceLastPush = now - _lastDirectPathPushSent; - if (sinceLastPush >= ((hops == 0) ? ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH : ZT_DIRECT_PATH_PUSH_INTERVAL) - || (_localMultipathSupported && (sinceLastPush >= (ZT_DIRECT_PATH_PUSH_INTERVAL_MULTIPATH)))) { + if (sinceLastPush >= ((hops == 0) ? ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH : ZT_DIRECT_PATH_PUSH_INTERVAL)) { _lastDirectPathPushSent = now; std::vector pathsToPush(RR->node->directPaths()); if (pathsToPush.size() > 0) { @@ -249,189 +210,15 @@ void Peer::received( } } -void Peer::constructSetOfVirtualPaths() +SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired, int32_t flowId) { - if (!_remoteMultipathSupported) { - return; - } - Mutex::Lock _l(_virtual_paths_m); - - int64_t now = RR->node->now(); - _virtualPathCount = 0; - for(unsigned int i=0;ialive(now)) { - for(unsigned int j=0;jalive(now)) { - int64_t localSocket = _paths[j].p->localSocket(); - bool foundVirtualPath = false; - for (int k=0; k<_virtualPaths.size(); k++) { - if (_virtualPaths[k]->localSocket == localSocket && _virtualPaths[k]->p == _paths[i].p) { - foundVirtualPath = true; - } - } - if (!foundVirtualPath) - { - VirtualPath *np = new VirtualPath; - np->p = _paths[i].p; - np->localSocket = localSocket; - _virtualPaths.push_back(np); - } - } - } - } - } -} - -void Peer::recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, - uint16_t payloadLength, const Packet::Verb verb, int64_t now) -{ - _freeRandomByte += (unsigned char)(packetId >> 8); // grab entropy to use in path selection logic for multipath - if (_canUseMultipath) { - path->recordOutgoingPacket(now, packetId, payloadLength, verb); - } -} - -void Peer::recordIncomingPacket(void *tPtr, const SharedPtr &path, const uint64_t packetId, - uint16_t payloadLength, const Packet::Verb verb, int64_t now) -{ - if (_canUseMultipath) { - if (path->needsToSendAck(now)) { - sendACK(tPtr, path, path->localSocket(), path->address(), now); - } - path->recordIncomingPacket(now, packetId, payloadLength, verb); - } -} - -void Peer::computeAggregateAllocation(int64_t now) -{ - float maxStability = 0; - float totalRelativeQuality = 0; - float maxThroughput = 1; - float maxScope = 0; - float relStability[ZT_MAX_PEER_NETWORK_PATHS]; - float relThroughput[ZT_MAX_PEER_NETWORK_PATHS]; - memset(&relStability, 0, sizeof(relStability)); - memset(&relThroughput, 0, sizeof(relThroughput)); - // Survey all paths - for(unsigned int i=0;ilastComputedStability(); - relThroughput[i] = (float)_paths[i].p->maxLifetimeThroughput(); - maxStability = relStability[i] > maxStability ? relStability[i] : maxStability; - maxThroughput = relThroughput[i] > maxThroughput ? relThroughput[i] : maxThroughput; - maxScope = _paths[i].p->ipScope() > maxScope ? _paths[i].p->ipScope() : maxScope; - } - } - // Convert to relative values - for(unsigned int i=0;iackAge(now), 0, ZT_PATH_MAX_AGE, 0, 10); - float age_contrib = exp((-1)*normalized_ma); - float relScope = ((float)(_paths[i].p->ipScope()+1) / (maxScope + 1)); - float relQuality = - (relStability[i] * (float)ZT_PATH_CONTRIB_STABILITY) - + (fmaxf(1.0f, relThroughput[i]) * (float)ZT_PATH_CONTRIB_THROUGHPUT) - + relScope * (float)ZT_PATH_CONTRIB_SCOPE; - relQuality *= age_contrib; - // Clamp values - relQuality = relQuality > (1.00f / 100.0f) ? relQuality : 0.0f; - relQuality = relQuality < (99.0f / 100.0f) ? relQuality : 1.0f; - totalRelativeQuality += relQuality; - _paths[i].p->updateRelativeQuality(relQuality); - } - } - // Convert set of relative performances into an allocation set - for(uint16_t i=0;inode->getMultipathMode() == ZT_MULTIPATH_BALANCE_RANDOM) { - _paths[i].p->updateComponentAllocationOfAggregateLink(((float)_pathChoiceHist.countValue(i) / (float)_pathChoiceHist.count()) * 255); - } - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_DYNAMIC_OPAQUE) { - _paths[i].p->updateComponentAllocationOfAggregateLink((unsigned char)((_paths[i].p->relativeQuality() / totalRelativeQuality) * 255)); - } - } - } -} - -int Peer::computeAggregateLinkPacketDelayVariance() -{ - float pdv = 0.0; - for(unsigned int i=0;irelativeQuality() * _paths[i].p->packetDelayVariance(); - } - } - return (int)pdv; -} - -int Peer::computeAggregateLinkMeanLatency() -{ - int ml = 0; - int pathCount = 0; - for(unsigned int i=0;irelativeQuality() * _paths[i].p->meanLatency()); - } - } - return ml / pathCount; -} - -int Peer::aggregateLinkPhysicalPathCount() -{ - std::map ifnamemap; - int pathCount = 0; - int64_t now = RR->node->now(); - for(unsigned int i=0;ialive(now)) { - if (!ifnamemap[_paths[i].p->getName()]) { - ifnamemap[_paths[i].p->getName()] = true; - pathCount++; - } - } - } - return pathCount; -} - -int Peer::aggregateLinkLogicalPathCount() -{ - int pathCount = 0; - int64_t now = RR->node->now(); - for(unsigned int i=0;ialive(now)) { - pathCount++; - } - } - return pathCount; -} - -std::vector > Peer::getAllPaths(int64_t now) -{ - Mutex::Lock _l(_virtual_paths_m); // FIXME: TX can now lock RX - std::vector > paths; - for (int i=0; i<_virtualPaths.size(); i++) { - if (_virtualPaths[i]->p) { - paths.push_back(_virtualPaths[i]->p); - } - } - return paths; -} - -SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired, int64_t flowId) -{ - Mutex::Lock _l(_paths_m); - SharedPtr selectedPath; - char curPathStr[128]; - char newPathStr[128]; - unsigned int bestPath = ZT_MAX_PEER_NETWORK_PATHS; - - /** - * Send traffic across the highest quality path only. This algorithm will still - * use the old path quality metric from protocol version 9. - */ - if (!_canUseMultipath) { + if (!_bondToPeer) { + Mutex::Lock _l(_paths_m); + unsigned int bestPath = ZT_MAX_PEER_NETWORK_PATHS; + /** + * Send traffic across the highest quality path only. This algorithm will still + * use the old path quality metric from protocol version 9. + */ long bestPathQuality = 2147483647; for(unsigned int i=0;i Peer::getAppropriatePath(int64_t now, bool includeExpired, int64 } return SharedPtr(); } - - // Update path measurements - for(unsigned int i=0;iprocessBackgroundPathMeasurements(now); - } - } - if (RR->sw->isFlowAware()) { - // Detect new flows and update existing records - if (_flows.count(flowId)) { - _flows[flowId]->lastSend = now; - } - else { - fprintf(stderr, "new flow %llx detected between this node and %llx (%lu active flow(s))\n", - flowId, this->_id.address().toInt(), (_flows.size()+1)); - struct Flow *newFlow = new Flow(flowId, now); - _flows[flowId] = newFlow; - newFlow->assignedPath = nullptr; - } - } - // Construct set of virtual paths if needed - if (!_virtualPaths.size()) { - constructSetOfVirtualPaths(); - } - if (!_virtualPaths.size()) { - fprintf(stderr, "no paths to send packet out on\n"); - return SharedPtr(); - } - - /** - * All traffic is sent on all paths. - */ - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BROADCAST) { - // Not handled here. Handled in Switch::_trySend() - } - - /** - * Only one link is active. Fail-over is immediate. - */ - if (RR->node->getMultipathMode() == ZT_MULTIPATH_ACTIVE_BACKUP) { - bool bFoundHotPath = false; - if (!_activeBackupPath) { - /* Select the fist path that appears to still be active. - * This will eventually be user-configurable */ - for (int i=0; ilastIn()) < ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) { - bFoundHotPath = true; - _activeBackupPath = _paths[i].p; - _pathAssignmentIdx = i; - _activeBackupPath->address().toString(curPathStr); - fprintf(stderr, "selected %s as the primary active-backup path to %llx (idx=%d)\n", - curPathStr, this->_id.address().toInt(), _pathAssignmentIdx); - break; - } - } - } - } - else { - char what[128]; - if ((now - _activeBackupPath->lastIn()) > ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) { - _activeBackupPath->address().toString(curPathStr); // Record path string for later debug trace - int16_t previousIdx = _pathAssignmentIdx; - SharedPtr nextAlternativePath; - // Search for a hot path, at the same time find the next path in - // a RR sequence that seems viable to use as an alternative - int searchCount = 0; - while (searchCount < ZT_MAX_PEER_NETWORK_PATHS) { - _pathAssignmentIdx++; - if (_pathAssignmentIdx == ZT_MAX_PEER_NETWORK_PATHS) { - _pathAssignmentIdx = 0; - } - searchCount++; - if (_paths[_pathAssignmentIdx].p) { - _paths[_pathAssignmentIdx].p->address().toString(what); - if (_activeBackupPath.ptr() == _paths[_pathAssignmentIdx].p.ptr()) { - continue; - } - if (!nextAlternativePath) { // Record the first viable alternative in the RR sequence - nextAlternativePath = _paths[_pathAssignmentIdx].p; - } - if ((now - _paths[_pathAssignmentIdx].p->lastIn()) < ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) { - bFoundHotPath = true; - _activeBackupPath = _paths[_pathAssignmentIdx].p; - _activeBackupPath->address().toString(newPathStr); - fprintf(stderr, "primary active-backup path %s to %llx appears to be dead, switched to %s\n", - curPathStr, this->_id.address().toInt(), newPathStr); - break; - } - } - } - if (!bFoundHotPath) { - if (nextAlternativePath) { - _activeBackupPath = nextAlternativePath; - _activeBackupPath->address().toString(curPathStr); - //fprintf(stderr, "no hot paths found to use as active-backup primary to %llx, using next best: %s\n", - // this->_id.address().toInt(), curPathStr); - } - else { - // No change - } - } - } - } - if (!_activeBackupPath) { - return SharedPtr(); - } - return _activeBackupPath; - } - - /** - * Traffic is randomly distributed among all active paths. - */ - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RANDOM) { - int sz = _virtualPaths.size(); - if (sz) { - int idx = _freeRandomByte % sz; - _pathChoiceHist.push(idx); - _virtualPaths[idx]->p->address().toString(curPathStr); - fprintf(stderr, "sending out: (%llx), idx=%d: path=%s, localSocket=%lld\n", - this->_id.address().toInt(), idx, curPathStr, _virtualPaths[idx]->localSocket); - return _virtualPaths[idx]->p; - } - // This call is algorithmically inert but gives us a value to show in the status output - computeAggregateAllocation(now); - } - - /** - * Packets are striped across all available paths. - */ - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RR_OPAQUE) { - int16_t previousIdx = _roundRobinPathAssignmentIdx; - int cycleCount = 0; - int minLastIn = 0; - int bestAlternativeIdx = -1; - while (cycleCount < ZT_MAX_PEER_NETWORK_PATHS) { - if (_roundRobinPathAssignmentIdx < (_virtualPaths.size()-1)) { - _roundRobinPathAssignmentIdx++; - } - else { - _roundRobinPathAssignmentIdx = 0; - } - cycleCount++; - if (_virtualPaths[_roundRobinPathAssignmentIdx]->p) { - uint64_t lastIn = _virtualPaths[_roundRobinPathAssignmentIdx]->p->lastIn(); - if (bestAlternativeIdx == -1) { - minLastIn = lastIn; // Initialization - bestAlternativeIdx = 0; - } - if (lastIn < minLastIn) { - minLastIn = lastIn; - bestAlternativeIdx = _roundRobinPathAssignmentIdx; - } - if ((now - lastIn) < 5000) { - selectedPath = _virtualPaths[_roundRobinPathAssignmentIdx]->p; - } - } - } - // If we can't find an appropriate path, try the most recently active one - if (!selectedPath) { - _roundRobinPathAssignmentIdx = bestAlternativeIdx; - selectedPath = _virtualPaths[bestAlternativeIdx]->p; - selectedPath->address().toString(curPathStr); - fprintf(stderr, "could not find good path, settling for next best %s\n",curPathStr); - } - selectedPath->address().toString(curPathStr); - fprintf(stderr, "sending packet out on path %s at index %d\n", - curPathStr, _roundRobinPathAssignmentIdx); - return selectedPath; - } - - /** - * Flows are striped across all available paths. - */ - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RR_FLOW) { - // fprintf(stderr, "ZT_MULTIPATH_BALANCE_RR_FLOW\n"); - } - - /** - * Flows are hashed across all available paths. - */ - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_XOR_FLOW) { - // fprintf(stderr, "ZT_MULTIPATH_BALANCE_XOR_FLOW (%llx) \n", flowId); - struct Flow *currFlow = NULL; - if (_flows.count(flowId)) { - currFlow = _flows[flowId]; - if (!currFlow->assignedPath) { - int idx = abs((int)(currFlow->flowId % (_virtualPaths.size()-1))); - currFlow->assignedPath = _virtualPaths[idx]; - _virtualPaths[idx]->p->address().toString(curPathStr); - fprintf(stderr, "assigning flow %llx between this node and peer %llx to path %s at index %d\n", - currFlow->flowId, this->_id.address().toInt(), curPathStr, idx); - } - else { - if (!currFlow->assignedPath->p->alive(now)) { - currFlow->assignedPath->p->address().toString(curPathStr); - // Re-assign - int idx = abs((int)(currFlow->flowId % (_virtualPaths.size()-1))); - currFlow->assignedPath = _virtualPaths[idx]; - _virtualPaths[idx]->p->address().toString(newPathStr); - fprintf(stderr, "path %s assigned to flow %llx between this node and %llx appears to be dead, reassigning to path %s\n", - curPathStr, currFlow->flowId, this->_id.address().toInt(), newPathStr); - } - } - return currFlow->assignedPath->p; - } - } - - /** - * Proportionally allocate traffic according to dynamic path quality measurements. - */ - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_DYNAMIC_OPAQUE) { - if ((now - _lastAggregateAllocation) >= ZT_PATH_QUALITY_COMPUTE_INTERVAL) { - _lastAggregateAllocation = now; - computeAggregateAllocation(now); - } - // Randomly choose path according to their allocations - float rf = _freeRandomByte; - for(int i=0;iallocation()) { - bestPath = i; - _pathChoiceHist.push(bestPath); // Record which path we chose - break; - } - rf -= _paths[i].p->allocation(); - } - } - if (bestPath < ZT_MAX_PEER_NETWORK_PATHS) { - return _paths[bestPath].p; - } - } - - /** - * Flows are dynamically allocated across paths in proportion to link strength and load. - */ - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_DYNAMIC_FLOW) { - } - - return SharedPtr(); -} - -char *Peer::interfaceListStr() -{ - std::map ifnamemap; - char tmp[32]; - const int64_t now = RR->node->now(); - char *ptr = _interfaceListStr; - bool imbalanced = false; - memset(_interfaceListStr, 0, sizeof(_interfaceListStr)); - int alivePathCount = aggregateLinkLogicalPathCount(); - for(unsigned int i=0;ialive(now)) { - int ipv = _paths[i].p->address().isV4(); - // If this is acting as an aggregate link, check allocations - float targetAllocation = 1.0f / (float)alivePathCount; - float currentAllocation = 1.0f; - if (alivePathCount > 1) { - currentAllocation = (float)_pathChoiceHist.countValue(i) / (float)_pathChoiceHist.count(); - if (fabs(targetAllocation - currentAllocation) > ZT_PATH_IMBALANCE_THRESHOLD) { - imbalanced = true; - } - } - char *ipvStr = ipv ? (char*)"ipv4" : (char*)"ipv6"; - sprintf(tmp, "(%s, %s, %.3f)", _paths[i].p->getName(), ipvStr, currentAllocation); - // Prevent duplicates - if(ifnamemap[_paths[i].p->getName()] != ipv) { - memcpy(ptr, tmp, strlen(tmp)); - ptr += strlen(tmp); - *ptr = ' '; - ptr++; - ifnamemap[_paths[i].p->getName()] = ipv; - } - } - } - ptr--; // Overwrite trailing space - if (imbalanced) { - sprintf(tmp, ", is asymmetrical"); - memcpy(ptr, tmp, sizeof(tmp)); - } else { - *ptr = '\0'; - } - return _interfaceListStr; + return _bondToPeer->getAppropriatePath(now, flowId); } void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr &other) const @@ -859,87 +360,6 @@ void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr &o } } -inline void Peer::processBackgroundPeerTasks(const int64_t now) -{ - // Determine current multipath compatibility with other peer - if ((now - _lastMultipathCompatibilityCheck) >= ZT_PATH_QUALITY_COMPUTE_INTERVAL) { - // - // Cache number of available paths so that we can short-circuit multipath logic elsewhere - // - // We also take notice of duplicate paths (same IP only) because we may have - // recently received a direct path push from a peer and our list might contain - // a dead path which hasn't been fully recognized as such. In this case we - // don't want the duplicate to trigger execution of multipath code prematurely. - // - // This is done to support the behavior of auto multipath enable/disable - // without user intervention. - // - int currAlivePathCount = 0; - int duplicatePathsFound = 0; - for (unsigned int i=0;iaddress().ipsEqual2(_paths[j].p->address()) && i != j) { - duplicatePathsFound+=1; - break; - } - } - } - } - _uniqueAlivePathCount = (currAlivePathCount - (duplicatePathsFound / 2)); - _lastMultipathCompatibilityCheck = now; - _localMultipathSupported = ((RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) && (ZT_PROTO_VERSION > 9)); - _remoteMultipathSupported = _vProto > 9; - // If both peers support multipath and more than one path exist, we can use multipath logic - _canUseMultipath = _localMultipathSupported && _remoteMultipathSupported && (_uniqueAlivePathCount > 1); - } - - // Remove old flows - if (RR->sw->isFlowAware()) { - std::map::iterator it = _flows.begin(); - while (it != _flows.end()) { - if ((now - it->second->lastSend) > ZT_MULTIPATH_FLOW_EXPIRATION) { - fprintf(stderr, "forgetting flow %llx between this node and %llx (%lu active flow(s))\n", - it->first, this->_id.address().toInt(), _flows.size()); - it = _flows.erase(it); - } else { - it++; - } - } - } -} - -void Peer::sendACK(void *tPtr,const SharedPtr &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now) -{ - Packet outp(_id.address(),RR->identity.address(),Packet::VERB_ACK); - uint32_t bytesToAck = path->bytesToAck(); - outp.append(bytesToAck); - if (atAddress) { - outp.armor(_key,false); - RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size()); - } else { - RR->sw->send(tPtr,outp,false); - } - path->sentAck(now); -} - -void Peer::sendQOS_MEASUREMENT(void *tPtr,const SharedPtr &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now) -{ - const int64_t _now = RR->node->now(); - Packet outp(_id.address(),RR->identity.address(),Packet::VERB_QOS_MEASUREMENT); - char qosData[ZT_PATH_MAX_QOS_PACKET_SZ]; - int16_t len = path->generateQoSPacket(_now,qosData); - outp.append(qosData,len); - if (atAddress) { - outp.armor(_key,false); - RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size()); - } else { - RR->sw->send(tPtr,outp,false); - } - path->sentQoS(now); -} - void Peer::sendHELLO(void *tPtr,const int64_t localSocket,const InetAddress &atAddress,int64_t now) { Packet outp(_id.address(),RR->identity.address(),Packet::VERB_HELLO); @@ -1005,29 +425,57 @@ void Peer::tryMemorizedPath(void *tPtr,int64_t now) } } +void Peer::performMultipathStateCheck(int64_t now) +{ + /** + * Check for conditions required for multipath bonding and create a bond + * if allowed. + */ + _localMultipathSupported = ((RR->bc->inUse()) && (ZT_PROTO_VERSION > 9)); + if (_localMultipathSupported) { + int currAlivePathCount = 0; + int duplicatePathsFound = 0; + for (unsigned int i=0;iaddress().ipsEqual2(_paths[j].p->address()) && i != j) { + duplicatePathsFound+=1; + break; + } + } + } + } + _uniqueAlivePathCount = (currAlivePathCount - (duplicatePathsFound / 2)); + _remoteMultipathSupported = _vProto > 9; + _canUseMultipath = _localMultipathSupported && _remoteMultipathSupported && (_uniqueAlivePathCount > 1); + } + if (_canUseMultipath && !_bondToPeer) { + if (RR->bc) { + _bondToPeer = RR->bc->createTransportTriggeredBond(RR, this); + /** + * Allow new bond to retroactively learn all paths known to this peer + */ + if (_bondToPeer) { + for (unsigned int i=0;inominatePath(_paths[i].p, now); + } + } + } + } + } +} + unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) { unsigned int sent = 0; Mutex::Lock _l(_paths_m); - processBackgroundPeerTasks(now); + performMultipathStateCheck(now); - // Emit traces regarding aggregate link status - if (_canUseMultipath) { - int alivePathCount = aggregateLinkPhysicalPathCount(); - if ((now - _lastAggregateStatsReport) > ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL) { - _lastAggregateStatsReport = now; - if (alivePathCount) { - RR->t->peerLinkAggregateStatistics(NULL,*this); - } - } if (alivePathCount < 2 && _linkIsRedundant) { - _linkIsRedundant = !_linkIsRedundant; - RR->t->peerLinkNoLongerAggregate(NULL,*this); - } if (alivePathCount > 1 && !_linkIsRedundant) { - _linkIsRedundant = !_linkIsRedundant; - RR->t->peerLinkNoLongerAggregate(NULL,*this); - } - } + const bool sendFullHello = ((now - _lastSentFullHello) >= ZT_PEER_PING_PERIOD); + _lastSentFullHello = now; // Right now we only keep pinging links that have the maximum priority. The // priority is used to track cluster redirections, meaning that when a cluster @@ -1040,15 +488,13 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) else break; } - const bool sendFullHello = ((now - _lastSentFullHello) >= ZT_PEER_PING_PERIOD); - _lastSentFullHello = now; - unsigned int j = 0; for(unsigned int i=0;ineedsHeartbeat(now))) { + if ((sendFullHello)||(_paths[i].p->needsHeartbeat(now)) + || (_canUseMultipath && _paths[i].p->needsGratuitousHeartbeat(now))) { attemptToContactAt(tPtr,_paths[i].p->localSocket(),_paths[i].p->address(),now,sendFullHello); _paths[i].p->sent(now); sent |= (_paths[i].p->address().ss_family == AF_INET) ? 0x1 : 0x2; @@ -1059,14 +505,6 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) } } else break; } - if (canUseMultipath()) { - while(j < ZT_MAX_PEER_NETWORK_PATHS) { - _paths[j].lr = 0; - _paths[j].p.zero(); - _paths[j].priority = 1; - ++j; - } - } return sent; } @@ -1133,4 +571,30 @@ void Peer::resetWithinScope(void *tPtr,InetAddress::IpScope scope,int inetAddres } } +void Peer::recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, + uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now) +{ + if (!_shouldCollectPathStatistics || !_bondToPeer) { + return; + } + _bondToPeer->recordOutgoingPacket(path, packetId, payloadLength, verb, flowId, now); +} + +void Peer::recordIncomingInvalidPacket(const SharedPtr& path) +{ + if (!_shouldCollectPathStatistics || !_bondToPeer) { + return; + } + _bondToPeer->recordIncomingInvalidPacket(path); +} + +void Peer::recordIncomingPacket(void *tPtr, const SharedPtr &path, const uint64_t packetId, + uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now) +{ + if (!_shouldCollectPathStatistics || !_bondToPeer) { + return; + } + _bondToPeer->recordIncomingPacket(path, packetId, payloadLength, verb, flowId, now); +} + } // namespace ZeroTier diff --git a/node/Peer.hpp b/node/Peer.hpp index ef4645e9a..1a2b6abc1 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -15,8 +15,6 @@ #define ZT_PEER_HPP #include -#include -#include #include "../include/ZeroTierOne.h" @@ -33,6 +31,8 @@ #include "AtomicCounter.hpp" #include "Hashtable.hpp" #include "Mutex.hpp" +#include "Bond.hpp" +#include "BondController.hpp" #define ZT_PEER_MAX_SERIALIZED_STATE_SIZE (sizeof(Peer) + 32 + (sizeof(Path) * 2)) @@ -44,6 +44,9 @@ namespace ZeroTier { class Peer { friend class SharedPtr; + friend class SharedPtr; + friend class Switch; + friend class Bond; private: Peer() {} // disabled to prevent bugs -- should not be constructed uninitialized @@ -97,7 +100,8 @@ public: const uint64_t inRePacketId, const Packet::Verb inReVerb, const bool trustEstablished, - const uint64_t networkId); + const uint64_t networkId, + const int32_t flowId); /** * Check whether we have an active path to this peer via the given address @@ -136,94 +140,49 @@ public: return false; } - void constructSetOfVirtualPaths(); - /** - * Record statistics on outgoing packets - * - * @param path Path over which packet was sent - * @param id Packet ID - * @param len Length of packet payload - * @param verb Packet verb - * @param now Current time - */ - void recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now); - - /** - * Record statistics on incoming packets - * - * @param path Path over which packet was sent - * @param id Packet ID - * @param len Length of packet payload - * @param verb Packet verb - * @param now Current time - */ - void recordIncomingPacket(void *tPtr, const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now); - - /** - * Send an ACK to peer for the most recent packets received + * Record incoming packets to * * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call - * @param localSocket Raw socket the ACK packet will be sent over - * @param atAddress Destination for the ACK packet + * @param path Path over which packet was received + * @param packetId Packet ID + * @param payloadLength Length of packet data payload + * @param verb Packet verb + * @param flowId Flow ID * @param now Current time */ - void sendACK(void *tPtr, const SharedPtr &path, const int64_t localSocket,const InetAddress &atAddress,int64_t now); + void recordIncomingPacket(void *tPtr, const SharedPtr &path, const uint64_t packetId, + uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now); /** - * Send a QoS packet to peer so that it can evaluate the quality of this link * - * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call - * @param localSocket Raw socket the QoS packet will be sent over - * @param atAddress Destination for the QoS packet + * @param path Path over which packet is being sent + * @param packetId Packet ID + * @param payloadLength Length of packet data payload + * @param verb Packet verb + * @param flowId Flow ID * @param now Current time */ - void sendQOS_MEASUREMENT(void *tPtr, const SharedPtr &path, const int64_t localSocket,const InetAddress &atAddress,int64_t now); + void recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, + uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now); /** - * Compute relative quality values and allocations for the components of the aggregate link + * Record an invalid incoming packet. This packet failed + * MAC/compression/cipher checks and will now contribute to a + * Packet Error Ratio (PER). * - * @param now Current time + * @param path Path over which packet was received */ - void computeAggregateAllocation(int64_t now); - - /** - * @return The aggregate link Packet Delay Variance (PDV) - */ - int computeAggregateLinkPacketDelayVariance(); - - /** - * @return The aggregate link mean latency - */ - int computeAggregateLinkMeanLatency(); - - /** - * @return The number of currently alive "physical" paths in the aggregate link - */ - int aggregateLinkPhysicalPathCount(); - - /** - * @return The number of currently alive "logical" paths in the aggregate link - */ - int aggregateLinkLogicalPathCount(); - - std::vector> getAllPaths(int64_t now); + void recordIncomingInvalidPacket(const SharedPtr& path); /** * Get the most appropriate direct path based on current multipath and QoS configuration * * @param now Current time - * @param flowId Session-specific protocol flow identifier used for path allocation * @param includeExpired If true, include even expired paths * @return Best current path or NULL if none */ - SharedPtr getAppropriatePath(int64_t now, bool includeExpired, int64_t flowId = -1); - - /** - * Generate a human-readable string of interface names making up the aggregate link, also include - * moving allocation and IP version number for each (for tracing) - */ - char *interfaceListStr(); + SharedPtr getAppropriatePath(int64_t now, bool includeExpired, int32_t flowId = -1); /** * Send VERB_RENDEZVOUS to this and another peer via the best common IP scope and path @@ -265,6 +224,13 @@ public: */ void tryMemorizedPath(void *tPtr,int64_t now); + /** + * A check to be performed periodically which determines whether multipath communication is + * possible with this peer. This check should be performed early in the life-cycle of the peer + * as well as during the process of learning new paths. + */ + void performMultipathStateCheck(int64_t now); + /** * Send pings or keepalives depending on configured timeouts * @@ -277,16 +243,6 @@ public: */ unsigned int doPingAndKeepalive(void *tPtr,int64_t now); - /** - * Clear paths whose localSocket(s) are in a CLOSED state or have an otherwise INVALID state. - * This should be called frequently so that we can detect and remove unproductive or invalid paths. - * - * Under the hood this is done periodically based on ZT_CLOSED_PATH_PRUNING_INTERVAL. - * - * @return Number of paths that were pruned this round - */ - unsigned int prunePaths(); - /** * Process a cluster redirect sent by this peer * @@ -348,7 +304,7 @@ public: inline unsigned int latency(const int64_t now) { if (_canUseMultipath) { - return (int)computeAggregateLinkMeanLatency(); + return (int)_lastComputedAggregateMeanLatency; } else { SharedPtr bp(getAppropriatePath(now,false)); if (bp) @@ -407,37 +363,6 @@ public: inline bool remoteVersionKnown() const { return ((_vMajor > 0)||(_vMinor > 0)||(_vRevision > 0)); } - /** - * Periodically update known multipath activation constraints. This is done so that we know when and when - * not to use multipath logic. Doing this once every few seconds is sufficient. - * - * @param now Current time - */ - inline void processBackgroundPeerTasks(const int64_t now); - - /** - * Record that the remote peer does have multipath enabled. As is evident by the receipt of a VERB_ACK - * or a VERB_QOS_MEASUREMENT packet at some point in the past. Until this flag is set, the local client - * shall assume that multipath is not enabled and should only use classical Protocol 9 logic. - */ - inline void inferRemoteMultipathEnabled() { _remotePeerMultipathEnabled = true; } - - /** - * @return Whether the local client supports and is configured to use multipath - */ - inline bool localMultipathSupport() { return _localMultipathSupported; } - - /** - * @return Whether the remote peer supports and is configured to use multipath - */ - inline bool remoteMultipathSupport() { return _remoteMultipathSupported; } - - /** - * @return Whether this client can use multipath to communicate with this peer. True if both peers are using - * the correct protocol and if both peers have multipath enabled. False if otherwise. - */ - inline bool canUseMultipath() { return _canUseMultipath; } - /** * @return True if peer has received a trust established packet (e.g. common network membership) in the past ZT_TRUST_EXPIRATION ms */ @@ -492,50 +417,35 @@ public: } /** - * Rate limit gate for inbound ECHO requests + * Rate limit gate for inbound ECHO requests. This rate limiter works + * by draining a certain number of requests per unit time. Each peer may + * theoretically receive up to ZT_ECHO_CUTOFF_LIMIT requests per second. */ inline bool rateGateEchoRequest(const int64_t now) { - if ((now - _lastEchoRequestReceived) >= ZT_PEER_GENERAL_RATE_LIMIT) { - _lastEchoRequestReceived = now; - return true; - } - return false; - } - - /** - * Rate limit gate for VERB_ACK - */ - inline bool rateGateACK(const int64_t now) - { - if ((now - _lastACKWindowReset) >= ZT_PATH_QOS_ACK_CUTOFF_TIME) { - _lastACKWindowReset = now; - _ACKCutoffCount = 0; + /* + // TODO: Rethink this + if (_canUseMultipath) { + _echoRequestCutoffCount++; + int numToDrain = (now - _lastEchoCheck) / ZT_ECHO_DRAINAGE_DIVISOR; + _lastEchoCheck = now; + fprintf(stderr, "ZT_ECHO_CUTOFF_LIMIT=%d, (now - _lastEchoCheck)=%d, numToDrain=%d, ZT_ECHO_DRAINAGE_DIVISOR=%d\n", ZT_ECHO_CUTOFF_LIMIT, (now - _lastEchoCheck), numToDrain, ZT_ECHO_DRAINAGE_DIVISOR); + if (_echoRequestCutoffCount > numToDrain) { + _echoRequestCutoffCount-=numToDrain; + } + else { + _echoRequestCutoffCount = 0; + } + return (_echoRequestCutoffCount < ZT_ECHO_CUTOFF_LIMIT); } else { - ++_ACKCutoffCount; + if ((now - _lastEchoRequestReceived) >= (ZT_PEER_GENERAL_RATE_LIMIT)) { + _lastEchoRequestReceived = now; + return true; + } + return false; } - return (_ACKCutoffCount < ZT_PATH_QOS_ACK_CUTOFF_LIMIT); - } - - /** - * Rate limit gate for VERB_QOS_MEASUREMENT - */ - inline bool rateGateQoS(const int64_t now) - { - if ((now - _lastQoSWindowReset) >= ZT_PATH_QOS_ACK_CUTOFF_TIME) { - _lastQoSWindowReset = now; - _QoSCutoffCount = 0; - } else { - ++_QoSCutoffCount; - } - return (_QoSCutoffCount < ZT_PATH_QOS_ACK_CUTOFF_LIMIT); - } - - /** - * @return Whether this peer is reachable via an aggregate link - */ - inline bool hasAggregateLink() { - return _localMultipathSupported && _remoteMultipathSupported && _remotePeerMultipathEnabled; + */ + return true; } /** @@ -610,6 +520,18 @@ public: } } + /** + * + * @return + */ + SharedPtr bond() { return _bondToPeer; } + + /** + * + * @return + */ + inline int8_t bondingPolicy() { return _bondingPolicy; } + private: struct _PeerPath { @@ -628,25 +550,16 @@ private: int64_t _lastTriedMemorizedPath; int64_t _lastDirectPathPushSent; int64_t _lastDirectPathPushReceive; + int64_t _lastEchoRequestReceived; int64_t _lastCredentialRequestSent; int64_t _lastWhoisRequestReceived; - int64_t _lastEchoRequestReceived; int64_t _lastCredentialsReceived; int64_t _lastTrustEstablishedPacketReceived; int64_t _lastSentFullHello; - int64_t _lastPathPrune; - int64_t _lastACKWindowReset; - int64_t _lastQoSWindowReset; - int64_t _lastMultipathCompatibilityCheck; + int64_t _lastEchoCheck; unsigned char _freeRandomByte; - int _uniqueAlivePathCount; - - bool _localMultipathSupported; - bool _remoteMultipathSupported; - bool _canUseMultipath; - uint16_t _vProto; uint16_t _vMajor; uint16_t _vMinor; @@ -659,62 +572,22 @@ private: unsigned int _directPathPushCutoffCount; unsigned int _credentialsCutoffCount; - unsigned int _QoSCutoffCount; - unsigned int _ACKCutoffCount; + unsigned int _echoRequestCutoffCount; AtomicCounter __refCount; - RingBuffer _pathChoiceHist; - - bool _linkIsBalanced; - bool _linkIsRedundant; bool _remotePeerMultipathEnabled; + int _uniqueAlivePathCount; + bool _localMultipathSupported; + bool _remoteMultipathSupported; + bool _canUseMultipath; - int64_t _lastAggregateStatsReport; - int64_t _lastAggregateAllocation; + volatile bool _shouldCollectPathStatistics; + volatile int8_t _bondingPolicy; - char _interfaceListStr[256]; // 16 characters * 16 paths in a link + int32_t _lastComputedAggregateMeanLatency; - // - struct LinkPerformanceEntry - { - int64_t packetId; - struct VirtualPath *egressVirtualPath; - struct VirtualPath *ingressVirtualPath; - }; - - // Virtual paths - int _virtualPathCount; - Mutex _virtual_paths_m; - struct VirtualPath - { - SharedPtr p; - int64_t localSocket; - std::queue performanceEntries; - }; - std::vector _virtualPaths; - - // Flows - struct Flow - { - Flow(int64_t fid, int64_t ls) : - flowId(fid), - lastSend(ls), - assignedPath(NULL) - {} - - int64_t flowId; - int64_t bytesPerSecond; - int64_t lastSend; - struct VirtualPath *assignedPath; - }; - - std::map _flows; - - int16_t _roundRobinPathAssignmentIdx; - - SharedPtr _activeBackupPath; - int16_t _pathAssignmentIdx; + SharedPtr _bondToPeer; }; } // namespace ZeroTier diff --git a/node/RingBuffer.hpp b/node/RingBuffer.hpp index 2d6cd1949..42047a873 100644 --- a/node/RingBuffer.hpp +++ b/node/RingBuffer.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -238,6 +238,21 @@ public: return curr_cnt ? subtotal / (float)curr_cnt : 0; } + /** + * @return The sum of the contents of the buffer + */ + inline float sum() + { + size_t iterator = begin; + float total = 0; + size_t curr_cnt = count(); + for (size_t i=0; i frameLen) + return false; // invalid! + proto = frameData[pos]; + pos += ((unsigned int)frameData[pos + 1] * 8) + 8; + break; + + //case 44: // fragment -- we currently can't parse these and they are deprecated in IPv6 anyway + //case 50: + //case 51: // IPSec ESP and AH -- we have to stop here since this is encrypted stuff + default: + return true; + } + } + return false; // overflow == invalid +} + void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddress &fromAddr,const void *data,unsigned int len) { + int32_t flowId = ZT_QOS_NO_FLOW; try { const int64_t now = RR->node->now(); @@ -112,6 +142,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre if (rq->packetId != fragmentPacketId) { // No packet found, so we received a fragment without its head. + rq->flowId = flowId; rq->timestamp = now; rq->packetId = fragmentPacketId; rq->frags[fragmentNumber - 1] = fragment; @@ -130,7 +161,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre for(unsigned int f=1;ffrag0.append(rq->frags[f - 1].payload(),rq->frags[f - 1].payloadLength()); - if (rq->frag0.tryDecode(RR,tPtr)) { + if (rq->frag0.tryDecode(RR,tPtr,flowId)) { rq->timestamp = 0; // packet decoded, free entry } else { rq->complete = true; // set complete flag but leave entry since it probably needs WHOIS or something @@ -195,6 +226,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre if (rq->packetId != packetId) { // If we have no other fragments yet, create an entry and save the head + rq->flowId = flowId; rq->timestamp = now; rq->packetId = packetId; rq->frag0.init(data,len,path,now); @@ -211,7 +243,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre for(unsigned int f=1;ftotalFragments;++f) rq->frag0.append(rq->frags[f - 1].payload(),rq->frags[f - 1].payloadLength()); - if (rq->frag0.tryDecode(RR,tPtr)) { + if (rq->frag0.tryDecode(RR,tPtr,flowId)) { rq->timestamp = 0; // packet decoded, free entry } else { rq->complete = true; // set complete flag but leave entry since it probably needs WHOIS or something @@ -224,9 +256,10 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre } else { // Packet is unfragmented, so just process it IncomingPacket packet(data,len,path,now); - if (!packet.tryDecode(RR,tPtr)) { + if (!packet.tryDecode(RR,tPtr,flowId)) { RXQueueEntry *const rq = _nextRXQueueEntry(); Mutex::Lock rql(rq->lock); + rq->flowId = flowId; rq->timestamp = now; rq->packetId = packet.packetId(); rq->frag0 = packet; @@ -242,43 +275,6 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre } catch ( ... ) {} // sanity check, should be caught elsewhere } -// Returns true if packet appears valid; pos and proto will be set -static bool _ipv6GetPayload(const uint8_t *frameData,unsigned int frameLen,unsigned int &pos,unsigned int &proto) -{ - if (frameLen < 40) - return false; - pos = 40; - proto = frameData[6]; - while (pos <= frameLen) { - switch(proto) { - case 0: // hop-by-hop options - case 43: // routing - case 60: // destination options - case 135: // mobility options - if ((pos + 8) > frameLen) - return false; // invalid! - proto = frameData[pos]; - pos += ((unsigned int)frameData[pos + 1] * 8) + 8; - break; - - //case 44: // fragment -- we currently can't parse these and they are deprecated in IPv6 anyway - //case 50: - //case 51: // IPSec ESP and AH -- we have to stop here since this is encrypted stuff - default: - return true; - } - } - return false; // overflow == invalid -} - -bool Switch::isFlowAware() -{ - int mode = RR->node->getMultipathMode(); - return (( mode == ZT_MULTIPATH_BALANCE_RR_FLOW) - || (mode == ZT_MULTIPATH_BALANCE_XOR_FLOW) - || (mode == ZT_MULTIPATH_BALANCE_DYNAMIC_FLOW)); -} - void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const MAC &from,const MAC &to,unsigned int etherType,unsigned int vlanId,const void *data,unsigned int len) { if (!network->hasConfig()) @@ -293,75 +289,73 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const } } - uint8_t qosBucket = ZT_QOS_DEFAULT_BUCKET; + uint8_t qosBucket = ZT_AQM_DEFAULT_BUCKET; - /* A pseudo-unique identifier used by the balancing and bonding policies to associate properties - * of a specific protocol flow over time and to determine which virtual path this packet - * shall be sent out on. This identifier consists of the source port and destination port - * of the encapsulated frame. + /** + * A pseudo-unique identifier used by balancing and bonding policies to + * categorize individual flows/conversations for assignment to a specific + * physical path. This identifier consists of the source port and + * destination port of the encapsulated frame. * - * A flowId of -1 will indicate that whatever packet we are about transmit has no - * preferred virtual path and will be sent out according to what the multipath logic - * deems appropriate. An example of this would be an ICMP packet. + * A flowId of -1 will indicate that there is no preference for how this + * packet shall be sent. An example of this would be an ICMP packet. */ - int64_t flowId = -1; + int32_t flowId = ZT_QOS_NO_FLOW; - if (isFlowAware()) { - if (etherType == ZT_ETHERTYPE_IPV4 && (len >= 20)) { - uint16_t srcPort = 0; - uint16_t dstPort = 0; - int8_t proto = (reinterpret_cast(data)[9]); - const unsigned int headerLen = 4 * (reinterpret_cast(data)[0] & 0xf); - switch(proto) { - case 0x01: // ICMP - flowId = 0x01; - break; - // All these start with 16-bit source and destination port in that order - case 0x06: // TCP - case 0x11: // UDP - case 0x84: // SCTP - case 0x88: // UDPLite - if (len > (headerLen + 4)) { - unsigned int pos = headerLen + 0; - srcPort = (reinterpret_cast(data)[pos++]) << 8; - srcPort |= (reinterpret_cast(data)[pos]); - pos++; - dstPort = (reinterpret_cast(data)[pos++]) << 8; - dstPort |= (reinterpret_cast(data)[pos]); - flowId = ((int64_t)srcPort << 48) | ((int64_t)dstPort << 32) | proto; - } - break; - } + if (etherType == ZT_ETHERTYPE_IPV4 && (len >= 20)) { + uint16_t srcPort = 0; + uint16_t dstPort = 0; + uint8_t proto = (reinterpret_cast(data)[9]); + const unsigned int headerLen = 4 * (reinterpret_cast(data)[0] & 0xf); + switch(proto) { + case 0x01: // ICMP + //flowId = 0x01; + break; + // All these start with 16-bit source and destination port in that order + case 0x06: // TCP + case 0x11: // UDP + case 0x84: // SCTP + case 0x88: // UDPLite + if (len > (headerLen + 4)) { + unsigned int pos = headerLen + 0; + srcPort = (reinterpret_cast(data)[pos++]) << 8; + srcPort |= (reinterpret_cast(data)[pos]); + pos++; + dstPort = (reinterpret_cast(data)[pos++]) << 8; + dstPort |= (reinterpret_cast(data)[pos]); + flowId = dstPort ^ srcPort ^ proto; + } + break; } + } - if (etherType == ZT_ETHERTYPE_IPV6 && (len >= 40)) { - uint16_t srcPort = 0; - uint16_t dstPort = 0; - unsigned int pos; - unsigned int proto; - _ipv6GetPayload((const uint8_t *)data, len, pos, proto); - switch(proto) { - case 0x3A: // ICMPv6 - flowId = 0x3A; - break; - // All these start with 16-bit source and destination port in that order - case 0x06: // TCP - case 0x11: // UDP - case 0x84: // SCTP - case 0x88: // UDPLite - if (len > (pos + 4)) { - srcPort = (reinterpret_cast(data)[pos++]) << 8; - srcPort |= (reinterpret_cast(data)[pos]); - pos++; - dstPort = (reinterpret_cast(data)[pos++]) << 8; - dstPort |= (reinterpret_cast(data)[pos]); - flowId = ((int64_t)srcPort << 48) | ((int64_t)dstPort << 32) | proto; - } - break; - default: - break; - } + if (etherType == ZT_ETHERTYPE_IPV6 && (len >= 40)) { + uint16_t srcPort = 0; + uint16_t dstPort = 0; + unsigned int pos; + unsigned int proto; + _ipv6GetPayload((const uint8_t *)data, len, pos, proto); + switch(proto) { + case 0x3A: // ICMPv6 + //flowId = 0x3A; + break; + // All these start with 16-bit source and destination port in that order + case 0x06: // TCP + case 0x11: // UDP + case 0x84: // SCTP + case 0x88: // UDPLite + if (len > (pos + 4)) { + srcPort = (reinterpret_cast(data)[pos++]) << 8; + srcPort |= (reinterpret_cast(data)[pos]); + pos++; + dstPort = (reinterpret_cast(data)[pos++]) << 8; + dstPort |= (reinterpret_cast(data)[pos]); + flowId = dstPort ^ srcPort ^ proto; + } + break; + default: + break; } } @@ -595,7 +589,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const } } -void Switch::aqm_enqueue(void *tPtr, const SharedPtr &network, Packet &packet,bool encrypt,int qosBucket,int64_t flowId) +void Switch::aqm_enqueue(void *tPtr, const SharedPtr &network, Packet &packet,bool encrypt,int qosBucket,int32_t flowId) { if(!network->qosEnabled()) { send(tPtr, packet, encrypt, flowId); @@ -603,18 +597,16 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr &network, Packet & } NetworkQoSControlBlock *nqcb = _netQueueControlBlock[network->id()]; if (!nqcb) { - // DEBUG_INFO("creating network QoS control block (NQCB) for network %llx", network->id()); nqcb = new NetworkQoSControlBlock(); _netQueueControlBlock[network->id()] = nqcb; // Initialize ZT_QOS_NUM_BUCKETS queues and place them in the INACTIVE list // These queues will be shuffled between the new/old/inactive lists by the enqueue/dequeue algorithm - for (int i=0; iinactiveQueues.push_back(new ManagedQueue(i)); } } // Don't apply QoS scheduling to ZT protocol traffic if (packet.verb() != Packet::VERB_FRAME && packet.verb() != Packet::VERB_EXT_FRAME) { - // just send packet normally, no QoS for ZT protocol traffic send(tPtr, packet, encrypt, flowId); } @@ -624,8 +616,9 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr &network, Packet & const Address dest(packet.destination()); TXQueueEntry *txEntry = new TXQueueEntry(dest,RR->node->now(),packet,encrypt,flowId); + ManagedQueue *selectedQueue = nullptr; - for (size_t i=0; ioldQueues.size()) { // search old queues first (I think this is best since old would imply most recent usage of the queue) if (nqcb->oldQueues[i]->id == qosBucket) { selectedQueue = nqcb->oldQueues[i]; @@ -638,7 +631,7 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr &network, Packet & if (nqcb->inactiveQueues[i]->id == qosBucket) { selectedQueue = nqcb->inactiveQueues[i]; // move queue to end of NEW queue list - selectedQueue->byteCredit = ZT_QOS_QUANTUM; + selectedQueue->byteCredit = ZT_AQM_QUANTUM; // DEBUG_INFO("moving q=%p from INACTIVE to NEW list", selectedQueue); nqcb->newQueues.push_back(selectedQueue); nqcb->inactiveQueues.erase(nqcb->inactiveQueues.begin() + i); @@ -657,11 +650,11 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr &network, Packet & // Drop a packet if necessary ManagedQueue *selectedQueueToDropFrom = nullptr; - if (nqcb->_currEnqueuedPackets > ZT_QOS_MAX_ENQUEUED_PACKETS) + if (nqcb->_currEnqueuedPackets > ZT_AQM_MAX_ENQUEUED_PACKETS) { // DEBUG_INFO("too many enqueued packets (%d), finding packet to drop", nqcb->_currEnqueuedPackets); int maxQueueLength = 0; - for (size_t i=0; ioldQueues.size()) { if (nqcb->oldQueues[i]->byteLength > maxQueueLength) { maxQueueLength = nqcb->oldQueues[i]->byteLength; @@ -694,7 +687,7 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr &network, Packet & uint64_t Switch::control_law(uint64_t t, int count) { - return (uint64_t)(t + ZT_QOS_INTERVAL / sqrt(count)); + return (uint64_t)(t + ZT_AQM_INTERVAL / sqrt(count)); } Switch::dqr Switch::dodequeue(ManagedQueue *q, uint64_t now) @@ -708,14 +701,14 @@ Switch::dqr Switch::dodequeue(ManagedQueue *q, uint64_t now) return r; } uint64_t sojourn_time = now - r.p->creationTime; - if (sojourn_time < ZT_QOS_TARGET || q->byteLength <= ZT_DEFAULT_MTU) { + if (sojourn_time < ZT_AQM_TARGET || q->byteLength <= ZT_DEFAULT_MTU) { // went below - stay below for at least interval q->first_above_time = 0; } else { if (q->first_above_time == 0) { // just went above from below. if still above at // first_above_time, will say it's ok to drop. - q->first_above_time = now + ZT_QOS_INTERVAL; + q->first_above_time = now + ZT_AQM_INTERVAL; } else if (now >= q->first_above_time) { r.ok_to_drop = true; } @@ -747,7 +740,7 @@ Switch::TXQueueEntry * Switch::CoDelDequeue(ManagedQueue *q, bool isNew, uint64_ q->q.pop_front(); // drop r = dodequeue(q, now); q->dropping = true; - q->count = (q->count > 2 && now - q->drop_next < 8*ZT_QOS_INTERVAL)? + q->count = (q->count > 2 && now - q->drop_next < 8*ZT_AQM_INTERVAL)? q->count - 2 : 1; q->drop_next = control_law(now, q->count); } @@ -775,7 +768,7 @@ void Switch::aqm_dequeue(void *tPtr) while (currQueues->size()) { ManagedQueue *queueAtFrontOfList = currQueues->front(); if (queueAtFrontOfList->byteCredit < 0) { - queueAtFrontOfList->byteCredit += ZT_QOS_QUANTUM; + queueAtFrontOfList->byteCredit += ZT_AQM_QUANTUM; // Move to list of OLD queues // DEBUG_INFO("moving q=%p from NEW to OLD list", queueAtFrontOfList); oldQueues->push_back(queueAtFrontOfList); @@ -810,7 +803,7 @@ void Switch::aqm_dequeue(void *tPtr) while (currQueues->size()) { ManagedQueue *queueAtFrontOfList = currQueues->front(); if (queueAtFrontOfList->byteCredit < 0) { - queueAtFrontOfList->byteCredit += ZT_QOS_QUANTUM; + queueAtFrontOfList->byteCredit += ZT_AQM_QUANTUM; oldQueues->push_back(queueAtFrontOfList); currQueues->erase(currQueues->begin()); } else { @@ -850,7 +843,7 @@ void Switch::removeNetworkQoSControlBlock(uint64_t nwid) } } -void Switch::send(void *tPtr,Packet &packet,bool encrypt,int64_t flowId) +void Switch::send(void *tPtr,Packet &packet,bool encrypt,int32_t flowId) { const Address dest(packet.destination()); if (dest == RR->identity.address()) @@ -883,7 +876,7 @@ void Switch::requestWhois(void *tPtr,const int64_t now,const Address &addr) const SharedPtr upstream(RR->topology->getUpstreamPeer()); if (upstream) { - int64_t flowId = -1; + int32_t flowId = ZT_QOS_NO_FLOW; Packet outp(upstream->address(),RR->identity.address(),Packet::VERB_WHOIS); addr.appendTo(outp); RR->node->expectReplyTo(outp.packetId()); @@ -903,7 +896,7 @@ void Switch::doAnythingWaitingForPeer(void *tPtr,const SharedPtr &peer) RXQueueEntry *const rq = &(_rxQueue[ptr]); Mutex::Lock rql(rq->lock); if ((rq->timestamp)&&(rq->complete)) { - if ((rq->frag0.tryDecode(RR,tPtr))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) + if ((rq->frag0.tryDecode(RR,tPtr,rq->flowId))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) rq->timestamp = 0; } } @@ -954,7 +947,7 @@ unsigned long Switch::doTimerTasks(void *tPtr,int64_t now) RXQueueEntry *const rq = &(_rxQueue[ptr]); Mutex::Lock rql(rq->lock); if ((rq->timestamp)&&(rq->complete)) { - if ((rq->frag0.tryDecode(RR,tPtr))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) { + if ((rq->frag0.tryDecode(RR,tPtr,rq->flowId))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) { rq->timestamp = 0; } else { const Address src(rq->frag0.source()); @@ -1000,7 +993,7 @@ bool Switch::_shouldUnite(const int64_t now,const Address &source,const Address return false; } -bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt,int64_t flowId) +bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt,int32_t flowId) { SharedPtr viaPath; const int64_t now = RR->node->now(); @@ -1008,8 +1001,18 @@ bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt,int64_t flowId) const SharedPtr peer(RR->topology->getPeer(tPtr,destination)); if (peer) { - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BROADCAST) { - // Nothing here, we'll grab an entire set of paths to send out on below + if ((peer->bondingPolicy() == ZT_BONDING_POLICY_BROADCAST) + && (packet.verb() == Packet::VERB_FRAME || packet.verb() == Packet::VERB_EXT_FRAME)) { + const SharedPtr relay(RR->topology->getUpstreamPeer()); + Mutex::Lock _l(peer->_paths_m); + for(int i=0;i_paths[i].p && peer->_paths[i].p->alive(now)) { + char pathStr[128]; + peer->_paths[i].p->address().toString(pathStr); + _sendViaSpecificPath(tPtr,peer,peer->_paths[i].p,now,packet,encrypt,flowId); + } + } + return true; } else { viaPath = peer->getAppropriatePath(now,false,flowId); @@ -1021,61 +1024,51 @@ bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt,int64_t flowId) return false; } } + if (viaPath) { + _sendViaSpecificPath(tPtr,peer,viaPath,now,packet,encrypt,flowId); + return true; + } } + } + return false; +} + +void Switch::_sendViaSpecificPath(void *tPtr,SharedPtr peer,SharedPtr viaPath,int64_t now,Packet &packet,bool encrypt,int32_t flowId) +{ + unsigned int mtu = ZT_DEFAULT_PHYSMTU; + uint64_t trustedPathId = 0; + RR->topology->getOutboundPathInfo(viaPath->address(),mtu,trustedPathId); + + unsigned int chunkSize = std::min(packet.size(),mtu); + packet.setFragmented(chunkSize < packet.size()); + + peer->recordOutgoingPacket(viaPath, packet.packetId(), packet.payloadLength(), packet.verb(), flowId, now); + + if (trustedPathId) { + packet.setTrusted(trustedPathId); } else { - return false; + packet.armor(peer->key(),encrypt); } - // If sending on all paths, set viaPath to first path - int nextPathIdx = 0; - std::vector> paths = peer->getAllPaths(now); - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BROADCAST) { - if (paths.size()) { - viaPath = paths[nextPathIdx++]; - } - } + if (viaPath->send(RR,tPtr,packet.data(),chunkSize,now)) { + if (chunkSize < packet.size()) { + // Too big for one packet, fragment the rest + unsigned int fragStart = chunkSize; + unsigned int remaining = packet.size() - chunkSize; + unsigned int fragsRemaining = (remaining / (mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH)); + if ((fragsRemaining * (mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH)) < remaining) + ++fragsRemaining; + const unsigned int totalFragments = fragsRemaining + 1; - while (viaPath) { - unsigned int mtu = ZT_DEFAULT_PHYSMTU; - uint64_t trustedPathId = 0; - RR->topology->getOutboundPathInfo(viaPath->address(),mtu,trustedPathId); - unsigned int chunkSize = std::min(packet.size(),mtu); - packet.setFragmented(chunkSize < packet.size()); - peer->recordOutgoingPacket(viaPath, packet.packetId(), packet.payloadLength(), packet.verb(), now); - - if (trustedPathId) { - packet.setTrusted(trustedPathId); - } else { - packet.armor(peer->key(),encrypt); - } - - if (viaPath->send(RR,tPtr,packet.data(),chunkSize,now)) { - if (chunkSize < packet.size()) { - // Too big for one packet, fragment the rest - unsigned int fragStart = chunkSize; - unsigned int remaining = packet.size() - chunkSize; - unsigned int fragsRemaining = (remaining / (mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH)); - if ((fragsRemaining * (mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH)) < remaining) - ++fragsRemaining; - const unsigned int totalFragments = fragsRemaining + 1; - - for(unsigned int fno=1;fnosend(RR,tPtr,frag.data(),frag.size(),now); - fragStart += chunkSize; - remaining -= chunkSize; - } - } - } - viaPath.zero(); - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BROADCAST) { - if (paths.size() > nextPathIdx) { - viaPath = paths[nextPathIdx++]; + for(unsigned int fno=1;fnosend(RR,tPtr,frag.data(),frag.size(),now); + fragStart += chunkSize; + remaining -= chunkSize; } } } - return true; } } // namespace ZeroTier diff --git a/node/Switch.hpp b/node/Switch.hpp index f535cb8eb..f1436c7cf 100644 --- a/node/Switch.hpp +++ b/node/Switch.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -59,6 +59,8 @@ class Switch struct ManagedQueue; struct TXQueueEntry; + friend class SharedPtr; + typedef struct { TXQueueEntry *p; bool ok_to_drop; @@ -123,7 +125,7 @@ public: * @param encrypt Encrypt packet payload? (always true except for HELLO) * @param qosBucket Which bucket the rule-system determined this packet should fall into */ - void aqm_enqueue(void *tPtr, const SharedPtr &network, Packet &packet,bool encrypt,int qosBucket,int64_t flowId = -1); + void aqm_enqueue(void *tPtr, const SharedPtr &network, Packet &packet,bool encrypt,int qosBucket,int32_t flowId = ZT_QOS_NO_FLOW); /** * Performs a single AQM cycle and dequeues and transmits all eligible packets on all networks @@ -169,7 +171,7 @@ public: * @param packet Packet to send (buffer may be modified) * @param encrypt Encrypt packet payload? (always true except for HELLO) */ - void send(void *tPtr,Packet &packet,bool encrypt,int64_t flowId = -1); + void send(void *tPtr,Packet &packet,bool encrypt,int32_t flowId = ZT_QOS_NO_FLOW); /** * Request WHOIS on a given address @@ -204,7 +206,8 @@ public: private: bool _shouldUnite(const int64_t now,const Address &source,const Address &destination); - bool _trySend(void *tPtr,Packet &packet,bool encrypt,int64_t flowId = -1); // packet is modified if return is true + bool _trySend(void *tPtr,Packet &packet,bool encrypt,int32_t flowId = ZT_QOS_NO_FLOW); // packet is modified if return is true + void _sendViaSpecificPath(void *tPtr,SharedPtr peer,SharedPtr viaPath,int64_t now,Packet &packet,bool encrypt,int32_t flowId); const RuntimeEnvironment *const RR; int64_t _lastBeaconResponse; @@ -225,6 +228,7 @@ private: unsigned int totalFragments; // 0 if only frag0 received, waiting for frags uint32_t haveFragments; // bit mask, LSB to MSB volatile bool complete; // if true, packet is complete + volatile int32_t flowId; Mutex lock; }; RXQueueEntry _rxQueue[ZT_RX_QUEUE_SIZE]; @@ -253,7 +257,7 @@ private: struct TXQueueEntry { TXQueueEntry() {} - TXQueueEntry(Address d,uint64_t ct,const Packet &p,bool enc,int64_t fid) : + TXQueueEntry(Address d,uint64_t ct,const Packet &p,bool enc,int32_t fid) : dest(d), creationTime(ct), packet(p), @@ -264,7 +268,7 @@ private: uint64_t creationTime; Packet packet; // unencrypted/unMAC'd packet -- this is done at send time bool encrypt; - int64_t flowId; + int32_t flowId; }; std::list< TXQueueEntry > _txQueue; Mutex _txQueue_m; @@ -296,7 +300,7 @@ private: { ManagedQueue(int id) : id(id), - byteCredit(ZT_QOS_QUANTUM), + byteCredit(ZT_AQM_QUANTUM), byteLength(0), dropping(false) {} diff --git a/node/Trace.cpp b/node/Trace.cpp index 96abf5c72..f7175c4c0 100644 --- a/node/Trace.cpp +++ b/node/Trace.cpp @@ -94,29 +94,26 @@ void Trace::peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId, } } -void Trace::peerLinkNowAggregate(void *const tPtr,Peer &peer) +void Trace::peerLinkNowRedundant(void *const tPtr,Peer &peer) { - if ((RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RANDOM)) { - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is now a randomly-distributed aggregate link",peer.address().toInt()); - } - if ((RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_DYNAMIC_OPAQUE)) { - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is now a proportionally-balanced aggregate link",peer.address().toInt()); - } + //ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is fully redundant",peer.address().toInt()); } -void Trace::peerLinkNoLongerAggregate(void *const tPtr,Peer &peer) +void Trace::peerLinkNoLongerRedundant(void *const tPtr,Peer &peer) { - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx has degraded and is no longer an aggregate link",peer.address().toInt()); + //ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is no longer redundant",peer.address().toInt()); } void Trace::peerLinkAggregateStatistics(void *const tPtr,Peer &peer) { - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is composed of (%d) physical paths %s, has PDV (%.0f ms), mean latency (%.0f ms)", + /* + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is composed of (%d) physical paths %s, has packet delay variance (%.0f ms), mean latency (%.0f ms)", peer.address().toInt(), peer.aggregateLinkPhysicalPathCount(), peer.interfaceListStr(), peer.computeAggregateLinkPacketDelayVariance(), peer.computeAggregateLinkMeanLatency()); + */ } void Trace::peerLearnedNewPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath,const uint64_t packetId) diff --git a/node/Trace.hpp b/node/Trace.hpp index b2a77161f..71169ebbb 100644 --- a/node/Trace.hpp +++ b/node/Trace.hpp @@ -109,8 +109,8 @@ public: void peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &path,const uint64_t packetId,const Packet::Verb verb); - void peerLinkNowAggregate(void *const tPtr,Peer &peer); - void peerLinkNoLongerAggregate(void *const tPtr,Peer &peer); + void peerLinkNowRedundant(void *const tPtr,Peer &peer); + void peerLinkNoLongerRedundant(void *const tPtr,Peer &peer); void peerLinkAggregateStatistics(void *const tPtr,Peer &peer); diff --git a/node/Utils.hpp b/node/Utils.hpp index 5ba5b035f..b80a7528d 100644 --- a/node/Utils.hpp +++ b/node/Utils.hpp @@ -214,12 +214,12 @@ public: return l; } - static inline float normalize(float value, int64_t bigMin, int64_t bigMax, int32_t targetMin, int32_t targetMax) + static inline float normalize(float value, float bigMin, float bigMax, float targetMin, float targetMax) { - int64_t bigSpan = bigMax - bigMin; - int64_t smallSpan = targetMax - targetMin; - float valueScaled = (value - (float)bigMin) / (float)bigSpan; - return (float)targetMin + valueScaled * (float)smallSpan; + float bigSpan = bigMax - bigMin; + float smallSpan = targetMax - targetMin; + float valueScaled = (value - bigMin) / bigSpan; + return targetMin + valueScaled * smallSpan; } /** @@ -253,6 +253,7 @@ public: static inline int strToInt(const char *s) { return (int)strtol(s,(char **)0,10); } static inline unsigned long strToULong(const char *s) { return strtoul(s,(char **)0,10); } static inline long strToLong(const char *s) { return strtol(s,(char **)0,10); } + static inline double strToDouble(const char *s) { return strtod(s,NULL); } static inline unsigned long long strToU64(const char *s) { #ifdef __WINDOWS__ diff --git a/objects.mk b/objects.mk index efa2f3c0f..b55ba3044 100644 --- a/objects.mk +++ b/objects.mk @@ -24,7 +24,9 @@ CORE_OBJS=\ node/Tag.o \ node/Topology.o \ node/Trace.o \ - node/Utils.o + node/Utils.o \ + node/Bond.o \ + node/BondController.o ONE_OBJS=\ controller/EmbeddedNetworkController.o \ diff --git a/osdep/Binder.hpp b/osdep/Binder.hpp index 660e6f0c3..0fde33452 100644 --- a/osdep/Binder.hpp +++ b/osdep/Binder.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -347,6 +347,23 @@ public: } } + // Generate set of unique interface names (used for formation of logical slave set in multipath code) + for(std::map::const_iterator ii(localIfAddrs.begin());ii!=localIfAddrs.end();++ii) { + slaveIfNames.insert(ii->second); + } + for (std::set::iterator si(slaveIfNames.begin());si!=slaveIfNames.end();si++) { + bool bFoundMatch = false; + for(std::map::const_iterator ii(localIfAddrs.begin());ii!=localIfAddrs.end();++ii) { + if (ii->second == *si) { + bFoundMatch = true; + break; + } + } + if (!bFoundMatch) { + slaveIfNames.erase(si); + } + } + // Create new bindings for those not already bound for(std::map::const_iterator ii(localIfAddrs.begin());ii!=localIfAddrs.end();++ii) { unsigned int bi = 0; @@ -444,7 +461,15 @@ public: return false; } + inline std::set getSlaveInterfaceNames() + { + Mutex::Lock _l(_lock); + return slaveIfNames; + } + private: + + std::set slaveIfNames; _Binding _bindings[ZT_BINDER_MAX_BINDINGS]; std::atomic _bindingCount; Mutex _lock; diff --git a/osdep/LinuxNetLink.cpp b/osdep/LinuxNetLink.cpp index 8d4ce2482..13e7176e4 100644 --- a/osdep/LinuxNetLink.cpp +++ b/osdep/LinuxNetLink.cpp @@ -55,8 +55,6 @@ LinuxNetLink::LinuxNetLink() { // set socket timeout to 1 sec so we're not permablocking recv() calls _setSocketTimeout(_fd, 1); - int yes=1; - setsockopt(_fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); _la.nl_family = AF_NETLINK; _la.nl_pid = 0; //getpid()+1; @@ -430,8 +428,6 @@ void LinuxNetLink::_linkDeleted(struct nlmsghdr *nlp) void LinuxNetLink::_requestIPv4Routes() { int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - int yes=1; - setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; @@ -485,8 +481,6 @@ void LinuxNetLink::_requestIPv4Routes() void LinuxNetLink::_requestIPv6Routes() { int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - int yes=1; - setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; @@ -540,8 +534,6 @@ void LinuxNetLink::_requestIPv6Routes() void LinuxNetLink::_requestInterfaceList() { int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - int yes=1; - setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; @@ -595,8 +587,6 @@ void LinuxNetLink::addRoute(const InetAddress &target, const InetAddress &via, c if (!target) return; int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - int yes=1; - setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; @@ -713,8 +703,6 @@ void LinuxNetLink::delRoute(const InetAddress &target, const InetAddress &via, c if (!target) return; int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - int yes=1; - setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; @@ -828,8 +816,6 @@ void LinuxNetLink::delRoute(const InetAddress &target, const InetAddress &via, c void LinuxNetLink::addAddress(const InetAddress &addr, const char *iface) { int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - int yes=1; - setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; @@ -948,8 +934,6 @@ void LinuxNetLink::addAddress(const InetAddress &addr, const char *iface) void LinuxNetLink::removeAddress(const InetAddress &addr, const char *iface) { int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - int yes=1; - setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; diff --git a/osdep/OSUtils.cpp b/osdep/OSUtils.cpp index 3770f0217..537e14966 100644 --- a/osdep/OSUtils.cpp +++ b/osdep/OSUtils.cpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -459,6 +459,22 @@ uint64_t OSUtils::jsonInt(const nlohmann::json &jv,const uint64_t dfl) return dfl; } +double OSUtils::jsonDouble(const nlohmann::json &jv,const double dfl) +{ + try { + if (jv.is_number()) { + return (double)jv; + } + else if (jv.is_string()) { + std::string s = jv; + return Utils::strToDouble(s.c_str()); + } else if (jv.is_boolean()) { + return (double)jv; + } + } catch ( ... ) {} + return dfl; +} + uint64_t OSUtils::jsonIntHex(const nlohmann::json &jv,const uint64_t dfl) { try { diff --git a/osdep/OSUtils.hpp b/osdep/OSUtils.hpp index 172575a09..70a5daccc 100644 --- a/osdep/OSUtils.hpp +++ b/osdep/OSUtils.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -277,6 +277,7 @@ public: static nlohmann::json jsonParse(const std::string &buf); static std::string jsonDump(const nlohmann::json &j,int indentation = 1); static uint64_t jsonInt(const nlohmann::json &jv,const uint64_t dfl); + static double jsonDouble(const nlohmann::json &jv,const double dfl); static uint64_t jsonIntHex(const nlohmann::json &jv,const uint64_t dfl); static bool jsonBool(const nlohmann::json &jv,const bool dfl); static std::string jsonString(const nlohmann::json &jv,const char *dfl); diff --git a/osdep/Phy.hpp b/osdep/Phy.hpp index b65a520eb..30da8b395 100644 --- a/osdep/Phy.hpp +++ b/osdep/Phy.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -261,46 +261,6 @@ public: } } - /** - * Whether or not the socket object is in a closed state - * - * @param s Socket object - * @return true if socket is closed, false if otherwise - */ - inline bool isClosed(PhySocket *s) - { - PhySocketImpl *sws = (reinterpret_cast(s)); - return sws->type == ZT_PHY_SOCKET_CLOSED; - } - - /** - * Get state of socket object - * - * @param s Socket object - * @return State of socket - */ - inline int getState(PhySocket *s) - { - PhySocketImpl *sws = (reinterpret_cast(s)); - return sws->type; - } - - /** - * In the event that this socket is erased, we need a way to convey to the multipath logic - * that this path is no longer valid. - * - * @param s Socket object - * @return Whether the state of this socket is within an acceptable range of values - */ - inline bool isValidState(PhySocket *s) - { - if (s) { - PhySocketImpl *sws = (reinterpret_cast(s)); - return sws->type >= ZT_PHY_SOCKET_CLOSED && sws->type <= ZT_PHY_SOCKET_UNIX_LISTEN; - } - return false; - } - /** * Cause poll() to stop waiting immediately * diff --git a/osdep/Slave.hpp b/osdep/Slave.hpp new file mode 100644 index 000000000..b1ae326ea --- /dev/null +++ b/osdep/Slave.hpp @@ -0,0 +1,238 @@ +/* + * Copyright (c)2013-2020 ZeroTier, Inc. + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file in the project's root directory. + * + * Change Date: 2024-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2.0 of the Apache License. + */ +/****/ + +#ifndef ZT_SLAVE_HPP +#define ZT_SLAVE_HPP + +#include + +#include "../node/AtomicCounter.hpp" + +namespace ZeroTier { + +class Slave +{ + friend class SharedPtr; + +public: + + Slave() {} + + /** + * + * @param ifnameStr + * @param ipvPref + * @param speed + * @param enabled + * @param mode + * @param failoverToSlaveStr + * @param userSpecifiedAlloc + */ + Slave(std::string& ifnameStr, + uint8_t ipvPref, + uint32_t speed, + uint32_t slaveMonitorInterval, + uint32_t upDelay, + uint32_t downDelay, + bool enabled, + uint8_t mode, + std::string failoverToSlaveStr, + float userSpecifiedAlloc) : + _ifnameStr(ifnameStr), + _ipvPref(ipvPref), + _speed(speed), + _relativeSpeed(0), + _slaveMonitorInterval(slaveMonitorInterval), + _upDelay(upDelay), + _downDelay(downDelay), + _enabled(enabled), + _mode(mode), + _failoverToSlaveStr(failoverToSlaveStr), + _userSpecifiedAlloc(userSpecifiedAlloc), + _isUserSpecified(false) + {} + + /** + * @return The string representation of this slave's underlying interface's system name. + */ + inline std::string ifname() { return _ifnameStr; } + + /** + * @return Whether this slave is designated as a primary. + */ + inline bool primary() { return _mode == ZT_MULTIPATH_SLAVE_MODE_PRIMARY; } + + /** + * @return Whether this slave is designated as a spare. + */ + inline bool spare() { return _mode == ZT_MULTIPATH_SLAVE_MODE_SPARE; } + + /** + * @return The name of the slave interface that should be used in the event of a failure. + */ + inline std::string failoverToSlave() { return _failoverToSlaveStr; } + + /** + * @return Whether this slave interface was specified by the user or auto-detected. + */ + inline bool isUserSpecified() { return _isUserSpecified; } + + /** + * Signify that this slave was specified by the user and not the result of auto-detection. + * + * @param isUserSpecified + */ + inline void setAsUserSpecified(bool isUserSpecified) { _isUserSpecified = isUserSpecified; } + + /** + * @return Whether or not the user has specified failover instructions. + */ + inline bool userHasSpecifiedFailoverInstructions() { return _failoverToSlaveStr.length(); } + + /** + * @return The speed of the slave relative to others in the bond. + */ + inline uint8_t relativeSpeed() { return _relativeSpeed; } + + /** + * Sets the speed of the slave relative to others in the bond. + * + * @param relativeSpeed The speed relative to the rest of the slave interfaces. + */ + inline void setRelativeSpeed(uint8_t relativeSpeed) { _relativeSpeed = relativeSpeed; } + + /** + * Sets the speed of the slave relative to others in the bond. + * + * @param relativeSpeed + */ + inline void setMonitorInterval(uint32_t interval) { _slaveMonitorInterval = interval; } + + /** + * @return The absolute speed of the slave interface (as specified by the user.) + */ + inline uint32_t monitorInterval() { return _slaveMonitorInterval; } + + /** + * @return The absolute speed of the slave interface (as specified by the user.) + */ + inline uint32_t speed() { return _speed; } + + /** + * @return The address preference for this slave interface (as specified by the user.) + */ + inline uint8_t ipvPref() { return _ipvPref; } + + /** + * @return The mode (e.g. primary/spare) for this slave interface (as specified by the user.) + */ + inline uint8_t mode() { return _mode; } + + /** + * @return The upDelay parameter for all paths on this slave interface. + */ + inline uint32_t upDelay() { return _upDelay; } + + /** + * @return The downDelay parameter for all paths on this slave interface. + */ + inline uint32_t downDelay() { return _downDelay; } + + /** + * @return Whether this slave is enabled or disabled + */ + inline uint8_t enabled() { return _enabled; } + +private: + + /** + * String representation of underlying interface's system name + */ + std::string _ifnameStr; + + /** + * What preference (if any) a user has for IP protocol version used in + * path aggregations. Preference is expressed in the order of the digits: + * + * 0: no preference + * 4: IPv4 only + * 6: IPv6 only + * 46: IPv4 over IPv6 + * 64: IPv6 over IPv4 + */ + uint8_t _ipvPref; + + /** + * User-specified speed of this slave/link + */ + uint32_t _speed; + + /** + * Speed relative to other specified slaves/links (computed by Bond) + */ + uint8_t _relativeSpeed; + + /** + * User-specified interval for monitoring paths on this specific slave + * instead of using the more generic interval specified for the entire + * bond. + */ + uint32_t _slaveMonitorInterval; + + /** + * How long before a path is considered to be usable after coming online. (when using policies that + * support fail-over events). + */ + uint32_t _upDelay; + + /** + * How long before a path is considered to be dead (when using policies that + * support fail-over events). + */ + uint32_t _downDelay; + + /** + * Whether this slave is enabled, or (disabled (possibly bad config)) + */ + uint8_t _enabled; + + /** + * Whether this slave is designated as a primary, a spare, or no preference. + */ + uint8_t _mode; + + /** + * The specific name of the interface to be used in the event that this + * slave fails. + */ + std::string _failoverToSlaveStr; + + /** + * User-specified allocation + */ + float _userSpecifiedAlloc; + + /** + * Whether or not this slave was created as a result of manual user specification. This is + * important to know because certain policy decisions are dependent on whether the user + * intents to use a specific set of interfaces. + */ + bool _isUserSpecified; + + AtomicCounter __refCount; + +}; + +} // namespace ZeroTier + +#endif \ No newline at end of file diff --git a/service/OneService.cpp b/service/OneService.cpp index 22c4f82e9..2b1cb631f 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -39,6 +39,8 @@ #include "../node/Salsa20.hpp" #include "../node/Poly1305.hpp" #include "../node/SHA512.hpp" +#include "../node/Bond.hpp" +#include "../node/Peer.hpp" #include "../osdep/Phy.hpp" #include "../osdep/Thread.hpp" @@ -48,6 +50,7 @@ #include "../osdep/Binder.hpp" #include "../osdep/ManagedRoute.hpp" #include "../osdep/BlockingQueue.hpp" +#include "../osdep/Slave.hpp" #include "OneService.hpp" #include "SoftwareUpdater.hpp" @@ -266,37 +269,43 @@ static void _peerToJson(nlohmann::json &pj,const ZT_Peer *peer) pj["paths"] = pa; } -static void _peerAggregateLinkToJson(nlohmann::json &pj,const ZT_Peer *peer) +static void _peerBondToJson(nlohmann::json &pj,const ZT_Peer *peer) { char tmp[256]; OSUtils::ztsnprintf(tmp,sizeof(tmp),"%.10llx",peer->address); - pj["aggregateLinkLatency"] = peer->latency; + //pj["aggregateLinkLatency"] = peer->latency; + std::string policyStr = BondController::getPolicyStrByCode(peer->bondingPolicy); + pj["policy"] = policyStr; nlohmann::json pa = nlohmann::json::array(); for(unsigned int i=0;ipathCount;++i) { int64_t lastSend = peer->paths[i].lastSend; int64_t lastReceive = peer->paths[i].lastReceive; nlohmann::json j; - j["address"] = reinterpret_cast(&(peer->paths[i].address))->toString(tmp); - j["lastSend"] = (lastSend < 0) ? 0 : lastSend; - j["lastReceive"] = (lastReceive < 0) ? 0 : lastReceive; + j["ifname"] = std::string(peer->paths[i].ifname); + j["path"] = reinterpret_cast(&(peer->paths[i].address))->toString(tmp); + j["lastTX"] = (lastSend < 0) ? 0 : lastSend; + j["lastRX"] = (lastReceive < 0) ? 0 : lastReceive; + j["lat"] = peer->paths[i].latencyMean; + j["pdv"] = peer->paths[i].latencyVariance; + //j["trustedPathId"] = peer->paths[i].trustedPathId; //j["active"] = (bool)(peer->paths[i].expired == 0); //j["expired"] = (bool)(peer->paths[i].expired != 0); //j["preferred"] = (bool)(peer->paths[i].preferred != 0); - j["latency"] = peer->paths[i].latency; - j["pdv"] = peer->paths[i].packetDelayVariance; - //j["throughputDisturbCoeff"] = peer->paths[i].throughputDisturbCoeff; - //j["packetErrorRatio"] = peer->paths[i].packetErrorRatio; - //j["packetLossRatio"] = peer->paths[i].packetLossRatio; - j["stability"] = peer->paths[i].stability; - j["throughput"] = peer->paths[i].throughput; - //j["maxThroughput"] = peer->paths[i].maxThroughput; - j["allocation"] = peer->paths[i].allocation; - j["ifname"] = peer->paths[i].ifname; + //j["ltm"] = peer->paths[i].latencyMax; + //j["plr"] = peer->paths[i].packetLossRatio; + //j["per"] = peer->paths[i].packetErrorRatio; + //j["thr"] = peer->paths[i].throughputMean; + //j["thm"] = peer->paths[i].throughputMax; + //j["thv"] = peer->paths[i].throughputVariance; + //j["avl"] = peer->paths[i].availability; + //j["age"] = peer->paths[i].age; + //j["alloc"] = peer->paths[i].allocation; + //j["ifname"] = peer->paths[i].ifname; pa.push_back(j); } - pj["paths"] = pa; + pj["slaves"] = pa; } static void _moonToJson(nlohmann::json &mj,const World &world) @@ -429,7 +438,7 @@ public: bool _updateAutoApply; bool _allowTcpFallbackRelay; bool _allowSecondaryPort; - unsigned int _multipathMode; + unsigned int _primaryPort; unsigned int _secondaryPort; unsigned int _tertiaryPort; @@ -718,6 +727,7 @@ public: } } #endif + // Delete legacy iddb.d if present (cleanup) OSUtils::rmDashRf((_homePath + ZT_PATH_SEPARATOR_S "iddb.d").c_str()); @@ -752,7 +762,6 @@ public: int64_t lastTapMulticastGroupCheck = 0; int64_t lastBindRefresh = 0; int64_t lastUpdateCheck = clockShouldBe; - int64_t lastMultipathModeUpdate = 0; int64_t lastCleanedPeersDb = 0; int64_t lastLocalInterfaceAddressCheck = (clockShouldBe - ZT_LOCAL_INTERFACE_CHECK_INTERVAL) + 15000; // do this in 15s to give portmapper time to configure and other things time to settle int64_t lastLocalConfFileCheck = OSUtils::now(); @@ -798,7 +807,7 @@ public: } // Refresh bindings in case device's interfaces have changed, and also sync routes to update any shadow routes (e.g. shadow default) - if (((now - lastBindRefresh) >= (_multipathMode ? ZT_BINDER_REFRESH_PERIOD / 8 : ZT_BINDER_REFRESH_PERIOD))||(restarted)) { + if (((now - lastBindRefresh) >= (_node->bondController()->inUse() ? ZT_BINDER_REFRESH_PERIOD / 4 : ZT_BINDER_REFRESH_PERIOD))||(restarted)) { lastBindRefresh = now; unsigned int p[3]; unsigned int pc = 0; @@ -815,11 +824,6 @@ public: } } } - // Update multipath mode (if needed) - if (((now - lastMultipathModeUpdate) >= ZT_BINDER_REFRESH_PERIOD / 8)||(restarted)) { - lastMultipathModeUpdate = now; - _node->setMultipathMode(_multipathMode); - } // Run background task processor in core if it's time to do so int64_t dl = _nextBackgroundTaskDeadline; @@ -855,7 +859,7 @@ public: } // Sync information about physical network interfaces - if ((now - lastLocalInterfaceAddressCheck) >= (_multipathMode ? ZT_LOCAL_INTERFACE_CHECK_INTERVAL / 8 : ZT_LOCAL_INTERFACE_CHECK_INTERVAL)) { + if ((now - lastLocalInterfaceAddressCheck) >= (_node->bondController()->inUse() ? ZT_LOCAL_INTERFACE_CHECK_INTERVAL / 8 : ZT_LOCAL_INTERFACE_CHECK_INTERVAL)) { lastLocalInterfaceAddressCheck = now; _node->clearLocalInterfaceAddresses(); @@ -869,8 +873,9 @@ public: #endif std::vector boundAddrs(_binder.allBoundLocalInterfaceAddresses()); - for(std::vector::const_iterator i(boundAddrs.begin());i!=boundAddrs.end();++i) + for(std::vector::const_iterator i(boundAddrs.begin());i!=boundAddrs.end();++i) { _node->addLocalInterfaceAddress(reinterpret_cast(&(*i))); + } } // Clean peers.d periodically @@ -1209,15 +1214,15 @@ public: settings["primaryPort"] = OSUtils::jsonInt(settings["primaryPort"],(uint64_t)_primaryPort) & 0xffff; settings["allowTcpFallbackRelay"] = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],_allowTcpFallbackRelay); - if (_multipathMode) { - json &multipathConfig = res["multipath"]; + if (_node->bondController()->inUse()) { + json &multipathConfig = res["bonds"]; ZT_PeerList *pl = _node->peers(); char peerAddrStr[256]; if (pl) { for(unsigned long i=0;ipeerCount;++i) { - if (pl->peers[i].hadAggregateLink) { + if (pl->peers[i].isBonded) { nlohmann::json pj; - _peerAggregateLinkToJson(pj,&(pl->peers[i])); + _peerBondToJson(pj,&(pl->peers[i])); OSUtils::ztsnprintf(peerAddrStr,sizeof(peerAddrStr),"%.10llx",pl->peers[i].address); multipathConfig[peerAddrStr] = (pj); } @@ -1346,8 +1351,8 @@ public: if (j.is_object()) { seed = Utils::hexStrToU64(OSUtils::jsonString(j["seed"],"0").c_str()); } - } catch (std::exception &exc) { } catch ( ... ) { + // discard invalid JSON } std::vector moons(_node->moons()); @@ -1396,8 +1401,8 @@ public: json &allowDefault = j["allowDefault"]; if (allowDefault.is_boolean()) localSettings.allowDefault = (bool)allowDefault; } - } catch (std::exception &exc) { } catch ( ... ) { + // discard invalid JSON } setNetworkSettings(nws->networks[i].nwid,localSettings); @@ -1551,10 +1556,133 @@ public: json &settings = lc["settings"]; + if (!_node->bondController()->inUse()) { + // defaultBondingPolicy + std::string defaultBondingPolicyStr(OSUtils::jsonString(settings["defaultBondingPolicy"],"")); + int defaultBondingPolicy = _node->bondController()->getPolicyCodeByStr(defaultBondingPolicyStr); + _node->bondController()->setBondingLayerDefaultPolicy(defaultBondingPolicy); + _node->bondController()->setBondingLayerDefaultPolicyStr(defaultBondingPolicyStr); // Used if custom policy + // Custom Policies + json &customBondingPolicies = settings["policies"]; + for (json::iterator policyItr = customBondingPolicies.begin(); policyItr != customBondingPolicies.end();++policyItr) { + fprintf(stderr, "\n\n--- (%s)\n", policyItr.key().c_str()); + // Custom Policy + std::string customPolicyStr(policyItr.key()); + json &customPolicy = policyItr.value(); + std::string basePolicyStr(OSUtils::jsonString(customPolicy["basePolicy"],"")); + if (_node->bondController()->getPolicyCodeByStr(basePolicyStr) == ZT_BONDING_POLICY_NONE) { + fprintf(stderr, "error: custom policy (%s) is invalid, unknown base policy (%s).\n", + customPolicyStr.c_str(), basePolicyStr.c_str()); + continue; + } if (_node->bondController()->getPolicyCodeByStr(customPolicyStr) != ZT_BONDING_POLICY_NONE) { + fprintf(stderr, "error: custom policy (%s) will be ignored, cannot use standard policy names for custom policies.\n", + customPolicyStr.c_str()); + continue; + } + // New bond, used as a copy template for new instances + SharedPtr newTemplateBond = new Bond(basePolicyStr, customPolicyStr, SharedPtr()); + // Acceptable ranges + newTemplateBond->setMaxAcceptableLatency(OSUtils::jsonInt(customPolicy["maxAcceptableLatency"],-1)); + newTemplateBond->setMaxAcceptableMeanLatency(OSUtils::jsonInt(customPolicy["maxAcceptableMeanLatency"],-1)); + newTemplateBond->setMaxAcceptablePacketDelayVariance(OSUtils::jsonInt(customPolicy["maxAcceptablePacketDelayVariance"],-1)); + newTemplateBond->setMaxAcceptablePacketLossRatio((float)OSUtils::jsonDouble(customPolicy["maxAcceptablePacketLossRatio"],-1)); + newTemplateBond->setMaxAcceptablePacketErrorRatio((float)OSUtils::jsonDouble(customPolicy["maxAcceptablePacketErrorRatio"],-1)); + newTemplateBond->setMinAcceptableAllocation((float)OSUtils::jsonDouble(customPolicy["minAcceptableAllocation"],0)); + // Quality weights + json &qualityWeights = customPolicy["qualityWeights"]; + if (qualityWeights.size() == ZT_QOS_WEIGHT_SIZE) { // TODO: Generalize this + float weights[ZT_QOS_WEIGHT_SIZE]; + weights[ZT_QOS_LAT_IDX] = (float)OSUtils::jsonDouble(qualityWeights["lat"],0.0); + weights[ZT_QOS_LTM_IDX] = (float)OSUtils::jsonDouble(qualityWeights["ltm"],0.0); + weights[ZT_QOS_PDV_IDX] = (float)OSUtils::jsonDouble(qualityWeights["pdv"],0.0); + weights[ZT_QOS_PLR_IDX] = (float)OSUtils::jsonDouble(qualityWeights["plr"],0.0); + weights[ZT_QOS_PER_IDX] = (float)OSUtils::jsonDouble(qualityWeights["per"],0.0); + weights[ZT_QOS_THR_IDX] = (float)OSUtils::jsonDouble(qualityWeights["thr"],0.0); + weights[ZT_QOS_THM_IDX] = (float)OSUtils::jsonDouble(qualityWeights["thm"],0.0); + weights[ZT_QOS_THV_IDX] = (float)OSUtils::jsonDouble(qualityWeights["thv"],0.0); + newTemplateBond->setUserQualityWeights(weights,ZT_QOS_WEIGHT_SIZE); + } + // Bond-specific properties + newTemplateBond->setUpDelay(OSUtils::jsonInt(customPolicy["upDelay"],-1)); + newTemplateBond->setDownDelay(OSUtils::jsonInt(customPolicy["downDelay"],-1)); + newTemplateBond->setFailoverInterval(OSUtils::jsonInt(customPolicy["failoverInterval"],(uint64_t)0)); + newTemplateBond->setPacketsPerSlave(OSUtils::jsonInt(customPolicy["packetsPerSlave"],-1)); + std::string slaveMonitorStrategyStr(OSUtils::jsonString(customPolicy["slaveMonitorStrategy"],"")); + uint8_t slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DEFAULT; + if (slaveMonitorStrategyStr == "passive") { newTemplateBond->setSlaveMonitorStrategy(ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE); } + if (slaveMonitorStrategyStr == "active") { newTemplateBond->setSlaveMonitorStrategy(ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_ACTIVE); } + if (slaveMonitorStrategyStr == "dynamic") { newTemplateBond->setSlaveMonitorStrategy(ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC); } + // Policy-Specific slave set + json &slaves = customPolicy["slaves"]; + for (json::iterator slaveItr = slaves.begin(); slaveItr != slaves.end();++slaveItr) { + fprintf(stderr, "\t--- slave (%s)\n", slaveItr.key().c_str()); + std::string slaveNameStr(slaveItr.key()); + json &slave = slaveItr.value(); + + bool enabled = OSUtils::jsonInt(slave["enabled"],true); + uint32_t speed = OSUtils::jsonInt(slave["speed"],0); + float alloc = (float)OSUtils::jsonDouble(slave["alloc"],0); + + if (speed && alloc) { + fprintf(stderr, "error: cannot specify both speed (%d) and alloc (%f) for slave (%s), pick one, slave disabled.\n", + speed, alloc, slaveNameStr.c_str()); + enabled = false; + } + uint32_t upDelay = OSUtils::jsonInt(slave["upDelay"],-1); + uint32_t downDelay = OSUtils::jsonInt(slave["downDelay"],-1); + uint8_t ipvPref = OSUtils::jsonInt(slave["ipvPref"],0); + uint32_t slaveMonitorInterval = OSUtils::jsonInt(slave["monitorInterval"],(uint64_t)0); + std::string failoverToStr(OSUtils::jsonString(slave["failoverTo"],"")); + // Mode + std::string slaveModeStr(OSUtils::jsonString(slave["mode"],"spare")); + uint8_t slaveMode = ZT_MULTIPATH_SLAVE_MODE_SPARE; + if (slaveModeStr == "primary") { slaveMode = ZT_MULTIPATH_SLAVE_MODE_PRIMARY; } + if (slaveModeStr == "spare") { slaveMode = ZT_MULTIPATH_SLAVE_MODE_SPARE; } + // ipvPref + if ((ipvPref != 0) && (ipvPref != 4) && (ipvPref != 6) && (ipvPref != 46) && (ipvPref != 64)) { + fprintf(stderr, "error: invalid ipvPref value (%d), slave disabled.\n", ipvPref); + enabled = false; + } + if (slaveMode == ZT_MULTIPATH_SLAVE_MODE_SPARE && failoverToStr.length()) { + fprintf(stderr, "error: cannot specify failover slaves for spares, slave disabled.\n"); + failoverToStr = ""; + enabled = false; + } + _node->bondController()->addCustomSlave(customPolicyStr, new Slave(slaveNameStr,ipvPref,speed,slaveMonitorInterval,upDelay,downDelay,enabled,slaveMode,failoverToStr,alloc)); + } + // TODO: This is dumb + std::string slaveSelectMethodStr(OSUtils::jsonString(customPolicy["activeReselect"],"optimize")); + if (slaveSelectMethodStr == "always") { newTemplateBond->setSlaveSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS); } + if (slaveSelectMethodStr == "better") { newTemplateBond->setSlaveSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_BETTER); } + if (slaveSelectMethodStr == "failure") { newTemplateBond->setSlaveSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_FAILURE); } + if (slaveSelectMethodStr == "optimize") { newTemplateBond->setSlaveSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE); } + if (newTemplateBond->getSlaveSelectMethod() < 0 || newTemplateBond->getSlaveSelectMethod() > 3) { + fprintf(stderr, "warning: invalid value (%s) for slaveSelectMethod, assuming mode: always\n", slaveSelectMethodStr.c_str()); + } + /* + newBond->setPolicy(_node->bondController()->getPolicyCodeByStr(basePolicyStr)); + newBond->setFlowHashing((bool)OSUtils::jsonInt(userSpecifiedBondingPolicies[i]["allowFlowHashing"],(bool)allowFlowHashing)); + newBond->setBondMonitorInterval((unsigned int)OSUtils::jsonInt(userSpecifiedBondingPolicies[i]["monitorInterval"],(uint64_t)0)); + newBond->setAllowPathNegotiation((bool)OSUtils::jsonInt(userSpecifiedBondingPolicies[i]["allowPathNegotiation"],(bool)false)); + */ + if (!_node->bondController()->addCustomPolicy(newTemplateBond)) { + fprintf(stderr, "error: a custom policy of this name (%s) already exists.\n", customPolicyStr.c_str()); + } + } + // Peer-specific bonding + json &peerSpecificBonds = settings["peerSpecificBonds"]; + for (json::iterator peerItr = peerSpecificBonds.begin(); peerItr != peerSpecificBonds.end();++peerItr) { + _node->bondController()->assignBondingPolicyToPeer(std::stoull(peerItr.key(),0,16), peerItr.value()); + } + // Check settings + if (defaultBondingPolicyStr.length() && !defaultBondingPolicy && !_node->bondController()->inUse()) { + fprintf(stderr, "error: unknown policy (%s) specified by defaultBondingPolicy, slave disabled.\n", defaultBondingPolicyStr.c_str()); + } + } + + // bondingPolicy cannot be used with allowTcpFallbackRelay + _allowTcpFallbackRelay = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],true) && !(_node->bondController()->inUse()); _primaryPort = (unsigned int)OSUtils::jsonInt(settings["primaryPort"],(uint64_t)_primaryPort) & 0xffff; - _multipathMode = (unsigned int)OSUtils::jsonInt(settings["multipathMode"],0); - // multipathMode cannot be used with allowTcpFallbackRelay - _allowTcpFallbackRelay = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],true) && !_multipathMode; _allowSecondaryPort = OSUtils::jsonBool(settings["allowSecondaryPort"],true); _secondaryPort = (unsigned int)OSUtils::jsonInt(settings["secondaryPort"],0); _tertiaryPort = (unsigned int)OSUtils::jsonInt(settings["tertiaryPort"],0); @@ -1705,9 +1833,8 @@ public: } } #ifdef __SYNOLOGY__ - if (!n.tap->addIps(newManagedIps)) { + if (!n.tap->addIpSyn(newManagedIps)) fprintf(stderr,"ERROR: unable to add ip addresses to ifcfg" ZT_EOL_S); - } #else for(std::vector::iterator ip(newManagedIps.begin());ip!=newManagedIps.end();++ip) { if (std::find(n.managedIps.begin(),n.managedIps.end(),*ip) == n.managedIps.end()) { @@ -2025,8 +2152,6 @@ public: return; } - } catch (std::exception &exc) { - _phy.close(sock); } catch ( ... ) { _phy.close(sock); } @@ -2135,8 +2260,6 @@ public: #endif _nets.erase(nwid); return -999; - } catch (int exc) { - return -999; } catch ( ... ) { return -999; // tap init failed } @@ -2743,6 +2866,7 @@ public: if (!strncmp(p->c_str(),ifname,p->length())) return false; } + return _node->bondController()->allowedToBind(std::string(ifname)); } { // Check global blacklists From 701960def5e6066ba11097e1288fbeed02ec91cd Mon Sep 17 00:00:00 2001 From: Grant Limberg Date: Wed, 13 May 2020 17:23:27 -0700 Subject: [PATCH 18/35] Track member status in Redis --- controller/PostgreSQL.cpp | 138 +++++++++++++++++++++++++++++++++----- controller/PostgreSQL.hpp | 4 ++ 2 files changed, 124 insertions(+), 18 deletions(-) diff --git a/controller/PostgreSQL.cpp b/controller/PostgreSQL.cpp index 286a734e0..05d2de7b1 100644 --- a/controller/PostgreSQL.cpp +++ b/controller/PostgreSQL.cpp @@ -229,12 +229,14 @@ void PostgreSQL::eraseNetwork(const uint64_t networkId) tmp.first["objtype"] = "_delete_network"; tmp.second = true; _commitQueue.post(tmp); + nlohmann::json nullJson; + _networkChanged(tmp.first, nullJson, true); } void PostgreSQL::eraseMember(const uint64_t networkId, const uint64_t memberId) { char tmp2[24]; - std::pair tmp; + std::pair tmp, nw; Utils::hex(networkId, tmp2); tmp.first["nwid"] = tmp2; Utils::hex(memberId, tmp2); @@ -242,6 +244,8 @@ void PostgreSQL::eraseMember(const uint64_t networkId, const uint64_t memberId) tmp.first["objtype"] = "_delete_member"; tmp.second = true; _commitQueue.post(tmp); + nlohmann::json nullJson; + _memberChanged(tmp.first, nullJson, true); } void PostgreSQL::nodeIsOnline(const uint64_t networkId, const uint64_t memberId, const InetAddress &physicalAddress) @@ -630,8 +634,8 @@ void PostgreSQL::heartbeat() }; PGresult *res = PQexecParams(conn, - "INSERT INTO ztc_controller (id, cluster_host, last_alive, public_identity, v_major, v_minor, v_rev, v_build, host_port, use_rabbitmq, use_redis) " - "VALUES ($1, $2, TO_TIMESTAMP($3::double precision/1000), $4, $5, $6, $7, $8, $9, $10, $11) " + "INSERT INTO ztc_controller (id, cluster_host, last_alive, public_identity, v_major, v_minor, v_rev, v_build, host_port, use_redis) " + "VALUES ($1, $2, TO_TIMESTAMP($3::double precision/1000), $4, $5, $6, $7, $8, $9, $10) " "ON CONFLICT (id) DO UPDATE SET cluster_host = EXCLUDED.cluster_host, last_alive = EXCLUDED.last_alive, " "public_identity = EXCLUDED.public_identity, v_major = EXCLUDED.v_major, v_minor = EXCLUDED.v_minor, " "v_rev = EXCLUDED.v_rev, v_build = EXCLUDED.v_rev, host_port = EXCLUDED.host_port, " @@ -1401,6 +1405,15 @@ void PostgreSQL::commitThread() } void PostgreSQL::onlineNotificationThread() +{ + if (_rc != NULL) { + onlineNotification_Redis(); + } else { + onlineNotification_Postgres(); + } +} + +void PostgreSQL::onlineNotification_Postgres() { PGconn *conn = getPgConn(); if (PQstatus(conn) == CONNECTION_BAD) { @@ -1410,9 +1423,7 @@ void PostgreSQL::onlineNotificationThread() } _connected = 1; - //int64_t lastUpdatedNetworkStatus = 0; - std::unordered_map< std::pair,int64_t,_PairHasher > lastOnlineCumulative; - + nlohmann::json jtmp1, jtmp2; while (_run == 1) { if (PQstatus(conn) != CONNECTION_OK) { fprintf(stderr, "ERROR: Online Notification thread lost connection to Postgres."); @@ -1420,9 +1431,6 @@ void PostgreSQL::onlineNotificationThread() exit(5); } - // map used to send notifications to front end - std::unordered_map> updateMap; - std::unordered_map< std::pair,std::pair,_PairHasher > lastOnline; { std::lock_guard l(_lastOnline_l); @@ -1443,20 +1451,13 @@ void PostgreSQL::onlineNotificationThread() OSUtils::ztsnprintf(nwidTmp,sizeof(nwidTmp), "%.16llx", nwid_i); OSUtils::ztsnprintf(memTmp,sizeof(memTmp), "%.10llx", i->first.second); - auto found = _networks.find(nwid_i); - if (found == _networks.end()) { - continue; // skip members trying to join non-existant networks + if(!get(nwid_i, jtmp1, i->first.second, jtmp2)) { + continue; // skip non existent networks/members } std::string networkId(nwidTmp); std::string memberId(memTmp); - std::vector &members = updateMap[networkId]; - members.push_back(memberId); - - lastOnlineCumulative[i->first] = i->second.first; - - const char *qvals[2] = { networkId.c_str(), memberId.c_str() @@ -1526,6 +1527,107 @@ void PostgreSQL::onlineNotificationThread() } } +void PostgreSQL::onlineNotification_Redis() +{ + _connected = 1; + + char buf[11] = {0}; + std::string controllerId = std::string(_myAddress.toString(buf)); + + while (_run == 1) { + std::unordered_map< std::pair,std::pair,_PairHasher > lastOnline; + { + std::lock_guard l(_lastOnline_l); + lastOnline.swap(_lastOnline); + } + + if (_rc->clusterMode) { + auto tx = _cluster->redis(controllerId).transaction(true); + _doRedisUpdate(tx, controllerId, lastOnline); + } else { + auto tx = _redis->transaction(true); + _doRedisUpdate(tx, controllerId, lastOnline); + } + + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } +} + +void PostgreSQL::_doRedisUpdate(sw::redis::Transaction &tx, std::string &controllerId, + std::unordered_map< std::pair,std::pair,_PairHasher > &lastOnline) + +{ + nlohmann::json jtmp1, jtmp2; + for (auto i=lastOnline.begin(); i != lastOnline.end(); ++i) { + uint64_t nwid_i = i->first.first; + uint64_t memberid_i = i->first.second; + char nwidTmp[64]; + char memTmp[64]; + char ipTmp[64]; + OSUtils::ztsnprintf(nwidTmp,sizeof(nwidTmp), "%.16llx", nwid_i); + OSUtils::ztsnprintf(memTmp,sizeof(memTmp), "%.10llx", memberid_i); + + if (!get(nwid_i, jtmp1, memberid_i, jtmp2)){ + continue; // skip non existent members/networks + } + auto found = _networks.find(nwid_i); + if (found == _networks.end()) { + continue; // skip members trying to join non-existant networks + } + + std::string networkId(nwidTmp); + std::string memberId(memTmp); + + int64_t ts = i->second.first; + std::string ipAddr = i->second.second.toIpString(ipTmp); + std::string timestamp = std::to_string(ts); + + std::unordered_map record = { + {"id", memberId}, + {"address", ipAddr}, + {"last_updated", std::to_string(ts)} + }; + tx.zadd("nodes-online:{"+controllerId+"}", memberId, ts) + .zadd("network-nodes-online:{"+controllerId+"}:"+networkId, memberId, ts) + .sadd("network-nodes-all:{"+controllerId+"}:"+networkId, memberId) + .hmset("network:{"+controllerId+"}:"+networkId+":"+memberId, record.begin(), record.end()); + } + + tx.exec(); + + // expire records from all-nodes and network-nodes member list + uint64_t expireOld = OSUtils::now() - 300000; + + auto cursor = 0LL; + std::unordered_set keys; + // can't scan for keys in a transaction, so we need to fall back to _cluster or _redis + // to get all network-members keys + if(_rc->clusterMode) { + auto r = _cluster->redis(controllerId); + while(true) { + cursor = r.scan(cursor, "network-nodes-online:{"+controllerId+"}:*", INT_MAX, std::inserter(keys, keys.begin())); + if (cursor == 0) { + break; + } + } + } else { + while(true) { + cursor = _redis->scan(cursor, "network-nodes-online:"+controllerId+":*", INT_MAX, std::inserter(keys, keys.begin())); + if (cursor == 0) { + break; + } + } + } + + tx.zremrangebyscore("nodes-online:{"+controllerId+"}", sw::redis::RightBoundedInterval(expireOld, sw::redis::BoundType::LEFT_OPEN)); + + for(const auto &k : keys) { + tx.zremrangebyscore(k, sw::redis::RightBoundedInterval(expireOld, sw::redis::BoundType::LEFT_OPEN)); + } + + tx.exec(); +} + PGconn *PostgreSQL::getPgConn(OverrideMode m) { if (m == ALLOW_PGBOUNCER_OVERRIDE) { diff --git a/controller/PostgreSQL.hpp b/controller/PostgreSQL.hpp index 44347cd81..f61670132 100644 --- a/controller/PostgreSQL.hpp +++ b/controller/PostgreSQL.hpp @@ -70,6 +70,10 @@ private: void commitThread(); void onlineNotificationThread(); + void onlineNotification_Postgres(); + void onlineNotification_Redis(); + void _doRedisUpdate(sw::redis::Transaction &tx, std::string &controllerId, + std::unordered_map< std::pair,std::pair,_PairHasher > &lastOnline); enum OverrideMode { ALLOW_PGBOUNCER_OVERRIDE = 0, From 58d567c33189b988a38c09f4052632cb3879be7c Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Thu, 14 May 2020 20:09:25 -0700 Subject: [PATCH 19/35] Formatting --- node/Bond.cpp | 189 ++++++++++++++++++++++++++++++++++------ node/Bond.hpp | 168 +++++++++++++++++------------------ node/BondController.hpp | 26 +++--- node/Flow.hpp | 12 +-- node/Path.hpp | 6 +- osdep/Slave.hpp | 7 +- 6 files changed, 271 insertions(+), 137 deletions(-) diff --git a/node/Bond.cpp b/node/Bond.cpp index 9a5ab1df8..2f283a696 100644 --- a/node/Bond.cpp +++ b/node/Bond.cpp @@ -140,7 +140,7 @@ SharedPtr Bond::getAppropriatePath(int64_t now, int32_t flowId) } } } - //fprintf(stderr, "resultant _rrIdx=%d\n", _rrIdx); + fprintf(stderr, "_rrIdx=%d\n", _rrIdx); if (_paths[_bondedIdx[_rrIdx]]) { return _paths[_bondedIdx[_rrIdx]]; } @@ -246,7 +246,7 @@ void Bond::recordIncomingPacket(const SharedPtr& path, uint64_t packetId, } /** * Learn new flows and pro-actively create entries for them in the bond so - * that the next time we send a packet out that is part of a flow we know + * that the next time we send a packet out that is part of a flow we know * which path to use. */ if ((flowId != ZT_QOS_NO_FLOW) @@ -385,7 +385,7 @@ SharedPtr Bond::createFlow(const SharedPtr &path, int32_t flowId, un } if (_flows.size() >= ZT_FLOW_MAX_COUNT) { fprintf(stderr, "max number of flows reached (%d), forcibly forgetting oldest flow\n", ZT_FLOW_MAX_COUNT); - forgetFlowsWhenNecessary(0,true,now); + forgetFlowsWhenNecessary(0,true,now); } SharedPtr flow = new Flow(flowId, now); _flows[flowId] = flow; @@ -588,7 +588,7 @@ void Bond::sendQOS_MEASUREMENT(void *tPtr,const SharedPtr &path,const int6 } else { RR->sw->send(tPtr,outp,false); } - // Account for the fact that a VERB_QOS_MEASUREMENT was just sent. Reset timers. + // Account for the fact that a VERB_QOS_MEASUREMENT was just sent. Reset timers. path->_packetsReceivedSinceLastQoS = 0; path->_lastQoSMeasurement = now; } @@ -608,7 +608,7 @@ void Bond::processBackgroundTasks(void *tPtr, const int64_t now) //fprintf(stderr, "_lastFrame=%llu, suggestedMonitorInterval=%d, _dynamicPathMonitorInterval=%d\n", // (now-_lastFrame), suggestedMonitorInterval, _dynamicPathMonitorInterval); } - + if (_slaveMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC) { _shouldCollectPathStatistics = true; } @@ -673,7 +673,7 @@ void Bond::processBackgroundTasks(void *tPtr, const int64_t now) if (((now - _lastPathNegotiationCheck) > ZT_PATH_NEGOTIATION_CHECK_INTERVAL) && _allowPathNegotiation) { _lastPathNegotiationCheck = now; pathNegotiationCheck(tPtr, now); - } + } } void Bond::applyUserPrefs() @@ -854,8 +854,8 @@ void Bond::estimatePathQuality(const int64_t now) float plr[ZT_MAX_PEER_NETWORK_PATHS]; float per[ZT_MAX_PEER_NETWORK_PATHS]; float thr[ZT_MAX_PEER_NETWORK_PATHS]; - float thm[ZT_MAX_PEER_NETWORK_PATHS]; - float thv[ZT_MAX_PEER_NETWORK_PATHS]; + float thm[ZT_MAX_PEER_NETWORK_PATHS]; + float thv[ZT_MAX_PEER_NETWORK_PATHS]; float maxLAT = 0; float maxPDV = 0; @@ -867,7 +867,7 @@ void Bond::estimatePathQuality(const int64_t now) float quality[ZT_MAX_PEER_NETWORK_PATHS]; uint8_t alloc[ZT_MAX_PEER_NETWORK_PATHS]; - + float totQuality = 0.0f; memset(&lat, 0, sizeof(lat)); @@ -950,7 +950,7 @@ void Bond::estimatePathQuality(const int64_t now) //fprintf(stdout, "EH %d: lat=%8.3f, ltm=%8.3f, pdv=%8.3f, plr=%5.3f, per=%5.3f, thr=%8f, thm=%5.3f, thv=%5.3f, avl=%5.3f, age=%8.2f, scp=%4d, q=%5.3f, qtot=%5.3f, ac=%d if=%s, path=%s\n", // i, lat[i], ltm[i], pdv[i], plr[i], per[i], thr[i], thm[i], thv[i], avl[i], age[i], scp[i], quality[i], totQuality, alloc[i], getSlave(_paths[i])->ifname().c_str(), pathStr); - + } // Convert metrics to relative quantities and apply contribution weights for(unsigned int i=0;ibonded()) { alloc[i] = std::ceil((quality[i] / totQuality) * (float)255); @@ -1011,8 +1011,8 @@ void Bond::estimatePathQuality(const int64_t now) if (_paths[i]) { _paths[i]->address().toString(pathStr); fprintf(stdout, "%s, %s, %8.3f, %8.3f, %8.3f, %5.3f, %5.3f, %5.3f, %8f, %5.3f, %5.3f, %d, %5.3f, %d, %d, %d, %d, %d, %d, ", - getSlave(_paths[i])->ifname().c_str(), pathStr, _paths[i]->latencyMean, lat[i],pdv[i], _paths[i]->packetLossRatio, plr[i],per[i],thr[i],thm[i],thv[i],(now - _paths[i]->lastIn()),quality[i],alloc[i], - _paths[i]->relativeByteLoad, _paths[i]->assignedFlowCount, _paths[i]->alive(now, true), _paths[i]->eligible(now,_ackSendInterval), _paths[i]->qosStatsOut.size()); + getSlave(_paths[i])->ifname().c_str(), pathStr, _paths[i]->latencyMean, lat[i],pdv[i], _paths[i]->packetLossRatio, plr[i],per[i],thr[i],thm[i],thv[i],(now - _paths[i]->lastIn()),quality[i],alloc[i], + _paths[i]->relativeByteLoad, _paths[i]->assignedFlowCount, _paths[i]->alive(now, true), _paths[i]->eligible(now,_ackSendInterval), _paths[i]->qosStatsOut.size()); } } fprintf(stdout, "\n"); @@ -1022,7 +1022,144 @@ void Bond::estimatePathQuality(const int64_t now) void Bond::processBalanceTasks(const int64_t now) { - // Omitted + //fprintf(stderr, "processBalanceTasks\n"); + char curPathStr[128]; + if (_allowFlowHashing) { + /** + * Clean up and reset flows if necessary + */ + if ((now - _lastFlowExpirationCheck) > ZT_MULTIPATH_FLOW_CHECK_INTERVAL) { + Mutex::Lock _l(_flows_m); + forgetFlowsWhenNecessary(ZT_MULTIPATH_FLOW_EXPIRATION_INTERVAL,false,now); + _lastFlowExpirationCheck = now; + } + if ((now - _lastFlowStatReset) > ZT_FLOW_STATS_RESET_INTERVAL) { + Mutex::Lock _l(_flows_m); + _lastFlowStatReset = now; + std::map >::iterator it = _flows.begin(); + while (it != _flows.end()) { + it->second->resetByteCounts(); + ++it; + } + } + /** + * Re-allocate flows from dead paths + */ + if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_XOR || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE) { + Mutex::Lock _l(_flows_m); + for (int i=0;ieligible(now,_ackSendInterval) && _paths[i]->_shouldReallocateFlows) { + _paths[i]->address().toString(curPathStr); + fprintf(stderr, "%d reallocating flows from dead path %s on %s\n", (RR->node->now() - RR->bc->getBondStartTime()), curPathStr, getSlave(_paths[i])->ifname().c_str()); + std::map >::iterator flow_it = _flows.begin(); + while (flow_it != _flows.end()) { + if (flow_it->second->assignedPath() == _paths[i]) { + if(assignFlowToBondedPath(flow_it->second, now)) { + _paths[i]->_assignedFlowCount--; + } + } + ++flow_it; + } + _paths[i]->_shouldReallocateFlows = false; + } + } + } + } + /** + * Tasks specific to (Balance Round Robin) + */ + if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_RR) { + if (_allowFlowHashing) { + // TODO: Should ideally failover from (idx) to a random slave, this is so that (idx+1) isn't overloaded + } + else if (!_allowFlowHashing) { + // Nothing + } + } + /** + * Tasks specific to (Balance XOR) + */ + if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_XOR) { + // Nothing specific for XOR + } + /** + * Tasks specific to (Balance Aware) + */ + if ((_bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE)) { + if (_allowFlowHashing) { + Mutex::Lock _l(_flows_m); + /** + * Re-balance flows in proportion to slave capacity (or when eligibility changes) + */ + if ((now - _lastFlowRebalance) > ZT_FLOW_REBALANCE_INTERVAL) { + /** + * Determine "load" for bonded paths + */ + uint64_t totalBytes = 0; + for(unsigned int i=0;ibonded()) { + _paths[i]->_byteLoad = 0; + std::map >::iterator flow_it = _flows.begin(); + while (flow_it != _flows.end()) { + if (flow_it->second->assignedPath() == _paths[i]) { + _paths[i]->_byteLoad += flow_it->second->totalBytes(); + } + ++flow_it; + } + totalBytes += _paths[i]->_byteLoad; + } + } + /** + * Determine "affinity" for bonded path + */ + //fprintf(stderr, "\n\n"); + _totalBondUnderload = 0; + + for(unsigned int i=0;ibonded()) { + if (totalBytes) { + uint8_t relativeByteLoad = std::ceil(((float)_paths[i]->_byteLoad / (float)totalBytes) * (float)255); + //fprintf(stderr, "lastComputedAllocation = %d\n", _paths[i]->allocation); + //fprintf(stderr, " relativeByteLoad = %d\n", relativeByteLoad); + _paths[i]->_relativeByteLoad = relativeByteLoad; + uint8_t relativeUnderload = std::max(0, (int)_paths[i]->_allocation - (int)relativeByteLoad); + //fprintf(stderr, " relativeUnderload = %d\n", relativeUnderload); + _totalBondUnderload += relativeUnderload; + //fprintf(stderr, " _totalBondUnderload = %d\n\n", _totalBondUnderload); + //_paths[i]->affinity = (relativeUnderload > 0 ? relativeUnderload : _paths[i]->_allocation); + } + else { // set everything to base values + _totalBondUnderload = 0; + //_paths[i]->affinity = 0; + } + } + } + + //fprintf(stderr, "_totalBondUnderload=%d (end)\n\n", _totalBondUnderload); + + /** + * + */ + //fprintf(stderr, "_lastFlowRebalance\n"); + std::map >::iterator it = _flows.begin(); + while (it != _flows.end()) { + int32_t flowId = it->first; + SharedPtr flow = it->second; + if ((now - flow->_lastPathReassignment) > ZT_FLOW_MIN_REBALANCE_INTERVAL) { + //fprintf(stdout, " could move : %x\n", flowId); + } + ++it; + } + _lastFlowRebalance = now; + } + } + else if (!_allowFlowHashing) { + // Nothing + } + } } void Bond::dequeueNextActiveBackupPath(const uint64_t now) @@ -1042,7 +1179,7 @@ void Bond::dequeueNextActiveBackupPath(const uint64_t now) } void Bond::processActiveBackupTasks(const int64_t now) -{ +{ //fprintf(stderr, "%llu processActiveBackupTasks\n", (now - RR->bc->getBondStartTime())); char pathStr[128]; char prevPathStr[128]; char curPathStr[128]; @@ -1058,7 +1195,7 @@ void Bond::processActiveBackupTasks(const int64_t now) /** * [Automatic mode] * The user has not explicitly specified slaves or their failover schedule, - * the bonding policy will now select the first eligible path and set it as + * the bonding policy will now select the first eligible path and set it as * its active backup path, if a substantially better path is detected the bonding * policy will assign it as the new active backup path. If the path fails it will * simply find the next eligible path. @@ -1187,9 +1324,9 @@ void Bond::processActiveBackupTasks(const int64_t now) } SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); _paths[i]->address().toString(pathStr); - + int failoverScoreHandicap = _paths[i]->_failoverScore; - if (_paths[i]->preferred()) + if (_paths[i]->preferred()) { failoverScoreHandicap += ZT_MULTIPATH_FAILOVER_HANDICAP_PREFERRED; //fprintf(stderr, "%s on %s ----> %d for preferred\n", pathStr, _paths[i]->ifname().c_str(), failoverScoreHandicap); @@ -1264,7 +1401,7 @@ void Bond::processActiveBackupTasks(const int64_t now) if (_paths[i].ptr() == negotiatedPath.ptr()) { _paths[i]->_negotiated = true; failoverScoreHandicap = ZT_MULTIPATH_FAILOVER_HANDICAP_NEGOTIATED; - } else { + } else { _paths[i]->_negotiated = false; } _paths[i]->_failoverScore = _paths[i]->_allocation + failoverScoreHandicap; @@ -1386,7 +1523,7 @@ void Bond::setReasonableDefaults(int policy) _lastPathNegotiationReceived=0; _lastBackgroundTaskCheck=0; _lastPathNegotiationCheck=0; - + _lastFlowStatReset=0; _lastFlowExpirationCheck=0; _localUtility=0; @@ -1397,7 +1534,7 @@ void Bond::setReasonableDefaults(int policy) _pathNegotiationCutoffCount=0; _lastFlowRebalance=0; _totalBondUnderload = 0; - + //_maxAcceptableLatency _maxAcceptablePacketDelayVariance = 50; _maxAcceptablePacketLossRatio = 0.10; @@ -1445,7 +1582,7 @@ void Bond::setReasonableDefaults(int policy) case ZT_BONDING_POLICY_BALANCE_RR: _failoverInterval = 5000; _allowFlowHashing = false; - _packetsPerSlave = 8; + _packetsPerSlave = 512; _slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; _qualityWeights[ZT_QOS_LAT_IDX] = 0.4f; _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; @@ -1550,8 +1687,8 @@ void Bond::setUserQualityWeights(float weights[], int len) bool Bond::relevant() { - return _peer->identity().address().toInt() == 0x16a03a3d03 - || _peer->identity().address().toInt() == 0x4410300d03 + return _peer->identity().address().toInt() == 0x16a03a3d03 + || _peer->identity().address().toInt() == 0x4410300d03 || _peer->identity().address().toInt() == 0x795cbf86fa; } @@ -1566,7 +1703,7 @@ void Bond::dumpInfo(const int64_t now) //char oldPathStr[128]; char currPathStr[128]; - if (!relevant()) { + if (!relevant()) { return; } /* @@ -1589,7 +1726,7 @@ void Bond::dumpInfo(const int64_t now) } _lastPrintTS = now; _lastLogTS = now; - + fprintf(stderr, "\n\n"); for(int i=0; i getSlave(const SharedPtr& path); + SharedPtr getSlave(const SharedPtr& path); - /** - * Constructor. For use only in first initialization in Node - * - * @param renv Runtime environment - */ - Bond(const RuntimeEnvironment *renv); + /** + * Constructor. For use only in first initialization in Node + * + * @param renv Runtime environment + */ + Bond(const RuntimeEnvironment *renv); - /** - * Constructor. Creates a bond based off of ZT defaults - * - * @param renv Runtime environment - * @param policy Bonding policy - * @param peer - */ - Bond(const RuntimeEnvironment *renv, int policy, const SharedPtr& peer); + /** + * Constructor. Creates a bond based off of ZT defaults + * + * @param renv Runtime environment + * @param policy Bonding policy + * @param peer + */ + Bond(const RuntimeEnvironment *renv, int policy, const SharedPtr& peer); - /** - * Constructor. For use when user intends to manually specify parameters - * - * @param basePolicy - * @param policyAlias - * @param peer - */ - Bond(std::string& basePolicy, std::string& policyAlias, const SharedPtr& peer); + /** + * Constructor. For use when user intends to manually specify parameters + * + * @param basePolicy + * @param policyAlias + * @param peer + */ + Bond(std::string& basePolicy, std::string& policyAlias, const SharedPtr& peer); - /** - * Constructor. Creates a bond based off of a user-defined bond template - * - * @param renv Runtime environment - * @param original - * @param peer - */ - Bond(const RuntimeEnvironment *renv, const Bond &original, const SharedPtr& peer); + /** + * Constructor. Creates a bond based off of a user-defined bond template + * + * @param renv Runtime environment + * @param original + * @param peer + */ + Bond(const RuntimeEnvironment *renv, const Bond &original, const SharedPtr& peer); /** * @@ -101,7 +101,7 @@ public: * @param now Current time */ void nominatePath(const SharedPtr& path, int64_t now); - + /** * Propagate and memoize often-used bonding preferences for each path */ @@ -109,9 +109,9 @@ public: /** * Check path states and perform bond rebuilds if needed. - * + * * @param now Current time - * @param rebuild Whether or not the bond should be reconstructed. + * @param rebuild Whether or not the bond should be reconstructed. */ void curateBond(const int64_t now, bool rebuild); @@ -156,7 +156,7 @@ public: /** * Process the contents of an inbound VERB_ACK to gather path quality observations. - * + * * @param path Path over which packet was received * @param now Current time * @param ackedBytes Number of bytes ACKed by this VERB_ACK @@ -174,7 +174,7 @@ public: /** * Record statistics for an inbound packet. - * + * * @param path Path over which packet was received * @param packetId Packet ID * @param payloadLength Packet data length @@ -183,7 +183,7 @@ public: * @param now Current time */ void recordIncomingPacket(const SharedPtr& path, uint64_t packetId, uint16_t payloadLength, - Packet::Verb verb, int32_t flowId, int64_t now); + Packet::Verb verb, int32_t flowId, int64_t now); /** * Determines the most appropriate path for packet and flow egress. This decision is made by @@ -197,7 +197,7 @@ public: /** * Creates a new flow record - * + * * @param path Path over which flow shall be handled * @param flowId Flow ID * @param entropy A byte of entropy to be used by the bonding algorithm @@ -208,7 +208,7 @@ public: /** * Removes flow records that are past a certain age limit. - * + * * @param age Age threshold to be forgotten * @param oldest Whether only the oldest shall be forgotten * @param now Current time @@ -217,17 +217,17 @@ public: /** * Assigns a new flow to a bonded path - * + * * @param flow Flow to be assigned * @param now Current time */ bool assignFlowToBondedPath(SharedPtr &flow, int64_t now); - /** + /** * Determine whether a path change should occur given the remote peer's reported utility and our * local peer's known utility. This has the effect of assigning inbound and outbound traffic to - * the same path. - * + * the same path. + * * @param now Current time * @param path Path over which the negotiation request was received * @param remoteUtility How much utility the remote peer claims to gain by using the declared path @@ -245,7 +245,7 @@ public: /** * Sends a VERB_ACK to the remote peer. - * + * * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call * @param path Path over which packet should be sent * @param localSocket Local source socket @@ -253,11 +253,11 @@ public: * @param now Current time */ void sendACK(void *tPtr,const SharedPtr &path,int64_t localSocket, - const InetAddress &atAddress,int64_t now); + const InetAddress &atAddress,int64_t now); /** * Sends a VERB_QOS_MEASUREMENT to the remote peer. - * + * * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call * @param path Path over which packet should be sent * @param localSocket Local source socket @@ -265,11 +265,11 @@ public: * @param now Current time */ void sendQOS_MEASUREMENT(void *tPtr,const SharedPtr &path,int64_t localSocket, - const InetAddress &atAddress,int64_t now); + const InetAddress &atAddress,int64_t now); /** * Sends a VERB_PATH_NEGOTIATION_REQUEST to the remote peer. - * + * * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call * @param path Path over which packet should be sent */ @@ -280,10 +280,10 @@ public: * @param now Current time */ void processBalanceTasks(int64_t now); - + /** * Perform periodic tasks unique to active-backup - * + * * @param now Current time */ void processActiveBackupTasks(int64_t now); @@ -296,12 +296,12 @@ public: */ void dequeueNextActiveBackupPath(uint64_t now); - /** - * Set bond parameters to reasonable defaults, these may later be overwritten by + /** + * Set bond parameters to reasonable defaults, these may later be overwritten by * user-specified parameters. - * - * @param policy Bonding policy - */ + * + * @param policy Bonding policy + */ void setReasonableDefaults(int policy); /** @@ -450,19 +450,19 @@ public: */ inline uint16_t getUpDelay() { return _upDelay; } - /** - * @param upDelay Length of time before a newly-discovered path is admitted to the bond - */ + /** + * @param upDelay Length of time before a newly-discovered path is admitted to the bond + */ inline void setUpDelay(int upDelay) { if (upDelay >= 0) { _upDelay = upDelay; } } - /** - * @return Length of time before a newly-failed path is removed from the bond - */ + /** + * @return Length of time before a newly-failed path is removed from the bond + */ inline uint16_t getDownDelay() { return _downDelay; } - /** - * @param downDelay Length of time before a newly-failed path is removed from the bond - */ + /** + * @param downDelay Length of time before a newly-failed path is removed from the bond + */ inline void setDownDelay(int downDelay) { if (downDelay >= 0) { _downDelay = downDelay; } } /** @@ -470,11 +470,11 @@ public: */ inline uint16_t getBondMonitorInterval() { return _bondMonitorInterval; } - /** - * Set the current monitoring interval for the bond (can be overridden with intervals specific to certain slaves.) - * - * @param monitorInterval How often gratuitous VERB_HELLO(s) are sent to remote peer. - */ + /** + * Set the current monitoring interval for the bond (can be overridden with intervals specific to certain slaves.) + * + * @param monitorInterval How often gratuitous VERB_HELLO(s) are sent to remote peer. + */ inline void setBondMonitorInterval(uint16_t interval) { _bondMonitorInterval = interval; } /** @@ -487,10 +487,10 @@ public: */ inline uint8_t getPolicy() { return _bondingPolicy; } - /** - * - * @param allowFlowHashing - */ + /** + * + * @param allowFlowHashing + */ inline void setFlowHashing(bool allowFlowHashing) { _allowFlowHashing = allowFlowHashing; } /** @@ -498,10 +498,10 @@ public: */ bool flowHashingEnabled() { return _allowFlowHashing; } - /** - * - * @param packetsPerSlave - */ + /** + * + * @param packetsPerSlave + */ inline void setPacketsPerSlave(int packetsPerSlave) { _packetsPerSlave = packetsPerSlave; } /** @@ -514,7 +514,7 @@ public: * * @return */ - inline uint8_t getSlaveSelectMethod() { return _abSlaveSelectMethod; } + inline uint8_t getSlaveSelectMethod() { return _abSlaveSelectMethod; } /** * diff --git a/node/BondController.hpp b/node/BondController.hpp index c8fa660b0..acc70d2ff 100644 --- a/node/BondController.hpp +++ b/node/BondController.hpp @@ -55,10 +55,10 @@ public: */ bool inUse() { return !_bondPolicyTemplates.empty() || _defaultBondingPolicy; } - /** - * @param basePolicyName Bonding policy name (See ZeroTierOne.h) - * @return The bonding policy code for a given human-readable bonding policy name - */ + /** + * @param basePolicyName Bonding policy name (See ZeroTierOne.h) + * @return The bonding policy code for a given human-readable bonding policy name + */ static int getPolicyCodeByStr(const std::string& basePolicyName) { if (basePolicyName == "active-backup") { return 1; } @@ -83,18 +83,18 @@ public: return "none"; } - /** - * Sets the default bonding policy for new or undefined bonds. + /** + * Sets the default bonding policy for new or undefined bonds. * - * @param bp Bonding policy - */ + * @param bp Bonding policy + */ void setBondingLayerDefaultPolicy(uint8_t bp) { _defaultBondingPolicy = bp; } - /** - * Sets the default (custom) bonding policy for new or undefined bonds. + /** + * Sets the default (custom) bonding policy for new or undefined bonds. * - * @param alias Human-readable string alias for bonding policy - */ + * @param alias Human-readable string alias for bonding policy + */ void setBondingLayerDefaultPolicyStr(std::string alias) { _defaultBondingPolicyStr = alias; } /** @@ -119,7 +119,7 @@ public: bool addCustomPolicy(const SharedPtr& newBond); /** - * Assigns a specific bonding policy + * Assigns a specific bonding policy * * @param identity * @param policyAlias diff --git a/node/Flow.hpp b/node/Flow.hpp index cb8c3e4aa..5994a4fb2 100644 --- a/node/Flow.hpp +++ b/node/Flow.hpp @@ -24,10 +24,10 @@ namespace ZeroTier { */ struct Flow { - /** - * @param flowId Given flow ID - * @param now Current time - */ + /** + * @param flowId Given flow ID + * @param now Current time + */ Flow(int32_t flowId, int64_t now) : _flowId(flowId), _bytesInPerUnitTime(0), @@ -50,12 +50,12 @@ struct Flow * @return The Flow's ID */ int32_t id() { return _flowId; } - + /** * @return Number of incoming bytes processed on this flow per unit time */ int64_t bytesInPerUnitTime() { return _bytesInPerUnitTime; } - + /** * Record number of incoming bytes on this flow * diff --git a/node/Path.hpp b/node/Path.hpp index 9c54f718f..22a932edf 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -28,7 +28,6 @@ #include "Utils.hpp" #include "Packet.hpp" #include "RingBuffer.hpp" -//#include "Bond.hpp" #include "../osdep/Slave.hpp" @@ -48,7 +47,6 @@ class Path { friend class SharedPtr; friend class Bond; - //friend class SharedPtr; public: /** @@ -361,7 +359,7 @@ public: * @return the age of the path in terms of receiving packets */ inline int64_t age(int64_t now) { return (now - _lastIn); } - + /** * @return Time last trust-established packet was received */ @@ -634,7 +632,7 @@ private: * The variance in the estimated throughput of this path. */ float _throughputVariance; - + /** * The relative quality of this path to all others in the bond, [0-255]. */ diff --git a/osdep/Slave.hpp b/osdep/Slave.hpp index b1ae326ea..a4caa983f 100644 --- a/osdep/Slave.hpp +++ b/osdep/Slave.hpp @@ -61,7 +61,7 @@ public: _userSpecifiedAlloc(userSpecifiedAlloc), _isUserSpecified(false) {} - + /** * @return The string representation of this slave's underlying interface's system name. */ @@ -163,7 +163,7 @@ private: /** * What preference (if any) a user has for IP protocol version used in * path aggregations. Preference is expressed in the order of the digits: - * + * * 0: no preference * 4: IPv4 only * 6: IPv6 only @@ -212,7 +212,7 @@ private: uint8_t _mode; /** - * The specific name of the interface to be used in the event that this + * The specific name of the interface to be used in the event that this * slave fails. */ std::string _failoverToSlaveStr; @@ -230,7 +230,6 @@ private: bool _isUserSpecified; AtomicCounter __refCount; - }; } // namespace ZeroTier From 7ed960297bb2719a361f13c673d2252e9ac52bdd Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 26 May 2020 17:57:09 -0700 Subject: [PATCH 20/35] Merge changes from dev into multipath --- controller/PostgreSQL.cpp | 136 +++++--------------------------------- node/NetworkConfig.cpp | 8 +-- node/NetworkConfig.hpp | 7 ++ 3 files changed, 28 insertions(+), 123 deletions(-) diff --git a/controller/PostgreSQL.cpp b/controller/PostgreSQL.cpp index 05d2de7b1..b8fd749a5 100644 --- a/controller/PostgreSQL.cpp +++ b/controller/PostgreSQL.cpp @@ -229,14 +229,12 @@ void PostgreSQL::eraseNetwork(const uint64_t networkId) tmp.first["objtype"] = "_delete_network"; tmp.second = true; _commitQueue.post(tmp); - nlohmann::json nullJson; - _networkChanged(tmp.first, nullJson, true); } void PostgreSQL::eraseMember(const uint64_t networkId, const uint64_t memberId) { char tmp2[24]; - std::pair tmp, nw; + std::pair tmp; Utils::hex(networkId, tmp2); tmp.first["nwid"] = tmp2; Utils::hex(memberId, tmp2); @@ -244,8 +242,6 @@ void PostgreSQL::eraseMember(const uint64_t networkId, const uint64_t memberId) tmp.first["objtype"] = "_delete_member"; tmp.second = true; _commitQueue.post(tmp); - nlohmann::json nullJson; - _memberChanged(tmp.first, nullJson, true); } void PostgreSQL::nodeIsOnline(const uint64_t networkId, const uint64_t memberId, const InetAddress &physicalAddress) @@ -634,7 +630,7 @@ void PostgreSQL::heartbeat() }; PGresult *res = PQexecParams(conn, - "INSERT INTO ztc_controller (id, cluster_host, last_alive, public_identity, v_major, v_minor, v_rev, v_build, host_port, use_redis) " + "INSERT INTO ztc_controller (id, cluster_host, last_alive, public_identity, v_major, v_minor, v_rev, v_build, host_port,use_redis) " "VALUES ($1, $2, TO_TIMESTAMP($3::double precision/1000), $4, $5, $6, $7, $8, $9, $10) " "ON CONFLICT (id) DO UPDATE SET cluster_host = EXCLUDED.cluster_host, last_alive = EXCLUDED.last_alive, " "public_identity = EXCLUDED.public_identity, v_major = EXCLUDED.v_major, v_minor = EXCLUDED.v_minor, " @@ -1405,15 +1401,6 @@ void PostgreSQL::commitThread() } void PostgreSQL::onlineNotificationThread() -{ - if (_rc != NULL) { - onlineNotification_Redis(); - } else { - onlineNotification_Postgres(); - } -} - -void PostgreSQL::onlineNotification_Postgres() { PGconn *conn = getPgConn(); if (PQstatus(conn) == CONNECTION_BAD) { @@ -1423,7 +1410,9 @@ void PostgreSQL::onlineNotification_Postgres() } _connected = 1; - nlohmann::json jtmp1, jtmp2; + //int64_t lastUpdatedNetworkStatus = 0; + std::unordered_map< std::pair,int64_t,_PairHasher > lastOnlineCumulative; + while (_run == 1) { if (PQstatus(conn) != CONNECTION_OK) { fprintf(stderr, "ERROR: Online Notification thread lost connection to Postgres."); @@ -1431,6 +1420,9 @@ void PostgreSQL::onlineNotification_Postgres() exit(5); } + // map used to send notifications to front end + std::unordered_map> updateMap; + std::unordered_map< std::pair,std::pair,_PairHasher > lastOnline; { std::lock_guard l(_lastOnline_l); @@ -1451,13 +1443,20 @@ void PostgreSQL::onlineNotification_Postgres() OSUtils::ztsnprintf(nwidTmp,sizeof(nwidTmp), "%.16llx", nwid_i); OSUtils::ztsnprintf(memTmp,sizeof(memTmp), "%.10llx", i->first.second); - if(!get(nwid_i, jtmp1, i->first.second, jtmp2)) { - continue; // skip non existent networks/members + auto found = _networks.find(nwid_i); + if (found == _networks.end()) { + continue; // skip members trying to join non-existant networks } std::string networkId(nwidTmp); std::string memberId(memTmp); + std::vector &members = updateMap[networkId]; + members.push_back(memberId); + + lastOnlineCumulative[i->first] = i->second.first; + + const char *qvals[2] = { networkId.c_str(), memberId.c_str() @@ -1527,107 +1526,6 @@ void PostgreSQL::onlineNotification_Postgres() } } -void PostgreSQL::onlineNotification_Redis() -{ - _connected = 1; - - char buf[11] = {0}; - std::string controllerId = std::string(_myAddress.toString(buf)); - - while (_run == 1) { - std::unordered_map< std::pair,std::pair,_PairHasher > lastOnline; - { - std::lock_guard l(_lastOnline_l); - lastOnline.swap(_lastOnline); - } - - if (_rc->clusterMode) { - auto tx = _cluster->redis(controllerId).transaction(true); - _doRedisUpdate(tx, controllerId, lastOnline); - } else { - auto tx = _redis->transaction(true); - _doRedisUpdate(tx, controllerId, lastOnline); - } - - std::this_thread::sleep_for(std::chrono::milliseconds(10)); - } -} - -void PostgreSQL::_doRedisUpdate(sw::redis::Transaction &tx, std::string &controllerId, - std::unordered_map< std::pair,std::pair,_PairHasher > &lastOnline) - -{ - nlohmann::json jtmp1, jtmp2; - for (auto i=lastOnline.begin(); i != lastOnline.end(); ++i) { - uint64_t nwid_i = i->first.first; - uint64_t memberid_i = i->first.second; - char nwidTmp[64]; - char memTmp[64]; - char ipTmp[64]; - OSUtils::ztsnprintf(nwidTmp,sizeof(nwidTmp), "%.16llx", nwid_i); - OSUtils::ztsnprintf(memTmp,sizeof(memTmp), "%.10llx", memberid_i); - - if (!get(nwid_i, jtmp1, memberid_i, jtmp2)){ - continue; // skip non existent members/networks - } - auto found = _networks.find(nwid_i); - if (found == _networks.end()) { - continue; // skip members trying to join non-existant networks - } - - std::string networkId(nwidTmp); - std::string memberId(memTmp); - - int64_t ts = i->second.first; - std::string ipAddr = i->second.second.toIpString(ipTmp); - std::string timestamp = std::to_string(ts); - - std::unordered_map record = { - {"id", memberId}, - {"address", ipAddr}, - {"last_updated", std::to_string(ts)} - }; - tx.zadd("nodes-online:{"+controllerId+"}", memberId, ts) - .zadd("network-nodes-online:{"+controllerId+"}:"+networkId, memberId, ts) - .sadd("network-nodes-all:{"+controllerId+"}:"+networkId, memberId) - .hmset("network:{"+controllerId+"}:"+networkId+":"+memberId, record.begin(), record.end()); - } - - tx.exec(); - - // expire records from all-nodes and network-nodes member list - uint64_t expireOld = OSUtils::now() - 300000; - - auto cursor = 0LL; - std::unordered_set keys; - // can't scan for keys in a transaction, so we need to fall back to _cluster or _redis - // to get all network-members keys - if(_rc->clusterMode) { - auto r = _cluster->redis(controllerId); - while(true) { - cursor = r.scan(cursor, "network-nodes-online:{"+controllerId+"}:*", INT_MAX, std::inserter(keys, keys.begin())); - if (cursor == 0) { - break; - } - } - } else { - while(true) { - cursor = _redis->scan(cursor, "network-nodes-online:"+controllerId+":*", INT_MAX, std::inserter(keys, keys.begin())); - if (cursor == 0) { - break; - } - } - } - - tx.zremrangebyscore("nodes-online:{"+controllerId+"}", sw::redis::RightBoundedInterval(expireOld, sw::redis::BoundType::LEFT_OPEN)); - - for(const auto &k : keys) { - tx.zremrangebyscore(k, sw::redis::RightBoundedInterval(expireOld, sw::redis::BoundType::LEFT_OPEN)); - } - - tx.exec(); -} - PGconn *PostgreSQL::getPgConn(OverrideMode m) { if (m == ALLOW_PGBOUNCER_OVERRIDE) { diff --git a/node/NetworkConfig.cpp b/node/NetworkConfig.cpp index e45a111d2..97985c7af 100644 --- a/node/NetworkConfig.cpp +++ b/node/NetworkConfig.cpp @@ -22,7 +22,7 @@ namespace ZeroTier { bool NetworkConfig::toDictionary(Dictionary &d,bool includeLegacy) const { Buffer *tmp = new Buffer(); - char tmp2[128]; + char tmp2[128] = {0}; try { d.clear(); @@ -84,7 +84,7 @@ bool NetworkConfig::toDictionary(Dictionary &d,b if (((int)lastrt < 32)||(lastrt == ZT_NETWORK_RULE_MATCH_ETHERTYPE)) { if (ets.length() > 0) ets.push_back(','); - char tmp2[16]; + char tmp2[16] = {0}; ets.append(Utils::hex((uint16_t)et,tmp2)); } et = 0; @@ -104,7 +104,7 @@ bool NetworkConfig::toDictionary(Dictionary &d,b if ((this->specialists[i] & ZT_NETWORKCONFIG_SPECIALIST_TYPE_ACTIVE_BRIDGE) != 0) { if (ab.length() > 0) ab.push_back(','); - char tmp2[16]; + char tmp2[16] = {0}; ab.append(Address(this->specialists[i]).toString(tmp2)); } } @@ -220,7 +220,7 @@ bool NetworkConfig::fromDictionary(const Dictionary Date: Tue, 26 May 2020 17:57:37 -0700 Subject: [PATCH 21/35] Remove vestigial constructor, fix typos, clean up code --- node/Bond.cpp | 38 ++++++++++++++++++++++++-------------- node/Bond.hpp | 12 ++---------- node/BondController.hpp | 4 ++-- node/Path.hpp | 20 +++++++------------- service/OneService.cpp | 2 +- 5 files changed, 36 insertions(+), 40 deletions(-) diff --git a/node/Bond.cpp b/node/Bond.cpp index 2f283a696..9aef8f815 100644 --- a/node/Bond.cpp +++ b/node/Bond.cpp @@ -29,7 +29,8 @@ Bond::Bond(const RuntimeEnvironment *renv, int policy, const SharedPtr& pe _policyAlias = BondController::getPolicyStrByCode(policy); } -Bond::Bond(std::string& basePolicy, std::string& policyAlias, const SharedPtr& peer) : +Bond::Bond(const RuntimeEnvironment *renv, std::string& basePolicy, std::string& policyAlias, const SharedPtr& peer) : + RR(renv), _policyAlias(policyAlias), _peer(peer) { @@ -1518,20 +1519,23 @@ void Bond::setReasonableDefaults(int policy) _upDelay = 0; _allowFlowHashing=false; _bondMonitorInterval=0; - _allowPathNegotiation=false; _shouldCollectPathStatistics=false; - _lastPathNegotiationReceived=0; _lastBackgroundTaskCheck=0; + + // Path negotiation + _allowPathNegotiation=false; + _lastPathNegotiationReceived=0; _lastPathNegotiationCheck=0; + _pathNegotiationCutoffCount=0; + _localUtility=0; _lastFlowStatReset=0; _lastFlowExpirationCheck=0; - _localUtility=0; + _numBondedPaths=0; _rrPacketsSentOnCurrSlave=0; _rrIdx=0; - _lastPathNegotiationReceived=0; - _pathNegotiationCutoffCount=0; + _lastFlowRebalance=0; _totalBondUnderload = 0; @@ -1543,12 +1547,6 @@ void Bond::setReasonableDefaults(int policy) _lastFrame=0; - // TODO: Remove - _header=false; - _lastLogTS = 0; - _lastPrintTS = 0; - - /** @@ -1582,7 +1580,7 @@ void Bond::setReasonableDefaults(int policy) case ZT_BONDING_POLICY_BALANCE_RR: _failoverInterval = 5000; _allowFlowHashing = false; - _packetsPerSlave = 512; + _packetsPerSlave = 1024; _slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; _qualityWeights[ZT_QOS_LAT_IDX] = 0.4f; _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; @@ -1653,11 +1651,23 @@ void Bond::setReasonableDefaults(int policy) _qosSendInterval = _bondMonitorInterval * 4; _qosCutoffCount = 0; _lastQoSRateCheck = 0; + _lastQualityEstimation=0; throughputMeasurementInterval = _ackSendInterval * 2; BondController::setMinReqPathMonitorInterval(_bondMonitorInterval); _defaultPathRefractoryPeriod = 8000; + + + + + // TODO: Remove + _header=false; + _lastLogTS = 0; + _lastPrintTS = 0; + + + fprintf(stderr, "TIMERS: strat=%d, fi= %d, bmi= %d, qos= %d, ack= %d, estimateInt= %d, refractory= %d, ud= %d, dd= %d\n", _slaveMonitorStrategy, _failoverInterval, @@ -1669,7 +1679,7 @@ void Bond::setReasonableDefaults(int policy) _upDelay, _downDelay); - _lastQualityEstimation=0; + } void Bond::setUserQualityWeights(float weights[], int len) diff --git a/node/Bond.hpp b/node/Bond.hpp index 89e4c905a..62195b18e 100644 --- a/node/Bond.hpp +++ b/node/Bond.hpp @@ -54,13 +54,6 @@ public: SharedPtr getSlave(const SharedPtr& path); - /** - * Constructor. For use only in first initialization in Node - * - * @param renv Runtime environment - */ - Bond(const RuntimeEnvironment *renv); - /** * Constructor. Creates a bond based off of ZT defaults * @@ -77,7 +70,7 @@ public: * @param policyAlias * @param peer */ - Bond(std::string& basePolicy, std::string& policyAlias, const SharedPtr& peer); + Bond(const RuntimeEnvironment *renv, std::string& basePolicy, std::string& policyAlias, const SharedPtr& peer); /** * Constructor. Creates a bond based off of a user-defined bond template @@ -89,8 +82,7 @@ public: Bond(const RuntimeEnvironment *renv, const Bond &original, const SharedPtr& peer); /** - * - * @return + * @return The human-readable name of the bonding policy */ std::string policyAlias() { return _policyAlias; } diff --git a/node/BondController.hpp b/node/BondController.hpp index acc70d2ff..95fbf81fc 100644 --- a/node/BondController.hpp +++ b/node/BondController.hpp @@ -36,7 +36,7 @@ public: BondController(const RuntimeEnvironment *renv); /** - * @return The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements. + * @return Whether this slave is permitted to become a member of a bond. */ bool slaveAllowed(std::string &policyAlias, SharedPtr slave); @@ -46,7 +46,7 @@ public: int minReqPathMonitorInterval() { return _minReqPathMonitorInterval; } /** - * @return The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements. + * @param minReqPathMonitorInterval The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements. */ static void setMinReqPathMonitorInterval(int minReqPathMonitorInterval) { _minReqPathMonitorInterval = minReqPathMonitorInterval; } diff --git a/node/Path.hpp b/node/Path.hpp index 22a932edf..1cbd588bc 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -127,8 +127,7 @@ public: _packetsReceivedSinceLastQoS(0), _bytesAckedSinceLastThroughputEstimation(0), _packetsIn(0), - _packetsOut(0), - _prevEligibility(false) + _packetsOut(0) {} Path(const int64_t localSocket,const InetAddress &addr) : @@ -177,8 +176,7 @@ public: _packetsReceivedSinceLastQoS(0), _bytesAckedSinceLastThroughputEstimation(0), _packetsIn(0), - _packetsOut(0), - _prevEligibility(false) + _packetsOut(0) {} /** @@ -187,10 +185,10 @@ public: * @param t Time of receive */ inline void received(const uint64_t t) { - _lastIn = t; - if (!_prevEligibility) { + if (!alive(t,_bonded)) { _lastAliveToggle = _lastIn; } + _lastIn = t; } /** @@ -506,7 +504,7 @@ private: uint64_t _lastQoSMeasurement; /** - * Last time that a the path's throughput was estimated. + * Last time that the path's throughput was estimated. */ uint64_t _lastThroughputEstimation; @@ -531,7 +529,7 @@ private: uint64_t _lastTrialBegin; /** - * Amount of time that this path is prevented from becoming a member of a bond. + * Amount of time that this path will be prevented from becoming a member of a bond. */ uint32_t _refractoryPeriod; @@ -576,7 +574,7 @@ private: bool _bonded; /** - * Whether this path was intentionally _negotiated by either peer. + * Whether this path was intentionally negotiated by either peer. */ bool _negotiated; @@ -684,10 +682,6 @@ private: */ int _packetsIn; int _packetsOut; - - // TODO: Remove - - bool _prevEligibility; }; } // namespace ZeroTier diff --git a/service/OneService.cpp b/service/OneService.cpp index 04734d7e2..ab8594eec 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -1596,7 +1596,7 @@ public: continue; } // New bond, used as a copy template for new instances - SharedPtr newTemplateBond = new Bond(basePolicyStr, customPolicyStr, SharedPtr()); + SharedPtr newTemplateBond = new Bond(NULL, basePolicyStr, customPolicyStr, SharedPtr()); // Acceptable ranges newTemplateBond->setMaxAcceptableLatency(OSUtils::jsonInt(customPolicy["maxAcceptableLatency"],-1)); newTemplateBond->setMaxAcceptableMeanLatency(OSUtils::jsonInt(customPolicy["maxAcceptableMeanLatency"],-1)); From a8f830aa9c216e70edee3d671a184ee3bb998cb6 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 26 May 2020 18:29:19 -0700 Subject: [PATCH 22/35] Add multipath documentation to service/ --- service/MULTIPATH.md | 250 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 250 insertions(+) create mode 100644 service/MULTIPATH.md diff --git a/service/MULTIPATH.md b/service/MULTIPATH.md new file mode 100644 index 000000000..8a9e84603 --- /dev/null +++ b/service/MULTIPATH.md @@ -0,0 +1,250 @@ +### **2.1.5.** Link aggregation + +Link aggregation allows the simultaneous (or conditional) use of multiple physical links to enable increased throughput, load balancing, redundancy, and fault tolerance. There are a variety of standard policies available that can be used right out of the box with little to no configuration. These policies are directly inspired by [the policies offered by the Linux kernel](https://www.kernel.org/doc/Documentation/networking/bonding.txt). + +#### Standard Policies + +| Policy name | Fault tolerance | Min. failover (sec.) | Default Failover (sec.) | Balancing | Aggregation efficiency | Redundancy | Sequence Reordering | +|--------------------|:---------------------:|---------------------:|---------------------:|----------------------:|-----------------------:|-----------:|--------------------:| +| `none` | None | `60+` | `60+` | none | `none` |1 | No +| `active-backup` | Brief interruption | `0.25` | `10` | none | `low` |1 | Only during failover +| `broadcast` | Fully tolerant | `N/A` | `N/A` | none | `very low` |N | Often +| `balance-rr` | Self-healing | `0.25` | `10` | packet-based | `high` |1 | Often +| `balance-xor` | Self-healing | `0.25` | `10` | flow-based | `very high` |1 | Only during failover +| `balance-aware` | Self-healing | `0.25` | `10` | *adaptive* flow-based | `very high` |1 | Only during failover and re-balance + +A policy can be used easily without specifying any additional parameters: + +``` +{ + "settings": { + "defaultBondingPolicy": "active-backup" + } +} +``` + +#### Custom Policies + +To customize a bonding policy for your use-case simply specify a `basePolicy` and override chosen parameters. For example, to create a more aggressive `active-backup` policy with low monitoring overhead that will failover `0.250` seconds after it detects a link failure, one could do the following: + +``` +{ + "settings": + { + "defaultBondingPolicy": "aggressive-active-backup", + "policies": + { + "aggressive-active-backup": + { + "failoverInterval": 250, + "pathMonitorStrategy": "dynamic", + "basePolicy": "active-backup" + } + } + } +} +``` + +#### Specifying Slave interfaces + +Available system network interfaces are referred to as `slaves`. Different sets of slaves can be constructed for different bonding policies and used simultaneously. One can specify the links that ZeroTier should use in any given bonding policy simply by providing an array of slaves with names corresponding to interface names. If a user doesn't specify a set of interfaces to use, ZeroTier will assume every system interface is available for use. However, if the user **does** specify a set of interfaces, ZeroTier will only use what is specified. The same applies to failover rules, if none are specified, ZeroTier will failover to any operational slave. On the other hand, if the user does specify failover rules and there is ever a situation where a slave is available for usage but does not fit within the rules specified by the user, it will go unused. + +To specify that ZeroTier should only use `eth0` and `eth1` as primary slaves, and `eth2` as a backup spare and that it should prefer IPv4 over IPv6 except on `eth2` where only IPv6 is allowed: + +``` +{ + "settings": { + "defaultBondingPolicy": "aggressive-active-backup", + "policies": { + "aggressive-active-backup": { + "slaves": { + "eth0": { + "ipvPref": 46, + "failoverTo": "eth2", + "mode": "primary" + }, + "eth1": { + "ipvPref": 46, + "failoverTo": "eth2", + "mode": "primary" + }, + "eth2": { + "ipvPref": 6, + "mode": "spare" + } + } + } + } + } +} +``` + +Additional slave-specific parameters: + +``` +"slaves": +{ + "interfaceName": /* System-name of the network interface. */ + { + "failoverInterval": 0-65535, /* (optional) How quickly a path on this slave should failover after a detected failure. */ + "ipvPref": [0,4,6,46,64], /* (optional) IP version preference for detected paths on a slave. */ + "speed": 0-1000000, /* (optional) How fast this slave is (in arbitrary units). This is a useful way to manually allocate a bond. */ + "alloc": 0-255, /* (optional) A relative value representing a desired allocation. */ + "upDelay": 0-65535, /* (optional) How long after a path becomes alive before it is added to the bond. */ + "downDelay": 0-65535, /* (optional) How long after a path fails before it is removed from the bond. */ + "failoverTo": "spareInterfaceName", /* (optional) Which slave should be used next after a failure of this slave. */ + "enabled": true|false, /* (optional) Whether any paths on this slave are allowed to be used this bond. */ + "mode": "primary"|"spare" /* (optional) Whether this slave is used by default or only after failover events. */ + } +} +``` + +#### Peer-specific Bonds + +It is possible to direct ZeroTier to form a certain type of bond with specific peers of your choice. For instance, if one were to want `active-backup` by default but for certain peers to be bonded with a custom load-balanced bond such as `my-custom-balance-aware` one could do the following: + +``` +{ + "settings": + { + "defaultBondingPolicy": "active-backup", + "policies": + { + "my-custom-balance-aware": + { + "failoverInterval": 2000, + "monitorStrategy": "dynamic", + "basePolicy": "balance-aware" + } + }, + "peerSpecificBonds": + { + "f6203a2db3":"my-custom-balance-aware", + "45b0301da2":"my-custom-balance-aware", + "a92cb526fa":"my-custom-balance-aware" + } + } +} +``` + +#### Active Backup (`active-backup`) + +Traffic is sent only on (one) path at any given time. A different path becomes active if the current path fails. This mode provides fault tolerance with a nearly immediate fail-over. This mode **does not** increase total throughput. + + - `mode`: `primary, spare` Slave option which specifies which slave is the primary device. The specified device is intended to always be the active slave while it is available. There are exceptions to this behavior when using different `slaveSelectMethod` modes. There can only be one `primary` slave in this bonding policy. + + - `slaveSelectMethod`: Specifies the selection policy for the active slave during failure and/or recovery events. This is similar to the Linux Kernel's `primary_reselect` option but with a minor extension: + - `optimize`: **(default if user provides no failover guidance)** The primary slave can change periodically if a superior path is detected. + - `always`: **(default when slaves are explicitly specified)**: Primary slave regains status as active slave whenever it comes back up. + - `better`: Primary slave regains status as active slave when it comes back up and (if) it is better than the currently-active slave. + - `failure`: Primary slave regains status as active slave only if the currently-active slave fails. + +``` +{ + "settings": + { + "defaultBondingPolicy": "active-backup", + "active-backup": + { + "slaveSelectMethod": "always", + "slaves": + { + "eth0": { "failoverTo": "eth1", "mode": "primary" }, + "eth1": { "mode": "spare" }, + "eth2": { "mode": "spare" }, + "eth3": { "mode": "spare" } + } + } + } +} +``` + +#### Broadcast (`broadcast`) + +Traffic is sent on (all) available paths simultaneously. This mode provides fault tolerance and effectively immediate failover due to transmission redundancy. This mode is a poor utilization of throughput resources and will **not** increase throughput but can prevent packet loss during a link failure. The only option available is `dedup` which will de-duplicate all packets on the receiving end if set to `true`. + +#### Balance Round Robin (`balance-rr`) + +Traffic is striped across multiple paths. Offers partial fault tolerance immediately, full fault tolerance eventually. This policy is unaware of protocols and is primarily intended for use with protocols that are not sensitive to reordering delays. The only option available for this policy is `packetsPerSlave` which specifies the number of packets to transmit via a path before moving to the next in the RR sequence. When set to `0` a path is chosen at random for each outgoing packet. The default value is `8`, low values can begin to add overhead to packet processing. + +#### Balance XOR (`balance-xor`, similar to the Linux kernel's [balance-xor](https://www.kernel.org/doc/Documentation/networking/bonding.txt) with `xmit_hash_policy=layer3+4`) + +Traffic is categorized into *flows* based on *source port*, *destination port*, and *protocol type* these flows are then hashed onto available slaves. Each flow will persist on its assigned slave interface for its entire life-cycle. Traffic that does not have an assigned port (such as ICMP pings) will be randomly distributed across slaves. The hash function is simply: `src_port ^ dst_port ^ proto`. + +#### Balance Aware (`balance-aware`, similar to Linux kernel's [`balance-*lb`](https://www.kernel.org/doc/Documentation/networking/bonding.txt) modes) + +Traffic is dynamically allocated and balanced across multiple slaves simultaneously according to the target allocation. Options allow for *packet* or *flow-based* processing, and active-flow reassignment. Flows mediated over a recently failed slaves will be reassigned in a manner that respects the target allocation of the bond. An optional `balancePolicy` can be specified with the following effects: `flow-dynamic` (default) will hash flows onto slaves according to target allocation and may perform periodic re-assignments in order to preserve balance. `flow-static`, will hash flows onto slaves according to target allocation but will not re-assign flows unless a failure occurs or the slave is no longer operating within acceptable parameters. And lastly `packet` which simply load balances packets across slaves according to target allocation but with no concern for sequence reordering. + +``` +{ + "settings": + { + "defaultBondingPolicy": "balance-aware", + "balance-aware": { + "balancePolicy": "flow-dynamic"|"flow-static"|"packet" + } + } +} +``` + +#### Link Quality + +ZeroTier measures various properties of a link (such as latency, throughput, jitter, packet loss ratio, etc) in order to arrive at a quality estimate. This estimate is used by bonding policies to make allocation and failover decisions: + +| Policy name | Role | +|:---------------|:-----| +|`active-backup` | Determines the order of the failover queue. And if `activeReselect=optimize` whether a new active slave is selected. | +|`broadcast` | Does not use quality measurements. | +|`balance-rr` | May trigger removal of slave from bond. | +|`balance-xor` | May trigger removal of slave from bond. | +|`balance-aware` | Informs flow assignments and (re-)assignments. May trigger removal of slave from bond. | + +A slave's eligibility for being included in a bond is dependent on more than perceived quality. If a path on a slave begins to exhibit disruptive behavior such as extremely high packet loss, corruption, or periodic inability to process traffic it will be removed from the bond, its traffic will be appropriately reallocated and it will be punished. Punishments gradually fade and a slave can be readmitted to the bond over time. However, punishments increase exponentially if applied more than once within a given window of time. + +#### Asymmetric Links + +In cases where it is necessary to bond physical links that vary radically in terms of cost, throughput, latency, and or reliability, there are a couple of ways to automatically (or manually) allocate traffic among them. Traffic distribution and balancing can be either `packet` or `flow` based. Where packet-based is suitable for protocols not susceptible to reordering penalties and flow-based is suitable for protocols such as TCP where it is desirable to keep a conversation on a single link unless we can't avoid having to re-assign it. Additionally, a *target allocation* of traffic used by the bonding policy can be derived/specified in the following ways: + + - **Automatically**: This is the easiest and requires no user configuration. The bonding layer measures and senses the link properties and determines a target allocation based on perceived quality and capacity. Weaker, less reliable links will have less traffic allocated to them and stronger, more reliable links will have more traffic allocated to them. Optionally, the user can specify a set of weights (totaling `1.0`) to inform the bonding layer how important certain link properties are. For instance, one may primarily be concerned with latency and jitter but not total throughput: + +``` +"balance-aware": { + "quality": { + "lat": 0.3, /* Moving average of latency in milliseconds */ + "ltm": 0.2, /* Maximum observed latency in milliseconds */ + "pdv": 0.3, /* Packet delay variance in milliseconds. Similar to jitter */ + "plr": 0.1, /* Packet loss ratio */ + "per": 0.1, /* Packet error ratio */ + "thr": 0.0, /* Mean throughput */ + "thm": 0.0, /* Maximum observed throughput */ + "thv": 0.0, /* Variance of throughput */ + "avl": 0.0, /* Availability */ + } +} +``` +In the absence of user guidance ZeroTier will attempt to form an understanding of each link's speed and capacity but this value can be inaccurate if the links are not routinely saturated. Therefore we provide a way to explicitly signal the capacity of each link in terms of arbitrary but relative values: + +``` +"slaves": { + "eth0": { "speed": 10000 }, + "eth1": { "speed": 1000 }, + "eth2": { "speed": 100 } +} +``` + +The user specifies allocation percentages (totaling `1.0`). In this case quality measurements will only be used to determine a slave's eligibility to be a member of a bond, now how much traffic it will carry: + +``` +"slaves": { + "eth0": { "alloc": 0.50 }, + "eth1": { "alloc": 0.25 }, + "eth2": { "alloc": 0.25 } +} +``` + +#### Performance and Overhead Considerations + + - Only packets with internal IDs divisible by `16` are included in measurements, this amounts to about `6.25%` of all traffic. + - `failoverInterval` specifies how quickly failover should occur during a link failure. In order to accomplish this a combination of active and passive measurement techniques are employed which may result in `VERB_HELLO` probes being sent every `failoverInterval / 4` time units. As a mitigation `monitorStrategy` may be set to `dynamic` so that probe frequency directly correlates with native application traffic. + + From 5e122b95e7cb594464ee2090ad0ab1a403f09367 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Sat, 30 May 2020 21:21:22 -0700 Subject: [PATCH 23/35] Fix segfault during balance-rr when link is brought down --- node/Bond.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/node/Bond.cpp b/node/Bond.cpp index 9aef8f815..28b998049 100644 --- a/node/Bond.cpp +++ b/node/Bond.cpp @@ -135,13 +135,14 @@ SharedPtr Bond::getAppropriatePath(int64_t now, int32_t flowId) int _tempIdx = _rrIdx; for (int searchCount = 0; searchCount < (_numBondedPaths-1); searchCount++) { _tempIdx = (_tempIdx == (_numBondedPaths-1)) ? 0 : _tempIdx+1; - if (_paths[_bondedIdx[_tempIdx]] && _paths[_bondedIdx[_tempIdx]]->eligible(now,_ackSendInterval)) { - _rrIdx = _tempIdx; - break; + if (_bondedIdx[_tempIdx] != ZT_MAX_PEER_NETWORK_PATHS) { + if (_paths[_bondedIdx[_tempIdx]] && _paths[_bondedIdx[_tempIdx]]->eligible(now,_ackSendInterval)) { + _rrIdx = _tempIdx; + break; + } } } } - fprintf(stderr, "_rrIdx=%d\n", _rrIdx); if (_paths[_bondedIdx[_rrIdx]]) { return _paths[_bondedIdx[_rrIdx]]; } From 1dca7b92cf2b04a4a592eaa58711c461aa22bd20 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Sun, 31 May 2020 17:30:41 -0700 Subject: [PATCH 24/35] Remove exit condition for bond creation during re-learning of previously-known paths --- node/BondController.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node/BondController.cpp b/node/BondController.cpp index 4bc8d2261..cb4414f9f 100644 --- a/node/BondController.cpp +++ b/node/BondController.cpp @@ -107,7 +107,7 @@ SharedPtr BondController::createTransportTriggeredBond(const RuntimeEnviro } } else { - fprintf(stderr, "bond already exists for %llx, cannot re-register. exiting\n", identity); exit(0); // TODO: Remove + fprintf(stderr, "bond already exists for %llx.\n", identity); } if (bond) { _bonds[identity] = bond; From fa5c8ef434a31e91f02861bb169f22a9aae7b1f4 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Mon, 1 Jun 2020 22:58:58 -0700 Subject: [PATCH 25/35] Fix timers, fix flow count discrepancy after flow removal, fix balance-aware flow re-assignment when one or more links go down --- node/Bond.cpp | 217 +++++++++++++++++++++------------------- node/Bond.hpp | 5 +- node/BondController.cpp | 4 +- 3 files changed, 117 insertions(+), 109 deletions(-) diff --git a/node/Bond.cpp b/node/Bond.cpp index 28b998049..9a2f5c267 100644 --- a/node/Bond.cpp +++ b/node/Bond.cpp @@ -25,7 +25,7 @@ Bond::Bond(const RuntimeEnvironment *renv, int policy, const SharedPtr& pe RR(renv), _peer(peer) { - setReasonableDefaults(policy); + setReasonableDefaults(policy, SharedPtr(), false); _policyAlias = BondController::getPolicyStrByCode(policy); } @@ -34,31 +34,14 @@ Bond::Bond(const RuntimeEnvironment *renv, std::string& basePolicy, std::string& _policyAlias(policyAlias), _peer(peer) { - setReasonableDefaults(BondController::getPolicyCodeByStr(basePolicy)); + setReasonableDefaults(BondController::getPolicyCodeByStr(basePolicy), SharedPtr(), false); } -Bond::Bond(const RuntimeEnvironment *renv, const Bond &originalBond, const SharedPtr& peer) : +Bond::Bond(const RuntimeEnvironment *renv, SharedPtr originalBond, const SharedPtr& peer) : RR(renv), _peer(peer) { - // First, set everything to sane defaults - setReasonableDefaults(originalBond._bondingPolicy); - _policyAlias = originalBond._policyAlias; - // Second, apply user specified values (only if they make sense) - _downDelay = originalBond._downDelay; - _upDelay = originalBond._upDelay; - if (originalBond._bondMonitorInterval > 0 && originalBond._bondMonitorInterval < 65535) { - _bondMonitorInterval = originalBond._bondMonitorInterval; - } - else { - fprintf(stderr, "warning: bondMonitorInterval (%d) is out of range, using default (%d)\n", originalBond._bondMonitorInterval, _bondMonitorInterval); - } - if (originalBond._slaveMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE - && originalBond._failoverInterval != 0) { - fprintf(stderr, "warning: passive path monitoring was specified, this will prevent failovers from happening in a timely manner.\n"); - } - _abSlaveSelectMethod = originalBond._abSlaveSelectMethod; - memcpy(_qualityWeights, originalBond._qualityWeights, ZT_QOS_WEIGHT_SIZE * sizeof(float)); + setReasonableDefaults(originalBond->_bondingPolicy, originalBond, true); } void Bond::nominatePath(const SharedPtr& path, int64_t now) @@ -97,7 +80,7 @@ SharedPtr Bond::getAppropriatePath(int64_t now, int32_t flowId) /** * active-backup */ - if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP) { + if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { if (_abPath) { return _abPath; } @@ -105,7 +88,7 @@ SharedPtr Bond::getAppropriatePath(int64_t now, int32_t flowId) /** * broadcast */ - if (_bondingPolicy== ZT_BONDING_POLICY_BROADCAST) { + if (_bondingPolicy == ZT_BONDING_POLICY_BROADCAST) { return SharedPtr(); // Handled in Switch::_trySend() } if (!_numBondedPaths) { @@ -114,7 +97,7 @@ SharedPtr Bond::getAppropriatePath(int64_t now, int32_t flowId) /** * balance-rr */ - if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_RR) { + if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) { if (!_allowFlowHashing) { //fprintf(stderr, "_rrPacketsSentOnCurrSlave=%d, _numBondedPaths=%d, _rrIdx=%d\n", _rrPacketsSentOnCurrSlave, _numBondedPaths, _rrIdx); if (_packetsPerSlave == 0) { @@ -151,7 +134,7 @@ SharedPtr Bond::getAppropriatePath(int64_t now, int32_t flowId) /** * balance-xor */ - if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_XOR || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE) { + if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { if (!_allowFlowHashing || flowId == -1) { // No specific path required for unclassified traffic, send on anything return _paths[_bondedIdx[_freeRandomByte % _numBondedPaths]]; // TODO: Optimize @@ -252,9 +235,9 @@ void Bond::recordIncomingPacket(const SharedPtr& path, uint64_t packetId, * which path to use. */ if ((flowId != ZT_QOS_NO_FLOW) - && (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_RR - || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_XOR - || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE)) { + && (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR + || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR + || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE)) { Mutex::Lock _l(_flows_m); SharedPtr flow; if (!_flows.count(flowId)) { @@ -335,6 +318,7 @@ bool Bond::assignFlowToBondedPath(SharedPtr &flow, int64_t now) unsigned int idx = ZT_MAX_PEER_NETWORK_PATHS; if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR) { idx = abs((int)(flow->id() % (_numBondedPaths))); + //fprintf(stderr, "flow->id()=%d, %x, _numBondedPaths=%d, idx=%d\n", flow->id(), flow->id(), _numBondedPaths, idx); flow->assignPath(_paths[_bondedIdx[idx]],now); } if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { @@ -347,15 +331,28 @@ bool Bond::assignFlowToBondedPath(SharedPtr &flow, int64_t now) fprintf(stderr, "no bonded paths for flow assignment\n"); return false; } + /* Since there may be scenarios where a path is removed before we can re-estimate + relative qualities (and thus allocations) we need to down-modulate the entropy + value that we use to randomly assign among the surviving paths, otherwise we risk + not being able to find a path to assign this flow to. */ + int totalIncompleteAllocation = 0; + for(unsigned int i=0;ibonded()) { + totalIncompleteAllocation += _paths[i]->_allocation; + } + } + fprintf(stderr, "entropy = %d, totalIncompleteAllocation=%d\n", entropy, totalIncompleteAllocation); + entropy %= totalIncompleteAllocation; + fprintf(stderr, "new entropy = %d\n", entropy); for(unsigned int i=0;ibonded()) { SharedPtr slave = RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); _paths[i]->address().toString(curPathStr); uint8_t probabilitySegment = (_totalBondUnderload > 0) ? _paths[i]->_affinity : _paths[i]->_allocation; - //fprintf(stderr, "i=%2d, entropy=%3d, alloc=%3d, byteload=%4d, segment=%3d, _totalBondUnderload=%3d, ifname=%s, path=%20s\n", i, entropy, _paths[i]->allocation, _paths[i]->relativeByteLoad, probabilitySegment, _totalBondUnderload, slave->ifname().c_str(), curPathStr); + fprintf(stderr, "i=%2d, entropy=%3d, alloc=%3d, byteload=%4d, segment=%3d, _totalBondUnderload=%3d, ifname=%s, path=%20s\n", i, entropy, _paths[i]->_allocation, _paths[i]->_relativeByteLoad, probabilitySegment, _totalBondUnderload, slave->ifname().c_str(), curPathStr); if (entropy <= probabilitySegment) { idx = i; - //fprintf(stderr, "\t is best path\n"); + fprintf(stderr, "\t is best path\n"); break; } entropy -= probabilitySegment; @@ -423,6 +420,7 @@ void Bond::forgetFlowsWhenNecessary(uint64_t age, bool oldest, int64_t now) while (it != _flows.end()) { if (it->second->age(now) > age) { fprintf(stderr, "forgetting flow %x between this node and %llx, %lu active flow(s)\n", it->first, _peer->_id.address().toInt(), (_flows.size()-1)); + it->second->assignedPath()->_assignedFlowCount--; it = _flows.erase(it); } else { ++it; @@ -440,10 +438,10 @@ void Bond::forgetFlowsWhenNecessary(uint64_t age, bool oldest, int64_t now) } if (oldestFlow != _flows.end()) { fprintf(stderr, "forgetting oldest flow %x (of age %llu) between this node and %llx, %lu active flow(s)\n", oldestFlow->first, oldestFlow->second->age(now), _peer->_id.address().toInt(), (_flows.size()-1)); + oldestFlow->second->assignedPath()->_assignedFlowCount--; _flows.erase(oldestFlow); } } - fprintf(stderr, "000\n"); } void Bond::processIncomingPathNegotiationRequest(uint64_t now, SharedPtr &path, int16_t remoteUtility) @@ -610,17 +608,17 @@ void Bond::processBackgroundTasks(void *tPtr, const int64_t now) //fprintf(stderr, "_lastFrame=%llu, suggestedMonitorInterval=%d, _dynamicPathMonitorInterval=%d\n", // (now-_lastFrame), suggestedMonitorInterval, _dynamicPathMonitorInterval); } - + // TODO: Clarify and generalize this logic if (_slaveMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC) { _shouldCollectPathStatistics = true; } // Memoize oft-used properties in the packet ingress/egress logic path - if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE) { + if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { // Required for real-time balancing _shouldCollectPathStatistics = true; } - if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP) { + if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { if (_abSlaveSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_BETTER) { // Required for judging suitability of primary slave after recovery _shouldCollectPathStatistics = true; @@ -680,7 +678,7 @@ void Bond::processBackgroundTasks(void *tPtr, const int64_t now) void Bond::applyUserPrefs() { - fprintf(stderr, "applyUserPrefs, _minReqPathMonitorInterval=%d\n", RR->bc->minReqPathMonitorInterval()); + //fprintf(stderr, "applyUserPrefs, _minReqPathMonitorInterval=%d\n", RR->bc->minReqPathMonitorInterval()); for(unsigned int i=0;ibc->getBondStartTime())), rebuildBond); + //fprintf(stderr, "%lu curateBond (rebuildBond=%d), _numBondedPaths=%d\n", ((now - RR->bc->getBondStartTime())), rebuildBond, _numBondedPaths); char pathStr[128]; /** * Update path states @@ -727,6 +725,9 @@ void Bond::curateBond(const int64_t now, bool rebuildBond) continue; } bool currEligibility = _paths[i]->eligible(now,_ackSendInterval); + //_paths[i]->address().toString(pathStr); + //fprintf(stderr, "\n\n%ld path eligibility (for %s, %s):\n", (RR->node->now() - RR->bc->getBondStartTime()), getSlave(_paths[i])->ifname().c_str(), pathStr); + //_paths[i]->printEligible(now,_ackSendInterval); if (currEligibility != _paths[i]->_lastEligibilityState) { _paths[i]->address().toString(pathStr); //fprintf(stderr, "\n\n%ld path eligibility (for %s, %s) has changed (from %d to %d)\n", (RR->node->now() - RR->bc->getBondStartTime()), getSlave(_paths[i])->ifname().c_str(), pathStr, _paths[i]->lastCheckedEligibility, _paths[i]->eligible(now,_ackSendInterval)); @@ -754,9 +755,9 @@ void Bond::curateBond(const int64_t now, bool rebuildBond) * Curate the set of paths that are part of the bond proper. Selects a single path * per logical slave according to eligibility and user-specified constraints. */ - if ((_bondingPolicy== ZT_BONDING_POLICY_BALANCE_RR) - || (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_XOR) - || (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE)) { + if ((_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) + || (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR) + || (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE)) { if (!_numBondedPaths) { rebuildBond = true; } @@ -822,7 +823,7 @@ void Bond::curateBond(const int64_t now, bool rebuildBond) } _numBondedPaths = updatedBondedPathCount; - if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_RR) { + if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) { // Cause a RR reset since the currently used index might no longer be valid _rrPacketsSentOnCurrSlave = _packetsPerSlave; } @@ -975,11 +976,9 @@ void Bond::estimatePathQuality(const int64_t now) _paths[i]->_allocation = alloc[i]; } } - /* if ((now - _lastLogTS) > 500) { if (!relevant()) {return;} //fprintf(stderr, "\n"); - _lastPrintTS = now; _lastLogTS = now; int numPlottablePaths=0; for(unsigned int i=0;iaddress().toString(pathStr); fprintf(stdout, "%s, %s, %8.3f, %8.3f, %8.3f, %5.3f, %5.3f, %5.3f, %8f, %5.3f, %5.3f, %d, %5.3f, %d, %d, %d, %d, %d, %d, ", - getSlave(_paths[i])->ifname().c_str(), pathStr, _paths[i]->latencyMean, lat[i],pdv[i], _paths[i]->packetLossRatio, plr[i],per[i],thr[i],thm[i],thv[i],(now - _paths[i]->lastIn()),quality[i],alloc[i], - _paths[i]->relativeByteLoad, _paths[i]->assignedFlowCount, _paths[i]->alive(now, true), _paths[i]->eligible(now,_ackSendInterval), _paths[i]->qosStatsOut.size()); + getSlave(_paths[i])->ifname().c_str(), pathStr, _paths[i]->_latencyMean, lat[i],pdv[i], _paths[i]->_packetLossRatio, plr[i],per[i],thr[i],thm[i],thv[i],(now - _paths[i]->lastIn()),quality[i],alloc[i], + _paths[i]->_relativeByteLoad, _paths[i]->_assignedFlowCount, _paths[i]->alive(now, true), _paths[i]->eligible(now,_ackSendInterval), _paths[i]->qosStatsOut.size()); } } fprintf(stdout, "\n"); } - */ } void Bond::processBalanceTasks(const int64_t now) @@ -1047,7 +1045,7 @@ void Bond::processBalanceTasks(const int64_t now) /** * Re-allocate flows from dead paths */ - if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_XOR || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE) { + if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { Mutex::Lock _l(_flows_m); for (int i=0;ibonded()) { if (totalBytes) { @@ -1139,7 +1138,7 @@ void Bond::processBalanceTasks(const int64_t now) } } } - +*/ //fprintf(stderr, "_totalBondUnderload=%d (end)\n\n", _totalBondUnderload); /** @@ -1502,7 +1501,7 @@ void Bond::processActiveBackupTasks(const int64_t now) } } -void Bond::setReasonableDefaults(int policy) +void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool useTemplate) { // If invalid bonding policy, try default int _defaultBondingPolicy = BondController::defaultBondingPolicy(); @@ -1548,7 +1547,10 @@ void Bond::setReasonableDefaults(int policy) _lastFrame=0; - + // TODO: Remove + _header=false; + _lastLogTS = RR->node->now(); + _lastPrintTS = RR->node->now(); /** * Paths are actively monitored to provide a real-time quality/preference-ordered rapid failover queue. @@ -1635,18 +1637,53 @@ void Bond::setReasonableDefaults(int policy) break; } + if (useTemplate) { + _policyAlias = templateBond->_policyAlias; + _failoverInterval = templateBond->_failoverInterval; + _downDelay = templateBond->_downDelay; + _upDelay = templateBond->_upDelay; + + fprintf(stderr, "TIMERS: strat=%d, fi= %d, bmi= %d, qos= %d, ack= %d, estimateInt= %d, refractory= %d, ud= %d, dd= %d\n", + _slaveMonitorStrategy, + _failoverInterval, + _bondMonitorInterval, + _qosSendInterval, + _ackSendInterval, + _qualityEstimationInterval, + _defaultPathRefractoryPeriod, + _upDelay, + _downDelay); + + if (templateBond->_slaveMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE + && templateBond->_failoverInterval != 0) { + fprintf(stderr, "warning: passive path monitoring was specified, this will prevent failovers from happening in a timely manner.\n"); + } + _abSlaveSelectMethod = templateBond->_abSlaveSelectMethod; + memcpy(_qualityWeights, templateBond->_qualityWeights, ZT_QOS_WEIGHT_SIZE * sizeof(float)); + } + + + // + // Second, apply user specified values (only if they make sense) + /** * Timer geometries and counters */ + // TODO: Think more about the maximum + /* + if (originalBond._failoverInterval > 250 && originalBond._failoverInterval < 65535) { + _failoverInterval = originalBond._failoverInterval; + } + else { + fprintf(stderr, "warning: _failoverInterval (%d) is out of range, using default (%d)\n", originalBond._failoverInterval, _failoverInterval); + } + */ + _bondMonitorInterval = _failoverInterval / 3; + BondController::setMinReqPathMonitorInterval(_bondMonitorInterval); _ackSendInterval = _failoverInterval; _qualityEstimationInterval = _failoverInterval * 2; - _dynamicPathMonitorInterval = 0; - - _downDelay=0; - _upDelay=0; - _ackCutoffCount = 0; _lastAckRateCheck = 0; _qosSendInterval = _bondMonitorInterval * 4; @@ -1654,33 +1691,7 @@ void Bond::setReasonableDefaults(int policy) _lastQoSRateCheck = 0; _lastQualityEstimation=0; throughputMeasurementInterval = _ackSendInterval * 2; - BondController::setMinReqPathMonitorInterval(_bondMonitorInterval); - _defaultPathRefractoryPeriod = 8000; - - - - - - // TODO: Remove - _header=false; - _lastLogTS = 0; - _lastPrintTS = 0; - - - - fprintf(stderr, "TIMERS: strat=%d, fi= %d, bmi= %d, qos= %d, ack= %d, estimateInt= %d, refractory= %d, ud= %d, dd= %d\n", - _slaveMonitorStrategy, - _failoverInterval, - _bondMonitorInterval, - _qosSendInterval, - _ackSendInterval, - _qualityEstimationInterval, - _defaultPathRefractoryPeriod, - _upDelay, - _downDelay); - - } void Bond::setUserQualityWeights(float weights[], int len) @@ -1721,22 +1732,20 @@ void Bond::dumpInfo(const int64_t now) fprintf(stderr, "---[ bp=%d, id=%llx, dd=%d, up=%d, pmi=%d, specifiedSlaves=%d, _specifiedPrimarySlave=%d, _specifiedFailInst=%d ]\n", _policy, _peer->identity().address().toInt(), _downDelay, _upDelay, _monitorInterval, _userHasSpecifiedSlaves, _userHasSpecifiedPrimarySlave, _userHasSpecifiedFailoverInstructions); - if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP) { + if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { fprintf(stderr, "Paths (bp=%d, stats=%d, primaryReselect=%d) :\n", _policy, _shouldCollectPathStatistics, _abSlaveSelectMethod); } - if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_RR - || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_XOR - || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE) { + if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR + || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR + || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { fprintf(stderr, "Paths (bp=%d, stats=%d, fh=%d) :\n", _policy, _shouldCollectPathStatistics, _allowFlowHashing); }*/ - - if ((now - _lastLogTS) < 1000) { + if ((now - _lastPrintTS) < 1000) { return; } _lastPrintTS = now; - _lastLogTS = now; fprintf(stderr, "\n\n"); @@ -1792,21 +1801,21 @@ void Bond::dumpInfo(const int64_t now) } else { fprintf(stderr, " "); } - if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP && _abPath && (_abPath == _paths[i].ptr())) { + if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP && _abPath && (_abPath == _paths[i].ptr())) { fprintf(stderr, " ACTIVE "); - } else if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP) { + } else if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { fprintf(stderr, " "); } - if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP && _abFailoverQueue.size() && (_abFailoverQueue.front().ptr() == _paths[i].ptr())) { + if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP && _abFailoverQueue.size() && (_abFailoverQueue.front().ptr() == _paths[i].ptr())) { fprintf(stderr, " NEXT "); - } else if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP) { + } else if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { fprintf(stderr, " "); } fprintf(stderr, "%5s %s\n", slave->ifname().c_str(), pathStr); } } - if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP) { + if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { if (!_abFailoverQueue.empty()) { fprintf(stderr, "\nFailover Queue:\n"); for (std::list >::iterator it(_abFailoverQueue.begin()); it!=_abFailoverQueue.end();++it) { @@ -1827,28 +1836,26 @@ void Bond::dumpInfo(const int64_t now) } } - if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_RR - || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_XOR - || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE) { - /* + if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR + || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR + || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { if (_numBondedPaths) { fprintf(stderr, "\nBonded Paths:\n"); for (int i=0; i<_numBondedPaths; ++i) { - _paths[_bondedIdx[i]].p->address().toString(currPathStr); - SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[_bondedIdx[i]].p->localSocket()); + _paths[_bondedIdx[i]]->address().toString(currPathStr); + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[_bondedIdx[i]]->localSocket()); fprintf(stderr, " [%d]\t%8s\tflows=%3d\tspeed=%7d\trelSpeed=%3d\tipvPref=%3d\tfscore=%9d\t\t%s\n", i, //fprintf(stderr, " [%d]\t%8s\tspeed=%7d\trelSpeed=%3d\tflowCount=%2d\tipvPref=%3d\tfscore=%9d\t\t%s\n", i, slave->ifname().c_str(), - numberOfAssignedFlows(_paths[_bondedIdx[i]].p), + _paths[_bondedIdx[i]]->_assignedFlowCount, slave->speed(), slave->relativeSpeed(), //_paths[_bondedIdx[i]].p->assignedFlows.size(), slave->ipvPref(), - _paths[_bondedIdx[i]].p->failoverScore(), + _paths[_bondedIdx[i]]->_failoverScore, currPathStr); } } - */ /* if (_allowFlowHashing) { //Mutex::Lock _l(_flows_m); diff --git a/node/Bond.hpp b/node/Bond.hpp index 62195b18e..e60e27a19 100644 --- a/node/Bond.hpp +++ b/node/Bond.hpp @@ -79,7 +79,7 @@ public: * @param original * @param peer */ - Bond(const RuntimeEnvironment *renv, const Bond &original, const SharedPtr& peer); + Bond(const RuntimeEnvironment *renv, SharedPtr originalBond, const SharedPtr& peer); /** * @return The human-readable name of the bonding policy @@ -293,8 +293,9 @@ public: * user-specified parameters. * * @param policy Bonding policy + * @param templateBond */ - void setReasonableDefaults(int policy); + void setReasonableDefaults(int policy, SharedPtr templateBond, bool useTemplate); /** * Check and assign user-specified quality weights to this bond. diff --git a/node/BondController.cpp b/node/BondController.cpp index cb4414f9f..06da41759 100644 --- a/node/BondController.cpp +++ b/node/BondController.cpp @@ -92,7 +92,7 @@ SharedPtr BondController::createTransportTriggeredBond(const RuntimeEnviro } if (!_defaultBondingPolicy && _defaultBondingPolicyStr.length()) { fprintf(stderr, " no assignment, using default custom (%s)\n", _defaultBondingPolicyStr.c_str()); - bond = new Bond(renv, *(_bondPolicyTemplates[_defaultBondingPolicyStr].ptr()), peer); + bond = new Bond(renv, _bondPolicyTemplates[_defaultBondingPolicyStr].ptr(), peer); } } else { @@ -102,7 +102,7 @@ SharedPtr BondController::createTransportTriggeredBond(const RuntimeEnviro bond = new Bond(renv, _defaultBondingPolicy, peer); } else { - bond = new Bond(renv, *(_bondPolicyTemplates[_policyTemplateAssignments[identity]].ptr()), peer); + bond = new Bond(renv, _bondPolicyTemplates[_policyTemplateAssignments[identity]].ptr(), peer); } } } From 5f0ee4fc78f438d00f382b4e29ebdde621e7e160 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 16 Jun 2020 12:30:21 -0700 Subject: [PATCH 26/35] Fix invalid defaultBondingPolicy conditions, Add ZT_MultipathFlowRebalanceStrategy, Add basic hysteresis mechanism to flow re-assignment --- include/ZeroTierOne.h | 27 ++++ node/Bond.cpp | 270 ++++++++++++++++++---------------------- node/Bond.hpp | 10 +- node/BondController.cpp | 5 +- node/Constants.hpp | 5 - node/Flow.hpp | 1 + node/IncomingPacket.cpp | 2 + node/Peer.cpp | 3 +- service/OneService.cpp | 1 + 9 files changed, 166 insertions(+), 158 deletions(-) diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h index 890e56048..dfb520469 100644 --- a/include/ZeroTierOne.h +++ b/include/ZeroTierOne.h @@ -521,6 +521,33 @@ enum ZT_MultipathMonitorStrategy ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC = 3 }; +/** + * Strategy for re-balancing protocol flows + */ +enum ZT_MultipathFlowRebalanceStrategy +{ + /** + * Flows will only be re-balanced among slaves during + * assignment or failover. This minimizes the possibility + * of sequence reordering and is thus the default setting. + */ + ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_PASSIVE = 0, + + /** + * Flows that are active may be re-assigned to a new more + * suitable slave if it can be done without disrupting the flow. + * This setting can sometimes cause sequence re-ordering. + */ + ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_OPPORTUNISTIC = 0, + + /** + * Flows will be continuously re-assigned the most suitable slave + * in order to maximize "balance". This can often cause sequence + * reordering and is thus only reccomended for protocols like UDP. + */ + ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_AGGRESSIVE = 2 +}; + /** * Indices for the path quality weight vector */ diff --git a/node/Bond.cpp b/node/Bond.cpp index 9a2f5c267..656285925 100644 --- a/node/Bond.cpp +++ b/node/Bond.cpp @@ -25,6 +25,10 @@ Bond::Bond(const RuntimeEnvironment *renv, int policy, const SharedPtr& pe RR(renv), _peer(peer) { + // TODO: Remove for production + _header=false; + _lastLogTS = RR->node->now(); + _lastPrintTS = RR->node->now(); setReasonableDefaults(policy, SharedPtr(), false); _policyAlias = BondController::getPolicyStrByCode(policy); } @@ -41,6 +45,10 @@ Bond::Bond(const RuntimeEnvironment *renv, SharedPtr originalBond, const S RR(renv), _peer(peer) { + // TODO: Remove for production + _header=false; + _lastLogTS = RR->node->now(); + _lastPrintTS = RR->node->now(); setReasonableDefaults(originalBond->_bondingPolicy, originalBond, true); } @@ -162,7 +170,7 @@ SharedPtr Bond::getAppropriatePath(int64_t now, int32_t flowId) void Bond::recordIncomingInvalidPacket(const SharedPtr& path) { - //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "recordIncomingInvalidPacket() %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + // char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "recordIncomingInvalidPacket() %s %s\n", getSlave(path)->ifname().c_str(), pathStr); Mutex::Lock _l(_paths_m); for (int i=0; i& path) void Bond::recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now) { - //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "recordOutgoingPacket() %s %s, packetId=%llx, payloadLength=%d, verb=%x, flowId=%lx\n", getSlave(path)->ifname().c_str(), pathStr, packetId, payloadLength, verb, flowId); + // char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "recordOutgoingPacket() %s %s, packetId=%llx, payloadLength=%d, verb=%x, flowId=%lx\n", getSlave(path)->ifname().c_str(), pathStr, packetId, payloadLength, verb, flowId); _freeRandomByte += (unsigned char)(packetId >> 8); // Grab entropy to use in path selection logic if (!_shouldCollectPathStatistics) { return; @@ -320,6 +328,7 @@ bool Bond::assignFlowToBondedPath(SharedPtr &flow, int64_t now) idx = abs((int)(flow->id() % (_numBondedPaths))); //fprintf(stderr, "flow->id()=%d, %x, _numBondedPaths=%d, idx=%d\n", flow->id(), flow->id(), _numBondedPaths, idx); flow->assignPath(_paths[_bondedIdx[idx]],now); + ++(_paths[_bondedIdx[idx]]->_assignedFlowCount); } if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { unsigned char entropy; @@ -341,29 +350,32 @@ bool Bond::assignFlowToBondedPath(SharedPtr &flow, int64_t now) totalIncompleteAllocation += _paths[i]->_allocation; } } - fprintf(stderr, "entropy = %d, totalIncompleteAllocation=%d\n", entropy, totalIncompleteAllocation); + //fprintf(stderr, "entropy = %d, totalIncompleteAllocation=%d\n", entropy, totalIncompleteAllocation); entropy %= totalIncompleteAllocation; - fprintf(stderr, "new entropy = %d\n", entropy); + //fprintf(stderr, "new entropy = %d\n", entropy); for(unsigned int i=0;ibonded()) { SharedPtr slave = RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); _paths[i]->address().toString(curPathStr); uint8_t probabilitySegment = (_totalBondUnderload > 0) ? _paths[i]->_affinity : _paths[i]->_allocation; - fprintf(stderr, "i=%2d, entropy=%3d, alloc=%3d, byteload=%4d, segment=%3d, _totalBondUnderload=%3d, ifname=%s, path=%20s\n", i, entropy, _paths[i]->_allocation, _paths[i]->_relativeByteLoad, probabilitySegment, _totalBondUnderload, slave->ifname().c_str(), curPathStr); + //fprintf(stderr, "i=%2d, entropy=%3d, alloc=%3d, byteload=%4d, segment=%3d, _totalBondUnderload=%3d, ifname=%s, path=%20s\n", i, entropy, _paths[i]->_allocation, _paths[i]->_relativeByteLoad, probabilitySegment, _totalBondUnderload, slave->ifname().c_str(), curPathStr); if (entropy <= probabilitySegment) { idx = i; - fprintf(stderr, "\t is best path\n"); + //fprintf(stderr, "\t is best path\n"); break; } entropy -= probabilitySegment; } } if (idx < ZT_MAX_PEER_NETWORK_PATHS) { + if (flow->_assignedPath) { + flow->_previouslyAssignedPath = flow->_assignedPath; + } flow->assignPath(_paths[idx],now); ++(_paths[idx]->_assignedFlowCount); } else { - fprintf(stderr, "could not assign flow?\n"); exit(0); // TODO: Remove + fprintf(stderr, "could not assign flow?\n"); exit(0); // TODO: Remove for production return false; } } @@ -397,6 +409,7 @@ SharedPtr Bond::createFlow(const SharedPtr &path, int32_t flowId, un if (path) { flow->assignPath(path,now); path->address().toString(curPathStr); + path->_assignedFlowCount++; SharedPtr slave = RR->bc->getSlaveBySocket(_policyAlias, flow->assignedPath()->localSocket()); fprintf(stderr, "assigned (rx) flow %x with peer %llx to path %s on %s\n", flow->id(), _peer->_id.address().toInt(), curPathStr, slave->ifname().c_str()); } @@ -818,7 +831,7 @@ void Bond::curateBond(const int64_t now, bool rebuildBond) ++it; ++updatedBondedPathCount; _paths[_bondedIdx[i]]->address().toString(pathStr); - fprintf(stderr, "setting i=%d, _bondedIdx[%d]=%d to bonded (%s %s)\n", i, i, _bondedIdx[i], getSlave(_paths[_bondedIdx[i]])->ifname().c_str(), pathStr); + //fprintf(stderr, "setting i=%d, _bondedIdx[%d]=%d to bonded (%s %s)\n", i, i, _bondedIdx[i], getSlave(_paths[_bondedIdx[i]])->ifname().c_str(), pathStr); } } _numBondedPaths = updatedBondedPathCount; @@ -834,8 +847,6 @@ void Bond::curateBond(const int64_t now, bool rebuildBond) void Bond::estimatePathQuality(const int64_t now) { char pathStr[128]; - //--- - uint32_t totUserSpecifiedSlaveSpeed = 0; if (_numBondedPaths) { // Compute relative user-specified speeds of slaves for(unsigned int i=0;i<_numBondedPaths;++i) { @@ -856,17 +867,11 @@ void Bond::estimatePathQuality(const int64_t now) float pdv[ZT_MAX_PEER_NETWORK_PATHS]; float plr[ZT_MAX_PEER_NETWORK_PATHS]; float per[ZT_MAX_PEER_NETWORK_PATHS]; - float thr[ZT_MAX_PEER_NETWORK_PATHS]; - float thm[ZT_MAX_PEER_NETWORK_PATHS]; - float thv[ZT_MAX_PEER_NETWORK_PATHS]; float maxLAT = 0; float maxPDV = 0; float maxPLR = 0; float maxPER = 0; - float maxTHR = 0; - float maxTHM = 0; - float maxTHV = 0; float quality[ZT_MAX_PEER_NETWORK_PATHS]; uint8_t alloc[ZT_MAX_PEER_NETWORK_PATHS]; @@ -877,9 +882,6 @@ void Bond::estimatePathQuality(const int64_t now) memset(&pdv, 0, sizeof(pdv)); memset(&plr, 0, sizeof(plr)); memset(&per, 0, sizeof(per)); - memset(&thr, 0, sizeof(thr)); - memset(&thm, 0, sizeof(thm)); - memset(&thv, 0, sizeof(thv)); memset(&quality, 0, sizeof(quality)); memset(&alloc, 0, sizeof(alloc)); @@ -901,24 +903,6 @@ void Bond::estimatePathQuality(const int64_t now) _paths[i]->_throughputVariance = 0; } } - /* - else { - // Use estimated metrics - if (_paths[i]->throughputSamples.count()) { - // If we have samples, use them - _paths[i]->throughputMean = (uint64_t)_paths[i]->throughputSamples.mean(); - if (_paths[i]->throughputMean > 0) { - _paths[i]->throughputVarianceSamples.push((float)_paths[i]->throughputSamples.stddev() / (float)_paths[i]->throughputMean); - _paths[i]->throughputVariance = _paths[i]->throughputVarianceSamples.mean(); - } - } - else { - // No samples have been collected yet, assume best case scenario - _paths[i]->throughputMean = ZT_QOS_THR_NORM_MAX; - _paths[i]->throughputVariance = 0; - } - } - */ // Drain unacknowledged QoS records std::map::iterator it = _paths[i]->qosStatsOut.begin(); uint64_t currentLostRecords = 0; @@ -934,23 +918,16 @@ void Bond::estimatePathQuality(const int64_t now) quality[i]=0; totQuality=0; // Normalize raw observations according to sane limits and/or user specified values - lat[i] = 1.0 / expf(4*Utils::normalize(_paths[i]->_latencyMean, 0, _maxAcceptableLatency, 0, 1)); - pdv[i] = 1.0 / expf(4*Utils::normalize(_paths[i]->_latencyVariance, 0, _maxAcceptablePacketDelayVariance, 0, 1)); - plr[i] = 1.0 / expf(4*Utils::normalize(_paths[i]->_packetLossRatio, 0, _maxAcceptablePacketLossRatio, 0, 1)); - per[i] = 1.0 / expf(4*Utils::normalize(_paths[i]->_packetErrorRatio, 0, _maxAcceptablePacketErrorRatio, 0, 1)); - //thr[i] = 1.0; //Utils::normalize(_paths[i]->throughputMean, 0, ZT_QOS_THR_NORM_MAX, 0, 1); - //thm[i] = 1.0; //Utils::normalize(_paths[i]->throughputMax, 0, ZT_QOS_THM_NORM_MAX, 0, 1); - //thv[i] = 1.0; //1.0 / expf(4*Utils::normalize(_paths[i]->throughputVariance, 0, ZT_QOS_THV_NORM_MAX, 0, 1)); + lat[i] = 1.0 / expf(4*Utils::normalize(_paths[i]->_latencyMean, 0, _maxAcceptableLatency, 0, 1)); + pdv[i] = 1.0 / expf(4*Utils::normalize(_paths[i]->_latencyVariance, 0, _maxAcceptablePacketDelayVariance, 0, 1)); + plr[i] = 1.0 / expf(4*Utils::normalize(_paths[i]->_packetLossRatio, 0, _maxAcceptablePacketLossRatio, 0, 1)); + per[i] = 1.0 / expf(4*Utils::normalize(_paths[i]->_packetErrorRatio, 0, _maxAcceptablePacketErrorRatio, 0, 1)); //scp[i] = _paths[i]->ipvPref != 0 ? 1.0 : Utils::normalize(_paths[i]->ipScope(), InetAddress::IP_SCOPE_NONE, InetAddress::IP_SCOPE_PRIVATE, 0, 1); // Record bond-wide maximums to determine relative values maxLAT = lat[i] > maxLAT ? lat[i] : maxLAT; maxPDV = pdv[i] > maxPDV ? pdv[i] : maxPDV; maxPLR = plr[i] > maxPLR ? plr[i] : maxPLR; maxPER = per[i] > maxPER ? per[i] : maxPER; - //maxTHR = thr[i] > maxTHR ? thr[i] : maxTHR; - //maxTHM = thm[i] > maxTHM ? thm[i] : maxTHM; - //maxTHV = thv[i] > maxTHV ? thv[i] : maxTHV; - //fprintf(stdout, "EH %d: lat=%8.3f, ltm=%8.3f, pdv=%8.3f, plr=%5.3f, per=%5.3f, thr=%8f, thm=%5.3f, thv=%5.3f, avl=%5.3f, age=%8.2f, scp=%4d, q=%5.3f, qtot=%5.3f, ac=%d if=%s, path=%s\n", // i, lat[i], ltm[i], pdv[i], plr[i], per[i], thr[i], thm[i], thv[i], avl[i], age[i], scp[i], quality[i], totQuality, alloc[i], getSlave(_paths[i])->ifname().c_str(), pathStr); @@ -962,9 +939,6 @@ void Bond::estimatePathQuality(const int64_t now) quality[i] += ((maxPDV > 0.0f ? pdv[i] / maxPDV : 0.0f) * _qualityWeights[ZT_QOS_PDV_IDX]); quality[i] += ((maxPLR > 0.0f ? plr[i] / maxPLR : 0.0f) * _qualityWeights[ZT_QOS_PLR_IDX]); quality[i] += ((maxPER > 0.0f ? per[i] / maxPER : 0.0f) * _qualityWeights[ZT_QOS_PER_IDX]); - //quality[i] += ((maxTHR > 0.0f ? thr[i] / maxTHR : 0.0f) * _qualityWeights[ZT_QOS_THR_IDX]); - //quality[i] += ((maxTHM > 0.0f ? thm[i] / maxTHM : 0.0f) * _qualityWeights[ZT_QOS_THM_IDX]); - //quality[i] += ((maxTHV > 0.0f ? thv[i] / maxTHV : 0.0f) * _qualityWeights[ZT_QOS_THV_IDX]); //quality[i] += (scp[i] * _qualityWeights[ZT_QOS_SCP_IDX]); totQuality += quality[i]; } @@ -1007,6 +981,7 @@ void Bond::estimatePathQuality(const int64_t now) } _header=true; } + /* fprintf(stdout, "%ld, %d, %d, %d, ",((now - RR->bc->getBondStartTime())),_numBondedPaths,_totalBondUnderload, _flows.size()); for(unsigned int i=0;iifname().c_str(), pathStr, _paths[i]->_latencyMean, lat[i],pdv[i], _paths[i]->_packetLossRatio, plr[i],per[i],thr[i],thm[i],thv[i],(now - _paths[i]->lastIn()),quality[i],alloc[i], _paths[i]->_relativeByteLoad, _paths[i]->_assignedFlowCount, _paths[i]->alive(now, true), _paths[i]->eligible(now,_ackSendInterval), _paths[i]->qosStatsOut.size()); } - } - fprintf(stdout, "\n"); + }*/ + //fprintf(stdout, "\n"); } } void Bond::processBalanceTasks(const int64_t now) { - //fprintf(stderr, "processBalanceTasks\n"); char curPathStr[128]; + + // TODO: Generalize + int totalAllocation = 0; + for (int i=0;ibonded() && _paths[i]->eligible(now,_ackSendInterval)) { + totalAllocation+=_paths[i]->_allocation; + } + } + unsigned char minimumAllocationValue = 0.33 * ((float)totalAllocation / (float)_numBondedPaths); + if (_allowFlowHashing) { /** * Clean up and reset flows if necessary @@ -1067,6 +1054,32 @@ void Bond::processBalanceTasks(const int64_t now) } } } + /** + * Re-allocate flows from under-performing + * NOTE: This could be part of the above block but was kept separate for clarity. + */ + if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { + Mutex::Lock _l(_flows_m); + for (int i=0;ibonded() && _paths[i]->eligible(now,_ackSendInterval) && (_paths[i]->_allocation < minimumAllocationValue) && _paths[i]->_assignedFlowCount) { + _paths[i]->address().toString(curPathStr); + fprintf(stderr, "%d reallocating flows from under-performing path %s on %s\n", (RR->node->now() - RR->bc->getBondStartTime()), curPathStr, getSlave(_paths[i])->ifname().c_str()); + std::map >::iterator flow_it = _flows.begin(); + while (flow_it != _flows.end()) { + if (flow_it->second->assignedPath() == _paths[i]) { + if(assignFlowToBondedPath(flow_it->second, now)) { + _paths[i]->_assignedFlowCount--; + } + } + ++flow_it; + } + _paths[i]->_shouldReallocateFlows = false; + } + } + } } /** * Tasks specific to (Balance Round Robin) @@ -1091,70 +1104,47 @@ void Bond::processBalanceTasks(const int64_t now) if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { if (_allowFlowHashing) { Mutex::Lock _l(_flows_m); - /** - * Re-balance flows in proportion to slave capacity (or when eligibility changes) - */ - if ((now - _lastFlowRebalance) > ZT_FLOW_REBALANCE_INTERVAL) { + if (_flowRebalanceStrategy == ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_PASSIVE) { + // Do nothing here, this is taken care of in the more general case above. + } + if (_flowRebalanceStrategy == ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_OPPORTUNISTIC) { + // If the flow is temporarily inactive we should take this opportunity to re-assign the flow if needed. + } + if (_flowRebalanceStrategy == ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_AGGRESSIVE) { /** - * Determine "load" for bonded paths + * Return flows to the original path if it has once again become available */ - uint64_t totalBytes = 0; - for(unsigned int i=0;ibonded()) { - _paths[i]->_byteLoad = 0; - std::map >::iterator flow_it = _flows.begin(); - while (flow_it != _flows.end()) { - if (flow_it->second->assignedPath() == _paths[i]) { - _paths[i]->_byteLoad += flow_it->second->totalBytes(); - } - ++flow_it; + if ((now - _lastFlowRebalance) > ZT_FLOW_REBALANCE_INTERVAL) { + std::map >::iterator flow_it = _flows.begin(); + while (flow_it != _flows.end()) { + if (flow_it->second->_previouslyAssignedPath && flow_it->second->_previouslyAssignedPath->eligible(now, _ackSendInterval) + && (flow_it->second->_previouslyAssignedPath->_allocation >= (minimumAllocationValue * 2))) { + fprintf(stderr, "moving flow back onto its previous path assignment (based on eligibility)\n"); + (flow_it->second->_assignedPath->_assignedFlowCount)--; + flow_it->second->assignPath(flow_it->second->_previouslyAssignedPath,now); + (flow_it->second->_previouslyAssignedPath->_assignedFlowCount)++; } - totalBytes += _paths[i]->_byteLoad; + ++flow_it; } + _lastFlowRebalance = now; } /** - * Determine "affinity" for bonded path + * Return flows to the original path if it has once again become (performant) */ - //fprintf(stderr, "\n\n"); - - _totalBondUnderload = 0; -/* - for(unsigned int i=0;ibonded()) { - if (totalBytes) { - uint8_t relativeByteLoad = std::ceil(((float)_paths[i]->_byteLoad / (float)totalBytes) * (float)255); - //fprintf(stderr, "lastComputedAllocation = %d\n", _paths[i]->allocation); - //fprintf(stderr, " relativeByteLoad = %d\n", relativeByteLoad); - _paths[i]->_relativeByteLoad = relativeByteLoad; - uint8_t relativeUnderload = std::max(0, (int)_paths[i]->_allocation - (int)relativeByteLoad); - //fprintf(stderr, " relativeUnderload = %d\n", relativeUnderload); - _totalBondUnderload += relativeUnderload; - //fprintf(stderr, " _totalBondUnderload = %d\n\n", _totalBondUnderload); - //_paths[i]->affinity = (relativeUnderload > 0 ? relativeUnderload : _paths[i]->_allocation); - } - else { // set everything to base values - _totalBondUnderload = 0; - //_paths[i]->affinity = 0; + if ((now - _lastFlowRebalance) > ZT_FLOW_REBALANCE_INTERVAL) { + std::map >::iterator flow_it = _flows.begin(); + while (flow_it != _flows.end()) { + if (flow_it->second->_previouslyAssignedPath && flow_it->second->_previouslyAssignedPath->eligible(now, _ackSendInterval) + && (flow_it->second->_previouslyAssignedPath->_allocation >= (minimumAllocationValue * 2))) { + fprintf(stderr, "moving flow back onto its previous path assignment (based on performance)\n"); + (flow_it->second->_assignedPath->_assignedFlowCount)--; + flow_it->second->assignPath(flow_it->second->_previouslyAssignedPath,now); + (flow_it->second->_previouslyAssignedPath->_assignedFlowCount)++; } + ++flow_it; } + _lastFlowRebalance = now; } -*/ - //fprintf(stderr, "_totalBondUnderload=%d (end)\n\n", _totalBondUnderload); - - /** - * - */ - //fprintf(stderr, "_lastFlowRebalance\n"); - std::map >::iterator it = _flows.begin(); - while (it != _flows.end()) { - int32_t flowId = it->first; - SharedPtr flow = it->second; - if ((now - flow->_lastPathReassignment) > ZT_FLOW_MIN_REBALANCE_INTERVAL) { - //fprintf(stdout, " could move : %x\n", flowId); - } - ++it; - } - _lastFlowRebalance = now; } } else if (!_allowFlowHashing) { @@ -1440,7 +1430,7 @@ void Bond::processActiveBackupTasks(const int64_t now) if (!_abFailoverQueue.empty()) { fprintf(stderr, "%llu AB: (failure) there are (%lu) slaves in queue to choose from...\n", ((now - RR->bc->getBondStartTime())), _abFailoverQueue.size()); dequeueNextActiveBackupPath(now); - _abPath->address().toString(curPathStr); fprintf(stderr, "%llu sAB: (failure) switched to %s on %s\n", ((now - RR->bc->getBondStartTime())), curPathStr, getSlave(_abPath)->ifname().c_str()); + _abPath->address().toString(curPathStr); fprintf(stderr, "%llu AB: (failure) switched to %s on %s\n", ((now - RR->bc->getBondStartTime())), curPathStr, getSlave(_abPath)->ifname().c_str()); } else { fprintf(stderr, "%llu AB: (failure) nothing available in the slave queue, doing nothing.\n", ((now - RR->bc->getBondStartTime()))); } @@ -1515,12 +1505,16 @@ void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool _bondingPolicy= policy; } + _freeRandomByte = 0; + _lastCheckUserPreferences = 0; + _lastBackgroundTaskCheck = 0; + _downDelay = 0; _upDelay = 0; _allowFlowHashing=false; _bondMonitorInterval=0; _shouldCollectPathStatistics=false; - _lastBackgroundTaskCheck=0; + // Path negotiation _allowPathNegotiation=false; @@ -1539,7 +1533,7 @@ void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool _lastFlowRebalance=0; _totalBondUnderload = 0; - //_maxAcceptableLatency + _maxAcceptableLatency = 100; _maxAcceptablePacketDelayVariance = 50; _maxAcceptablePacketLossRatio = 0.10; _maxAcceptablePacketErrorRatio = 0.10; @@ -1547,17 +1541,18 @@ void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool _lastFrame=0; - // TODO: Remove - _header=false; - _lastLogTS = RR->node->now(); - _lastPrintTS = RR->node->now(); + + + /* ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_PASSIVE is the most conservative strategy and is + least likely to cause unexpected behavior */ + _flowRebalanceStrategy = ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_AGGRESSIVE; /** * Paths are actively monitored to provide a real-time quality/preference-ordered rapid failover queue. */ switch (policy) { case ZT_BONDING_POLICY_ACTIVE_BACKUP: - _failoverInterval = 5000; + _failoverInterval = 500; _abSlaveSelectMethod = ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE; _slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; _qualityWeights[ZT_QOS_LAT_IDX] = 0.2f; @@ -1581,7 +1576,7 @@ void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool * Paths are monitored to determine when/if one needs to be added or removed from the rotation */ case ZT_BONDING_POLICY_BALANCE_RR: - _failoverInterval = 5000; + _failoverInterval = 500; _allowFlowHashing = false; _packetsPerSlave = 1024; _slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; @@ -1600,8 +1595,8 @@ void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool * path and where to place the next flow. */ case ZT_BONDING_POLICY_BALANCE_XOR: - _failoverInterval = 5000;; - _upDelay=_bondMonitorInterval*2; + _failoverInterval = 500; + _upDelay = _bondMonitorInterval * 2; _allowFlowHashing = true; _slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; _qualityWeights[ZT_QOS_LAT_IDX] = 0.4f; @@ -1623,13 +1618,13 @@ void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool _failoverInterval = 3000; _allowFlowHashing = true; _slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; - _qualityWeights[ZT_QOS_LAT_IDX] = 0.3f; + _qualityWeights[ZT_QOS_LAT_IDX] = 0.4f; _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; - _qualityWeights[ZT_QOS_PDV_IDX] = 0.1f; - _qualityWeights[ZT_QOS_PLR_IDX] = 0.1f; - _qualityWeights[ZT_QOS_PER_IDX] = 0.1f; + _qualityWeights[ZT_QOS_PDV_IDX] = 0.4f; + _qualityWeights[ZT_QOS_PLR_IDX] = 0.2f; + _qualityWeights[ZT_QOS_PER_IDX] = 0.0f; _qualityWeights[ZT_QOS_THR_IDX] = 0.0f; - _qualityWeights[ZT_QOS_THM_IDX] = 0.4f; + _qualityWeights[ZT_QOS_THM_IDX] = 0.0f; _qualityWeights[ZT_QOS_THV_IDX] = 0.0f; _qualityWeights[ZT_QOS_SCP_IDX] = 0.0f; break; @@ -1637,6 +1632,8 @@ void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool break; } + /* If a user has specified custom parameters for this bonding policy, overlay + them onto the defaults that were previously set */ if (useTemplate) { _policyAlias = templateBond->_policyAlias; _failoverInterval = templateBond->_failoverInterval; @@ -1742,7 +1739,7 @@ void Bond::dumpInfo(const int64_t now) fprintf(stderr, "Paths (bp=%d, stats=%d, fh=%d) :\n", _policy, _shouldCollectPathStatistics, _allowFlowHashing); }*/ - if ((now - _lastPrintTS) < 1000) { + if ((now - _lastPrintTS) < 2000) { return; } _lastPrintTS = now; @@ -1856,30 +1853,7 @@ void Bond::dumpInfo(const int64_t now) currPathStr); } } - /* - if (_allowFlowHashing) { - //Mutex::Lock _l(_flows_m); - if (_flows.size()) { - fprintf(stderr, "\nFlows:\n"); - std::map >::iterator it = _flows.begin(); - while (it != _flows.end()) { - it->second->assignedPath()->address().toString(currPathStr); - SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, it->second->assignedPath()->localSocket()); - fprintf(stderr, " [%4x] in=%16llu, out=%16llu, bytes=%16llu, last=%16llu, if=%8s\t\t%s\n", - it->second->id(), - it->second->bytesInPerUnitTime(), - it->second->bytesOutPerUnitTime(), - it->second->totalBytes(), - it->second->age(now), - slave->ifname().c_str(), - currPathStr); - ++it; - } - } - } - */ } - //fprintf(stderr, "\n\n\n\n\n"); } } // namespace ZeroTier \ No newline at end of file diff --git a/node/Bond.hpp b/node/Bond.hpp index e60e27a19..353ed9317 100644 --- a/node/Bond.hpp +++ b/node/Bond.hpp @@ -87,7 +87,7 @@ public: std::string policyAlias() { return _policyAlias; } /** - * Inform the bond about the path that its peer just learned about + * Inform the bond about the path that its peer (owning object) just learned about * * @param path Newly-learned Path which should now be handled by the Bond * @param now Current time @@ -434,7 +434,12 @@ public: inline void setFailoverInterval(uint32_t interval) { _failoverInterval = interval; } /** - * @param strategy The strategy that the bond uses to prob for path aliveness and quality + * @param strategy Strategy that the bond uses to re-assign protocol flows. + */ + inline void setFlowRebalanceStrategy(uint32_t strategy) { _flowRebalanceStrategy = strategy; } + + /** + * @param strategy Strategy that the bond uses to prob for path aliveness and quality */ inline void setSlaveMonitorStrategy(uint8_t strategy) { _slaveMonitorStrategy = strategy; } @@ -578,6 +583,7 @@ private: // balance-aware uint64_t _totalBondUnderload; + uint8_t _flowRebalanceStrategy; // dynamic slave monitoring uint8_t _slaveMonitorStrategy; diff --git a/node/BondController.cpp b/node/BondController.cpp index 06da41759..6b21d9998 100644 --- a/node/BondController.cpp +++ b/node/BondController.cpp @@ -11,6 +11,7 @@ */ /****/ +#include "Constants.hpp" #include "BondController.hpp" #include "Peer.hpp" @@ -23,6 +24,7 @@ BondController::BondController(const RuntimeEnvironment *renv) : RR(renv) { bondStartTime = RR->node->now(); + _defaultBondingPolicy = ZT_BONDING_POLICY_NONE; } bool BondController::slaveAllowed(std::string &policyAlias, SharedPtr slave) @@ -83,10 +85,9 @@ SharedPtr BondController::createTransportTriggeredBond(const RuntimeEnviro Bond *bond = nullptr; if (!_bonds.count(identity)) { std::string policyAlias; - int _defaultBondingPolicy = defaultBondingPolicy(); fprintf(stderr, "new bond, registering for %llx\n", identity); if (!_policyTemplateAssignments.count(identity)) { - if (defaultBondingPolicy()) { + if (_defaultBondingPolicy) { fprintf(stderr, " no assignment, using default (%d)\n", _defaultBondingPolicy); bond = new Bond(renv, _defaultBondingPolicy, peer); } diff --git a/node/Constants.hpp b/node/Constants.hpp index c27e02319..9f2cd80a5 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -341,11 +341,6 @@ */ #define ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE 32 -/** - * Number of samples to consider when processing long-term trends - */ -#define ZT_QOS_LONGTERM_SAMPLE_WIN_SIZE (ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE * 4) - /** * Max allowable time spent in any queue (in ms) */ diff --git a/node/Flow.hpp b/node/Flow.hpp index 5994a4fb2..b19fd475c 100644 --- a/node/Flow.hpp +++ b/node/Flow.hpp @@ -116,6 +116,7 @@ struct Flow int64_t _lastActivity; int64_t _lastPathReassignment; SharedPtr _assignedPath; + SharedPtr _previouslyAssignedPath; }; } // namespace ZeroTier diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index 702c08090..43e36f3ce 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -221,9 +221,11 @@ bool IncomingPacket::_doACK(const RuntimeEnvironment *RR,void *tPtr,const Shared bool IncomingPacket::_doQOS_MEASUREMENT(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) { SharedPtr bond = peer->bond(); + /* TODO: Fix rate gate issue if (!bond || !bond->rateGateQoS(RR->node->now())) { return true; } + */ /* Dissect incoming QoS packet. From this we can compute latency values and their variance. * The latency variance is used as a measure of "jitter". */ if (payloadLength() > ZT_QOS_MAX_PACKET_SIZE || payloadLength() < ZT_QOS_MIN_PACKET_SIZE) { diff --git a/node/Peer.cpp b/node/Peer.cpp index 1ee0c1240..30911b43c 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -55,7 +55,8 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _remoteMultipathSupported(false), _canUseMultipath(false), _shouldCollectPathStatistics(0), - _lastComputedAggregateMeanLatency(0) + _lastComputedAggregateMeanLatency(0), + _bondingPolicy(0) { if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) { throw ZT_EXCEPTION_INVALID_ARGUMENT; diff --git a/service/OneService.cpp b/service/OneService.cpp index ab8594eec..ec24f7ade 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -1621,6 +1621,7 @@ public: // Bond-specific properties newTemplateBond->setUpDelay(OSUtils::jsonInt(customPolicy["upDelay"],-1)); newTemplateBond->setDownDelay(OSUtils::jsonInt(customPolicy["downDelay"],-1)); + newTemplateBond->setFlowRebalanceStrategy(OSUtils::jsonInt(customPolicy["flowRebalanceStrategy"],(uint64_t)0)); newTemplateBond->setFailoverInterval(OSUtils::jsonInt(customPolicy["failoverInterval"],(uint64_t)0)); newTemplateBond->setPacketsPerSlave(OSUtils::jsonInt(customPolicy["packetsPerSlave"],-1)); std::string slaveMonitorStrategyStr(OSUtils::jsonString(customPolicy["slaveMonitorStrategy"],"")); From a33a494d6070c12669bb9696616d725155c01dc7 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Wed, 17 Jun 2020 14:54:13 -0700 Subject: [PATCH 27/35] Adjust terminology --- include/ZeroTierOne.h | 28 +-- node/Bond.cpp | 364 +++++++++++++++++----------------- node/Bond.hpp | 60 +++--- node/BondController.cpp | 80 ++++---- node/BondController.hpp | 34 ++-- node/Constants.hpp | 6 +- node/Node.cpp | 2 - node/Path.hpp | 20 +- node/Peer.cpp | 4 +- osdep/Binder.hpp | 14 +- osdep/{Slave.hpp => Link.hpp} | 84 ++++---- service/MULTIPATH.md | 103 +++++----- service/OneService.cpp | 83 ++++---- 13 files changed, 440 insertions(+), 442 deletions(-) rename osdep/{Slave.hpp => Link.hpp} (58%) diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h index dfb520469..afa75c290 100644 --- a/include/ZeroTierOne.h +++ b/include/ZeroTierOne.h @@ -455,39 +455,39 @@ enum ZT_MultipathBondingPolicy }; /** - * Multipath active re-selection policy (slaveSelectMethod) + * Multipath active re-selection policy (linkSelectMethod) */ -enum ZT_MultipathSlaveSelectMethod +enum ZT_MultipathLinkSelectMethod { /** - * Primary slave regains status as active slave whenever it comes back up - * (default when slaves are explicitly specified) + * Primary link regains status as active link whenever it comes back up + * (default when links are explicitly specified) */ ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS = 0, /** - * Primary slave regains status as active slave when it comes back up and - * (if) it is better than the currently-active slave. + * Primary link regains status as active link when it comes back up and + * (if) it is better than the currently-active link. */ ZT_MULTIPATH_RESELECTION_POLICY_BETTER = 1, /** - * Primary slave regains status as active slave only if the currently-active - * slave fails. + * Primary link regains status as active link only if the currently-active + * link fails. */ ZT_MULTIPATH_RESELECTION_POLICY_FAILURE = 2, /** - * The primary slave can change if a superior path is detected. + * The primary link can change if a superior path is detected. * (default if user provides no fail-over guidance) */ ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE = 3 }; /** - * Mode of multipath slave interface + * Mode of multipath link interface */ -enum ZT_MultipathSlaveMode +enum ZT_MultipathLinkMode { ZT_MULTIPATH_SLAVE_MODE_PRIMARY = 0, ZT_MULTIPATH_SLAVE_MODE_SPARE = 1 @@ -527,7 +527,7 @@ enum ZT_MultipathMonitorStrategy enum ZT_MultipathFlowRebalanceStrategy { /** - * Flows will only be re-balanced among slaves during + * Flows will only be re-balanced among links during * assignment or failover. This minimizes the possibility * of sequence reordering and is thus the default setting. */ @@ -535,13 +535,13 @@ enum ZT_MultipathFlowRebalanceStrategy /** * Flows that are active may be re-assigned to a new more - * suitable slave if it can be done without disrupting the flow. + * suitable link if it can be done without disrupting the flow. * This setting can sometimes cause sequence re-ordering. */ ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_OPPORTUNISTIC = 0, /** - * Flows will be continuously re-assigned the most suitable slave + * Flows will be continuously re-assigned the most suitable link * in order to maximize "balance". This can often cause sequence * reordering and is thus only reccomended for protocols like UDP. */ diff --git a/node/Bond.cpp b/node/Bond.cpp index 656285925..0338f5195 100644 --- a/node/Bond.cpp +++ b/node/Bond.cpp @@ -54,9 +54,9 @@ Bond::Bond(const RuntimeEnvironment *renv, SharedPtr originalBond, const S void Bond::nominatePath(const SharedPtr& path, int64_t now) { - char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "nominatePath: %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "nominatePath: %s %s\n", getLink(path)->ifname().c_str(), pathStr); Mutex::Lock _l(_paths_m); - if (!RR->bc->slaveAllowed(_policyAlias, getSlave(path))) { + if (!RR->bc->linkAllowed(_policyAlias, getLink(path))) { return; } bool alreadyPresent = false; @@ -72,7 +72,7 @@ void Bond::nominatePath(const SharedPtr& path, int64_t now) if (!_paths[i]) { fprintf(stderr, "notifyOfNewPath(): Setting path %s to idx=%d\n", pathStr, i); _paths[i] = path; - //_paths[i]->slave = RR->bc->getSlaveBySocket(_policyAlias, path->localSocket()); + //_paths[i]->link = RR->bc->getLinkBySocket(_policyAlias, path->localSocket()); _paths[i]->startTrial(now); break; } @@ -107,18 +107,18 @@ SharedPtr Bond::getAppropriatePath(int64_t now, int32_t flowId) */ if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) { if (!_allowFlowHashing) { - //fprintf(stderr, "_rrPacketsSentOnCurrSlave=%d, _numBondedPaths=%d, _rrIdx=%d\n", _rrPacketsSentOnCurrSlave, _numBondedPaths, _rrIdx); - if (_packetsPerSlave == 0) { + //fprintf(stderr, "_rrPacketsSentOnCurrLink=%d, _numBondedPaths=%d, _rrIdx=%d\n", _rrPacketsSentOnCurrLink, _numBondedPaths, _rrIdx); + if (_packetsPerLink == 0) { // Randomly select a path return _paths[_bondedIdx[_freeRandomByte % _numBondedPaths]]; // TODO: Optimize } - if (_rrPacketsSentOnCurrSlave < _packetsPerSlave) { - // Continue to use this slave - ++_rrPacketsSentOnCurrSlave; + if (_rrPacketsSentOnCurrLink < _packetsPerLink) { + // Continue to use this link + ++_rrPacketsSentOnCurrLink; return _paths[_bondedIdx[_rrIdx]]; } // Reset striping counter - _rrPacketsSentOnCurrSlave = 0; + _rrPacketsSentOnCurrLink = 0; if (_numBondedPaths == 1) { _rrIdx = 0; } @@ -170,7 +170,7 @@ SharedPtr Bond::getAppropriatePath(int64_t now, int32_t flowId) void Bond::recordIncomingInvalidPacket(const SharedPtr& path) { - // char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "recordIncomingInvalidPacket() %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + // char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "recordIncomingInvalidPacket() %s %s\n", getLink(path)->ifname().c_str(), pathStr); Mutex::Lock _l(_paths_m); for (int i=0; i& path) void Bond::recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now) { - // char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "recordOutgoingPacket() %s %s, packetId=%llx, payloadLength=%d, verb=%x, flowId=%lx\n", getSlave(path)->ifname().c_str(), pathStr, packetId, payloadLength, verb, flowId); + // char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "recordOutgoingPacket() %s %s, packetId=%llx, payloadLength=%d, verb=%x, flowId=%lx\n", getLink(path)->ifname().c_str(), pathStr, packetId, payloadLength, verb, flowId); _freeRandomByte += (unsigned char)(packetId >> 8); // Grab entropy to use in path selection logic if (!_shouldCollectPathStatistics) { return; @@ -218,7 +218,7 @@ void Bond::recordOutgoingPacket(const SharedPtr &path, const uint64_t pack void Bond::recordIncomingPacket(const SharedPtr& path, uint64_t packetId, uint16_t payloadLength, Packet::Verb verb, int32_t flowId, int64_t now) { - //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "recordIncomingPacket() %s %s, packetId=%llx, payloadLength=%d, verb=%x, flowId=%lx\n", getSlave(path)->ifname().c_str(), pathStr, packetId, payloadLength, verb, flowId); + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "recordIncomingPacket() %s %s, packetId=%llx, payloadLength=%d, verb=%x, flowId=%lx\n", getLink(path)->ifname().c_str(), pathStr, packetId, payloadLength, verb, flowId); bool isFrame = (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME); bool shouldRecord = (packetId & (ZT_QOS_ACK_DIVISOR - 1) && (verb != Packet::VERB_ACK) @@ -261,7 +261,7 @@ void Bond::recordIncomingPacket(const SharedPtr& path, uint64_t packetId, void Bond::receivedQoS(const SharedPtr& path, int64_t now, int count, uint64_t *rx_id, uint16_t *rx_ts) { - //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "receivedQoS() %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "receivedQoS() %s %s\n", getLink(path)->ifname().c_str(), pathStr); Mutex::Lock _l(_paths_m); // Look up egress times and compute latency values for each record std::map::iterator it; @@ -273,13 +273,13 @@ void Bond::receivedQoS(const SharedPtr& path, int64_t now, int count, uint } } path->qosRecordSize.push(count); - //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "receivedQoS() on path %s %s, count=%d, successful=%d, qosStatsOut.size()=%d\n", getSlave(path)->ifname().c_str(), pathStr, count, path->aknowledgedQoSRecordCountSinceLastCheck, path->qosStatsOut.size()); + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "receivedQoS() on path %s %s, count=%d, successful=%d, qosStatsOut.size()=%d\n", getLink(path)->ifname().c_str(), pathStr, count, path->aknowledgedQoSRecordCountSinceLastCheck, path->qosStatsOut.size()); } void Bond::receivedAck(const SharedPtr& path, int64_t now, int32_t ackedBytes) { Mutex::Lock _l(_paths_m); - //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "receivedAck() %s %s, (ackedBytes=%d, lastAckReceived=%lld, ackAge=%lld)\n", getSlave(path)->ifname().c_str(), pathStr, ackedBytes, path->lastAckReceived, path->ackAge(now)); + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "receivedAck() %s %s, (ackedBytes=%d, lastAckReceived=%lld, ackAge=%lld)\n", getLink(path)->ifname().c_str(), pathStr, ackedBytes, path->lastAckReceived, path->ackAge(now)); path->_lastAckReceived = now; path->_unackedBytes = (ackedBytes > path->_unackedBytes) ? 0 : path->_unackedBytes - ackedBytes; int64_t timeSinceThroughputEstimate = (now - path->_lastThroughputEstimation); @@ -300,7 +300,7 @@ void Bond::receivedAck(const SharedPtr& path, int64_t now, int32_t ackedBy int32_t Bond::generateQoSPacket(const SharedPtr& path, int64_t now, char *qosBuffer) { - //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "generateQoSPacket() %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "generateQoSPacket() %s %s\n", getLink(path)->ifname().c_str(), pathStr); int32_t len = 0; std::map::iterator it = path->qosStatsIn.begin(); int i=0; @@ -355,10 +355,10 @@ bool Bond::assignFlowToBondedPath(SharedPtr &flow, int64_t now) //fprintf(stderr, "new entropy = %d\n", entropy); for(unsigned int i=0;ibonded()) { - SharedPtr slave = RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); _paths[i]->address().toString(curPathStr); uint8_t probabilitySegment = (_totalBondUnderload > 0) ? _paths[i]->_affinity : _paths[i]->_allocation; - //fprintf(stderr, "i=%2d, entropy=%3d, alloc=%3d, byteload=%4d, segment=%3d, _totalBondUnderload=%3d, ifname=%s, path=%20s\n", i, entropy, _paths[i]->_allocation, _paths[i]->_relativeByteLoad, probabilitySegment, _totalBondUnderload, slave->ifname().c_str(), curPathStr); + //fprintf(stderr, "i=%2d, entropy=%3d, alloc=%3d, byteload=%4d, segment=%3d, _totalBondUnderload=%3d, ifname=%s, path=%20s\n", i, entropy, _paths[i]->_allocation, _paths[i]->_relativeByteLoad, probabilitySegment, _totalBondUnderload, link->ifname().c_str(), curPathStr); if (entropy <= probabilitySegment) { idx = i; //fprintf(stderr, "\t is best path\n"); @@ -380,8 +380,8 @@ bool Bond::assignFlowToBondedPath(SharedPtr &flow, int64_t now) } } flow->assignedPath()->address().toString(curPathStr); - SharedPtr slave = RR->bc->getSlaveBySocket(_policyAlias, flow->assignedPath()->localSocket()); - fprintf(stderr, "assigned (tx) flow %x with peer %llx to path %s on %s (idx=%d)\n", flow->id(), _peer->_id.address().toInt(), curPathStr, slave->ifname().c_str(), idx); + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, flow->assignedPath()->localSocket()); + fprintf(stderr, "assigned (tx) flow %x with peer %llx to path %s on %s (idx=%d)\n", flow->id(), _peer->_id.address().toInt(), curPathStr, link->ifname().c_str(), idx); return true; } @@ -410,8 +410,8 @@ SharedPtr Bond::createFlow(const SharedPtr &path, int32_t flowId, un flow->assignPath(path,now); path->address().toString(curPathStr); path->_assignedFlowCount++; - SharedPtr slave = RR->bc->getSlaveBySocket(_policyAlias, flow->assignedPath()->localSocket()); - fprintf(stderr, "assigned (rx) flow %x with peer %llx to path %s on %s\n", flow->id(), _peer->_id.address().toInt(), curPathStr, slave->ifname().c_str()); + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, flow->assignedPath()->localSocket()); + fprintf(stderr, "assigned (rx) flow %x with peer %llx to path %s on %s\n", flow->id(), _peer->_id.address().toInt(), curPathStr, link->ifname().c_str()); } /** * Add a flow when no path was provided. This means that it is an outgoing packet @@ -460,7 +460,7 @@ void Bond::forgetFlowsWhenNecessary(uint64_t age, bool oldest, int64_t now) void Bond::processIncomingPathNegotiationRequest(uint64_t now, SharedPtr &path, int16_t remoteUtility) { //fprintf(stderr, "processIncomingPathNegotiationRequest\n"); - if (_abSlaveSelectMethod != ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { + if (_abLinkSelectMethod != ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { return; } Mutex::Lock _l(_paths_m); @@ -469,18 +469,18 @@ void Bond::processIncomingPathNegotiationRequest(uint64_t now, SharedPtr & if (!_lastPathNegotiationCheck) { return; } - SharedPtr slave = RR->bc->getSlaveBySocket(_policyAlias, path->localSocket()); + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, path->localSocket()); if (remoteUtility > _localUtility) { - fprintf(stderr, "peer suggests path, its utility (%d) is greater than ours (%d), we will switch to %s on %s (ls=%llx)\n", remoteUtility, _localUtility, pathStr, slave->ifname().c_str(), path->localSocket()); + fprintf(stderr, "peer suggests path, its utility (%d) is greater than ours (%d), we will switch to %s on %s (ls=%llx)\n", remoteUtility, _localUtility, pathStr, link->ifname().c_str(), path->localSocket()); negotiatedPath = path; } if (remoteUtility < _localUtility) { - fprintf(stderr, "peer suggests path, its utility (%d) is less than ours (%d), we will NOT switch to %s on %s (ls=%llx)\n", remoteUtility, _localUtility, pathStr, slave->ifname().c_str(), path->localSocket()); + fprintf(stderr, "peer suggests path, its utility (%d) is less than ours (%d), we will NOT switch to %s on %s (ls=%llx)\n", remoteUtility, _localUtility, pathStr, link->ifname().c_str(), path->localSocket()); } if (remoteUtility == _localUtility) { fprintf(stderr, "peer suggest path, but utility is equal, picking choice made by peer with greater identity.\n"); if (_peer->_id.address().toInt() > RR->node->identity().address().toInt()) { - fprintf(stderr, "peer identity was greater, going with their choice of %s on %s (ls=%llx)\n", pathStr, slave->ifname().c_str(), path->localSocket()); + fprintf(stderr, "peer identity was greater, going with their choice of %s on %s (ls=%llx)\n", pathStr, link->ifname().c_str(), path->localSocket()); negotiatedPath = path; } else { fprintf(stderr, "our identity was greater, no change\n"); @@ -532,8 +532,8 @@ void Bond::pathNegotiationCheck(void *tPtr, const int64_t now) ++_numSentPathNegotiationRequests; _lastSentPathNegotiationRequest = now; _paths[maxOutPathIdx]->address().toString(pathStr); - SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[maxOutPathIdx]->localSocket()); - fprintf(stderr, "sending request to use %s on %s, ls=%llx, utility=%d\n", pathStr, slave->ifname().c_str(), _paths[maxOutPathIdx]->localSocket(), _localUtility); + SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[maxOutPathIdx]->localSocket()); + fprintf(stderr, "sending request to use %s on %s, ls=%llx, utility=%d\n", pathStr, link->ifname().c_str(), _paths[maxOutPathIdx]->localSocket(), _localUtility); } } /** @@ -551,8 +551,8 @@ void Bond::pathNegotiationCheck(void *tPtr, const int64_t now) void Bond::sendPATH_NEGOTIATION_REQUEST(void *tPtr, const SharedPtr &path) { - //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "sendPATH_NEGOTIATION_REQUEST() %s %s\n", getSlave(path)->ifname().c_str(), pathStr); - if (_abSlaveSelectMethod != ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "sendPATH_NEGOTIATION_REQUEST() %s %s\n", getLink(path)->ifname().c_str(), pathStr); + if (_abLinkSelectMethod != ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { return; } Packet outp(_peer->_id.address(),RR->identity.address(),Packet::VERB_PATH_NEGOTIATION_REQUEST); @@ -566,7 +566,7 @@ void Bond::sendPATH_NEGOTIATION_REQUEST(void *tPtr, const SharedPtr &path) void Bond::sendACK(void *tPtr,const SharedPtr &path,const int64_t localSocket, const InetAddress &atAddress,int64_t now) { - //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "sendACK() %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "sendACK() %s %s\n", getLink(path)->ifname().c_str(), pathStr); Packet outp(_peer->_id.address(),RR->identity.address(),Packet::VERB_ACK); int32_t bytesToAck = 0; std::map::iterator it = path->ackStatsIn.begin(); @@ -589,7 +589,7 @@ void Bond::sendACK(void *tPtr,const SharedPtr &path,const int64_t localSoc void Bond::sendQOS_MEASUREMENT(void *tPtr,const SharedPtr &path,const int64_t localSocket, const InetAddress &atAddress,int64_t now) { - //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "sendQOS() %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "sendQOS() %s %s\n", getLink(path)->ifname().c_str(), pathStr); const int64_t _now = RR->node->now(); Packet outp(_peer->_id.address(),RR->identity.address(),Packet::VERB_QOS_MEASUREMENT); char qosData[ZT_QOS_MAX_PACKET_SIZE]; @@ -615,14 +615,14 @@ void Bond::processBackgroundTasks(void *tPtr, const int64_t now) _lastBackgroundTaskCheck = now; // Compute dynamic path monitor timer interval - if (_slaveMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC) { + if (_linkMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC) { int suggestedMonitorInterval = (now - _lastFrame) / 100; _dynamicPathMonitorInterval = std::min(ZT_PATH_HEARTBEAT_PERIOD, ((suggestedMonitorInterval > _bondMonitorInterval) ? suggestedMonitorInterval : _bondMonitorInterval)); //fprintf(stderr, "_lastFrame=%llu, suggestedMonitorInterval=%d, _dynamicPathMonitorInterval=%d\n", // (now-_lastFrame), suggestedMonitorInterval, _dynamicPathMonitorInterval); } // TODO: Clarify and generalize this logic - if (_slaveMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC) { + if (_linkMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC) { _shouldCollectPathStatistics = true; } @@ -632,11 +632,11 @@ void Bond::processBackgroundTasks(void *tPtr, const int64_t now) _shouldCollectPathStatistics = true; } if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { - if (_abSlaveSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_BETTER) { - // Required for judging suitability of primary slave after recovery + if (_abLinkSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_BETTER) { + // Required for judging suitability of primary link after recovery _shouldCollectPathStatistics = true; } - if (_abSlaveSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { + if (_abLinkSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { // Required for judging suitability of new candidate primary _shouldCollectPathStatistics = true; } @@ -696,18 +696,18 @@ void Bond::applyUserPrefs() if (!_paths[i]) { continue; } - SharedPtr sl = getSlave(_paths[i]); + SharedPtr sl = getLink(_paths[i]); if (sl) { - if (sl->monitorInterval() == 0) { // If no interval was specified for this slave, use more generic bond-wide interval + if (sl->monitorInterval() == 0) { // If no interval was specified for this link, use more generic bond-wide interval sl->setMonitorInterval(_bondMonitorInterval); } RR->bc->setMinReqPathMonitorInterval((sl->monitorInterval() < RR->bc->minReqPathMonitorInterval()) ? sl->monitorInterval() : RR->bc->minReqPathMonitorInterval()); - bool bFoundCommonSlave = false; - SharedPtr commonSlave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + bool bFoundCommonLink = false; + SharedPtr commonLink =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); for(unsigned int j=0;jbc->getSlaveBySocket(_policyAlias, _paths[j]->localSocket()) == commonSlave) { - bFoundCommonSlave = true; + if (RR->bc->getLinkBySocket(_policyAlias, _paths[j]->localSocket()) == commonLink) { + bFoundCommonLink = true; } } } @@ -717,7 +717,7 @@ void Bond::applyUserPrefs() _paths[i]->_ipvPref = sl->ipvPref(); _paths[i]->_mode = sl->mode(); _paths[i]->_enabled = sl->enabled(); - _paths[i]->_onlyPathOnSlave = !bFoundCommonSlave; + _paths[i]->_onlyPathOnLink = !bFoundCommonLink; } } if (_peer) { @@ -739,11 +739,11 @@ void Bond::curateBond(const int64_t now, bool rebuildBond) } bool currEligibility = _paths[i]->eligible(now,_ackSendInterval); //_paths[i]->address().toString(pathStr); - //fprintf(stderr, "\n\n%ld path eligibility (for %s, %s):\n", (RR->node->now() - RR->bc->getBondStartTime()), getSlave(_paths[i])->ifname().c_str(), pathStr); + //fprintf(stderr, "\n\n%ld path eligibility (for %s, %s):\n", (RR->node->now() - RR->bc->getBondStartTime()), getLink(_paths[i])->ifname().c_str(), pathStr); //_paths[i]->printEligible(now,_ackSendInterval); if (currEligibility != _paths[i]->_lastEligibilityState) { _paths[i]->address().toString(pathStr); - //fprintf(stderr, "\n\n%ld path eligibility (for %s, %s) has changed (from %d to %d)\n", (RR->node->now() - RR->bc->getBondStartTime()), getSlave(_paths[i])->ifname().c_str(), pathStr, _paths[i]->lastCheckedEligibility, _paths[i]->eligible(now,_ackSendInterval)); + //fprintf(stderr, "\n\n%ld path eligibility (for %s, %s) has changed (from %d to %d)\n", (RR->node->now() - RR->bc->getBondStartTime()), getLink(_paths[i])->ifname().c_str(), pathStr, _paths[i]->lastCheckedEligibility, _paths[i]->eligible(now,_ackSendInterval)); if (currEligibility) { rebuildBond = true; } @@ -766,7 +766,7 @@ void Bond::curateBond(const int64_t now, bool rebuildBond) } /** * Curate the set of paths that are part of the bond proper. Selects a single path - * per logical slave according to eligibility and user-specified constraints. + * per logical link according to eligibility and user-specified constraints. */ if ((_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) || (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR) @@ -777,68 +777,68 @@ void Bond::curateBond(const int64_t now, bool rebuildBond) // TODO: Optimize if (rebuildBond) { int updatedBondedPathCount = 0; - std::map,int> slaveMap; + std::map,int> linkMap; for (int i=0;iallowed() && (_paths[i]->eligible(now,_ackSendInterval) || !_numBondedPaths)) { - SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); - if (!slaveMap.count(slave)) { - slaveMap[slave] = i; + SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); + if (!linkMap.count(link)) { + linkMap[link] = i; } else { bool overriden = false; _paths[i]->address().toString(pathStr); - //fprintf(stderr, " slave representative path already exists! (%s %s)\n", getSlave(_paths[i])->ifname().c_str(), pathStr); - if (_paths[i]->preferred() && !_paths[slaveMap[slave]]->preferred()) { + //fprintf(stderr, " link representative path already exists! (%s %s)\n", getLink(_paths[i])->ifname().c_str(), pathStr); + if (_paths[i]->preferred() && !_paths[linkMap[link]]->preferred()) { // Override previous choice if preferred //fprintf(stderr, "overriding since its preferred!\n"); - if (_paths[slaveMap[slave]]->_assignedFlowCount) { - _paths[slaveMap[slave]]->_deprecated = true; + if (_paths[linkMap[link]]->_assignedFlowCount) { + _paths[linkMap[link]]->_deprecated = true; } else { - _paths[slaveMap[slave]]->_deprecated = true; - _paths[slaveMap[slave]]->setBonded(false); + _paths[linkMap[link]]->_deprecated = true; + _paths[linkMap[link]]->setBonded(false); } - slaveMap[slave] = i; + linkMap[link] = i; overriden = true; } - if ((_paths[i]->preferred() && _paths[slaveMap[slave]]->preferred()) - || (!_paths[i]->preferred() && !_paths[slaveMap[slave]]->preferred())) { - if (_paths[i]->preferenceRank() > _paths[slaveMap[slave]]->preferenceRank()) { + if ((_paths[i]->preferred() && _paths[linkMap[link]]->preferred()) + || (!_paths[i]->preferred() && !_paths[linkMap[link]]->preferred())) { + if (_paths[i]->preferenceRank() > _paths[linkMap[link]]->preferenceRank()) { // Override if higher preference //fprintf(stderr, "overriding according to preference preferenceRank!\n"); - if (_paths[slaveMap[slave]]->_assignedFlowCount) { - _paths[slaveMap[slave]]->_deprecated = true; + if (_paths[linkMap[link]]->_assignedFlowCount) { + _paths[linkMap[link]]->_deprecated = true; } else { - _paths[slaveMap[slave]]->_deprecated = true; - _paths[slaveMap[slave]]->setBonded(false); + _paths[linkMap[link]]->_deprecated = true; + _paths[linkMap[link]]->setBonded(false); } - slaveMap[slave] = i; + linkMap[link] = i; } } } } } - std::map,int>::iterator it = slaveMap.begin(); + std::map,int>::iterator it = linkMap.begin(); for (int i=0; isecond; _paths[_bondedIdx[i]]->setBonded(true); ++it; ++updatedBondedPathCount; _paths[_bondedIdx[i]]->address().toString(pathStr); - //fprintf(stderr, "setting i=%d, _bondedIdx[%d]=%d to bonded (%s %s)\n", i, i, _bondedIdx[i], getSlave(_paths[_bondedIdx[i]])->ifname().c_str(), pathStr); + //fprintf(stderr, "setting i=%d, _bondedIdx[%d]=%d to bonded (%s %s)\n", i, i, _bondedIdx[i], getLink(_paths[_bondedIdx[i]])->ifname().c_str(), pathStr); } } _numBondedPaths = updatedBondedPathCount; if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) { // Cause a RR reset since the currently used index might no longer be valid - _rrPacketsSentOnCurrSlave = _packetsPerSlave; + _rrPacketsSentOnCurrLink = _packetsPerLink; } } } @@ -847,18 +847,18 @@ void Bond::curateBond(const int64_t now, bool rebuildBond) void Bond::estimatePathQuality(const int64_t now) { char pathStr[128]; - uint32_t totUserSpecifiedSlaveSpeed = 0; - if (_numBondedPaths) { // Compute relative user-specified speeds of slaves + uint32_t totUserSpecifiedLinkSpeed = 0; + if (_numBondedPaths) { // Compute relative user-specified speeds of links for(unsigned int i=0;i<_numBondedPaths;++i) { - SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); if (_paths[i] && _paths[i]->allowed()) { - totUserSpecifiedSlaveSpeed += slave->speed(); + totUserSpecifiedLinkSpeed += link->speed(); } } for(unsigned int i=0;i<_numBondedPaths;++i) { - SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); if (_paths[i] && _paths[i]->allowed()) { - slave->setRelativeSpeed(round( ((float)slave->speed() / (float)totUserSpecifiedSlaveSpeed) * 255)); + link->setRelativeSpeed(round( ((float)link->speed() / (float)totUserSpecifiedLinkSpeed) * 255)); } } } @@ -895,11 +895,11 @@ void Bond::estimatePathQuality(const int64_t now) _paths[i]->_latencyVariance = _paths[i]->latencySamples.stddev(); _paths[i]->_packetErrorRatio = 1.0 - (_paths[i]->packetValiditySamples.count() ? _paths[i]->packetValiditySamples.mean() : 1.0); - if (userHasSpecifiedSlaveSpeeds()) { + if (userHasSpecifiedLinkSpeeds()) { // Use user-reported metrics - SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); - if (slave) { - _paths[i]->_throughputMean = slave->speed(); + SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); + if (link) { + _paths[i]->_throughputMean = link->speed(); _paths[i]->_throughputVariance = 0; } } @@ -929,7 +929,7 @@ void Bond::estimatePathQuality(const int64_t now) maxPLR = plr[i] > maxPLR ? plr[i] : maxPLR; maxPER = per[i] > maxPER ? per[i] : maxPER; //fprintf(stdout, "EH %d: lat=%8.3f, ltm=%8.3f, pdv=%8.3f, plr=%5.3f, per=%5.3f, thr=%8f, thm=%5.3f, thv=%5.3f, avl=%5.3f, age=%8.2f, scp=%4d, q=%5.3f, qtot=%5.3f, ac=%d if=%s, path=%s\n", - // i, lat[i], ltm[i], pdv[i], plr[i], per[i], thr[i], thm[i], thv[i], avl[i], age[i], scp[i], quality[i], totQuality, alloc[i], getSlave(_paths[i])->ifname().c_str(), pathStr); + // i, lat[i], ltm[i], pdv[i], plr[i], per[i], thr[i], thm[i], thv[i], avl[i], age[i], scp[i], quality[i], totQuality, alloc[i], getLink(_paths[i])->ifname().c_str(), pathStr); } // Convert metrics to relative quantities and apply contribution weights @@ -962,7 +962,7 @@ void Bond::estimatePathQuality(const int64_t now) //fprintf(stderr, "%lu FIN [%d/%d]: pmi=%5d, lat=%4.3f, ltm=%4.3f, pdv=%4.3f, plr=%4.3f, per=%4.3f, thr=%4.3f, thm=%4.3f, thv=%4.3f, age=%4.3f, scp=%4d, q=%4.3f, qtot=%4.3f, ac=%4d, asf=%3d, if=%s, path=%20s, bond=%d, qosout=%d, plrraw=%d\n", // ((now - RR->bc->getBondStartTime())), i, _numBondedPaths, _paths[i]->monitorInterval, // lat[i], ltm[i], pdv[i], plr[i], per[i], thr[i], thm[i], thv[i], age[i], scp[i], - // quality[i], totQuality, alloc[i], _paths[i]->assignedFlowCount, getSlave(_paths[i])->ifname().c_str(), pathStr, _paths[i]->bonded(), _paths[i]->qosStatsOut.size(), _paths[i]->packetLossRatio); + // quality[i], totQuality, alloc[i], _paths[i]->assignedFlowCount, getLink(_paths[i])->ifname().c_str(), pathStr, _paths[i]->bonded(), _paths[i]->qosStatsOut.size(), _paths[i]->packetLossRatio); } } if (numPlottablePaths < 2) { @@ -973,7 +973,7 @@ void Bond::estimatePathQuality(const int64_t now) for(unsigned int i=0;iaddress().toString(pathStr); - std::string label = std::string((pathStr)) + " " + getSlave(_paths[i])->ifname(); + std::string label = std::string((pathStr)) + " " + getLink(_paths[i])->ifname(); for (int i=0; i<19; ++i) { fprintf(stdout, "%s, ", label.c_str()); } @@ -987,7 +987,7 @@ void Bond::estimatePathQuality(const int64_t now) if (_paths[i]) { _paths[i]->address().toString(pathStr); fprintf(stdout, "%s, %s, %8.3f, %8.3f, %8.3f, %5.3f, %5.3f, %5.3f, %8f, %5.3f, %5.3f, %d, %5.3f, %d, %d, %d, %d, %d, %d, ", - getSlave(_paths[i])->ifname().c_str(), pathStr, _paths[i]->_latencyMean, lat[i],pdv[i], _paths[i]->_packetLossRatio, plr[i],per[i],thr[i],thm[i],thv[i],(now - _paths[i]->lastIn()),quality[i],alloc[i], + getLink(_paths[i])->ifname().c_str(), pathStr, _paths[i]->_latencyMean, lat[i],pdv[i], _paths[i]->_packetLossRatio, plr[i],per[i],thr[i],thm[i],thv[i],(now - _paths[i]->lastIn()),quality[i],alloc[i], _paths[i]->_relativeByteLoad, _paths[i]->_assignedFlowCount, _paths[i]->alive(now, true), _paths[i]->eligible(now,_ackSendInterval), _paths[i]->qosStatsOut.size()); } }*/ @@ -1040,7 +1040,7 @@ void Bond::processBalanceTasks(const int64_t now) } if (!_paths[i]->eligible(now,_ackSendInterval) && _paths[i]->_shouldReallocateFlows) { _paths[i]->address().toString(curPathStr); - fprintf(stderr, "%d reallocating flows from dead path %s on %s\n", (RR->node->now() - RR->bc->getBondStartTime()), curPathStr, getSlave(_paths[i])->ifname().c_str()); + fprintf(stderr, "%d reallocating flows from dead path %s on %s\n", (RR->node->now() - RR->bc->getBondStartTime()), curPathStr, getLink(_paths[i])->ifname().c_str()); std::map >::iterator flow_it = _flows.begin(); while (flow_it != _flows.end()) { if (flow_it->second->assignedPath() == _paths[i]) { @@ -1066,7 +1066,7 @@ void Bond::processBalanceTasks(const int64_t now) } if (_paths[i] && _paths[i]->bonded() && _paths[i]->eligible(now,_ackSendInterval) && (_paths[i]->_allocation < minimumAllocationValue) && _paths[i]->_assignedFlowCount) { _paths[i]->address().toString(curPathStr); - fprintf(stderr, "%d reallocating flows from under-performing path %s on %s\n", (RR->node->now() - RR->bc->getBondStartTime()), curPathStr, getSlave(_paths[i])->ifname().c_str()); + fprintf(stderr, "%d reallocating flows from under-performing path %s on %s\n", (RR->node->now() - RR->bc->getBondStartTime()), curPathStr, getLink(_paths[i])->ifname().c_str()); std::map >::iterator flow_it = _flows.begin(); while (flow_it != _flows.end()) { if (flow_it->second->assignedPath() == _paths[i]) { @@ -1086,7 +1086,7 @@ void Bond::processBalanceTasks(const int64_t now) */ if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) { if (_allowFlowHashing) { - // TODO: Should ideally failover from (idx) to a random slave, this is so that (idx+1) isn't overloaded + // TODO: Should ideally failover from (idx) to a random link, this is so that (idx+1) isn't overloaded } else if (!_allowFlowHashing) { // Nothing @@ -1176,29 +1176,29 @@ void Bond::processActiveBackupTasks(const int64_t now) SharedPtr prevActiveBackupPath = _abPath; SharedPtr nonPreferredPath; - bool bFoundPrimarySlave = false; + bool bFoundPrimaryLink = false; /** - * Select initial "active" active-backup slave + * Select initial "active" active-backup link */ if (!_abPath) { fprintf(stderr, "%llu no active backup path yet...\n", ((now - RR->bc->getBondStartTime()))); /** * [Automatic mode] - * The user has not explicitly specified slaves or their failover schedule, + * The user has not explicitly specified links or their failover schedule, * the bonding policy will now select the first eligible path and set it as * its active backup path, if a substantially better path is detected the bonding * policy will assign it as the new active backup path. If the path fails it will * simply find the next eligible path. */ - if (!userHasSpecifiedSlaves()) { - fprintf(stderr, "%llu AB: (auto) user did not specify any slaves. waiting until we know more\n", ((now - RR->bc->getBondStartTime()))); + if (!userHasSpecifiedLinks()) { + fprintf(stderr, "%llu AB: (auto) user did not specify any links. waiting until we know more\n", ((now - RR->bc->getBondStartTime()))); for (int i=0; ieligible(now,_ackSendInterval)) { _paths[i]->address().toString(curPathStr); - SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); - if (slave) { - fprintf(stderr, "%llu AB: (initial) [%d] found eligible path %s on: %s\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, slave->ifname().c_str()); + SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); + if (link) { + fprintf(stderr, "%llu AB: (initial) [%d] found eligible path %s on: %s\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, link->ifname().c_str()); } _abPath = _paths[i]; break; @@ -1207,57 +1207,57 @@ void Bond::processActiveBackupTasks(const int64_t now) } /** * [Manual mode] - * The user has specified slaves or failover rules that the bonding policy should adhere to. + * The user has specified links or failover rules that the bonding policy should adhere to. */ - else if (userHasSpecifiedSlaves()) { - fprintf(stderr, "%llu AB: (manual) no active backup slave, checking local.conf\n", ((now - RR->bc->getBondStartTime()))); - if (userHasSpecifiedPrimarySlave()) { - fprintf(stderr, "%llu AB: (manual) user has specified primary slave, looking for it.\n", ((now - RR->bc->getBondStartTime()))); + else if (userHasSpecifiedLinks()) { + fprintf(stderr, "%llu AB: (manual) no active backup link, checking local.conf\n", ((now - RR->bc->getBondStartTime()))); + if (userHasSpecifiedPrimaryLink()) { + fprintf(stderr, "%llu AB: (manual) user has specified primary link, looking for it.\n", ((now - RR->bc->getBondStartTime()))); for (int i=0; i slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); - if (_paths[i]->eligible(now,_ackSendInterval) && slave->primary()) { + SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); + if (_paths[i]->eligible(now,_ackSendInterval) && link->primary()) { if (!_paths[i]->preferred()) { _paths[i]->address().toString(curPathStr); - fprintf(stderr, "%llu AB: (initial) [%d] found path on primary slave, taking note in case we don't find a preferred path\n", ((now - RR->bc->getBondStartTime())), i); + fprintf(stderr, "%llu AB: (initial) [%d] found path on primary link, taking note in case we don't find a preferred path\n", ((now - RR->bc->getBondStartTime())), i); nonPreferredPath = _paths[i]; - bFoundPrimarySlave = true; + bFoundPrimaryLink = true; } if (_paths[i]->preferred()) { _abPath = _paths[i]; _abPath->address().toString(curPathStr); - SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); - if (slave) { - fprintf(stderr, "%llu AB: (initial) [%d] found preferred path %s on primary slave: %s\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, slave->ifname().c_str()); + SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); + if (link) { + fprintf(stderr, "%llu AB: (initial) [%d] found preferred path %s on primary link: %s\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, link->ifname().c_str()); } - bFoundPrimarySlave = true; + bFoundPrimaryLink = true; break; } } } if (_abPath) { _abPath->address().toString(curPathStr); - SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _abPath->localSocket()); - if (slave) { - fprintf(stderr, "%llu AB: (initial) found preferred primary path: %s on %s\n", ((now - RR->bc->getBondStartTime())), curPathStr, slave->ifname().c_str()); + SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _abPath->localSocket()); + if (link) { + fprintf(stderr, "%llu AB: (initial) found preferred primary path: %s on %s\n", ((now - RR->bc->getBondStartTime())), curPathStr, link->ifname().c_str()); } } else { - if (bFoundPrimarySlave && nonPreferredPath) { + if (bFoundPrimaryLink && nonPreferredPath) { fprintf(stderr, "%llu AB: (initial) found a non-preferred primary path\n", ((now - RR->bc->getBondStartTime()))); _abPath = nonPreferredPath; } } if (!_abPath) { - fprintf(stderr, "%llu AB: (initial) designated primary slave is not yet ready\n", ((now - RR->bc->getBondStartTime()))); + fprintf(stderr, "%llu AB: (initial) designated primary link is not yet ready\n", ((now - RR->bc->getBondStartTime()))); // TODO: Should fail-over to specified backup or just wait? } } - else if (!userHasSpecifiedPrimarySlave()) { + else if (!userHasSpecifiedPrimaryLink()) { int _abIdx = ZT_MAX_PEER_NETWORK_PATHS; - fprintf(stderr, "%llu AB: (initial) user did not specify primary slave, just picking something\n", ((now - RR->bc->getBondStartTime()))); + fprintf(stderr, "%llu AB: (initial) user did not specify primary link, just picking something\n", ((now - RR->bc->getBondStartTime()))); for (int i=0; ieligible(now,_ackSendInterval)) { _abIdx = i; @@ -1269,9 +1269,9 @@ void Bond::processActiveBackupTasks(const int64_t now) } else { _abPath = _paths[_abIdx]; - SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _abPath->localSocket()); - if (slave) { - fprintf(stderr, "%llu AB: (initial) selected non-primary slave idx=%d, %s on %s\n", ((now - RR->bc->getBondStartTime())), _abIdx, pathStr, slave->ifname().c_str()); + SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _abPath->localSocket()); + if (link) { + fprintf(stderr, "%llu AB: (initial) selected non-primary link idx=%d, %s on %s\n", ((now - RR->bc->getBondStartTime())), _abIdx, pathStr, link->ifname().c_str()); } } } @@ -1281,14 +1281,14 @@ void Bond::processActiveBackupTasks(const int64_t now) * Update and maintain the active-backup failover queue */ if (_abPath) { - // Don't worry about the failover queue until we have an active slave - // Remove ineligible paths from the failover slave queue + // Don't worry about the failover queue until we have an active link + // Remove ineligible paths from the failover link queue for (std::list >::iterator it(_abFailoverQueue.begin()); it!=_abFailoverQueue.end();) { if ((*it) && !(*it)->eligible(now,_ackSendInterval)) { (*it)->address().toString(curPathStr); - SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, (*it)->localSocket()); - if (slave) { - fprintf(stderr, "%llu AB: (fq) %s on %s is now ineligible, removing from failover queue\n", ((now - RR->bc->getBondStartTime())), curPathStr, slave->ifname().c_str()); + SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, (*it)->localSocket()); + if (link) { + fprintf(stderr, "%llu AB: (fq) %s on %s is now ineligible, removing from failover queue\n", ((now - RR->bc->getBondStartTime())), curPathStr, link->ifname().c_str()); } it = _abFailoverQueue.erase(it); } else { @@ -1313,7 +1313,7 @@ void Bond::processActiveBackupTasks(const int64_t now) if (!_paths[i] || !_paths[i]->allowed() || !_paths[i]->eligible(now,_ackSendInterval)) { continue; } - SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); _paths[i]->address().toString(pathStr); int failoverScoreHandicap = _paths[i]->_failoverScore; @@ -1322,8 +1322,8 @@ void Bond::processActiveBackupTasks(const int64_t now) failoverScoreHandicap += ZT_MULTIPATH_FAILOVER_HANDICAP_PREFERRED; //fprintf(stderr, "%s on %s ----> %d for preferred\n", pathStr, _paths[i]->ifname().c_str(), failoverScoreHandicap); } - if (slave->primary()) { - // If using "optimize" primary reselect mode, ignore user slave designations + if (link->primary()) { + // If using "optimize" primary reselect mode, ignore user link designations failoverScoreHandicap += ZT_MULTIPATH_FAILOVER_HANDICAP_PRIMARY; //fprintf(stderr, "%s on %s ----> %d for primary\n", pathStr, _paths[i]->ifname().c_str(), failoverScoreHandicap); } @@ -1333,17 +1333,17 @@ void Bond::processActiveBackupTasks(const int64_t now) _paths[i]->_failoverScore = newHandicap; //fprintf(stderr, "%s on %s ----> %d for allocation\n", pathStr, _paths[i]->ifname().c_str(), newHandicap); } - SharedPtr failoverSlave; - if (slave->failoverToSlave().length()) { - failoverSlave = RR->bc->getSlaveByName(_policyAlias, slave->failoverToSlave()); + SharedPtr failoverLink; + if (link->failoverToLink().length()) { + failoverLink = RR->bc->getLinkByName(_policyAlias, link->failoverToLink()); } - if (failoverSlave) { + if (failoverLink) { for (int j=0; jaddress().toString(pathStr); int inheritedHandicap = failoverScoreHandicap - 10; int newHandicap = _paths[j]->_failoverScore > inheritedHandicap ? _paths[j]->_failoverScore : inheritedHandicap; - //fprintf(stderr, "\thanding down %s on %s ----> %d\n", pathStr, getSlave(_paths[j])->ifname().c_str(), newHandicap); + //fprintf(stderr, "\thanding down %s on %s ----> %d\n", pathStr, getLink(_paths[j])->ifname().c_str(), newHandicap); if (!_paths[j]->preferred()) { newHandicap--; } @@ -1360,7 +1360,7 @@ void Bond::processActiveBackupTasks(const int64_t now) } if (!bFoundPathInQueue) { _paths[i]->address().toString(curPathStr); - fprintf(stderr, "%llu AB: (fq) [%d] added %s on %s to queue\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, getSlave(_paths[i])->ifname().c_str()); + fprintf(stderr, "%llu AB: (fq) [%d] added %s on %s to queue\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, getLink(_paths[i])->ifname().c_str()); _abFailoverQueue.push_front(_paths[i]); } } @@ -1385,8 +1385,8 @@ void Bond::processActiveBackupTasks(const int64_t now) if (!_paths[i]->eligible(now,includeRefractoryPeriod)) { failoverScoreHandicap = -10000; } - if (getSlave(_paths[i])->primary() && _abSlaveSelectMethod != ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { - // If using "optimize" primary reselect mode, ignore user slave designations + if (getLink(_paths[i])->primary() && _abLinkSelectMethod != ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { + // If using "optimize" primary reselect mode, ignore user link designations failoverScoreHandicap = ZT_MULTIPATH_FAILOVER_HANDICAP_PRIMARY; } if (_paths[i].ptr() == negotiatedPath.ptr()) { @@ -1405,7 +1405,7 @@ void Bond::processActiveBackupTasks(const int64_t now) } if (!bFoundPathInQueue) { _paths[i]->address().toString(curPathStr); - fprintf(stderr, "%llu AB: (fq) [%d] added %s on %s to queue\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, getSlave(_paths[i])->ifname().c_str()); + fprintf(stderr, "%llu AB: (fq) [%d] added %s on %s to queue\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, getLink(_paths[i])->ifname().c_str()); _abFailoverQueue.push_front(_paths[i]); } } @@ -1428,11 +1428,11 @@ void Bond::processActiveBackupTasks(const int64_t now) if (_abPath && !_abPath->eligible(now,_ackSendInterval)) { // Implicit ZT_MULTIPATH_RESELECTION_POLICY_FAILURE _abPath->address().toString(curPathStr); fprintf(stderr, "%llu AB: (failure) failover event!, active backup path (%s) is no-longer eligible\n", ((now - RR->bc->getBondStartTime())), curPathStr); if (!_abFailoverQueue.empty()) { - fprintf(stderr, "%llu AB: (failure) there are (%lu) slaves in queue to choose from...\n", ((now - RR->bc->getBondStartTime())), _abFailoverQueue.size()); + fprintf(stderr, "%llu AB: (failure) there are (%lu) links in queue to choose from...\n", ((now - RR->bc->getBondStartTime())), _abFailoverQueue.size()); dequeueNextActiveBackupPath(now); - _abPath->address().toString(curPathStr); fprintf(stderr, "%llu AB: (failure) switched to %s on %s\n", ((now - RR->bc->getBondStartTime())), curPathStr, getSlave(_abPath)->ifname().c_str()); + _abPath->address().toString(curPathStr); fprintf(stderr, "%llu AB: (failure) switched to %s on %s\n", ((now - RR->bc->getBondStartTime())), curPathStr, getLink(_abPath)->ifname().c_str()); } else { - fprintf(stderr, "%llu AB: (failure) nothing available in the slave queue, doing nothing.\n", ((now - RR->bc->getBondStartTime()))); + fprintf(stderr, "%llu AB: (failure) nothing available in the link queue, doing nothing.\n", ((now - RR->bc->getBondStartTime()))); } } /** @@ -1441,38 +1441,38 @@ void Bond::processActiveBackupTasks(const int64_t now) if (prevActiveBackupPath != _abPath) { _lastActiveBackupPathChange = now; } - if (_abSlaveSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS) { - if (_abPath && !getSlave(_abPath)->primary() - && getSlave(_abFailoverQueue.front())->primary()) { + if (_abLinkSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS) { + if (_abPath && !getLink(_abPath)->primary() + && getLink(_abFailoverQueue.front())->primary()) { fprintf(stderr, "%llu AB: (always) switching to available primary\n", ((now - RR->bc->getBondStartTime()))); dequeueNextActiveBackupPath(now); } } - if (_abSlaveSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_BETTER) { - if (_abPath && !getSlave(_abPath)->primary()) { - fprintf(stderr, "%llu AB: (better) active backup has switched to \"better\" primary slave according to re-select policy.\n", ((now - RR->bc->getBondStartTime()))); - if (getSlave(_abFailoverQueue.front())->primary() + if (_abLinkSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_BETTER) { + if (_abPath && !getLink(_abPath)->primary()) { + fprintf(stderr, "%llu AB: (better) active backup has switched to \"better\" primary link according to re-select policy.\n", ((now - RR->bc->getBondStartTime()))); + if (getLink(_abFailoverQueue.front())->primary() && (_abFailoverQueue.front()->_failoverScore > _abPath->_failoverScore)) { dequeueNextActiveBackupPath(now); fprintf(stderr, "%llu AB: (better) switched back to user-defined primary\n", ((now - RR->bc->getBondStartTime()))); } } } - if (_abSlaveSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE && !_abFailoverQueue.empty()) { + if (_abLinkSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE && !_abFailoverQueue.empty()) { /** * Implement link negotiation that was previously-decided */ if (_abFailoverQueue.front()->_negotiated) { dequeueNextActiveBackupPath(now); _abPath->address().toString(prevPathStr); - fprintf(stderr, "%llu AB: (optimize) switched to negotiated path %s on %s\n", ((now - RR->bc->getBondStartTime())), prevPathStr, getSlave(_abPath)->ifname().c_str()); + fprintf(stderr, "%llu AB: (optimize) switched to negotiated path %s on %s\n", ((now - RR->bc->getBondStartTime())), prevPathStr, getLink(_abPath)->ifname().c_str()); _lastPathNegotiationCheck = now; } else { // Try to find a better path and automatically switch to it -- not too often, though. if ((now - _lastActiveBackupPathChange) > ZT_MULTIPATH_MIN_ACTIVE_BACKUP_AUTOFLOP_INTERVAL) { if (!_abFailoverQueue.empty()) { - //fprintf(stderr, "AB: (optimize) there are (%d) slaves in queue to choose from...\n", _abFailoverQueue.size()); + //fprintf(stderr, "AB: (optimize) there are (%d) links in queue to choose from...\n", _abFailoverQueue.size()); int newFScore = _abFailoverQueue.front()->_failoverScore; int prevFScore = _abPath->_failoverScore; // Establish a minimum switch threshold to prevent flapping @@ -1483,7 +1483,7 @@ void Bond::processActiveBackupTasks(const int64_t now) _abPath->address().toString(prevPathStr); dequeueNextActiveBackupPath(now); _abPath->address().toString(curPathStr); - fprintf(stderr, "%llu AB: (optimize) switched from %s on %s (fs=%d) to %s on %s (fs=%d)\n", ((now - RR->bc->getBondStartTime())), prevPathStr, getSlave(oldPath)->ifname().c_str(), prevFScore, curPathStr, getSlave(_abPath)->ifname().c_str(), newFScore); + fprintf(stderr, "%llu AB: (optimize) switched from %s on %s (fs=%d) to %s on %s (fs=%d)\n", ((now - RR->bc->getBondStartTime())), prevPathStr, getLink(oldPath)->ifname().c_str(), prevFScore, curPathStr, getLink(_abPath)->ifname().c_str(), newFScore); } } } @@ -1527,7 +1527,7 @@ void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool _lastFlowExpirationCheck=0; _numBondedPaths=0; - _rrPacketsSentOnCurrSlave=0; + _rrPacketsSentOnCurrLink=0; _rrIdx=0; _lastFlowRebalance=0; @@ -1537,7 +1537,7 @@ void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool _maxAcceptablePacketDelayVariance = 50; _maxAcceptablePacketLossRatio = 0.10; _maxAcceptablePacketErrorRatio = 0.10; - _userHasSpecifiedSlaveSpeeds=0; + _userHasSpecifiedLinkSpeeds=0; _lastFrame=0; @@ -1553,8 +1553,8 @@ void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool switch (policy) { case ZT_BONDING_POLICY_ACTIVE_BACKUP: _failoverInterval = 500; - _abSlaveSelectMethod = ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE; - _slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; + _abLinkSelectMethod = ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE; + _linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; _qualityWeights[ZT_QOS_LAT_IDX] = 0.2f; _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; _qualityWeights[ZT_QOS_PDV_IDX] = 0.2f; @@ -1578,8 +1578,8 @@ void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool case ZT_BONDING_POLICY_BALANCE_RR: _failoverInterval = 500; _allowFlowHashing = false; - _packetsPerSlave = 1024; - _slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; + _packetsPerLink = 1024; + _linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; _qualityWeights[ZT_QOS_LAT_IDX] = 0.4f; _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; _qualityWeights[ZT_QOS_PDV_IDX] = 0.2f; @@ -1598,7 +1598,7 @@ void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool _failoverInterval = 500; _upDelay = _bondMonitorInterval * 2; _allowFlowHashing = true; - _slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; + _linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; _qualityWeights[ZT_QOS_LAT_IDX] = 0.4f; _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; _qualityWeights[ZT_QOS_PDV_IDX] = 0.2f; @@ -1617,7 +1617,7 @@ void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool case ZT_BONDING_POLICY_BALANCE_AWARE: _failoverInterval = 3000; _allowFlowHashing = true; - _slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; + _linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; _qualityWeights[ZT_QOS_LAT_IDX] = 0.4f; _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; _qualityWeights[ZT_QOS_PDV_IDX] = 0.4f; @@ -1641,7 +1641,7 @@ void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool _upDelay = templateBond->_upDelay; fprintf(stderr, "TIMERS: strat=%d, fi= %d, bmi= %d, qos= %d, ack= %d, estimateInt= %d, refractory= %d, ud= %d, dd= %d\n", - _slaveMonitorStrategy, + _linkMonitorStrategy, _failoverInterval, _bondMonitorInterval, _qosSendInterval, @@ -1651,11 +1651,11 @@ void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool _upDelay, _downDelay); - if (templateBond->_slaveMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE + if (templateBond->_linkMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE && templateBond->_failoverInterval != 0) { fprintf(stderr, "warning: passive path monitoring was specified, this will prevent failovers from happening in a timely manner.\n"); } - _abSlaveSelectMethod = templateBond->_abSlaveSelectMethod; + _abLinkSelectMethod = templateBond->_abLinkSelectMethod; memcpy(_qualityWeights, templateBond->_qualityWeights, ZT_QOS_WEIGHT_SIZE * sizeof(float)); } @@ -1711,9 +1711,9 @@ bool Bond::relevant() { || _peer->identity().address().toInt() == 0x795cbf86fa; } -SharedPtr Bond::getSlave(const SharedPtr& path) +SharedPtr Bond::getLink(const SharedPtr& path) { - return RR->bc->getSlaveBySocket(_policyAlias, path->localSocket()); + return RR->bc->getLinkBySocket(_policyAlias, path->localSocket()); } void Bond::dumpInfo(const int64_t now) @@ -1726,12 +1726,12 @@ void Bond::dumpInfo(const int64_t now) return; } /* - fprintf(stderr, "---[ bp=%d, id=%llx, dd=%d, up=%d, pmi=%d, specifiedSlaves=%d, _specifiedPrimarySlave=%d, _specifiedFailInst=%d ]\n", - _policy, _peer->identity().address().toInt(), _downDelay, _upDelay, _monitorInterval, _userHasSpecifiedSlaves, _userHasSpecifiedPrimarySlave, _userHasSpecifiedFailoverInstructions); + fprintf(stderr, "---[ bp=%d, id=%llx, dd=%d, up=%d, pmi=%d, specifiedLinks=%d, _specifiedPrimaryLink=%d, _specifiedFailInst=%d ]\n", + _policy, _peer->identity().address().toInt(), _downDelay, _upDelay, _monitorInterval, _userHasSpecifiedLinks, _userHasSpecifiedPrimaryLink, _userHasSpecifiedFailoverInstructions); if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { fprintf(stderr, "Paths (bp=%d, stats=%d, primaryReselect=%d) :\n", - _policy, _shouldCollectPathStatistics, _abSlaveSelectMethod); + _policy, _shouldCollectPathStatistics, _abLinkSelectMethod); } if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR @@ -1748,13 +1748,13 @@ void Bond::dumpInfo(const int64_t now) for(int i=0; i slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); _paths[i]->address().toString(pathStr); fprintf(stderr, " %2d: lat=%8.3f, ac=%3d, fail%5s, fscore=%6d, in=%7d, out=%7d, age=%7ld, ack=%7ld, ref=%6d, ls=%llx", i, _paths[i]->_latencyMean, _paths[i]->_allocation, - slave->failoverToSlave().c_str(), + link->failoverToLink().c_str(), _paths[i]->_failoverScore, _paths[i]->_packetsIn, _paths[i]->_packetsOut, @@ -1763,12 +1763,12 @@ void Bond::dumpInfo(const int64_t now) _paths[i]->_refractoryPeriod, _paths[i]->localSocket() ); - if (slave->spare()) { + if (link->spare()) { fprintf(stderr, " SPR."); } else { fprintf(stderr, " "); } - if (slave->primary()) { + if (link->primary()) { fprintf(stderr, " PRIM."); } else { fprintf(stderr, " "); @@ -1808,7 +1808,7 @@ void Bond::dumpInfo(const int64_t now) } else if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { fprintf(stderr, " "); } - fprintf(stderr, "%5s %s\n", slave->ifname().c_str(), pathStr); + fprintf(stderr, "%5s %s\n", link->ifname().c_str(), pathStr); } } @@ -1817,12 +1817,12 @@ void Bond::dumpInfo(const int64_t now) fprintf(stderr, "\nFailover Queue:\n"); for (std::list >::iterator it(_abFailoverQueue.begin()); it!=_abFailoverQueue.end();++it) { (*it)->address().toString(currPathStr); - SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, (*it)->localSocket()); + SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, (*it)->localSocket()); fprintf(stderr, "\t%8s\tspeed=%7d\trelSpeed=%3d\tipvPref=%3d\tfscore=%9d\t\t%s\n", - slave->ifname().c_str(), - slave->speed(), - slave->relativeSpeed(), - slave->ipvPref(), + link->ifname().c_str(), + link->speed(), + link->relativeSpeed(), + link->ipvPref(), (*it)->_failoverScore, currPathStr); } @@ -1840,15 +1840,15 @@ void Bond::dumpInfo(const int64_t now) fprintf(stderr, "\nBonded Paths:\n"); for (int i=0; i<_numBondedPaths; ++i) { _paths[_bondedIdx[i]]->address().toString(currPathStr); - SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[_bondedIdx[i]]->localSocket()); + SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[_bondedIdx[i]]->localSocket()); fprintf(stderr, " [%d]\t%8s\tflows=%3d\tspeed=%7d\trelSpeed=%3d\tipvPref=%3d\tfscore=%9d\t\t%s\n", i, //fprintf(stderr, " [%d]\t%8s\tspeed=%7d\trelSpeed=%3d\tflowCount=%2d\tipvPref=%3d\tfscore=%9d\t\t%s\n", i, - slave->ifname().c_str(), + link->ifname().c_str(), _paths[_bondedIdx[i]]->_assignedFlowCount, - slave->speed(), - slave->relativeSpeed(), + link->speed(), + link->relativeSpeed(), //_paths[_bondedIdx[i]].p->assignedFlows.size(), - slave->ipvPref(), + link->ipvPref(), _paths[_bondedIdx[i]]->_failoverScore, currPathStr); } diff --git a/node/Bond.hpp b/node/Bond.hpp index 353ed9317..4eee20f36 100644 --- a/node/Bond.hpp +++ b/node/Bond.hpp @@ -18,13 +18,13 @@ #include "Path.hpp" #include "Peer.hpp" -#include "../osdep/Slave.hpp" +#include "../osdep/Link.hpp" #include "Flow.hpp" namespace ZeroTier { class RuntimeEnvironment; -class Slave; +class Link; class Bond { @@ -52,7 +52,7 @@ public: void dumpInfo(const int64_t now); bool relevant(); - SharedPtr getSlave(const SharedPtr& path); + SharedPtr getLink(const SharedPtr& path); /** * Constructor. Creates a bond based off of ZT defaults @@ -281,7 +281,7 @@ public: void processActiveBackupTasks(int64_t now); /** - * Switches the active slave in an active-backup scenario to the next best during + * Switches the active link in an active-backup scenario to the next best during * a failover event. * * @param now Current time @@ -348,24 +348,24 @@ public: } /** - * @return Whether the user has defined slaves for use on this bond + * @return Whether the user has defined links for use on this bond */ - inline bool userHasSpecifiedSlaves() { return _userHasSpecifiedSlaves; } + inline bool userHasSpecifiedLinks() { return _userHasSpecifiedLinks; } /** - * @return Whether the user has defined a set of failover slave(s) for this bond + * @return Whether the user has defined a set of failover link(s) for this bond */ inline bool userHasSpecifiedFailoverInstructions() { return _userHasSpecifiedFailoverInstructions; }; /** - * @return Whether the user has specified a primary slave + * @return Whether the user has specified a primary link */ - inline bool userHasSpecifiedPrimarySlave() { return _userHasSpecifiedPrimarySlave; } + inline bool userHasSpecifiedPrimaryLink() { return _userHasSpecifiedPrimaryLink; } /** - * @return Whether the user has specified slave speeds + * @return Whether the user has specified link speeds */ - inline bool userHasSpecifiedSlaveSpeeds() { return _userHasSpecifiedSlaveSpeeds; } + inline bool userHasSpecifiedLinkSpeeds() { return _userHasSpecifiedLinkSpeeds; } /** * Periodically perform maintenance tasks for each active bond. @@ -441,7 +441,7 @@ public: /** * @param strategy Strategy that the bond uses to prob for path aliveness and quality */ - inline void setSlaveMonitorStrategy(uint8_t strategy) { _slaveMonitorStrategy = strategy; } + inline void setLinkMonitorStrategy(uint8_t strategy) { _linkMonitorStrategy = strategy; } /** * @return the current up delay parameter @@ -464,12 +464,12 @@ public: inline void setDownDelay(int downDelay) { if (downDelay >= 0) { _downDelay = downDelay; } } /** - * @return the current monitoring interval for the bond (can be overridden with intervals specific to certain slaves.) + * @return the current monitoring interval for the bond (can be overridden with intervals specific to certain links.) */ inline uint16_t getBondMonitorInterval() { return _bondMonitorInterval; } /** - * Set the current monitoring interval for the bond (can be overridden with intervals specific to certain slaves.) + * Set the current monitoring interval for the bond (can be overridden with intervals specific to certain links.) * * @param monitorInterval How often gratuitous VERB_HELLO(s) are sent to remote peer. */ @@ -498,21 +498,21 @@ public: /** * - * @param packetsPerSlave + * @param packetsPerLink */ - inline void setPacketsPerSlave(int packetsPerSlave) { _packetsPerSlave = packetsPerSlave; } + inline void setPacketsPerLink(int packetsPerLink) { _packetsPerLink = packetsPerLink; } /** * - * @param slaveSelectMethod + * @param linkSelectMethod */ - inline void setSlaveSelectMethod(uint8_t method) { _abSlaveSelectMethod = method; } + inline void setLinkSelectMethod(uint8_t method) { _abLinkSelectMethod = method; } /** * * @return */ - inline uint8_t getSlaveSelectMethod() { return _abSlaveSelectMethod; } + inline uint8_t getLinkSelectMethod() { return _abLinkSelectMethod; } /** * @@ -568,25 +568,25 @@ private: // active-backup SharedPtr _abPath; // current active path std::list > _abFailoverQueue; - uint8_t _abSlaveSelectMethod; // slave re-selection policy for the primary slave in active-backup + uint8_t _abLinkSelectMethod; // link re-selection policy for the primary link in active-backup uint64_t _lastActiveBackupPathChange; // balance-rr uint8_t _rrIdx; // index to path currently in use during Round Robin operation - uint16_t _rrPacketsSentOnCurrSlave; // number of packets sent on this slave since the most recent path switch. + uint16_t _rrPacketsSentOnCurrLink; // number of packets sent on this link since the most recent path switch. /** * How many packets will be sent on a path before moving to the next path * in the round-robin sequence. A value of zero will cause a random path * selection for each outgoing packet. */ - int _packetsPerSlave; + int _packetsPerLink; // balance-aware uint64_t _totalBondUnderload; uint8_t _flowRebalanceStrategy; - // dynamic slave monitoring - uint8_t _slaveMonitorStrategy; + // dynamic link monitoring + uint8_t _linkMonitorStrategy; uint64_t _lastFrame; uint32_t _dynamicPathMonitorInterval; @@ -651,14 +651,14 @@ private: Mutex _flows_m; /** - * Whether the user has specified slaves for this bond. + * Whether the user has specified links for this bond. */ - bool _userHasSpecifiedSlaves; + bool _userHasSpecifiedLinks; /** - * Whether the user has specified a primary slave for this bond. + * Whether the user has specified a primary link for this bond. */ - bool _userHasSpecifiedPrimarySlave; + bool _userHasSpecifiedPrimaryLink; /** * Whether the user has specified failover instructions for this bond. @@ -666,9 +666,9 @@ private: bool _userHasSpecifiedFailoverInstructions; /** - * Whether the user has specified slaves speeds for this bond. + * Whether the user has specified links speeds for this bond. */ - bool _userHasSpecifiedSlaveSpeeds; + bool _userHasSpecifiedLinkSpeeds; /** * How frequently (in ms) a VERB_ECHO is sent to a peer to verify that a diff --git a/node/BondController.cpp b/node/BondController.cpp index 6b21d9998..f7159dbc3 100644 --- a/node/BondController.cpp +++ b/node/BondController.cpp @@ -27,33 +27,33 @@ BondController::BondController(const RuntimeEnvironment *renv) : _defaultBondingPolicy = ZT_BONDING_POLICY_NONE; } -bool BondController::slaveAllowed(std::string &policyAlias, SharedPtr slave) +bool BondController::linkAllowed(std::string &policyAlias, SharedPtr link) { bool foundInDefinitions = false; - if (_slaveDefinitions.count(policyAlias)) { - auto it = _slaveDefinitions[policyAlias].begin(); - while (it != _slaveDefinitions[policyAlias].end()) { - if (slave->ifname() == (*it)->ifname()) { + if (_linkDefinitions.count(policyAlias)) { + auto it = _linkDefinitions[policyAlias].begin(); + while (it != _linkDefinitions[policyAlias].end()) { + if (link->ifname() == (*it)->ifname()) { foundInDefinitions = true; break; } ++it; } } - return _slaveDefinitions[policyAlias].empty() || foundInDefinitions; + return _linkDefinitions[policyAlias].empty() || foundInDefinitions; } -void BondController::addCustomSlave(std::string& policyAlias, SharedPtr slave) +void BondController::addCustomLink(std::string& policyAlias, SharedPtr link) { - Mutex::Lock _l(_slaves_m); - _slaveDefinitions[policyAlias].push_back(slave); - auto search = _interfaceToSlaveMap[policyAlias].find(slave->ifname()); - if (search == _interfaceToSlaveMap[policyAlias].end()) { - slave->setAsUserSpecified(true); - _interfaceToSlaveMap[policyAlias].insert(std::pair>(slave->ifname(), slave)); + Mutex::Lock _l(_links_m); + _linkDefinitions[policyAlias].push_back(link); + auto search = _interfaceToLinkMap[policyAlias].find(link->ifname()); + if (search == _interfaceToLinkMap[policyAlias].end()) { + link->setAsUserSpecified(true); + _interfaceToLinkMap[policyAlias].insert(std::pair>(link->ifname(), link)); } else { - fprintf(stderr, "slave already exists=%s\n", slave->ifname().c_str()); - // Slave is already defined, overlay user settings + fprintf(stderr, "link already exists=%s\n", link->ifname().c_str()); + // Link is already defined, overlay user settings } } @@ -115,20 +115,20 @@ SharedPtr BondController::createTransportTriggeredBond(const RuntimeEnviro /** * Determine if user has specified anything that could affect the bonding policy's decisions */ - if (_interfaceToSlaveMap.count(bond->policyAlias())) { - std::map >::iterator it = _interfaceToSlaveMap[bond->policyAlias()].begin(); - while (it != _interfaceToSlaveMap[bond->policyAlias()].end()) { + if (_interfaceToLinkMap.count(bond->policyAlias())) { + std::map >::iterator it = _interfaceToLinkMap[bond->policyAlias()].begin(); + while (it != _interfaceToLinkMap[bond->policyAlias()].end()) { if (it->second->isUserSpecified()) { - bond->_userHasSpecifiedSlaves = true; + bond->_userHasSpecifiedLinks = true; } if (it->second->isUserSpecified() && it->second->primary()) { - bond->_userHasSpecifiedPrimarySlave = true; + bond->_userHasSpecifiedPrimaryLink = true; } if (it->second->isUserSpecified() && it->second->userHasSpecifiedFailoverInstructions()) { bond->_userHasSpecifiedFailoverInstructions = true; } if (it->second->isUserSpecified() && (it->second->speed() > 0)) { - bond->_userHasSpecifiedSlaveSpeeds = true; + bond->_userHasSpecifiedLinkSpeeds = true; } ++it; } @@ -138,16 +138,16 @@ SharedPtr BondController::createTransportTriggeredBond(const RuntimeEnviro return SharedPtr(); } -SharedPtr BondController::getSlaveBySocket(const std::string& policyAlias, uint64_t localSocket) +SharedPtr BondController::getLinkBySocket(const std::string& policyAlias, uint64_t localSocket) { - Mutex::Lock _l(_slaves_m); + Mutex::Lock _l(_links_m); char ifname[16]; _phy->getIfName((PhySocket *) ((uintptr_t)localSocket), ifname, 16); std::string ifnameStr(ifname); - auto search = _interfaceToSlaveMap[policyAlias].find(ifnameStr); - if (search == _interfaceToSlaveMap[policyAlias].end()) { - SharedPtr s = new Slave(ifnameStr, 0, 0, 0, 0, 0, true, ZT_MULTIPATH_SLAVE_MODE_SPARE, "", 0.0); - _interfaceToSlaveMap[policyAlias].insert(std::pair >(ifnameStr, s)); + auto search = _interfaceToLinkMap[policyAlias].find(ifnameStr); + if (search == _interfaceToLinkMap[policyAlias].end()) { + SharedPtr s = new Link(ifnameStr, 0, 0, 0, 0, 0, true, ZT_MULTIPATH_SLAVE_MODE_SPARE, "", 0.0); + _interfaceToLinkMap[policyAlias].insert(std::pair >(ifnameStr, s)); return s; } else { @@ -155,14 +155,14 @@ SharedPtr BondController::getSlaveBySocket(const std::string& policyAlias } } -SharedPtr BondController::getSlaveByName(const std::string& policyAlias, const std::string& ifname) +SharedPtr BondController::getLinkByName(const std::string& policyAlias, const std::string& ifname) { - Mutex::Lock _l(_slaves_m); - auto search = _interfaceToSlaveMap[policyAlias].find(ifname); - if (search != _interfaceToSlaveMap[policyAlias].end()) { + Mutex::Lock _l(_links_m); + auto search = _interfaceToLinkMap[policyAlias].find(ifname); + if (search != _interfaceToLinkMap[policyAlias].end()) { return search->second; } - return SharedPtr(); + return SharedPtr(); } bool BondController::allowedToBind(const std::string& ifname) @@ -172,18 +172,18 @@ bool BondController::allowedToBind(const std::string& ifname) if (!_defaultBondingPolicy) { return true; // no restrictions } - Mutex::Lock _l(_slaves_m); - if (_interfaceToSlaveMap.empty()) { + Mutex::Lock _l(_links_m); + if (_interfaceToLinkMap.empty()) { return true; // no restrictions } - std::map > >::iterator policyItr = _interfaceToSlaveMap.begin(); - while (policyItr != _interfaceToSlaveMap.end()) { - std::map >::iterator slaveItr = policyItr->second.begin(); - while (slaveItr != policyItr->second.end()) { - if (slaveItr->first == ifname) { + std::map > >::iterator policyItr = _interfaceToLinkMap.begin(); + while (policyItr != _interfaceToLinkMap.end()) { + std::map >::iterator linkItr = policyItr->second.begin(); + while (linkItr != policyItr->second.end()) { + if (linkItr->first == ifname) { return true; } - ++slaveItr; + ++linkItr; } ++policyItr; } diff --git a/node/BondController.hpp b/node/BondController.hpp index 95fbf81fc..2e0c15072 100644 --- a/node/BondController.hpp +++ b/node/BondController.hpp @@ -19,7 +19,7 @@ #include "SharedPtr.hpp" #include "../osdep/Phy.hpp" -#include "../osdep/Slave.hpp" +#include "../osdep/Link.hpp" namespace ZeroTier { @@ -36,9 +36,9 @@ public: BondController(const RuntimeEnvironment *renv); /** - * @return Whether this slave is permitted to become a member of a bond. + * @return Whether this link is permitted to become a member of a bond. */ - bool slaveAllowed(std::string &policyAlias, SharedPtr slave); + bool linkAllowed(std::string &policyAlias, SharedPtr link); /** * @return The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements. @@ -103,12 +103,12 @@ public: static int defaultBondingPolicy() { return _defaultBondingPolicy; } /** - * Add a user-defined slave to a given bonding policy. + * Add a user-defined link to a given bonding policy. * * @param policyAlias User-defined custom name for variant of bonding policy - * @param slave Pointer to new slave definition + * @param link Pointer to new link definition */ - void addCustomSlave(std::string& policyAlias, SharedPtr slave); + void addCustomLink(std::string& policyAlias, SharedPtr link); /** * Add a user-defined bonding policy that is based on one of the standard types. @@ -145,22 +145,22 @@ public: void processBackgroundTasks(void *tPtr, int64_t now); /** - * Gets a reference to a physical slave definition given a policy alias and a local socket. + * Gets a reference to a physical link definition given a policy alias and a local socket. * * @param policyAlias Policy in use * @param localSocket Local source socket - * @return Physical slave definition + * @return Physical link definition */ - SharedPtr getSlaveBySocket(const std::string& policyAlias, uint64_t localSocket); + SharedPtr getLinkBySocket(const std::string& policyAlias, uint64_t localSocket); /** - * Gets a reference to a physical slave definition given its human-readable system name. + * Gets a reference to a physical link definition given its human-readable system name. * * @param policyAlias Policy in use * @param ifname Alphanumeric human-readable name - * @return Physical slave definition + * @return Physical link definition */ - SharedPtr getSlaveByName(const std::string& policyAlias, const std::string& ifname); + SharedPtr getLinkByName(const std::string& policyAlias, const std::string& ifname); /** * @param ifname Name of interface that we want to know if we can bind to @@ -175,7 +175,7 @@ private: const RuntimeEnvironment *RR; Mutex _bonds_m; - Mutex _slaves_m; + Mutex _links_m; /** * The last time that the bond controller updated the set of bonds. @@ -213,14 +213,14 @@ private: std::map > _bondPolicyTemplates; /** - * Set of slaves defined for a given bonding policy + * Set of links defined for a given bonding policy */ - std::map > > _slaveDefinitions; + std::map > > _linkDefinitions; /** - * Set of slave objects mapped to their physical interfaces + * Set of link objects mapped to their physical interfaces */ - std::map > > _interfaceToSlaveMap; + std::map > > _interfaceToLinkMap; // TODO: Remove uint64_t bondStartTime; diff --git a/node/Constants.hpp b/node/Constants.hpp index 9f2cd80a5..9b1d21f9f 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -405,12 +405,12 @@ #define ZT_FLOW_MAX_COUNT (1024*64) /** - * How often flows are rebalanced across slave interfaces (if at all) + * How often flows are rebalanced across link (if at all) */ #define ZT_FLOW_MIN_REBALANCE_INTERVAL 5000 /** - * How often flows are rebalanced across slave interfaces (if at all) + * How often flows are rebalanced across link (if at all) */ #define ZT_FLOW_REBALANCE_INTERVAL 5000 @@ -428,7 +428,7 @@ /** * Minimum amount of time (since a previous transition) before the active-backup bonding - * policy is allowed to transition to a different slave. Only valid for active-backup. + * policy is allowed to transition to a different link. Only valid for active-backup. */ #define ZT_MULTIPATH_MIN_ACTIVE_BACKUP_AUTOFLOP_INTERVAL 10000 diff --git a/node/Node.cpp b/node/Node.cpp index e71c1424c..16484dac0 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -501,14 +501,12 @@ ZT_PeerList *Node::peers() const p->pathCount = 0; for(std::vector< SharedPtr >::iterator path(paths.begin());path!=paths.end();++path) { memcpy(&(p->paths[p->pathCount].address),&((*path)->address()),sizeof(struct sockaddr_storage)); - //memcpy(&(p->paths[p->pathCount].ifname,&((*path)->slave()),32);) p->paths[p->pathCount].localSocket = (*path)->localSocket(); p->paths[p->pathCount].lastSend = (*path)->lastOut(); p->paths[p->pathCount].lastReceive = (*path)->lastIn(); p->paths[p->pathCount].trustedPathId = RR->topology->getOutboundPathTrust((*path)->address()); p->paths[p->pathCount].expired = 0; p->paths[p->pathCount].preferred = ((*path) == bestp) ? 1 : 0; - //p->paths[p->pathCount].age = (*path)->age(_now); p->paths[p->pathCount].scope = (*path)->ipScope(); ++p->pathCount; } diff --git a/node/Path.hpp b/node/Path.hpp index 1cbd588bc..5a3a1ef82 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -29,7 +29,7 @@ #include "Packet.hpp" #include "RingBuffer.hpp" -#include "../osdep/Slave.hpp" +#include "../osdep/Link.hpp" /** * Maximum return value of preferenceRank() @@ -103,7 +103,7 @@ public: _downDelay(0), _ipvPref(0), _mode(0), - _onlyPathOnSlave(false), + _onlyPathOnLink(false), _enabled(false), _bonded(false), _negotiated(false), @@ -152,7 +152,7 @@ public: _downDelay(0), _ipvPref(0), _mode(0), - _onlyPathOnSlave(false), + _onlyPathOnLink(false), _enabled(false), _bonded(false), _negotiated(false), @@ -431,10 +431,10 @@ public: } /** - * @return True if a path is preferred over another on the same physical slave (according to user pref.) + * @return True if a path is preferred over another on the same physical link (according to user pref.) */ inline bool preferred() { - return _onlyPathOnSlave + return _onlyPathOnLink || (_addr.isV4() && (_ipvPref == 4 || _ipvPref == 46)) || (_addr.isV6() && (_ipvPref == 6 || _ipvPref == 64)); } @@ -549,22 +549,22 @@ private: uint32_t _downDelay; /** - * IP version preference inherited from the physical slave. + * IP version preference inherited from the physical link. */ uint8_t _ipvPref; /** - * Mode inherited from the physical slave. + * Mode inherited from the physical link. */ uint8_t _mode; /** - * IP version preference inherited from the physical slave. + * IP version preference inherited from the physical link. */ - bool _onlyPathOnSlave; + bool _onlyPathOnLink; /** - * Enabled state inherited from the physical slave. + * Enabled state inherited from the physical link. */ bool _enabled; diff --git a/node/Peer.cpp b/node/Peer.cpp index 30911b43c..565118867 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -55,8 +55,8 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _remoteMultipathSupported(false), _canUseMultipath(false), _shouldCollectPathStatistics(0), - _lastComputedAggregateMeanLatency(0), - _bondingPolicy(0) + _bondingPolicy(0), + _lastComputedAggregateMeanLatency(0) { if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) { throw ZT_EXCEPTION_INVALID_ARGUMENT; diff --git a/osdep/Binder.hpp b/osdep/Binder.hpp index 0fde33452..8f572a4f5 100644 --- a/osdep/Binder.hpp +++ b/osdep/Binder.hpp @@ -347,11 +347,11 @@ public: } } - // Generate set of unique interface names (used for formation of logical slave set in multipath code) + // Generate set of unique interface names (used for formation of logical link set in multipath code) for(std::map::const_iterator ii(localIfAddrs.begin());ii!=localIfAddrs.end();++ii) { - slaveIfNames.insert(ii->second); + linkIfNames.insert(ii->second); } - for (std::set::iterator si(slaveIfNames.begin());si!=slaveIfNames.end();si++) { + for (std::set::iterator si(linkIfNames.begin());si!=linkIfNames.end();si++) { bool bFoundMatch = false; for(std::map::const_iterator ii(localIfAddrs.begin());ii!=localIfAddrs.end();++ii) { if (ii->second == *si) { @@ -360,7 +360,7 @@ public: } } if (!bFoundMatch) { - slaveIfNames.erase(si); + linkIfNames.erase(si); } } @@ -461,15 +461,15 @@ public: return false; } - inline std::set getSlaveInterfaceNames() + inline std::set getLinkInterfaceNames() { Mutex::Lock _l(_lock); - return slaveIfNames; + return linkIfNames; } private: - std::set slaveIfNames; + std::set linkIfNames; _Binding _bindings[ZT_BINDER_MAX_BINDINGS]; std::atomic _bindingCount; Mutex _lock; diff --git a/osdep/Slave.hpp b/osdep/Link.hpp similarity index 58% rename from osdep/Slave.hpp rename to osdep/Link.hpp index a4caa983f..6cbbbdfbe 100644 --- a/osdep/Slave.hpp +++ b/osdep/Link.hpp @@ -11,8 +11,8 @@ */ /****/ -#ifndef ZT_SLAVE_HPP -#define ZT_SLAVE_HPP +#ifndef ZT_LINK_HPP +#define ZT_LINK_HPP #include @@ -20,13 +20,13 @@ namespace ZeroTier { -class Slave +class Link { - friend class SharedPtr; + friend class SharedPtr; public: - Slave() {} + Link() {} /** * @@ -35,60 +35,60 @@ public: * @param speed * @param enabled * @param mode - * @param failoverToSlaveStr + * @param failoverToLinkStr * @param userSpecifiedAlloc */ - Slave(std::string& ifnameStr, + Link(std::string& ifnameStr, uint8_t ipvPref, uint32_t speed, - uint32_t slaveMonitorInterval, + uint32_t linkMonitorInterval, uint32_t upDelay, uint32_t downDelay, bool enabled, uint8_t mode, - std::string failoverToSlaveStr, + std::string failoverToLinkStr, float userSpecifiedAlloc) : _ifnameStr(ifnameStr), _ipvPref(ipvPref), _speed(speed), _relativeSpeed(0), - _slaveMonitorInterval(slaveMonitorInterval), + _linkMonitorInterval(linkMonitorInterval), _upDelay(upDelay), _downDelay(downDelay), _enabled(enabled), _mode(mode), - _failoverToSlaveStr(failoverToSlaveStr), + _failoverToLinkStr(failoverToLinkStr), _userSpecifiedAlloc(userSpecifiedAlloc), _isUserSpecified(false) {} /** - * @return The string representation of this slave's underlying interface's system name. + * @return The string representation of this link's underlying interface's system name. */ inline std::string ifname() { return _ifnameStr; } /** - * @return Whether this slave is designated as a primary. + * @return Whether this link is designated as a primary. */ inline bool primary() { return _mode == ZT_MULTIPATH_SLAVE_MODE_PRIMARY; } /** - * @return Whether this slave is designated as a spare. + * @return Whether this link is designated as a spare. */ inline bool spare() { return _mode == ZT_MULTIPATH_SLAVE_MODE_SPARE; } /** - * @return The name of the slave interface that should be used in the event of a failure. + * @return The name of the link interface that should be used in the event of a failure. */ - inline std::string failoverToSlave() { return _failoverToSlaveStr; } + inline std::string failoverToLink() { return _failoverToLinkStr; } /** - * @return Whether this slave interface was specified by the user or auto-detected. + * @return Whether this link interface was specified by the user or auto-detected. */ inline bool isUserSpecified() { return _isUserSpecified; } /** - * Signify that this slave was specified by the user and not the result of auto-detection. + * Signify that this link was specified by the user and not the result of auto-detection. * * @param isUserSpecified */ @@ -97,59 +97,59 @@ public: /** * @return Whether or not the user has specified failover instructions. */ - inline bool userHasSpecifiedFailoverInstructions() { return _failoverToSlaveStr.length(); } + inline bool userHasSpecifiedFailoverInstructions() { return _failoverToLinkStr.length(); } /** - * @return The speed of the slave relative to others in the bond. + * @return The speed of the link relative to others in the bond. */ inline uint8_t relativeSpeed() { return _relativeSpeed; } /** - * Sets the speed of the slave relative to others in the bond. + * Sets the speed of the link relative to others in the bond. * - * @param relativeSpeed The speed relative to the rest of the slave interfaces. + * @param relativeSpeed The speed relative to the rest of the link. */ inline void setRelativeSpeed(uint8_t relativeSpeed) { _relativeSpeed = relativeSpeed; } /** - * Sets the speed of the slave relative to others in the bond. + * Sets the speed of the link relative to others in the bond. * * @param relativeSpeed */ - inline void setMonitorInterval(uint32_t interval) { _slaveMonitorInterval = interval; } + inline void setMonitorInterval(uint32_t interval) { _linkMonitorInterval = interval; } /** - * @return The absolute speed of the slave interface (as specified by the user.) + * @return The absolute speed of the link (as specified by the user.) */ - inline uint32_t monitorInterval() { return _slaveMonitorInterval; } + inline uint32_t monitorInterval() { return _linkMonitorInterval; } /** - * @return The absolute speed of the slave interface (as specified by the user.) + * @return The absolute speed of the link (as specified by the user.) */ inline uint32_t speed() { return _speed; } /** - * @return The address preference for this slave interface (as specified by the user.) + * @return The address preference for this link (as specified by the user.) */ inline uint8_t ipvPref() { return _ipvPref; } /** - * @return The mode (e.g. primary/spare) for this slave interface (as specified by the user.) + * @return The mode (e.g. primary/spare) for this link (as specified by the user.) */ inline uint8_t mode() { return _mode; } /** - * @return The upDelay parameter for all paths on this slave interface. + * @return The upDelay parameter for all paths on this link. */ inline uint32_t upDelay() { return _upDelay; } /** - * @return The downDelay parameter for all paths on this slave interface. + * @return The downDelay parameter for all paths on this link. */ inline uint32_t downDelay() { return _downDelay; } /** - * @return Whether this slave is enabled or disabled + * @return Whether this link is enabled or disabled */ inline uint8_t enabled() { return _enabled; } @@ -173,21 +173,21 @@ private: uint8_t _ipvPref; /** - * User-specified speed of this slave/link + * User-specified speed of this link */ uint32_t _speed; /** - * Speed relative to other specified slaves/links (computed by Bond) + * Speed relative to other specified links (computed by Bond) */ uint8_t _relativeSpeed; /** - * User-specified interval for monitoring paths on this specific slave + * User-specified interval for monitoring paths on this specific link * instead of using the more generic interval specified for the entire * bond. */ - uint32_t _slaveMonitorInterval; + uint32_t _linkMonitorInterval; /** * How long before a path is considered to be usable after coming online. (when using policies that @@ -202,20 +202,20 @@ private: uint32_t _downDelay; /** - * Whether this slave is enabled, or (disabled (possibly bad config)) + * Whether this link is enabled, or (disabled (possibly bad config)) */ uint8_t _enabled; /** - * Whether this slave is designated as a primary, a spare, or no preference. + * Whether this link is designated as a primary, a spare, or no preference. */ uint8_t _mode; /** - * The specific name of the interface to be used in the event that this - * slave fails. + * The specific name of the link to be used in the event that this + * link fails. */ - std::string _failoverToSlaveStr; + std::string _failoverToLinkStr; /** * User-specified allocation @@ -223,7 +223,7 @@ private: float _userSpecifiedAlloc; /** - * Whether or not this slave was created as a result of manual user specification. This is + * Whether or not this link was created as a result of manual user specification. This is * important to know because certain policy decisions are dependent on whether the user * intents to use a specific set of interfaces. */ diff --git a/service/MULTIPATH.md b/service/MULTIPATH.md index 8a9e84603..106125a72 100644 --- a/service/MULTIPATH.md +++ b/service/MULTIPATH.md @@ -1,17 +1,17 @@ -### **2.1.5.** Link aggregation +### Bonding (link aggregation) -Link aggregation allows the simultaneous (or conditional) use of multiple physical links to enable increased throughput, load balancing, redundancy, and fault tolerance. There are a variety of standard policies available that can be used right out of the box with little to no configuration. These policies are directly inspired by [the policies offered by the Linux kernel](https://www.kernel.org/doc/Documentation/networking/bonding.txt). +Link aggregation allows the simultaneous (or conditional) use of multiple physical links to enable increased throughput, load balancing, redundancy, and fault tolerance. There are a variety of standard policies available that can be used right out of the box with little to no configuration. These policies are directly inspired by [the policies offered by the Linux kernel](https://www.kernel.org/doc/Documentation/networking/bonding.txt) but are now offered in user-space and hence available on all platforms that ZeroTier supports. -#### Standard Policies +#### Standard policies -| Policy name | Fault tolerance | Min. failover (sec.) | Default Failover (sec.) | Balancing | Aggregation efficiency | Redundancy | Sequence Reordering | -|--------------------|:---------------------:|---------------------:|---------------------:|----------------------:|-----------------------:|-----------:|--------------------:| -| `none` | None | `60+` | `60+` | none | `none` |1 | No -| `active-backup` | Brief interruption | `0.25` | `10` | none | `low` |1 | Only during failover -| `broadcast` | Fully tolerant | `N/A` | `N/A` | none | `very low` |N | Often -| `balance-rr` | Self-healing | `0.25` | `10` | packet-based | `high` |1 | Often -| `balance-xor` | Self-healing | `0.25` | `10` | flow-based | `very high` |1 | Only during failover -| `balance-aware` | Self-healing | `0.25` | `10` | *adaptive* flow-based | `very high` |1 | Only during failover and re-balance +| Policy name | Fault tolerance | Min. failover (sec.) | Default Failover (sec.)| Balancing | Aggregation efficiency | Redundancy | Sequence Reordering | +|--------------------|:---------------------:|---------------------:|-----------------------:|----------------------:|-----------------------:|-----------:|--------------------:| +| `none` | None | `60+` | `60+` | none | `none` |1 | No +| `active-backup` | Brief interruption | `0.25` | `10` | none | `low` |1 | Only during failover +| `broadcast` | Fully tolerant | `N/A` | `N/A` | none | `very low` |N | Often +| `balance-rr` | Self-healing | `0.25` | `10` | packet-based | `high` |1 | Often +| `balance-xor` | Self-healing | `0.25` | `10` | flow-based | `very high` |1 | Only during failover +| `balance-aware` | Self-healing | `0.25` | `10` | *adaptive* flow-based | `very high` |1 | Only during failover and re-balance A policy can be used easily without specifying any additional parameters: @@ -23,7 +23,7 @@ A policy can be used easily without specifying any additional parameters: } ``` -#### Custom Policies +#### Custom policies To customize a bonding policy for your use-case simply specify a `basePolicy` and override chosen parameters. For example, to create a more aggressive `active-backup` policy with low monitoring overhead that will failover `0.250` seconds after it detects a link failure, one could do the following: @@ -45,11 +45,11 @@ To customize a bonding policy for your use-case simply specify a `basePolicy` an } ``` -#### Specifying Slave interfaces +#### Specifying links -Available system network interfaces are referred to as `slaves`. Different sets of slaves can be constructed for different bonding policies and used simultaneously. One can specify the links that ZeroTier should use in any given bonding policy simply by providing an array of slaves with names corresponding to interface names. If a user doesn't specify a set of interfaces to use, ZeroTier will assume every system interface is available for use. However, if the user **does** specify a set of interfaces, ZeroTier will only use what is specified. The same applies to failover rules, if none are specified, ZeroTier will failover to any operational slave. On the other hand, if the user does specify failover rules and there is ever a situation where a slave is available for usage but does not fit within the rules specified by the user, it will go unused. +Bonds are composed of multiple `links`. Different sets of links can be constructed for different bonding policies and used simultaneously. One can specify the links that ZeroTier should use in any given bonding policy simply by providing an array of links with names corresponding to interface names. If a user doesn't specify a set of interfaces to use, ZeroTier will assume every system interface is available for use. However, if the user **does** specify a set of interfaces, ZeroTier will only use what is specified. The same applies to failover rules, if none are specified, ZeroTier will failover to any operational link. On the other hand, if the user does specify failover rules and there is ever a situation where a link is available for usage but does not fit within the rules specified by the user, it will go unused. -To specify that ZeroTier should only use `eth0` and `eth1` as primary slaves, and `eth2` as a backup spare and that it should prefer IPv4 over IPv6 except on `eth2` where only IPv6 is allowed: +To specify that ZeroTier should only use `eth0` and `eth1` as primary links, and `eth2` as a backup spare and that it should prefer IPv4 over IPv6 except on `eth2` where only IPv6 is allowed: ``` { @@ -57,7 +57,7 @@ To specify that ZeroTier should only use `eth0` and `eth1` as primary slaves, an "defaultBondingPolicy": "aggressive-active-backup", "policies": { "aggressive-active-backup": { - "slaves": { + "links": { "eth0": { "ipvPref": 46, "failoverTo": "eth2", @@ -79,27 +79,27 @@ To specify that ZeroTier should only use `eth0` and `eth1` as primary slaves, an } ``` -Additional slave-specific parameters: +Additional link-specific parameters: ``` -"slaves": +"links": { "interfaceName": /* System-name of the network interface. */ { - "failoverInterval": 0-65535, /* (optional) How quickly a path on this slave should failover after a detected failure. */ - "ipvPref": [0,4,6,46,64], /* (optional) IP version preference for detected paths on a slave. */ - "speed": 0-1000000, /* (optional) How fast this slave is (in arbitrary units). This is a useful way to manually allocate a bond. */ + "failoverInterval": 0-65535, /* (optional) How quickly a path on this link should failover after a detected failure. */ + "ipvPref": [0,4,6,46,64], /* (optional) IP version preference for detected paths on a link. */ + "speed": 0-1000000, /* (optional) How fast this link is (in arbitrary units). This is a useful way to manually allocate a bond. */ "alloc": 0-255, /* (optional) A relative value representing a desired allocation. */ "upDelay": 0-65535, /* (optional) How long after a path becomes alive before it is added to the bond. */ "downDelay": 0-65535, /* (optional) How long after a path fails before it is removed from the bond. */ - "failoverTo": "spareInterfaceName", /* (optional) Which slave should be used next after a failure of this slave. */ - "enabled": true|false, /* (optional) Whether any paths on this slave are allowed to be used this bond. */ - "mode": "primary"|"spare" /* (optional) Whether this slave is used by default or only after failover events. */ + "failoverTo": "spareInterfaceName", /* (optional) Which link should be used next after a failure of this link. */ + "enabled": true|false, /* (optional) Whether any paths on this link are allowed to be used this bond. */ + "mode": "primary"|"spare" /* (optional) Whether this link is used by default or only after failover events. */ } } ``` -#### Peer-specific Bonds +#### Peer-specific bonds It is possible to direct ZeroTier to form a certain type of bond with specific peers of your choice. For instance, if one were to want `active-backup` by default but for certain peers to be bonded with a custom load-balanced bond such as `my-custom-balance-aware` one could do the following: @@ -127,17 +127,17 @@ It is possible to direct ZeroTier to form a certain type of bond with specific p } ``` -#### Active Backup (`active-backup`) +#### Active backup (`active-backup`) Traffic is sent only on (one) path at any given time. A different path becomes active if the current path fails. This mode provides fault tolerance with a nearly immediate fail-over. This mode **does not** increase total throughput. - - `mode`: `primary, spare` Slave option which specifies which slave is the primary device. The specified device is intended to always be the active slave while it is available. There are exceptions to this behavior when using different `slaveSelectMethod` modes. There can only be one `primary` slave in this bonding policy. + - `mode`: `primary, spare` Link option which specifies which link is the primary device. The specified device is intended to always be the active link while it is available. There are exceptions to this behavior when using different `linkSelectMethod` modes. There can only be one `primary` link in this bonding policy. - - `slaveSelectMethod`: Specifies the selection policy for the active slave during failure and/or recovery events. This is similar to the Linux Kernel's `primary_reselect` option but with a minor extension: - - `optimize`: **(default if user provides no failover guidance)** The primary slave can change periodically if a superior path is detected. - - `always`: **(default when slaves are explicitly specified)**: Primary slave regains status as active slave whenever it comes back up. - - `better`: Primary slave regains status as active slave when it comes back up and (if) it is better than the currently-active slave. - - `failure`: Primary slave regains status as active slave only if the currently-active slave fails. + - `linkSelectMethod`: Specifies the selection policy for the active link during failure and/or recovery events. This is similar to the Linux Kernel's `primary_reselect` option but with a minor extension: + - `optimize`: **(default if user provides no failover guidance)** The primary link can change periodically if a superior path is detected. + - `always`: **(default when links are explicitly specified)**: Primary link regains status as active link whenever it comes back up. + - `better`: Primary link regains status as active link when it comes back up and (if) it is better than the currently-active link. + - `failure`: Primary link regains status as active link only if the currently-active link fails. ``` { @@ -146,8 +146,8 @@ Traffic is sent only on (one) path at any given time. A different path becomes a "defaultBondingPolicy": "active-backup", "active-backup": { - "slaveSelectMethod": "always", - "slaves": + "linkSelectMethod": "always", + "links": { "eth0": { "failoverTo": "eth1", "mode": "primary" }, "eth1": { "mode": "spare" }, @@ -163,17 +163,17 @@ Traffic is sent only on (one) path at any given time. A different path becomes a Traffic is sent on (all) available paths simultaneously. This mode provides fault tolerance and effectively immediate failover due to transmission redundancy. This mode is a poor utilization of throughput resources and will **not** increase throughput but can prevent packet loss during a link failure. The only option available is `dedup` which will de-duplicate all packets on the receiving end if set to `true`. -#### Balance Round Robin (`balance-rr`) +#### Balance round robin (`balance-rr`) -Traffic is striped across multiple paths. Offers partial fault tolerance immediately, full fault tolerance eventually. This policy is unaware of protocols and is primarily intended for use with protocols that are not sensitive to reordering delays. The only option available for this policy is `packetsPerSlave` which specifies the number of packets to transmit via a path before moving to the next in the RR sequence. When set to `0` a path is chosen at random for each outgoing packet. The default value is `8`, low values can begin to add overhead to packet processing. +Traffic is striped across multiple paths. Offers partial fault tolerance immediately, full fault tolerance eventually. This policy is unaware of protocols and is primarily intended for use with protocols that are not sensitive to reordering delays. The only option available for this policy is `packetsPerLink` which specifies the number of packets to transmit via a path before moving to the next in the RR sequence. When set to `0` a path is chosen at random for each outgoing packet. The default value is `8`, low values can begin to add overhead to packet processing. #### Balance XOR (`balance-xor`, similar to the Linux kernel's [balance-xor](https://www.kernel.org/doc/Documentation/networking/bonding.txt) with `xmit_hash_policy=layer3+4`) -Traffic is categorized into *flows* based on *source port*, *destination port*, and *protocol type* these flows are then hashed onto available slaves. Each flow will persist on its assigned slave interface for its entire life-cycle. Traffic that does not have an assigned port (such as ICMP pings) will be randomly distributed across slaves. The hash function is simply: `src_port ^ dst_port ^ proto`. +Traffic is categorized into *flows* based on *source port*, *destination port*, and *protocol type* these flows are then hashed onto available links. Each flow will persist on its assigned link interface for its entire life-cycle. Traffic that does not have an assigned port (such as ICMP pings) will be randomly distributed across links. The hash function is simply: `src_port ^ dst_port ^ proto`. -#### Balance Aware (`balance-aware`, similar to Linux kernel's [`balance-*lb`](https://www.kernel.org/doc/Documentation/networking/bonding.txt) modes) +#### Balance aware (`balance-aware`, similar to Linux kernel's [`balance-*lb`](https://www.kernel.org/doc/Documentation/networking/bonding.txt) modes) -Traffic is dynamically allocated and balanced across multiple slaves simultaneously according to the target allocation. Options allow for *packet* or *flow-based* processing, and active-flow reassignment. Flows mediated over a recently failed slaves will be reassigned in a manner that respects the target allocation of the bond. An optional `balancePolicy` can be specified with the following effects: `flow-dynamic` (default) will hash flows onto slaves according to target allocation and may perform periodic re-assignments in order to preserve balance. `flow-static`, will hash flows onto slaves according to target allocation but will not re-assign flows unless a failure occurs or the slave is no longer operating within acceptable parameters. And lastly `packet` which simply load balances packets across slaves according to target allocation but with no concern for sequence reordering. +Traffic is dynamically allocated and balanced across multiple links simultaneously according to the target allocation. Options allow for *packet* or *flow-based* processing, and active-flow reassignment. Flows mediated over a recently failed links will be reassigned in a manner that respects the target allocation of the bond. An optional `balancePolicy` can be specified with the following effects: `flow-dynamic` (default) will hash flows onto links according to target allocation and may perform periodic re-assignments in order to preserve balance. `flow-static`, will hash flows onto links according to target allocation but will not re-assign flows unless a failure occurs or the link is no longer operating within acceptable parameters. And lastly `packet` which simply load balances packets across links according to target allocation but with no concern for sequence reordering. ``` { @@ -187,21 +187,21 @@ Traffic is dynamically allocated and balanced across multiple slaves simultaneou } ``` -#### Link Quality +#### Link quality ZeroTier measures various properties of a link (such as latency, throughput, jitter, packet loss ratio, etc) in order to arrive at a quality estimate. This estimate is used by bonding policies to make allocation and failover decisions: | Policy name | Role | |:---------------|:-----| -|`active-backup` | Determines the order of the failover queue. And if `activeReselect=optimize` whether a new active slave is selected. | +|`active-backup` | Determines the order of the failover queue. And if `activeReselect=optimize` whether a new active link is selected. | |`broadcast` | Does not use quality measurements. | -|`balance-rr` | May trigger removal of slave from bond. | -|`balance-xor` | May trigger removal of slave from bond. | -|`balance-aware` | Informs flow assignments and (re-)assignments. May trigger removal of slave from bond. | +|`balance-rr` | May trigger removal of link from bond. | +|`balance-xor` | May trigger removal of link from bond. | +|`balance-aware` | Informs flow assignments and (re-)assignments. May trigger removal of link from bond. | -A slave's eligibility for being included in a bond is dependent on more than perceived quality. If a path on a slave begins to exhibit disruptive behavior such as extremely high packet loss, corruption, or periodic inability to process traffic it will be removed from the bond, its traffic will be appropriately reallocated and it will be punished. Punishments gradually fade and a slave can be readmitted to the bond over time. However, punishments increase exponentially if applied more than once within a given window of time. +A link's eligibility for being included in a bond is dependent on more than perceived quality. If a path on a link begins to exhibit disruptive behavior such as extremely high packet loss, corruption, or periodic inability to process traffic it will be removed from the bond, its traffic will be appropriately reallocated and it will be punished. Punishments gradually fade and a link can be readmitted to the bond over time. However, punishments increase exponentially if applied more than once within a given window of time. -#### Asymmetric Links +#### Asymmetric links In cases where it is necessary to bond physical links that vary radically in terms of cost, throughput, latency, and or reliability, there are a couple of ways to automatically (or manually) allocate traffic among them. Traffic distribution and balancing can be either `packet` or `flow` based. Where packet-based is suitable for protocols not susceptible to reordering penalties and flow-based is suitable for protocols such as TCP where it is desirable to keep a conversation on a single link unless we can't avoid having to re-assign it. Additionally, a *target allocation* of traffic used by the bonding policy can be derived/specified in the following ways: @@ -215,9 +215,6 @@ In cases where it is necessary to bond physical links that vary radically in ter "pdv": 0.3, /* Packet delay variance in milliseconds. Similar to jitter */ "plr": 0.1, /* Packet loss ratio */ "per": 0.1, /* Packet error ratio */ - "thr": 0.0, /* Mean throughput */ - "thm": 0.0, /* Maximum observed throughput */ - "thv": 0.0, /* Variance of throughput */ "avl": 0.0, /* Availability */ } } @@ -225,24 +222,24 @@ In cases where it is necessary to bond physical links that vary radically in ter In the absence of user guidance ZeroTier will attempt to form an understanding of each link's speed and capacity but this value can be inaccurate if the links are not routinely saturated. Therefore we provide a way to explicitly signal the capacity of each link in terms of arbitrary but relative values: ``` -"slaves": { +"links": { "eth0": { "speed": 10000 }, "eth1": { "speed": 1000 }, "eth2": { "speed": 100 } } ``` -The user specifies allocation percentages (totaling `1.0`). In this case quality measurements will only be used to determine a slave's eligibility to be a member of a bond, now how much traffic it will carry: +The user specifies allocation percentages (totaling `1.0`). In this case quality measurements will only be used to determine a link's eligibility to be a member of a bond, now how much traffic it will carry: ``` -"slaves": { +"links": { "eth0": { "alloc": 0.50 }, "eth1": { "alloc": 0.25 }, "eth2": { "alloc": 0.25 } } ``` -#### Performance and Overhead Considerations +#### Performance and overhead considerations - Only packets with internal IDs divisible by `16` are included in measurements, this amounts to about `6.25%` of all traffic. - `failoverInterval` specifies how quickly failover should occur during a link failure. In order to accomplish this a combination of active and passive measurement techniques are employed which may result in `VERB_HELLO` probes being sent every `failoverInterval / 4` time units. As a mitigation `monitorStrategy` may be set to `dynamic` so that probe frequency directly correlates with native application traffic. diff --git a/service/OneService.cpp b/service/OneService.cpp index ec24f7ade..7f73e903f 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -50,7 +50,7 @@ #include "../osdep/Binder.hpp" #include "../osdep/ManagedRoute.hpp" #include "../osdep/BlockingQueue.hpp" -#include "../osdep/Slave.hpp" +#include "../osdep/Link.hpp" #include "OneService.hpp" #include "SoftwareUpdater.hpp" @@ -307,7 +307,7 @@ static void _peerBondToJson(nlohmann::json &pj,const ZT_Peer *peer) //j["ifname"] = peer->paths[i].ifname; pa.push_back(j); } - pj["slaves"] = pa; + pj["links"] = pa; } static void _moonToJson(nlohmann::json &mj,const World &world) @@ -1623,58 +1623,61 @@ public: newTemplateBond->setDownDelay(OSUtils::jsonInt(customPolicy["downDelay"],-1)); newTemplateBond->setFlowRebalanceStrategy(OSUtils::jsonInt(customPolicy["flowRebalanceStrategy"],(uint64_t)0)); newTemplateBond->setFailoverInterval(OSUtils::jsonInt(customPolicy["failoverInterval"],(uint64_t)0)); - newTemplateBond->setPacketsPerSlave(OSUtils::jsonInt(customPolicy["packetsPerSlave"],-1)); - std::string slaveMonitorStrategyStr(OSUtils::jsonString(customPolicy["slaveMonitorStrategy"],"")); - uint8_t slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DEFAULT; - if (slaveMonitorStrategyStr == "passive") { newTemplateBond->setSlaveMonitorStrategy(ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE); } - if (slaveMonitorStrategyStr == "active") { newTemplateBond->setSlaveMonitorStrategy(ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_ACTIVE); } - if (slaveMonitorStrategyStr == "dynamic") { newTemplateBond->setSlaveMonitorStrategy(ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC); } - // Policy-Specific slave set - json &slaves = customPolicy["slaves"]; - for (json::iterator slaveItr = slaves.begin(); slaveItr != slaves.end();++slaveItr) { - fprintf(stderr, "\t--- slave (%s)\n", slaveItr.key().c_str()); - std::string slaveNameStr(slaveItr.key()); - json &slave = slaveItr.value(); + newTemplateBond->setPacketsPerLink(OSUtils::jsonInt(customPolicy["packetsPerLink"],-1)); - bool enabled = OSUtils::jsonInt(slave["enabled"],true); - uint32_t speed = OSUtils::jsonInt(slave["speed"],0); - float alloc = (float)OSUtils::jsonDouble(slave["alloc"],0); + std::string linkMonitorStrategyStr(OSUtils::jsonString(customPolicy["linkMonitorStrategy"],"")); + uint8_t linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DEFAULT; + if (linkMonitorStrategyStr == "passive") { linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE; } + if (linkMonitorStrategyStr == "active") { linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_ACTIVE; } + if (linkMonitorStrategyStr == "dynamic") { linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; } + newTemplateBond->setLinkMonitorStrategy(linkMonitorStrategy); + + // Policy-Specific link set + json &links = customPolicy["links"]; + for (json::iterator linkItr = links.begin(); linkItr != links.end();++linkItr) { + fprintf(stderr, "\t--- link (%s)\n", linkItr.key().c_str()); + std::string linkNameStr(linkItr.key()); + json &link = linkItr.value(); + + bool enabled = OSUtils::jsonInt(link["enabled"],true); + uint32_t speed = OSUtils::jsonInt(link["speed"],0); + float alloc = (float)OSUtils::jsonDouble(link["alloc"],0); if (speed && alloc) { - fprintf(stderr, "error: cannot specify both speed (%d) and alloc (%f) for slave (%s), pick one, slave disabled.\n", - speed, alloc, slaveNameStr.c_str()); + fprintf(stderr, "error: cannot specify both speed (%d) and alloc (%f) for link (%s), pick one, link disabled.\n", + speed, alloc, linkNameStr.c_str()); enabled = false; } - uint32_t upDelay = OSUtils::jsonInt(slave["upDelay"],-1); - uint32_t downDelay = OSUtils::jsonInt(slave["downDelay"],-1); - uint8_t ipvPref = OSUtils::jsonInt(slave["ipvPref"],0); - uint32_t slaveMonitorInterval = OSUtils::jsonInt(slave["monitorInterval"],(uint64_t)0); - std::string failoverToStr(OSUtils::jsonString(slave["failoverTo"],"")); + uint32_t upDelay = OSUtils::jsonInt(link["upDelay"],-1); + uint32_t downDelay = OSUtils::jsonInt(link["downDelay"],-1); + uint8_t ipvPref = OSUtils::jsonInt(link["ipvPref"],0); + uint32_t linkMonitorInterval = OSUtils::jsonInt(link["monitorInterval"],(uint64_t)0); + std::string failoverToStr(OSUtils::jsonString(link["failoverTo"],"")); // Mode - std::string slaveModeStr(OSUtils::jsonString(slave["mode"],"spare")); - uint8_t slaveMode = ZT_MULTIPATH_SLAVE_MODE_SPARE; - if (slaveModeStr == "primary") { slaveMode = ZT_MULTIPATH_SLAVE_MODE_PRIMARY; } - if (slaveModeStr == "spare") { slaveMode = ZT_MULTIPATH_SLAVE_MODE_SPARE; } + std::string linkModeStr(OSUtils::jsonString(link["mode"],"spare")); + uint8_t linkMode = ZT_MULTIPATH_SLAVE_MODE_SPARE; + if (linkModeStr == "primary") { linkMode = ZT_MULTIPATH_SLAVE_MODE_PRIMARY; } + if (linkModeStr == "spare") { linkMode = ZT_MULTIPATH_SLAVE_MODE_SPARE; } // ipvPref if ((ipvPref != 0) && (ipvPref != 4) && (ipvPref != 6) && (ipvPref != 46) && (ipvPref != 64)) { - fprintf(stderr, "error: invalid ipvPref value (%d), slave disabled.\n", ipvPref); + fprintf(stderr, "error: invalid ipvPref value (%d), link disabled.\n", ipvPref); enabled = false; } - if (slaveMode == ZT_MULTIPATH_SLAVE_MODE_SPARE && failoverToStr.length()) { - fprintf(stderr, "error: cannot specify failover slaves for spares, slave disabled.\n"); + if (linkMode == ZT_MULTIPATH_SLAVE_MODE_SPARE && failoverToStr.length()) { + fprintf(stderr, "error: cannot specify failover links for spares, link disabled.\n"); failoverToStr = ""; enabled = false; } - _node->bondController()->addCustomSlave(customPolicyStr, new Slave(slaveNameStr,ipvPref,speed,slaveMonitorInterval,upDelay,downDelay,enabled,slaveMode,failoverToStr,alloc)); + _node->bondController()->addCustomLink(customPolicyStr, new Link(linkNameStr,ipvPref,speed,linkMonitorInterval,upDelay,downDelay,enabled,linkMode,failoverToStr,alloc)); } // TODO: This is dumb - std::string slaveSelectMethodStr(OSUtils::jsonString(customPolicy["activeReselect"],"optimize")); - if (slaveSelectMethodStr == "always") { newTemplateBond->setSlaveSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS); } - if (slaveSelectMethodStr == "better") { newTemplateBond->setSlaveSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_BETTER); } - if (slaveSelectMethodStr == "failure") { newTemplateBond->setSlaveSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_FAILURE); } - if (slaveSelectMethodStr == "optimize") { newTemplateBond->setSlaveSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE); } - if (newTemplateBond->getSlaveSelectMethod() < 0 || newTemplateBond->getSlaveSelectMethod() > 3) { - fprintf(stderr, "warning: invalid value (%s) for slaveSelectMethod, assuming mode: always\n", slaveSelectMethodStr.c_str()); + std::string linkSelectMethodStr(OSUtils::jsonString(customPolicy["activeReselect"],"optimize")); + if (linkSelectMethodStr == "always") { newTemplateBond->setLinkSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS); } + if (linkSelectMethodStr == "better") { newTemplateBond->setLinkSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_BETTER); } + if (linkSelectMethodStr == "failure") { newTemplateBond->setLinkSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_FAILURE); } + if (linkSelectMethodStr == "optimize") { newTemplateBond->setLinkSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE); } + if (newTemplateBond->getLinkSelectMethod() < 0 || newTemplateBond->getLinkSelectMethod() > 3) { + fprintf(stderr, "warning: invalid value (%s) for linkSelectMethod, assuming mode: always\n", linkSelectMethodStr.c_str()); } /* newBond->setPolicy(_node->bondController()->getPolicyCodeByStr(basePolicyStr)); @@ -1693,7 +1696,7 @@ public: } // Check settings if (defaultBondingPolicyStr.length() && !defaultBondingPolicy && !_node->bondController()->inUse()) { - fprintf(stderr, "error: unknown policy (%s) specified by defaultBondingPolicy, slave disabled.\n", defaultBondingPolicyStr.c_str()); + fprintf(stderr, "error: unknown policy (%s) specified by defaultBondingPolicy, link disabled.\n", defaultBondingPolicyStr.c_str()); } } From 29e7fa5c4be1a320679875231a5cb98906a4c4fe Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Mon, 6 Jul 2020 14:07:31 -0700 Subject: [PATCH 28/35] Revert to ancient path redundancy check logic --- node/Peer.cpp | 55 ++++++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/node/Peer.cpp b/node/Peer.cpp index 565118867..f99396aaa 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -115,45 +115,50 @@ void Peer::received( } bool attemptToContact = false; - - int replaceIdx = ZT_MAX_PEER_NETWORK_PATHS; if ((!havePath)&&(RR->node->shouldUsePathForZeroTierTraffic(tPtr,_id.address(),path->localSocket(),path->address()))) { Mutex::Lock _l(_paths_m); + + // Paths are redunant if they duplicate an alive path to the same IP or + // with the same local socket and address family. + bool redundant = false; for(unsigned int i=0;ialive(now)) && ( ((_paths[i].p->localSocket() == path->localSocket())&&(_paths[i].p->address().ss_family == path->address().ss_family)) && (_paths[i].p->address().ipsEqual2(path->address())) ) ) { - // port - if (_paths[i].p->address().port() == path->address().port()) { - replaceIdx = i; - break; - } + if ( (_paths[i].p->alive(now)) && ( ((_paths[i].p->localSocket() == path->localSocket())&&(_paths[i].p->address().ss_family == path->address().ss_family)) || (_paths[i].p->address().ipsEqual2(path->address())) ) ) { + redundant = true; + break; } - } + } else break; } - if (replaceIdx == ZT_MAX_PEER_NETWORK_PATHS) { + + if (!redundant) { + unsigned int replacePath = ZT_MAX_PEER_NETWORK_PATHS; + int replacePathQuality = 0; for(unsigned int i=0;iquality(now); + if (q > replacePathQuality) { + replacePathQuality = q; + replacePath = i; + } + } else { + replacePath = i; break; } } - } - if (replaceIdx != ZT_MAX_PEER_NETWORK_PATHS) { - if (verb == Packet::VERB_OK) { - RR->t->peerLearnedNewPath(tPtr,networkId,*this,path,packetId); - performMultipathStateCheck(now); - if (_bondToPeer) { - _bondToPeer->nominatePath(path, now); + + if (replacePath != ZT_MAX_PEER_NETWORK_PATHS) { + if (verb == Packet::VERB_OK) { + RR->t->peerLearnedNewPath(tPtr,networkId,*this,path,packetId); + _paths[replacePath].lr = now; + _paths[replacePath].p = path; + _paths[replacePath].priority = 1; + } else { + attemptToContact = true; } - _paths[replaceIdx].lr = now; - _paths[replaceIdx].p = path; - _paths[replaceIdx].priority = 1; - } else { - attemptToContact = true; } } } + if (attemptToContact) { attemptToContactAt(tPtr,path->localSocket(),path->address(),now,true); path->sent(now); From bd6c97aeb8cfc11b597217d6fe515075e49011d8 Mon Sep 17 00:00:00 2001 From: Grant Limberg Date: Mon, 6 Jul 2020 14:35:05 -0700 Subject: [PATCH 29/35] dont strip binary --- make-mac.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/make-mac.mk b/make-mac.mk index f7ff8759b..ada65ff77 100644 --- a/make-mac.mk +++ b/make-mac.mk @@ -99,7 +99,7 @@ mac-agent: FORCE one: $(CORE_OBJS) $(ONE_OBJS) one.o mac-agent $(CXX) $(CXXFLAGS) -o zerotier-one $(CORE_OBJS) $(ONE_OBJS) one.o $(LIBS) - $(STRIP) zerotier-one + # $(STRIP) zerotier-one ln -sf zerotier-one zerotier-idtool ln -sf zerotier-one zerotier-cli $(CODESIGN) -f -s $(CODESIGN_APP_CERT) zerotier-one From decd5add2a9ee1302f23cb901518b9c5262fc99c Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 14 Jul 2020 17:05:05 -0700 Subject: [PATCH 30/35] Fix segfault on unique interface name list generation (for multipath) --- osdep/Binder.hpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/osdep/Binder.hpp b/osdep/Binder.hpp index 8f572a4f5..8076b6e92 100644 --- a/osdep/Binder.hpp +++ b/osdep/Binder.hpp @@ -348,10 +348,11 @@ public: } // Generate set of unique interface names (used for formation of logical link set in multipath code) + // TODO: Could be gated not to run if multipath is not enabled. for(std::map::const_iterator ii(localIfAddrs.begin());ii!=localIfAddrs.end();++ii) { linkIfNames.insert(ii->second); } - for (std::set::iterator si(linkIfNames.begin());si!=linkIfNames.end();si++) { + for (std::set::iterator si(linkIfNames.begin());si!=linkIfNames.end();) { bool bFoundMatch = false; for(std::map::const_iterator ii(localIfAddrs.begin());ii!=localIfAddrs.end();++ii) { if (ii->second == *si) { @@ -360,7 +361,10 @@ public: } } if (!bFoundMatch) { - linkIfNames.erase(si); + linkIfNames.erase(si++); + } + else { + ++si; } } From dc784f62131b8c2774e5f75944953c0b93a6c1d2 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Tue, 21 Jul 2020 10:22:10 -0700 Subject: [PATCH 31/35] Comment out Bond and BondController debug traces --- node/Bond.cpp | 202 ++++++++++++++++++++-------------------- node/BondController.cpp | 16 ++-- service/OneService.cpp | 4 +- 3 files changed, 113 insertions(+), 109 deletions(-) diff --git a/node/Bond.cpp b/node/Bond.cpp index 0338f5195..e96355ec9 100644 --- a/node/Bond.cpp +++ b/node/Bond.cpp @@ -54,7 +54,7 @@ Bond::Bond(const RuntimeEnvironment *renv, SharedPtr originalBond, const S void Bond::nominatePath(const SharedPtr& path, int64_t now) { - char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "nominatePath: %s %s\n", getLink(path)->ifname().c_str(), pathStr); + char pathStr[128];path->address().toString(pathStr);//fprintf(stderr, "nominatePath: %s %s\n", getLink(path)->ifname().c_str(), pathStr); Mutex::Lock _l(_paths_m); if (!RR->bc->linkAllowed(_policyAlias, getLink(path))) { return; @@ -62,7 +62,7 @@ void Bond::nominatePath(const SharedPtr& path, int64_t now) bool alreadyPresent = false; for (int i=0; i& path, int64_t now) if (!alreadyPresent) { for (int i=0; ilink = RR->bc->getLinkBySocket(_policyAlias, path->localSocket()); _paths[i]->startTrial(now); @@ -170,7 +170,7 @@ SharedPtr Bond::getAppropriatePath(int64_t now, int32_t flowId) void Bond::recordIncomingInvalidPacket(const SharedPtr& path) { - // char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "recordIncomingInvalidPacket() %s %s\n", getLink(path)->ifname().c_str(), pathStr); + // char pathStr[128];path->address().toString(pathStr);//fprintf(stderr, "recordIncomingInvalidPacket() %s %s\n", getLink(path)->ifname().c_str(), pathStr); Mutex::Lock _l(_paths_m); for (int i=0; i& path) void Bond::recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now) { - // char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "recordOutgoingPacket() %s %s, packetId=%llx, payloadLength=%d, verb=%x, flowId=%lx\n", getLink(path)->ifname().c_str(), pathStr, packetId, payloadLength, verb, flowId); + // char pathStr[128];path->address().toString(pathStr);//fprintf(stderr, "recordOutgoingPacket() %s %s, packetId=%llx, payloadLength=%d, verb=%x, flowId=%lx\n", getLink(path)->ifname().c_str(), pathStr, packetId, payloadLength, verb, flowId); _freeRandomByte += (unsigned char)(packetId >> 8); // Grab entropy to use in path selection logic if (!_shouldCollectPathStatistics) { return; @@ -218,7 +218,7 @@ void Bond::recordOutgoingPacket(const SharedPtr &path, const uint64_t pack void Bond::recordIncomingPacket(const SharedPtr& path, uint64_t packetId, uint16_t payloadLength, Packet::Verb verb, int32_t flowId, int64_t now) { - //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "recordIncomingPacket() %s %s, packetId=%llx, payloadLength=%d, verb=%x, flowId=%lx\n", getLink(path)->ifname().c_str(), pathStr, packetId, payloadLength, verb, flowId); + //char pathStr[128];path->address().toString(pathStr);//fprintf(stderr, "recordIncomingPacket() %s %s, packetId=%llx, payloadLength=%d, verb=%x, flowId=%lx\n", getLink(path)->ifname().c_str(), pathStr, packetId, payloadLength, verb, flowId); bool isFrame = (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME); bool shouldRecord = (packetId & (ZT_QOS_ACK_DIVISOR - 1) && (verb != Packet::VERB_ACK) @@ -261,7 +261,7 @@ void Bond::recordIncomingPacket(const SharedPtr& path, uint64_t packetId, void Bond::receivedQoS(const SharedPtr& path, int64_t now, int count, uint64_t *rx_id, uint16_t *rx_ts) { - //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "receivedQoS() %s %s\n", getLink(path)->ifname().c_str(), pathStr); + //char pathStr[128];path->address().toString(pathStr);//fprintf(stderr, "receivedQoS() %s %s\n", getLink(path)->ifname().c_str(), pathStr); Mutex::Lock _l(_paths_m); // Look up egress times and compute latency values for each record std::map::iterator it; @@ -273,13 +273,13 @@ void Bond::receivedQoS(const SharedPtr& path, int64_t now, int count, uint } } path->qosRecordSize.push(count); - //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "receivedQoS() on path %s %s, count=%d, successful=%d, qosStatsOut.size()=%d\n", getLink(path)->ifname().c_str(), pathStr, count, path->aknowledgedQoSRecordCountSinceLastCheck, path->qosStatsOut.size()); + //char pathStr[128];path->address().toString(pathStr);//fprintf(stderr, "receivedQoS() on path %s %s, count=%d, successful=%d, qosStatsOut.size()=%d\n", getLink(path)->ifname().c_str(), pathStr, count, path->aknowledgedQoSRecordCountSinceLastCheck, path->qosStatsOut.size()); } void Bond::receivedAck(const SharedPtr& path, int64_t now, int32_t ackedBytes) { Mutex::Lock _l(_paths_m); - //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "receivedAck() %s %s, (ackedBytes=%d, lastAckReceived=%lld, ackAge=%lld)\n", getLink(path)->ifname().c_str(), pathStr, ackedBytes, path->lastAckReceived, path->ackAge(now)); + //char pathStr[128];path->address().toString(pathStr);//fprintf(stderr, "receivedAck() %s %s, (ackedBytes=%d, lastAckReceived=%lld, ackAge=%lld)\n", getLink(path)->ifname().c_str(), pathStr, ackedBytes, path->lastAckReceived, path->ackAge(now)); path->_lastAckReceived = now; path->_unackedBytes = (ackedBytes > path->_unackedBytes) ? 0 : path->_unackedBytes - ackedBytes; int64_t timeSinceThroughputEstimate = (now - path->_lastThroughputEstimation); @@ -300,7 +300,7 @@ void Bond::receivedAck(const SharedPtr& path, int64_t now, int32_t ackedBy int32_t Bond::generateQoSPacket(const SharedPtr& path, int64_t now, char *qosBuffer) { - //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "generateQoSPacket() %s %s\n", getLink(path)->ifname().c_str(), pathStr); + //char pathStr[128];path->address().toString(pathStr);//fprintf(stderr, "generateQoSPacket() %s %s\n", getLink(path)->ifname().c_str(), pathStr); int32_t len = 0; std::map::iterator it = path->qosStatsIn.begin(); int i=0; @@ -337,7 +337,7 @@ bool Bond::assignFlowToBondedPath(SharedPtr &flow, int64_t now) entropy %= _totalBondUnderload; } if (!_numBondedPaths) { - fprintf(stderr, "no bonded paths for flow assignment\n"); + //fprintf(stderr, "no bonded paths for flow assignment\n"); return false; } /* Since there may be scenarios where a path is removed before we can re-estimate @@ -375,13 +375,13 @@ bool Bond::assignFlowToBondedPath(SharedPtr &flow, int64_t now) ++(_paths[idx]->_assignedFlowCount); } else { - fprintf(stderr, "could not assign flow?\n"); exit(0); // TODO: Remove for production + //fprintf(stderr, "could not assign flow?\n"); exit(0); // TODO: Remove for production return false; } } flow->assignedPath()->address().toString(curPathStr); SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, flow->assignedPath()->localSocket()); - fprintf(stderr, "assigned (tx) flow %x with peer %llx to path %s on %s (idx=%d)\n", flow->id(), _peer->_id.address().toInt(), curPathStr, link->ifname().c_str(), idx); + //fprintf(stderr, "assigned (tx) flow %x with peer %llx to path %s on %s (idx=%d)\n", flow->id(), _peer->_id.address().toInt(), curPathStr, link->ifname().c_str(), idx); return true; } @@ -391,16 +391,16 @@ SharedPtr Bond::createFlow(const SharedPtr &path, int32_t flowId, un char curPathStr[128]; // --- if (!_numBondedPaths) { - fprintf(stderr, "there are no bonded paths, cannot assign flow\n"); + //fprintf(stderr, "there are no bonded paths, cannot assign flow\n"); return SharedPtr(); } if (_flows.size() >= ZT_FLOW_MAX_COUNT) { - fprintf(stderr, "max number of flows reached (%d), forcibly forgetting oldest flow\n", ZT_FLOW_MAX_COUNT); + //fprintf(stderr, "max number of flows reached (%d), forcibly forgetting oldest flow\n", ZT_FLOW_MAX_COUNT); forgetFlowsWhenNecessary(0,true,now); } SharedPtr flow = new Flow(flowId, now); _flows[flowId] = flow; - fprintf(stderr, "new flow %x detected with peer %llx, %lu active flow(s)\n", flowId, _peer->_id.address().toInt(), (_flows.size())); + //fprintf(stderr, "new flow %x detected with peer %llx, %lu active flow(s)\n", flowId, _peer->_id.address().toInt(), (_flows.size())); /** * Add a flow with a given Path already provided. This is the case when a packet * is received on a path but no flow exists, in this case we simply assign the path @@ -411,7 +411,7 @@ SharedPtr Bond::createFlow(const SharedPtr &path, int32_t flowId, un path->address().toString(curPathStr); path->_assignedFlowCount++; SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, flow->assignedPath()->localSocket()); - fprintf(stderr, "assigned (rx) flow %x with peer %llx to path %s on %s\n", flow->id(), _peer->_id.address().toInt(), curPathStr, link->ifname().c_str()); + //fprintf(stderr, "assigned (rx) flow %x with peer %llx to path %s on %s\n", flow->id(), _peer->_id.address().toInt(), curPathStr, link->ifname().c_str()); } /** * Add a flow when no path was provided. This means that it is an outgoing packet @@ -432,7 +432,7 @@ void Bond::forgetFlowsWhenNecessary(uint64_t age, bool oldest, int64_t now) if (age) { // Remove by specific age while (it != _flows.end()) { if (it->second->age(now) > age) { - fprintf(stderr, "forgetting flow %x between this node and %llx, %lu active flow(s)\n", it->first, _peer->_id.address().toInt(), (_flows.size()-1)); + //fprintf(stderr, "forgetting flow %x between this node and %llx, %lu active flow(s)\n", it->first, _peer->_id.address().toInt(), (_flows.size()-1)); it->second->assignedPath()->_assignedFlowCount--; it = _flows.erase(it); } else { @@ -450,7 +450,7 @@ void Bond::forgetFlowsWhenNecessary(uint64_t age, bool oldest, int64_t now) ++it; } if (oldestFlow != _flows.end()) { - fprintf(stderr, "forgetting oldest flow %x (of age %llu) between this node and %llx, %lu active flow(s)\n", oldestFlow->first, oldestFlow->second->age(now), _peer->_id.address().toInt(), (_flows.size()-1)); + //fprintf(stderr, "forgetting oldest flow %x (of age %llu) between this node and %llx, %lu active flow(s)\n", oldestFlow->first, oldestFlow->second->age(now), _peer->_id.address().toInt(), (_flows.size()-1)); oldestFlow->second->assignedPath()->_assignedFlowCount--; _flows.erase(oldestFlow); } @@ -471,19 +471,19 @@ void Bond::processIncomingPathNegotiationRequest(uint64_t now, SharedPtr & } SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, path->localSocket()); if (remoteUtility > _localUtility) { - fprintf(stderr, "peer suggests path, its utility (%d) is greater than ours (%d), we will switch to %s on %s (ls=%llx)\n", remoteUtility, _localUtility, pathStr, link->ifname().c_str(), path->localSocket()); + //fprintf(stderr, "peer suggests path, its utility (%d) is greater than ours (%d), we will switch to %s on %s (ls=%llx)\n", remoteUtility, _localUtility, pathStr, link->ifname().c_str(), path->localSocket()); negotiatedPath = path; } if (remoteUtility < _localUtility) { - fprintf(stderr, "peer suggests path, its utility (%d) is less than ours (%d), we will NOT switch to %s on %s (ls=%llx)\n", remoteUtility, _localUtility, pathStr, link->ifname().c_str(), path->localSocket()); + //fprintf(stderr, "peer suggests path, its utility (%d) is less than ours (%d), we will NOT switch to %s on %s (ls=%llx)\n", remoteUtility, _localUtility, pathStr, link->ifname().c_str(), path->localSocket()); } if (remoteUtility == _localUtility) { - fprintf(stderr, "peer suggest path, but utility is equal, picking choice made by peer with greater identity.\n"); + //fprintf(stderr, "peer suggest path, but utility is equal, picking choice made by peer with greater identity.\n"); if (_peer->_id.address().toInt() > RR->node->identity().address().toInt()) { - fprintf(stderr, "peer identity was greater, going with their choice of %s on %s (ls=%llx)\n", pathStr, link->ifname().c_str(), path->localSocket()); + //fprintf(stderr, "peer identity was greater, going with their choice of %s on %s (ls=%llx)\n", pathStr, link->ifname().c_str(), path->localSocket()); negotiatedPath = path; } else { - fprintf(stderr, "our identity was greater, no change\n"); + //fprintf(stderr, "our identity was greater, no change\n"); } } } @@ -522,18 +522,18 @@ void Bond::pathNegotiationCheck(void *tPtr, const int64_t now) _localUtility -= ZT_MULTIPATH_FAILOVER_HANDICAP_NEGOTIATED; } if ((now - _lastSentPathNegotiationRequest) > ZT_PATH_NEGOTIATION_CUTOFF_TIME) { - fprintf(stderr, "BT: (sync) it's been long enough, sending more requests.\n"); + //fprintf(stderr, "BT: (sync) it's been long enough, sending more requests.\n"); _numSentPathNegotiationRequests = 0; } if (_numSentPathNegotiationRequests < ZT_PATH_NEGOTIATION_TRY_COUNT) { if (_localUtility >= 0) { - fprintf(stderr, "BT: (sync) paths appear to be out of sync (utility=%d)\n", _localUtility); + //fprintf(stderr, "BT: (sync) paths appear to be out of sync (utility=%d)\n", _localUtility); sendPATH_NEGOTIATION_REQUEST(tPtr, _paths[maxOutPathIdx]); ++_numSentPathNegotiationRequests; _lastSentPathNegotiationRequest = now; _paths[maxOutPathIdx]->address().toString(pathStr); SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[maxOutPathIdx]->localSocket()); - fprintf(stderr, "sending request to use %s on %s, ls=%llx, utility=%d\n", pathStr, link->ifname().c_str(), _paths[maxOutPathIdx]->localSocket(), _localUtility); + //fprintf(stderr, "sending request to use %s on %s, ls=%llx, utility=%d\n", pathStr, link->ifname().c_str(), _paths[maxOutPathIdx]->localSocket(), _localUtility); } } /** @@ -542,7 +542,7 @@ void Bond::pathNegotiationCheck(void *tPtr, const int64_t now) else if ((now - _lastSentPathNegotiationRequest) > (2 * ZT_PATH_NEGOTIATION_CHECK_INTERVAL)) { if (_localUtility == 0) { // There's no loss to us, just switch without sending a another request - fprintf(stderr, "BT: (sync) giving up, switching to remote peer's path.\n"); + //fprintf(stderr, "BT: (sync) giving up, switching to remote peer's path.\n"); negotiatedPath = _paths[maxInPathIdx]; } } @@ -551,7 +551,7 @@ void Bond::pathNegotiationCheck(void *tPtr, const int64_t now) void Bond::sendPATH_NEGOTIATION_REQUEST(void *tPtr, const SharedPtr &path) { - //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "sendPATH_NEGOTIATION_REQUEST() %s %s\n", getLink(path)->ifname().c_str(), pathStr); + //char pathStr[128];path->address().toString(pathStr);//fprintf(stderr, "sendPATH_NEGOTIATION_REQUEST() %s %s\n", getLink(path)->ifname().c_str(), pathStr); if (_abLinkSelectMethod != ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { return; } @@ -566,7 +566,7 @@ void Bond::sendPATH_NEGOTIATION_REQUEST(void *tPtr, const SharedPtr &path) void Bond::sendACK(void *tPtr,const SharedPtr &path,const int64_t localSocket, const InetAddress &atAddress,int64_t now) { - //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "sendACK() %s %s\n", getLink(path)->ifname().c_str(), pathStr); + //char pathStr[128];path->address().toString(pathStr);//fprintf(stderr, "sendACK() %s %s\n", getLink(path)->ifname().c_str(), pathStr); Packet outp(_peer->_id.address(),RR->identity.address(),Packet::VERB_ACK); int32_t bytesToAck = 0; std::map::iterator it = path->ackStatsIn.begin(); @@ -589,7 +589,7 @@ void Bond::sendACK(void *tPtr,const SharedPtr &path,const int64_t localSoc void Bond::sendQOS_MEASUREMENT(void *tPtr,const SharedPtr &path,const int64_t localSocket, const InetAddress &atAddress,int64_t now) { - //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "sendQOS() %s %s\n", getLink(path)->ifname().c_str(), pathStr); + //char pathStr[128];path->address().toString(pathStr);//fprintf(stderr, "sendQOS() %s %s\n", getLink(path)->ifname().c_str(), pathStr); const int64_t _now = RR->node->now(); Packet outp(_peer->_id.address(),RR->identity.address(),Packet::VERB_QOS_MEASUREMENT); char qosData[ZT_QOS_MAX_PACKET_SIZE]; @@ -969,24 +969,24 @@ void Bond::estimatePathQuality(const int64_t now) return; } if (!_header) { - fprintf(stdout, "now, bonded, relativeUnderload, flows, "); + //fprintf(stdout, "now, bonded, relativeUnderload, flows, "); for(unsigned int i=0;iaddress().toString(pathStr); std::string label = std::string((pathStr)) + " " + getLink(_paths[i])->ifname(); for (int i=0; i<19; ++i) { - fprintf(stdout, "%s, ", label.c_str()); + //fprintf(stdout, "%s, ", label.c_str()); } } } _header=true; } /* - fprintf(stdout, "%ld, %d, %d, %d, ",((now - RR->bc->getBondStartTime())),_numBondedPaths,_totalBondUnderload, _flows.size()); + //fprintf(stdout, "%ld, %d, %d, %d, ",((now - RR->bc->getBondStartTime())),_numBondedPaths,_totalBondUnderload, _flows.size()); for(unsigned int i=0;iaddress().toString(pathStr); - fprintf(stdout, "%s, %s, %8.3f, %8.3f, %8.3f, %5.3f, %5.3f, %5.3f, %8f, %5.3f, %5.3f, %d, %5.3f, %d, %d, %d, %d, %d, %d, ", + //fprintf(stdout, "%s, %s, %8.3f, %8.3f, %8.3f, %5.3f, %5.3f, %5.3f, %8f, %5.3f, %5.3f, %d, %5.3f, %d, %d, %d, %d, %d, %d, ", getLink(_paths[i])->ifname().c_str(), pathStr, _paths[i]->_latencyMean, lat[i],pdv[i], _paths[i]->_packetLossRatio, plr[i],per[i],thr[i],thm[i],thv[i],(now - _paths[i]->lastIn()),quality[i],alloc[i], _paths[i]->_relativeByteLoad, _paths[i]->_assignedFlowCount, _paths[i]->alive(now, true), _paths[i]->eligible(now,_ackSendInterval), _paths[i]->qosStatsOut.size()); } @@ -1040,7 +1040,7 @@ void Bond::processBalanceTasks(const int64_t now) } if (!_paths[i]->eligible(now,_ackSendInterval) && _paths[i]->_shouldReallocateFlows) { _paths[i]->address().toString(curPathStr); - fprintf(stderr, "%d reallocating flows from dead path %s on %s\n", (RR->node->now() - RR->bc->getBondStartTime()), curPathStr, getLink(_paths[i])->ifname().c_str()); + //fprintf(stderr, "%d reallocating flows from dead path %s on %s\n", (RR->node->now() - RR->bc->getBondStartTime()), curPathStr, getLink(_paths[i])->ifname().c_str()); std::map >::iterator flow_it = _flows.begin(); while (flow_it != _flows.end()) { if (flow_it->second->assignedPath() == _paths[i]) { @@ -1066,7 +1066,7 @@ void Bond::processBalanceTasks(const int64_t now) } if (_paths[i] && _paths[i]->bonded() && _paths[i]->eligible(now,_ackSendInterval) && (_paths[i]->_allocation < minimumAllocationValue) && _paths[i]->_assignedFlowCount) { _paths[i]->address().toString(curPathStr); - fprintf(stderr, "%d reallocating flows from under-performing path %s on %s\n", (RR->node->now() - RR->bc->getBondStartTime()), curPathStr, getLink(_paths[i])->ifname().c_str()); + //fprintf(stderr, "%d reallocating flows from under-performing path %s on %s\n", (RR->node->now() - RR->bc->getBondStartTime()), curPathStr, getLink(_paths[i])->ifname().c_str()); std::map >::iterator flow_it = _flows.begin(); while (flow_it != _flows.end()) { if (flow_it->second->assignedPath() == _paths[i]) { @@ -1119,7 +1119,7 @@ void Bond::processBalanceTasks(const int64_t now) while (flow_it != _flows.end()) { if (flow_it->second->_previouslyAssignedPath && flow_it->second->_previouslyAssignedPath->eligible(now, _ackSendInterval) && (flow_it->second->_previouslyAssignedPath->_allocation >= (minimumAllocationValue * 2))) { - fprintf(stderr, "moving flow back onto its previous path assignment (based on eligibility)\n"); + //fprintf(stderr, "moving flow back onto its previous path assignment (based on eligibility)\n"); (flow_it->second->_assignedPath->_assignedFlowCount)--; flow_it->second->assignPath(flow_it->second->_previouslyAssignedPath,now); (flow_it->second->_previouslyAssignedPath->_assignedFlowCount)++; @@ -1136,7 +1136,7 @@ void Bond::processBalanceTasks(const int64_t now) while (flow_it != _flows.end()) { if (flow_it->second->_previouslyAssignedPath && flow_it->second->_previouslyAssignedPath->eligible(now, _ackSendInterval) && (flow_it->second->_previouslyAssignedPath->_allocation >= (minimumAllocationValue * 2))) { - fprintf(stderr, "moving flow back onto its previous path assignment (based on performance)\n"); + //fprintf(stderr, "moving flow back onto its previous path assignment (based on performance)\n"); (flow_it->second->_assignedPath->_assignedFlowCount)--; flow_it->second->assignPath(flow_it->second->_previouslyAssignedPath,now); (flow_it->second->_previouslyAssignedPath->_assignedFlowCount)++; @@ -1182,7 +1182,7 @@ void Bond::processActiveBackupTasks(const int64_t now) * Select initial "active" active-backup link */ if (!_abPath) { - fprintf(stderr, "%llu no active backup path yet...\n", ((now - RR->bc->getBondStartTime()))); + //fprintf(stderr, "%llu no active backup path yet...\n", ((now - RR->bc->getBondStartTime()))); /** * [Automatic mode] * The user has not explicitly specified links or their failover schedule, @@ -1192,13 +1192,13 @@ void Bond::processActiveBackupTasks(const int64_t now) * simply find the next eligible path. */ if (!userHasSpecifiedLinks()) { - fprintf(stderr, "%llu AB: (auto) user did not specify any links. waiting until we know more\n", ((now - RR->bc->getBondStartTime()))); + //fprintf(stderr, "%llu AB: (auto) user did not specify any links. waiting until we know more\n", ((now - RR->bc->getBondStartTime()))); for (int i=0; ieligible(now,_ackSendInterval)) { _paths[i]->address().toString(curPathStr); SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); if (link) { - fprintf(stderr, "%llu AB: (initial) [%d] found eligible path %s on: %s\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, link->ifname().c_str()); + //fprintf(stderr, "%llu AB: (initial) [%d] found eligible path %s on: %s\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, link->ifname().c_str()); } _abPath = _paths[i]; break; @@ -1210,9 +1210,9 @@ void Bond::processActiveBackupTasks(const int64_t now) * The user has specified links or failover rules that the bonding policy should adhere to. */ else if (userHasSpecifiedLinks()) { - fprintf(stderr, "%llu AB: (manual) no active backup link, checking local.conf\n", ((now - RR->bc->getBondStartTime()))); + //fprintf(stderr, "%llu AB: (manual) no active backup link, checking local.conf\n", ((now - RR->bc->getBondStartTime()))); if (userHasSpecifiedPrimaryLink()) { - fprintf(stderr, "%llu AB: (manual) user has specified primary link, looking for it.\n", ((now - RR->bc->getBondStartTime()))); + //fprintf(stderr, "%llu AB: (manual) user has specified primary link, looking for it.\n", ((now - RR->bc->getBondStartTime()))); for (int i=0; ieligible(now,_ackSendInterval) && link->primary()) { if (!_paths[i]->preferred()) { _paths[i]->address().toString(curPathStr); - fprintf(stderr, "%llu AB: (initial) [%d] found path on primary link, taking note in case we don't find a preferred path\n", ((now - RR->bc->getBondStartTime())), i); + //fprintf(stderr, "%llu AB: (initial) [%d] found path on primary link, taking note in case we don't find a preferred path\n", ((now - RR->bc->getBondStartTime())), i); nonPreferredPath = _paths[i]; bFoundPrimaryLink = true; } @@ -1230,7 +1230,7 @@ void Bond::processActiveBackupTasks(const int64_t now) _abPath->address().toString(curPathStr); SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); if (link) { - fprintf(stderr, "%llu AB: (initial) [%d] found preferred path %s on primary link: %s\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, link->ifname().c_str()); + //fprintf(stderr, "%llu AB: (initial) [%d] found preferred path %s on primary link: %s\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, link->ifname().c_str()); } bFoundPrimaryLink = true; break; @@ -1241,23 +1241,23 @@ void Bond::processActiveBackupTasks(const int64_t now) _abPath->address().toString(curPathStr); SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _abPath->localSocket()); if (link) { - fprintf(stderr, "%llu AB: (initial) found preferred primary path: %s on %s\n", ((now - RR->bc->getBondStartTime())), curPathStr, link->ifname().c_str()); + //fprintf(stderr, "%llu AB: (initial) found preferred primary path: %s on %s\n", ((now - RR->bc->getBondStartTime())), curPathStr, link->ifname().c_str()); } } else { if (bFoundPrimaryLink && nonPreferredPath) { - fprintf(stderr, "%llu AB: (initial) found a non-preferred primary path\n", ((now - RR->bc->getBondStartTime()))); + //fprintf(stderr, "%llu AB: (initial) found a non-preferred primary path\n", ((now - RR->bc->getBondStartTime()))); _abPath = nonPreferredPath; } } if (!_abPath) { - fprintf(stderr, "%llu AB: (initial) designated primary link is not yet ready\n", ((now - RR->bc->getBondStartTime()))); + //fprintf(stderr, "%llu AB: (initial) designated primary link is not yet ready\n", ((now - RR->bc->getBondStartTime()))); // TODO: Should fail-over to specified backup or just wait? } } else if (!userHasSpecifiedPrimaryLink()) { int _abIdx = ZT_MAX_PEER_NETWORK_PATHS; - fprintf(stderr, "%llu AB: (initial) user did not specify primary link, just picking something\n", ((now - RR->bc->getBondStartTime()))); + //fprintf(stderr, "%llu AB: (initial) user did not specify primary link, just picking something\n", ((now - RR->bc->getBondStartTime()))); for (int i=0; ieligible(now,_ackSendInterval)) { _abIdx = i; @@ -1265,13 +1265,13 @@ void Bond::processActiveBackupTasks(const int64_t now) } } if (_abIdx == ZT_MAX_PEER_NETWORK_PATHS) { - fprintf(stderr, "%llu AB: (initial) unable to find a candidate next-best, no change\n", ((now - RR->bc->getBondStartTime()))); + //fprintf(stderr, "%llu AB: (initial) unable to find a candidate next-best, no change\n", ((now - RR->bc->getBondStartTime()))); } else { _abPath = _paths[_abIdx]; SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _abPath->localSocket()); if (link) { - fprintf(stderr, "%llu AB: (initial) selected non-primary link idx=%d, %s on %s\n", ((now - RR->bc->getBondStartTime())), _abIdx, pathStr, link->ifname().c_str()); + //fprintf(stderr, "%llu AB: (initial) selected non-primary link idx=%d, %s on %s\n", ((now - RR->bc->getBondStartTime())), _abIdx, pathStr, link->ifname().c_str()); } } } @@ -1288,7 +1288,7 @@ void Bond::processActiveBackupTasks(const int64_t now) (*it)->address().toString(curPathStr); SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, (*it)->localSocket()); if (link) { - fprintf(stderr, "%llu AB: (fq) %s on %s is now ineligible, removing from failover queue\n", ((now - RR->bc->getBondStartTime())), curPathStr, link->ifname().c_str()); + //fprintf(stderr, "%llu AB: (fq) %s on %s is now ineligible, removing from failover queue\n", ((now - RR->bc->getBondStartTime())), curPathStr, link->ifname().c_str()); } it = _abFailoverQueue.erase(it); } else { @@ -1360,7 +1360,7 @@ void Bond::processActiveBackupTasks(const int64_t now) } if (!bFoundPathInQueue) { _paths[i]->address().toString(curPathStr); - fprintf(stderr, "%llu AB: (fq) [%d] added %s on %s to queue\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, getLink(_paths[i])->ifname().c_str()); + //fprintf(stderr, "%llu AB: (fq) [%d] added %s on %s to queue\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, getLink(_paths[i])->ifname().c_str()); _abFailoverQueue.push_front(_paths[i]); } } @@ -1405,7 +1405,7 @@ void Bond::processActiveBackupTasks(const int64_t now) } if (!bFoundPathInQueue) { _paths[i]->address().toString(curPathStr); - fprintf(stderr, "%llu AB: (fq) [%d] added %s on %s to queue\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, getLink(_paths[i])->ifname().c_str()); + //fprintf(stderr, "%llu AB: (fq) [%d] added %s on %s to queue\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, getLink(_paths[i])->ifname().c_str()); _abFailoverQueue.push_front(_paths[i]); } } @@ -1413,7 +1413,7 @@ void Bond::processActiveBackupTasks(const int64_t now) } _abFailoverQueue.sort(PathQualityComparator()); if (_abFailoverQueue.empty()) { - fprintf(stderr, "%llu AB: (fq) the failover queue is empty, the active-backup bond is no longer fault-tolerant\n", ((now - RR->bc->getBondStartTime()))); + //fprintf(stderr, "%llu AB: (fq) the failover queue is empty, the active-backup bond is no longer fault-tolerant\n", ((now - RR->bc->getBondStartTime()))); } } /** @@ -1426,13 +1426,13 @@ void Bond::processActiveBackupTasks(const int64_t now) * Fulfill primary reselect obligations */ if (_abPath && !_abPath->eligible(now,_ackSendInterval)) { // Implicit ZT_MULTIPATH_RESELECTION_POLICY_FAILURE - _abPath->address().toString(curPathStr); fprintf(stderr, "%llu AB: (failure) failover event!, active backup path (%s) is no-longer eligible\n", ((now - RR->bc->getBondStartTime())), curPathStr); + _abPath->address().toString(curPathStr); //fprintf(stderr, "%llu AB: (failure) failover event!, active backup path (%s) is no-longer eligible\n", ((now - RR->bc->getBondStartTime())), curPathStr); if (!_abFailoverQueue.empty()) { - fprintf(stderr, "%llu AB: (failure) there are (%lu) links in queue to choose from...\n", ((now - RR->bc->getBondStartTime())), _abFailoverQueue.size()); + //fprintf(stderr, "%llu AB: (failure) there are (%lu) links in queue to choose from...\n", ((now - RR->bc->getBondStartTime())), _abFailoverQueue.size()); dequeueNextActiveBackupPath(now); - _abPath->address().toString(curPathStr); fprintf(stderr, "%llu AB: (failure) switched to %s on %s\n", ((now - RR->bc->getBondStartTime())), curPathStr, getLink(_abPath)->ifname().c_str()); + _abPath->address().toString(curPathStr); //fprintf(stderr, "%llu AB: (failure) switched to %s on %s\n", ((now - RR->bc->getBondStartTime())), curPathStr, getLink(_abPath)->ifname().c_str()); } else { - fprintf(stderr, "%llu AB: (failure) nothing available in the link queue, doing nothing.\n", ((now - RR->bc->getBondStartTime()))); + //fprintf(stderr, "%llu AB: (failure) nothing available in the link queue, doing nothing.\n", ((now - RR->bc->getBondStartTime()))); } } /** @@ -1444,17 +1444,17 @@ void Bond::processActiveBackupTasks(const int64_t now) if (_abLinkSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS) { if (_abPath && !getLink(_abPath)->primary() && getLink(_abFailoverQueue.front())->primary()) { - fprintf(stderr, "%llu AB: (always) switching to available primary\n", ((now - RR->bc->getBondStartTime()))); + //fprintf(stderr, "%llu AB: (always) switching to available primary\n", ((now - RR->bc->getBondStartTime()))); dequeueNextActiveBackupPath(now); } } if (_abLinkSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_BETTER) { if (_abPath && !getLink(_abPath)->primary()) { - fprintf(stderr, "%llu AB: (better) active backup has switched to \"better\" primary link according to re-select policy.\n", ((now - RR->bc->getBondStartTime()))); + //fprintf(stderr, "%llu AB: (better) active backup has switched to \"better\" primary link according to re-select policy.\n", ((now - RR->bc->getBondStartTime()))); if (getLink(_abFailoverQueue.front())->primary() && (_abFailoverQueue.front()->_failoverScore > _abPath->_failoverScore)) { dequeueNextActiveBackupPath(now); - fprintf(stderr, "%llu AB: (better) switched back to user-defined primary\n", ((now - RR->bc->getBondStartTime()))); + //fprintf(stderr, "%llu AB: (better) switched back to user-defined primary\n", ((now - RR->bc->getBondStartTime()))); } } } @@ -1465,7 +1465,7 @@ void Bond::processActiveBackupTasks(const int64_t now) if (_abFailoverQueue.front()->_negotiated) { dequeueNextActiveBackupPath(now); _abPath->address().toString(prevPathStr); - fprintf(stderr, "%llu AB: (optimize) switched to negotiated path %s on %s\n", ((now - RR->bc->getBondStartTime())), prevPathStr, getLink(_abPath)->ifname().c_str()); + //fprintf(stderr, "%llu AB: (optimize) switched to negotiated path %s on %s\n", ((now - RR->bc->getBondStartTime())), prevPathStr, getLink(_abPath)->ifname().c_str()); _lastPathNegotiationCheck = now; } else { @@ -1483,7 +1483,7 @@ void Bond::processActiveBackupTasks(const int64_t now) _abPath->address().toString(prevPathStr); dequeueNextActiveBackupPath(now); _abPath->address().toString(curPathStr); - fprintf(stderr, "%llu AB: (optimize) switched from %s on %s (fs=%d) to %s on %s (fs=%d)\n", ((now - RR->bc->getBondStartTime())), prevPathStr, getLink(oldPath)->ifname().c_str(), prevFScore, curPathStr, getLink(_abPath)->ifname().c_str(), newFScore); + //fprintf(stderr, "%llu AB: (optimize) switched from %s on %s (fs=%d) to %s on %s (fs=%d)\n", ((now - RR->bc->getBondStartTime())), prevPathStr, getLink(oldPath)->ifname().c_str(), prevFScore, curPathStr, getLink(_abPath)->ifname().c_str(), newFScore); } } } @@ -1640,7 +1640,7 @@ void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool _downDelay = templateBond->_downDelay; _upDelay = templateBond->_upDelay; - fprintf(stderr, "TIMERS: strat=%d, fi= %d, bmi= %d, qos= %d, ack= %d, estimateInt= %d, refractory= %d, ud= %d, dd= %d\n", + /*fprintf(stderr, "TIMERS: strat=%d, fi= %d, bmi= %d, qos= %d, ack= %d, estimateInt= %d, refractory= %d, ud= %d, dd= %d\n", _linkMonitorStrategy, _failoverInterval, _bondMonitorInterval, @@ -1650,10 +1650,11 @@ void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool _defaultPathRefractoryPeriod, _upDelay, _downDelay); + */ if (templateBond->_linkMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE && templateBond->_failoverInterval != 0) { - fprintf(stderr, "warning: passive path monitoring was specified, this will prevent failovers from happening in a timely manner.\n"); + //fprintf(stderr, "warning: passive path monitoring was specified, this will prevent failovers from happening in a timely manner.\n"); } _abLinkSelectMethod = templateBond->_abLinkSelectMethod; memcpy(_qualityWeights, templateBond->_qualityWeights, ZT_QOS_WEIGHT_SIZE * sizeof(float)); @@ -1672,7 +1673,7 @@ void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool _failoverInterval = originalBond._failoverInterval; } else { - fprintf(stderr, "warning: _failoverInterval (%d) is out of range, using default (%d)\n", originalBond._failoverInterval, _failoverInterval); + //fprintf(stderr, "warning: _failoverInterval (%d) is out of range, using default (%d)\n", originalBond._failoverInterval, _failoverInterval); } */ @@ -1726,17 +1727,17 @@ void Bond::dumpInfo(const int64_t now) return; } /* - fprintf(stderr, "---[ bp=%d, id=%llx, dd=%d, up=%d, pmi=%d, specifiedLinks=%d, _specifiedPrimaryLink=%d, _specifiedFailInst=%d ]\n", + //fprintf(stderr, "---[ bp=%d, id=%llx, dd=%d, up=%d, pmi=%d, specifiedLinks=%d, _specifiedPrimaryLink=%d, _specifiedFailInst=%d ]\n", _policy, _peer->identity().address().toInt(), _downDelay, _upDelay, _monitorInterval, _userHasSpecifiedLinks, _userHasSpecifiedPrimaryLink, _userHasSpecifiedFailoverInstructions); if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { - fprintf(stderr, "Paths (bp=%d, stats=%d, primaryReselect=%d) :\n", + //fprintf(stderr, "Paths (bp=%d, stats=%d, primaryReselect=%d) :\n", _policy, _shouldCollectPathStatistics, _abLinkSelectMethod); } if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { - fprintf(stderr, "Paths (bp=%d, stats=%d, fh=%d) :\n", + //fprintf(stderr, "Paths (bp=%d, stats=%d, fh=%d) :\n", _policy, _shouldCollectPathStatistics, _allowFlowHashing); }*/ if ((now - _lastPrintTS) < 2000) { @@ -1744,13 +1745,13 @@ void Bond::dumpInfo(const int64_t now) } _lastPrintTS = now; - fprintf(stderr, "\n\n"); + //fprintf(stderr, "\n\n"); for(int i=0; i link =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); _paths[i]->address().toString(pathStr); - fprintf(stderr, " %2d: lat=%8.3f, ac=%3d, fail%5s, fscore=%6d, in=%7d, out=%7d, age=%7ld, ack=%7ld, ref=%6d, ls=%llx", + /*fprintf(stderr, " %2d: lat=%8.3f, ac=%3d, fail%5s, fscore=%6d, in=%7d, out=%7d, age=%7ld, ack=%7ld, ref=%6d, ls=%llx", i, _paths[i]->_latencyMean, _paths[i]->_allocation, @@ -1763,73 +1764,75 @@ void Bond::dumpInfo(const int64_t now) _paths[i]->_refractoryPeriod, _paths[i]->localSocket() ); + */ if (link->spare()) { - fprintf(stderr, " SPR."); + //fprintf(stderr, " SPR."); } else { - fprintf(stderr, " "); + //fprintf(stderr, " "); } if (link->primary()) { - fprintf(stderr, " PRIM."); + //fprintf(stderr, " PRIM."); } else { - fprintf(stderr, " "); + //fprintf(stderr, " "); } if (_paths[i]->allowed()) { - fprintf(stderr, " ALL."); + //fprintf(stderr, " ALL."); } else { - fprintf(stderr, " "); + //fprintf(stderr, " "); } if (_paths[i]->eligible(now,_ackSendInterval)) { - fprintf(stderr, " ELI."); + //fprintf(stderr, " ELI."); } else { - fprintf(stderr, " "); + //fprintf(stderr, " "); } if (_paths[i]->preferred()) { - fprintf(stderr, " PREF."); + //fprintf(stderr, " PREF."); } else { - fprintf(stderr, " "); + //fprintf(stderr, " "); } if (_paths[i]->_negotiated) { - fprintf(stderr, " NEG."); + //fprintf(stderr, " NEG."); } else { - fprintf(stderr, " "); + //fprintf(stderr, " "); } if (_paths[i]->bonded()) { - fprintf(stderr, " BOND "); + //fprintf(stderr, " BOND "); } else { - fprintf(stderr, " "); + //fprintf(stderr, " "); } if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP && _abPath && (_abPath == _paths[i].ptr())) { - fprintf(stderr, " ACTIVE "); + //fprintf(stderr, " ACTIVE "); } else if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { - fprintf(stderr, " "); + //fprintf(stderr, " "); } if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP && _abFailoverQueue.size() && (_abFailoverQueue.front().ptr() == _paths[i].ptr())) { - fprintf(stderr, " NEXT "); + //fprintf(stderr, " NEXT "); } else if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { - fprintf(stderr, " "); + //fprintf(stderr, " "); } - fprintf(stderr, "%5s %s\n", link->ifname().c_str(), pathStr); + //fprintf(stderr, "%5s %s\n", link->ifname().c_str(), pathStr); } } if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { if (!_abFailoverQueue.empty()) { - fprintf(stderr, "\nFailover Queue:\n"); + //fprintf(stderr, "\nFailover Queue:\n"); for (std::list >::iterator it(_abFailoverQueue.begin()); it!=_abFailoverQueue.end();++it) { (*it)->address().toString(currPathStr); SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, (*it)->localSocket()); - fprintf(stderr, "\t%8s\tspeed=%7d\trelSpeed=%3d\tipvPref=%3d\tfscore=%9d\t\t%s\n", + /*fprintf(stderr, "\t%8s\tspeed=%7d\trelSpeed=%3d\tipvPref=%3d\tfscore=%9d\t\t%s\n", link->ifname().c_str(), link->speed(), link->relativeSpeed(), link->ipvPref(), (*it)->_failoverScore, currPathStr); + */ } } else { - fprintf(stderr, "\nFailover Queue size = %lu\n", _abFailoverQueue.size()); + //fprintf(stderr, "\nFailover Queue size = %lu\n", _abFailoverQueue.size()); } } @@ -1837,20 +1840,21 @@ void Bond::dumpInfo(const int64_t now) || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { if (_numBondedPaths) { - fprintf(stderr, "\nBonded Paths:\n"); + //fprintf(stderr, "\nBonded Paths:\n"); for (int i=0; i<_numBondedPaths; ++i) { _paths[_bondedIdx[i]]->address().toString(currPathStr); SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[_bondedIdx[i]]->localSocket()); - fprintf(stderr, " [%d]\t%8s\tflows=%3d\tspeed=%7d\trelSpeed=%3d\tipvPref=%3d\tfscore=%9d\t\t%s\n", i, - //fprintf(stderr, " [%d]\t%8s\tspeed=%7d\trelSpeed=%3d\tflowCount=%2d\tipvPref=%3d\tfscore=%9d\t\t%s\n", i, + //fprintf(stderr, " [%d]\t%8s\tflows=%3d\tspeed=%7d\trelSpeed=%3d\tipvPref=%3d\tfscore=%9d\t\t%s\n", i, + /*fprintf(stderr, " [%d]\t%8s\tspeed=%7d\trelSpeed=%3d\tflowCount=%2d\tipvPref=%3d\tfscore=%9d\t\t%s\n", i, link->ifname().c_str(), _paths[_bondedIdx[i]]->_assignedFlowCount, link->speed(), link->relativeSpeed(), - //_paths[_bondedIdx[i]].p->assignedFlows.size(), + _paths[_bondedIdx[i]].p->assignedFlows.size(), link->ipvPref(), _paths[_bondedIdx[i]]->_failoverScore, currPathStr); + */ } } } diff --git a/node/BondController.cpp b/node/BondController.cpp index f7159dbc3..357daef7e 100644 --- a/node/BondController.cpp +++ b/node/BondController.cpp @@ -52,7 +52,7 @@ void BondController::addCustomLink(std::string& policyAlias, SharedPtr lin link->setAsUserSpecified(true); _interfaceToLinkMap[policyAlias].insert(std::pair>(link->ifname(), link)); } else { - fprintf(stderr, "link already exists=%s\n", link->ifname().c_str()); + //fprintf(stderr, "link already exists=%s\n", link->ifname().c_str()); // Link is already defined, overlay user settings } } @@ -79,27 +79,27 @@ bool BondController::assignBondingPolicyToPeer(int64_t identity, const std::stri SharedPtr BondController::createTransportTriggeredBond(const RuntimeEnvironment *renv, const SharedPtr& peer) { - fprintf(stderr, "createTransportTriggeredBond\n"); + //fprintf(stderr, "createTransportTriggeredBond\n"); Mutex::Lock _l(_bonds_m); int64_t identity = peer->identity().address().toInt(); Bond *bond = nullptr; if (!_bonds.count(identity)) { std::string policyAlias; - fprintf(stderr, "new bond, registering for %llx\n", identity); + //fprintf(stderr, "new bond, registering for %llx\n", identity); if (!_policyTemplateAssignments.count(identity)) { if (_defaultBondingPolicy) { - fprintf(stderr, " no assignment, using default (%d)\n", _defaultBondingPolicy); + //fprintf(stderr, " no assignment, using default (%d)\n", _defaultBondingPolicy); bond = new Bond(renv, _defaultBondingPolicy, peer); } if (!_defaultBondingPolicy && _defaultBondingPolicyStr.length()) { - fprintf(stderr, " no assignment, using default custom (%s)\n", _defaultBondingPolicyStr.c_str()); + //fprintf(stderr, " no assignment, using default custom (%s)\n", _defaultBondingPolicyStr.c_str()); bond = new Bond(renv, _bondPolicyTemplates[_defaultBondingPolicyStr].ptr(), peer); } } else { - fprintf(stderr, " assignment found for %llx, using it as a template (%s)\n", identity,_policyTemplateAssignments[identity].c_str()); + //fprintf(stderr, " assignment found for %llx, using it as a template (%s)\n", identity,_policyTemplateAssignments[identity].c_str()); if (!_bondPolicyTemplates[_policyTemplateAssignments[identity]]) { - fprintf(stderr, "unable to locate template (%s), ignoring assignment for (%llx), using defaults\n", _policyTemplateAssignments[identity].c_str(), identity); + //fprintf(stderr, "unable to locate template (%s), ignoring assignment for (%llx), using defaults\n", _policyTemplateAssignments[identity].c_str(), identity); bond = new Bond(renv, _defaultBondingPolicy, peer); } else { @@ -108,7 +108,7 @@ SharedPtr BondController::createTransportTriggeredBond(const RuntimeEnviro } } else { - fprintf(stderr, "bond already exists for %llx.\n", identity); + //fprintf(stderr, "bond already exists for %llx.\n", identity); } if (bond) { _bonds[identity] = bond; diff --git a/service/OneService.cpp b/service/OneService.cpp index 7f73e903f..96c9babbd 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -1581,7 +1581,7 @@ public: // Custom Policies json &customBondingPolicies = settings["policies"]; for (json::iterator policyItr = customBondingPolicies.begin(); policyItr != customBondingPolicies.end();++policyItr) { - fprintf(stderr, "\n\n--- (%s)\n", policyItr.key().c_str()); + //fprintf(stderr, "\n\n--- (%s)\n", policyItr.key().c_str()); // Custom Policy std::string customPolicyStr(policyItr.key()); json &customPolicy = policyItr.value(); @@ -1635,7 +1635,7 @@ public: // Policy-Specific link set json &links = customPolicy["links"]; for (json::iterator linkItr = links.begin(); linkItr != links.end();++linkItr) { - fprintf(stderr, "\t--- link (%s)\n", linkItr.key().c_str()); + //fprintf(stderr, "\t--- link (%s)\n", linkItr.key().c_str()); std::string linkNameStr(linkItr.key()); json &link = linkItr.value(); From a1b2ff772a246a0367e1bb4439c6eb2827e960ab Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Thu, 23 Jul 2020 00:15:38 -0700 Subject: [PATCH 32/35] Add new replacement condition in peer path redundancy logic to fix duplicate paths --- node/Peer.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/node/Peer.cpp b/node/Peer.cpp index f99396aaa..ad3d47106 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -139,6 +139,9 @@ void Peer::received( if (q > replacePathQuality) { replacePathQuality = q; replacePath = i; + if (!_paths[i].p->alive(now)) { + break; // Stop searching, we found an identical dead path, replace the object + } } } else { replacePath = i; From 29ebda62ef38c9239802e2a61daf53f52147047f Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Thu, 23 Jul 2020 00:32:39 -0700 Subject: [PATCH 33/35] Remove (some) debug functions and traces --- node/Bond.cpp | 146 +--------------------------------------- node/BondController.cpp | 1 - node/Packet.hpp | 2 +- 3 files changed, 3 insertions(+), 146 deletions(-) diff --git a/node/Bond.cpp b/node/Bond.cpp index e96355ec9..609d62bc6 100644 --- a/node/Bond.cpp +++ b/node/Bond.cpp @@ -1660,7 +1660,6 @@ void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool memcpy(_qualityWeights, templateBond->_qualityWeights, ZT_QOS_WEIGHT_SIZE * sizeof(float)); } - // // Second, apply user specified values (only if they make sense) @@ -1705,11 +1704,8 @@ void Bond::setUserQualityWeights(float weights[], int len) } } - bool Bond::relevant() { - return _peer->identity().address().toInt() == 0x16a03a3d03 - || _peer->identity().address().toInt() == 0x4410300d03 - || _peer->identity().address().toInt() == 0x795cbf86fa; + return false; } SharedPtr Bond::getLink(const SharedPtr& path) @@ -1719,145 +1715,7 @@ SharedPtr Bond::getLink(const SharedPtr& path) void Bond::dumpInfo(const int64_t now) { - char pathStr[128]; - //char oldPathStr[128]; - char currPathStr[128]; - - if (!relevant()) { - return; - } - /* - //fprintf(stderr, "---[ bp=%d, id=%llx, dd=%d, up=%d, pmi=%d, specifiedLinks=%d, _specifiedPrimaryLink=%d, _specifiedFailInst=%d ]\n", - _policy, _peer->identity().address().toInt(), _downDelay, _upDelay, _monitorInterval, _userHasSpecifiedLinks, _userHasSpecifiedPrimaryLink, _userHasSpecifiedFailoverInstructions); - - if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { - //fprintf(stderr, "Paths (bp=%d, stats=%d, primaryReselect=%d) :\n", - _policy, _shouldCollectPathStatistics, _abLinkSelectMethod); - } - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR - || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR - || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { - //fprintf(stderr, "Paths (bp=%d, stats=%d, fh=%d) :\n", - _policy, _shouldCollectPathStatistics, _allowFlowHashing); - }*/ - if ((now - _lastPrintTS) < 2000) { - return; - } - _lastPrintTS = now; - - //fprintf(stderr, "\n\n"); - - for(int i=0; i link =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); - _paths[i]->address().toString(pathStr); - /*fprintf(stderr, " %2d: lat=%8.3f, ac=%3d, fail%5s, fscore=%6d, in=%7d, out=%7d, age=%7ld, ack=%7ld, ref=%6d, ls=%llx", - i, - _paths[i]->_latencyMean, - _paths[i]->_allocation, - link->failoverToLink().c_str(), - _paths[i]->_failoverScore, - _paths[i]->_packetsIn, - _paths[i]->_packetsOut, - (long)_paths[i]->age(now), - (long)_paths[i]->ackAge(now), - _paths[i]->_refractoryPeriod, - _paths[i]->localSocket() - ); - */ - if (link->spare()) { - //fprintf(stderr, " SPR."); - } else { - //fprintf(stderr, " "); - } - if (link->primary()) { - //fprintf(stderr, " PRIM."); - } else { - //fprintf(stderr, " "); - } - if (_paths[i]->allowed()) { - //fprintf(stderr, " ALL."); - } else { - //fprintf(stderr, " "); - } - if (_paths[i]->eligible(now,_ackSendInterval)) { - //fprintf(stderr, " ELI."); - } else { - //fprintf(stderr, " "); - } - if (_paths[i]->preferred()) { - //fprintf(stderr, " PREF."); - } else { - //fprintf(stderr, " "); - } - if (_paths[i]->_negotiated) { - //fprintf(stderr, " NEG."); - } else { - //fprintf(stderr, " "); - } - if (_paths[i]->bonded()) { - //fprintf(stderr, " BOND "); - } else { - //fprintf(stderr, " "); - } - if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP && _abPath && (_abPath == _paths[i].ptr())) { - //fprintf(stderr, " ACTIVE "); - } else if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { - //fprintf(stderr, " "); - } - if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP && _abFailoverQueue.size() && (_abFailoverQueue.front().ptr() == _paths[i].ptr())) { - //fprintf(stderr, " NEXT "); - } else if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { - //fprintf(stderr, " "); - } - //fprintf(stderr, "%5s %s\n", link->ifname().c_str(), pathStr); - } - } - - if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { - if (!_abFailoverQueue.empty()) { - //fprintf(stderr, "\nFailover Queue:\n"); - for (std::list >::iterator it(_abFailoverQueue.begin()); it!=_abFailoverQueue.end();++it) { - (*it)->address().toString(currPathStr); - SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, (*it)->localSocket()); - /*fprintf(stderr, "\t%8s\tspeed=%7d\trelSpeed=%3d\tipvPref=%3d\tfscore=%9d\t\t%s\n", - link->ifname().c_str(), - link->speed(), - link->relativeSpeed(), - link->ipvPref(), - (*it)->_failoverScore, - currPathStr); - */ - } - } - else - { - //fprintf(stderr, "\nFailover Queue size = %lu\n", _abFailoverQueue.size()); - } - } - - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR - || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR - || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { - if (_numBondedPaths) { - //fprintf(stderr, "\nBonded Paths:\n"); - for (int i=0; i<_numBondedPaths; ++i) { - _paths[_bondedIdx[i]]->address().toString(currPathStr); - SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[_bondedIdx[i]]->localSocket()); - //fprintf(stderr, " [%d]\t%8s\tflows=%3d\tspeed=%7d\trelSpeed=%3d\tipvPref=%3d\tfscore=%9d\t\t%s\n", i, - /*fprintf(stderr, " [%d]\t%8s\tspeed=%7d\trelSpeed=%3d\tflowCount=%2d\tipvPref=%3d\tfscore=%9d\t\t%s\n", i, - link->ifname().c_str(), - _paths[_bondedIdx[i]]->_assignedFlowCount, - link->speed(), - link->relativeSpeed(), - _paths[_bondedIdx[i]].p->assignedFlows.size(), - link->ipvPref(), - _paths[_bondedIdx[i]]->_failoverScore, - currPathStr); - */ - } - } - } + // Omitted } } // namespace ZeroTier \ No newline at end of file diff --git a/node/BondController.cpp b/node/BondController.cpp index 357daef7e..4fed2befd 100644 --- a/node/BondController.cpp +++ b/node/BondController.cpp @@ -79,7 +79,6 @@ bool BondController::assignBondingPolicyToPeer(int64_t identity, const std::stri SharedPtr BondController::createTransportTriggeredBond(const RuntimeEnvironment *renv, const SharedPtr& peer) { - //fprintf(stderr, "createTransportTriggeredBond\n"); Mutex::Lock _l(_bonds_m); int64_t identity = peer->identity().address().toInt(); Bond *bond = nullptr; diff --git a/node/Packet.hpp b/node/Packet.hpp index ca789db81..f1112403e 100644 --- a/node/Packet.hpp +++ b/node/Packet.hpp @@ -56,7 +56,7 @@ * + Inline push of CertificateOfMembership deprecated * 9 - 1.2.0 ... 1.2.14 * 10 - 1.4.0 ... CURRENT - * + Multipath capability and load balancing + * + Multipath capability and load balancing (tentative) */ #define ZT_PROTO_VERSION 10 From c92e030a4bf5443c3e0765645ef7bf32130ec4c0 Mon Sep 17 00:00:00 2001 From: Travis LaDuke Date: Fri, 27 Sep 2019 14:50:21 -0700 Subject: [PATCH 34/35] Create a bash completion script. Just adding it to the repo, but it still needs to be dealt with during install. Probably put it in $ZT_HOME and then symlink to the proper place for the distro? searches $ZT_HOME/networks.d/ for network IDs searches HISTORY for 16 digit numbers that look like network IDs. --- zerotier-cli-completion.bash | 57 ++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 zerotier-cli-completion.bash diff --git a/zerotier-cli-completion.bash b/zerotier-cli-completion.bash new file mode 100644 index 000000000..cd6da0d9a --- /dev/null +++ b/zerotier-cli-completion.bash @@ -0,0 +1,57 @@ +#compdef zerotier-cli +#autoload + + +_get_network_ids () +{ + if [[ "$OSTYPE" == "darwin"* ]]; then + COMPREPLY=($(compgen -W "$(ls -1 /Library/Application\ Support/ZeroTier/One/networks.d | cut -c 1-16)" -- ${cur})) + else + COMPREPLY=($(compgen -W "$(ls -1 /var/lib/zerotier-one/networks.d | cut -c 1-16)" -- ${cur})) + fi +} + +_get_network_ids_from_history () +{ + COMPREPLY=($(compgen -W "$(fc -l -1000 -1 | sed -n 's/.*\([[:xdigit:]]\{16\}\).*/\1/p')" -- ${cur})) +} + +_zerotier-cli_completions() +{ + local cur prev + + cur=${COMP_WORDS[COMP_CWORD]} + prev=${COMP_WORDS[COMP_CWORD-1]} + + case ${COMP_CWORD} in + 1) + COMPREPLY=($(compgen -W "info listpeers peers listnetworks join leave set get listmoons orbit deorbit" -- ${cur})) + ;; + 2) + case ${prev} in + leave) + _get_network_ids + ;; + join) + _get_network_ids_from_history + ;; + set) + _get_network_ids + ;; + get) + _get_network_ids + ;; + *) + COMPREPLY=() + ;; + esac + ;; + *) + COMPREPLY=() + ;; + esac +} + +complete -F _zerotier-cli_completions zerotier-cli + + From 9f4985b11a5a4f93c9435094adfce378d573c7a6 Mon Sep 17 00:00:00 2001 From: Joseph Henry Date: Mon, 27 Jul 2020 23:01:12 -0700 Subject: [PATCH 35/35] Add basic bond health status reporting (listbonds) --- include/ZeroTierOne.h | 35 +++++++++++++++++++---- node/Bond.cpp | 51 ++++++++++++++++++++++++++++++++++ node/Bond.hpp | 19 +++++++++++++ node/Node.cpp | 3 ++ one.cpp | 63 ++++++++++++++++++++++++++++++++++++++++++ service/OneService.cpp | 34 +++++++++++++++++++++++ 6 files changed, 199 insertions(+), 6 deletions(-) diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h index afa75c290..e1c8f7df3 100644 --- a/include/ZeroTierOne.h +++ b/include/ZeroTierOne.h @@ -1475,17 +1475,40 @@ typedef struct enum ZT_PeerRole role; /** - * Number of paths (size of paths[]) - */ - unsigned int pathCount; - - /** - * Whether multiple paths to this peer are bonded + * Whether a multi-link bond has formed */ bool isBonded; + /** + * The bonding policy used to bond to this peer + */ int bondingPolicy; + /** + * The health status of the bond to this peer + */ + bool isHealthy; + + /** + * The number of links that comprise the bond to this peer that are considered alive + */ + int numAliveLinks; + + /** + * The number of links that comprise the bond to this peer + */ + int numTotalLinks; + + /** + * The user-specified bond template name + */ + char customBondName[32]; + + /** + * Number of paths (size of paths[]) + */ + unsigned int pathCount; + /** * Known network paths to peer */ diff --git a/node/Bond.cpp b/node/Bond.cpp index 609d62bc6..e7ae33c85 100644 --- a/node/Bond.cpp +++ b/node/Bond.cpp @@ -730,6 +730,9 @@ void Bond::curateBond(const int64_t now, bool rebuildBond) { //fprintf(stderr, "%lu curateBond (rebuildBond=%d), _numBondedPaths=%d\n", ((now - RR->bc->getBondStartTime())), rebuildBond, _numBondedPaths); char pathStr[128]; + + uint8_t tmpNumAliveLinks = 0; + uint8_t tmpNumTotalLinks = 0; /** * Update path states */ @@ -737,6 +740,10 @@ void Bond::curateBond(const int64_t now, bool rebuildBond) if (!_paths[i]) { continue; } + tmpNumTotalLinks++; + if (_paths[i]->alive(now, true)) { + tmpNumAliveLinks++; + } bool currEligibility = _paths[i]->eligible(now,_ackSendInterval); //_paths[i]->address().toString(pathStr); //fprintf(stderr, "\n\n%ld path eligibility (for %s, %s):\n", (RR->node->now() - RR->bc->getBondStartTime()), getLink(_paths[i])->ifname().c_str(), pathStr); @@ -764,6 +771,46 @@ void Bond::curateBond(const int64_t now, bool rebuildBond) } _paths[i]->_lastEligibilityState = currEligibility; } + _numAliveLinks = tmpNumAliveLinks; + _numTotalLinks = tmpNumTotalLinks; + + /* Determine health status to report to user */ + + bool tmpHealthStatus = true; + + if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { + if (_numAliveLinks < 2) { + // Considered healthy if there is at least one failover link + tmpHealthStatus = false; + } + } + if (_bondingPolicy == ZT_BONDING_POLICY_BROADCAST) { + if (_numAliveLinks < 1) { + // Considerd healthy if we're able to send frames at all + tmpHealthStatus = false; + } + } + if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) { + if (_numAliveLinks < _numTotalLinks) { + // Considerd healthy if all known paths are alive, this should be refined to account for user bond config settings + tmpHealthStatus = false; + } + } + if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR) { + if (_numAliveLinks < _numTotalLinks) { + // Considerd healthy if all known paths are alive, this should be refined to account for user bond config settings + tmpHealthStatus = false; + } + } + if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { + if (_numAliveLinks < _numTotalLinks) { + // Considerd healthy if all known paths are alive, this should be refined to account for user bond config settings + tmpHealthStatus = false; + } + } + + _isHealthy = tmpHealthStatus; + /** * Curate the set of paths that are part of the bond proper. Selects a single path * per logical link according to eligibility and user-specified constraints. @@ -1509,6 +1556,10 @@ void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool _lastCheckUserPreferences = 0; _lastBackgroundTaskCheck = 0; + _isHealthy = false; + _numAliveLinks = 0; + _numTotalLinks = 0; + _downDelay = 0; _upDelay = 0; _allowFlowHashing=false; diff --git a/node/Bond.hpp b/node/Bond.hpp index 4eee20f36..c87caf281 100644 --- a/node/Bond.hpp +++ b/node/Bond.hpp @@ -485,6 +485,21 @@ public: */ inline uint8_t getPolicy() { return _bondingPolicy; } + /** + * @return the health status of the bond + */ + inline bool isHealthy() { return _isHealthy; } + + /** + * @return the number of links comprising this bond which are considered alive + */ + inline uint8_t getNumAliveLinks() { return _numAliveLinks; }; + + /** + * @return the number of links comprising this bond + */ + inline uint8_t getNumTotalLinks() { return _numTotalLinks; } + /** * * @param allowFlowHashing @@ -626,6 +641,10 @@ private: uint16_t _maxAcceptablePacketDelayVariance; uint8_t _minAcceptableAllocation; + bool _isHealthy; + uint8_t _numAliveLinks; + uint8_t _numTotalLinks; + /** * Default initial punishment inflicted on misbehaving paths. Punishment slowly * drains linearly. For each eligibility change the remaining punishment is doubled. diff --git a/node/Node.cpp b/node/Node.cpp index 16484dac0..e4b0a0735 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -513,6 +513,9 @@ ZT_PeerList *Node::peers() const if (pi->second->bond()) { p->isBonded = pi->second->bond(); p->bondingPolicy = pi->second->bond()->getPolicy(); + p->isHealthy = pi->second->bond()->isHealthy(); + p->numAliveLinks = pi->second->bond()->getNumAliveLinks(); + p->numTotalLinks = pi->second->bond()->getNumTotalLinks(); } } diff --git a/one.cpp b/one.cpp index 99a3a575b..27f6a06ae 100644 --- a/one.cpp +++ b/one.cpp @@ -72,6 +72,8 @@ #include "osdep/Http.hpp" #include "osdep/Thread.hpp" +#include "node/BondController.hpp" + #include "service/OneService.hpp" #include "ext/json/json.hpp" @@ -467,6 +469,67 @@ static int cli(int argc,char **argv) printf("%u %s %s" ZT_EOL_S,scode,command.c_str(),responseBody.c_str()); return 1; } + } else if (command == "listbonds") { + const unsigned int scode = Http::GET(1024 * 1024 * 16,60000,(const struct sockaddr *)&addr,"/bonds",requestHeaders,responseHeaders,responseBody); + + if (scode == 0) { + printf("Error connecting to the ZeroTier service: %s\n\nPlease check that the service is running and that TCP port 9993 can be contacted via 127.0.0.1." ZT_EOL_S, responseBody.c_str()); + return 1; + } + + nlohmann::json j; + try { + j = OSUtils::jsonParse(responseBody); + } catch (std::exception &exc) { + printf("%u %s invalid JSON response (%s)" ZT_EOL_S,scode,command.c_str(),exc.what()); + return 1; + } catch ( ... ) { + printf("%u %s invalid JSON response (unknown exception)" ZT_EOL_S,scode,command.c_str()); + return 1; + } + + if (scode == 200) { + if (json) { + printf("%s" ZT_EOL_S,OSUtils::jsonDump(j).c_str()); + } else { + printf(" " ZT_EOL_S); + if (j.is_array()) { + for(unsigned long k=0;k= ZT_BONDING_POLICY_NONE && bondingPolicy <= ZT_BONDING_POLICY_BALANCE_AWARE) { + policyStr = BondController::getPolicyStrByCode(bondingPolicy); + } + + printf("%10s %32s %8s %d/%d" ZT_EOL_S, + OSUtils::jsonString(p ["address"],"-").c_str(), + policyStr.c_str(), + healthStr.c_str(), + numAliveLinks, + numTotalLinks); + } + } + } + } + return 0; + } else { + printf("%u %s %s" ZT_EOL_S,scode,command.c_str(),responseBody.c_str()); + return 1; + } } else if (command == "listnetworks") { const unsigned int scode = Http::GET(1024 * 1024 * 16,60000,(const struct sockaddr *)&addr,"/network",requestHeaders,responseHeaders,responseBody); diff --git a/service/OneService.cpp b/service/OneService.cpp index 96c9babbd..c5d8076d4 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -253,6 +253,11 @@ static void _peerToJson(nlohmann::json &pj,const ZT_Peer *peer) pj["version"] = tmp; pj["latency"] = peer->latency; pj["role"] = prole; + pj["isBonded"] = peer->isBonded; + pj["bondingPolicy"] = peer->bondingPolicy; + pj["isHealthy"] = peer->isHealthy; + pj["numAliveLinks"] = peer->numAliveLinks; + pj["numTotalLinks"] = peer->numTotalLinks; nlohmann::json pa = nlohmann::json::array(); for(unsigned int i=0;ipathCount;++i) { @@ -1348,6 +1353,35 @@ public: } else scode = 404; _node->freeQueryResult((void *)pl); } else scode = 500; + } else if (ps[0] == "bonds") { + ZT_PeerList *pl = _node->peers(); + if (pl) { + if (ps.size() == 1) { + // Return [array] of all peers + + res = nlohmann::json::array(); + for(unsigned long i=0;ipeerCount;++i) { + nlohmann::json pj; + _peerToJson(pj,&(pl->peers[i])); + res.push_back(pj); + } + + scode = 200; + } else if (ps.size() == 2) { + // Return a single peer by ID or 404 if not found + + uint64_t wantp = Utils::hexStrToU64(ps[1].c_str()); + for(unsigned long i=0;ipeerCount;++i) { + if (pl->peers[i].address == wantp) { + _peerToJson(res,&(pl->peers[i])); + scode = 200; + break; + } + } + + } else scode = 404; + _node->freeQueryResult((void *)pl); + } else scode = 500; } else { if (_controller) { scode = _controller->handleControlPlaneHttpGET(std::vector(ps.begin()+1,ps.end()),urlArgs,headers,body,responseBody,responseContentType);