diff --git a/controller/PostgreSQL.cpp b/controller/PostgreSQL.cpp index 64b0767bf..4550e8722 100644 --- a/controller/PostgreSQL.cpp +++ b/controller/PostgreSQL.cpp @@ -150,7 +150,7 @@ PostgreSQL::PostgreSQL(const Identity &myId, const char *path, int listenPort, R { char myAddress[64]; _myAddressStr = myId.address().toString(myAddress); - _connString = std::string(path) + " application_name=controller_" + _myAddressStr; + _connString = std::string(path); auto f = std::make_shared(_connString); _pool = std::make_shared >( 15, 5, std::static_pointer_cast(f)); @@ -357,7 +357,7 @@ std::string PostgreSQL::getSSOAuthURL(const nlohmann::json &member, const std::s std::string nonce = ""; // check if the member exists first. - pqxx::row count = w.exec_params1("SELECT count(id) FROM ztc_member WHERE id = $1 AND network_id = $2", memberId, networkId); + pqxx::row count = w.exec_params1("SELECT count(id) FROM ztc_member WHERE id = $1 AND network_id = $2 AND deleted = false", memberId, networkId); if (count[0].as() == 1) { // find an unused nonce, if one exists. pqxx::result r = w.exec_params("SELECT nonce FROM ztc_sso_expiry " diff --git a/ext/central-controller-docker/main.sh b/ext/central-controller-docker/main.sh index fc724017b..7a1086907 100755 --- a/ext/central-controller-docker/main.sh +++ b/ext/central-controller-docker/main.sh @@ -62,9 +62,11 @@ popd DEFAULT_PORT=9993 +APP_NAME="controller-$(cat /var/lib/zerotier-one/identity.public | cut -d ':' -f 1)" + echo "{ \"settings\": { - \"controllerDbPath\": \"postgres:host=${ZT_DB_HOST} port=${ZT_DB_PORT} dbname=${ZT_DB_NAME} user=${ZT_DB_USER} password=${ZT_DB_PASSWORD} sslmode=prefer sslcert=${DB_CLIENT_CERT} sslkey=${DB_CLIENT_KEY} sslrootcert=${DB_SERVER_CA}\", + \"controllerDbPath\": \"postgres:host=${ZT_DB_HOST} port=${ZT_DB_PORT} dbname=${ZT_DB_NAME} user=${ZT_DB_USER} password=${ZT_DB_PASSWORD} application_name=${APP_NAME} sslmode=prefer sslcert=${DB_CLIENT_CERT} sslkey=${DB_CLIENT_KEY} sslrootcert=${DB_SERVER_CA}\", \"portMappingEnabled\": true, \"softwareUpdate\": \"disable\", \"interfacePrefixBlacklist\": [ diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h index f0a232480..2bdea6474 100644 --- a/include/ZeroTierOne.h +++ b/include/ZeroTierOne.h @@ -420,157 +420,6 @@ enum ZT_ResultCode */ #define ZT_ResultCode_isFatal(x) ((((int)(x)) >= 100)&&(((int)(x)) < 1000)) - -/** - * Multipath bonding policy - */ -enum ZT_MultipathBondingPolicy -{ - /** - * Normal operation. No fault tolerance, no load balancing - */ - ZT_BONDING_POLICY_NONE = 0, - - /** - * Sends traffic out on only one path at a time. Configurable immediate - * fail-over. - */ - ZT_BONDING_POLICY_ACTIVE_BACKUP = 1, - - /** - * Sends traffic out on all paths - */ - ZT_BONDING_POLICY_BROADCAST = 2, - - /** - * Stripes packets across all paths - */ - ZT_BONDING_POLICY_BALANCE_RR = 3, - - /** - * Packets destined for specific peers will always be sent over the same - * path. - */ - ZT_BONDING_POLICY_BALANCE_XOR = 4, - - /** - * Balances flows among all paths according to path performance - */ - ZT_BONDING_POLICY_BALANCE_AWARE = 5 -}; - -/** - * Multipath active re-selection policy (linkSelectMethod) - */ -enum ZT_MultipathLinkSelectMethod -{ - /** - * Primary link regains status as active link whenever it comes back up - * (default when links are explicitly specified) - */ - ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS = 0, - - /** - * Primary link regains status as active link when it comes back up and - * (if) it is better than the currently-active link. - */ - ZT_MULTIPATH_RESELECTION_POLICY_BETTER = 1, - - /** - * Primary link regains status as active link only if the currently-active - * link fails. - */ - ZT_MULTIPATH_RESELECTION_POLICY_FAILURE = 2, - - /** - * The primary link can change if a superior path is detected. - * (default if user provides no fail-over guidance) - */ - ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE = 3 -}; - -/** - * Mode of multipath link interface - */ -enum ZT_MultipathLinkMode -{ - ZT_MULTIPATH_SLAVE_MODE_PRIMARY = 0, - ZT_MULTIPATH_SLAVE_MODE_SPARE = 1 -}; - -/** - * Strategy for path monitoring - */ -enum ZT_MultipathMonitorStrategy -{ - /** - * Use bonding policy's default strategy - */ - ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DEFAULT = 0, - - /** - * Does not actively send probes to judge aliveness, will rely - * on conventional traffic and summary statistics. - */ - ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE = 1, - - /** - * Sends probes at a constant rate to judge aliveness. - */ - ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_ACTIVE = 2, - - /** - * Sends probes at varying rates which correlate to native - * traffic loads to judge aliveness. - */ - ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC = 3 -}; - -/** - * Strategy for re-balancing protocol flows - */ -enum ZT_MultipathFlowRebalanceStrategy -{ - /** - * Flows will only be re-balanced among links during - * assignment or failover. This minimizes the possibility - * of sequence reordering and is thus the default setting. - */ - ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_PASSIVE = 0, - - /** - * Flows that are active may be re-assigned to a new more - * suitable link if it can be done without disrupting the flow. - * This setting can sometimes cause sequence re-ordering. - */ - ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_OPPORTUNISTIC = 0, - - /** - * Flows will be continuously re-assigned the most suitable link - * in order to maximize "balance". This can often cause sequence - * reordering and is thus only reccomended for protocols like UDP. - */ - ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_AGGRESSIVE = 2 -}; - -/** - * Indices for the path quality weight vector - */ -enum ZT_MultipathQualityWeightIndex -{ - ZT_QOS_LAT_IDX, - ZT_QOS_LTM_IDX, - ZT_QOS_PDV_IDX, - ZT_QOS_PLR_IDX, - ZT_QOS_PER_IDX, - ZT_QOS_THR_IDX, - ZT_QOS_THM_IDX, - ZT_QOS_THV_IDX, - ZT_QOS_AGE_IDX, - ZT_QOS_SCP_IDX, - ZT_QOS_WEIGHT_SIZE -}; - /** * Status codes sent to status update callback when things happen */ diff --git a/node/Bond.cpp b/node/Bond.cpp index 3d64f3554..d0678a447 100644 --- a/node/Bond.cpp +++ b/node/Bond.cpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2013-2020 ZeroTier, Inc. + * Copyright (c)2013-2021 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2025-01-01 + * Change Date: 2026-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -12,92 +12,256 @@ /****/ #include "Bond.hpp" - -#include "../osdep/OSUtils.hpp" #include "Switch.hpp" #include +#include namespace ZeroTier { -Bond::Bond(const RuntimeEnvironment* renv, int policy, const SharedPtr& peer) - : RR(renv) - , _peer(peer) - , _qosCutoffCount(0) - , _ackCutoffCount(0) - , _lastAckRateCheck(0) - , _lastQoSRateCheck(0) - , _lastQualityEstimation(0) - , _lastCheckUserPreferences(0) - , _lastBackgroundTaskCheck(0) - , _lastBondStatusLog(0) - , _lastPathNegotiationReceived(0) - , _lastPathNegotiationCheck(0) - , _lastSentPathNegotiationRequest(0) - , _lastFlowStatReset(0) - , _lastFlowExpirationCheck(0) - , _lastFlowRebalance(0) - , _lastFrame(0) - , _lastActiveBackupPathChange(0) +static unsigned char s_freeRandomByteCounter = 0; + +int Bond::_minReqMonitorInterval = ZT_BOND_FAILOVER_DEFAULT_INTERVAL; +uint8_t Bond::_defaultPolicy = ZT_BOND_POLICY_NONE; + +Phy* Bond::_phy; + +Mutex Bond::_bonds_m; +Mutex Bond::_links_m; + +std::string Bond::_defaultPolicyStr; +std::map > Bond::_bonds; +std::map Bond::_policyTemplateAssignments; +std::map > Bond::_bondPolicyTemplates; +std::map > > Bond::_linkDefinitions; +std::map > > Bond::_interfaceToLinkMap; + +bool Bond::linkAllowed(std::string& policyAlias, SharedPtr link) { - setReasonableDefaults(policy, SharedPtr(), false); - _policyAlias = BondController::getPolicyStrByCode(policy); + bool foundInDefinitions = false; + if (_linkDefinitions.count(policyAlias)) { + auto it = _linkDefinitions[policyAlias].begin(); + while (it != _linkDefinitions[policyAlias].end()) { + if (link->ifname() == (*it)->ifname()) { + foundInDefinitions = true; + break; + } + ++it; + } + } + return _linkDefinitions[policyAlias].empty() || foundInDefinitions; +} + +void Bond::addCustomLink(std::string& policyAlias, SharedPtr link) +{ + Mutex::Lock _l(_links_m); + _linkDefinitions[policyAlias].push_back(link); + auto search = _interfaceToLinkMap[policyAlias].find(link->ifname()); + if (search == _interfaceToLinkMap[policyAlias].end()) { + link->setAsUserSpecified(true); + _interfaceToLinkMap[policyAlias].insert(std::pair >(link->ifname(), link)); + } +} + +bool Bond::addCustomPolicy(const SharedPtr& newBond) +{ + Mutex::Lock _l(_bonds_m); + if (! _bondPolicyTemplates.count(newBond->policyAlias())) { + _bondPolicyTemplates[newBond->policyAlias()] = newBond; + return true; + } + return false; +} + +bool Bond::assignBondingPolicyToPeer(int64_t identity, const std::string& policyAlias) +{ + Mutex::Lock _l(_bonds_m); + if (! _policyTemplateAssignments.count(identity)) { + _policyTemplateAssignments[identity] = policyAlias; + return true; + } + return false; +} + +SharedPtr Bond::getBondByPeerId(int64_t identity) +{ + Mutex::Lock _l(_bonds_m); + return _bonds.count(identity) ? _bonds[identity] : SharedPtr(); +} + +SharedPtr Bond::createTransportTriggeredBond(const RuntimeEnvironment* renv, const SharedPtr& peer) +{ + Mutex::Lock _l(_bonds_m); + int64_t identity = peer->identity().address().toInt(); + Bond* bond = nullptr; + if (! _bonds.count(identity)) { + std::string policyAlias; + if (! _policyTemplateAssignments.count(identity)) { + if (_defaultPolicy) { + bond = new Bond(renv, _defaultPolicy, peer); + bond->log("new default bond"); + } + if (! _defaultPolicy && _defaultPolicyStr.length()) { + bond = new Bond(renv, _bondPolicyTemplates[_defaultPolicyStr].ptr(), peer); + bond->log("new default custom bond"); + } + } + else { + if (! _bondPolicyTemplates[_policyTemplateAssignments[identity]]) { + bond = new Bond(renv, _defaultPolicy, peer); + bond->log("peer-specific bond, was specified as %s but the bond definition was not found, using default %s", _policyTemplateAssignments[identity].c_str(), getPolicyStrByCode(_defaultPolicy).c_str()); + } + else { + bond = new Bond(renv, _bondPolicyTemplates[_policyTemplateAssignments[identity]].ptr(), peer); + bond->log("new default bond"); + } + } + } + if (bond) { + _bonds[identity] = bond; + /** + * Determine if user has specified anything that could affect the bonding policy's decisions + */ + if (_interfaceToLinkMap.count(bond->policyAlias())) { + std::map >::iterator it = _interfaceToLinkMap[bond->policyAlias()].begin(); + while (it != _interfaceToLinkMap[bond->policyAlias()].end()) { + if (it->second->isUserSpecified()) { + bond->_userHasSpecifiedLinks = true; + } + if (it->second->isUserSpecified() && it->second->primary()) { + bond->_userHasSpecifiedPrimaryLink = true; + } + if (it->second->isUserSpecified() && it->second->userHasSpecifiedFailoverInstructions()) { + bond->_userHasSpecifiedFailoverInstructions = true; + } + if (it->second->isUserSpecified() && (it->second->speed() > 0)) { + bond->_userHasSpecifiedLinkSpeeds = true; + } + ++it; + } + } + return bond; + } + return SharedPtr(); +} + +SharedPtr Bond::getLinkBySocket(const std::string& policyAlias, uint64_t localSocket) +{ + Mutex::Lock _l(_links_m); + char ifname[32] = { 0 }; // 256 because interfaces on Windows can potentially be that long + _phy->getIfName((PhySocket*)((uintptr_t)localSocket), ifname, sizeof(ifname) - 1); + // fprintf(stderr, "ifname %s\n",ifname); + std::string ifnameStr(ifname); + auto search = _interfaceToLinkMap[policyAlias].find(ifnameStr); + if (search == _interfaceToLinkMap[policyAlias].end()) { + // If the link wasn't already known, add a new entry + // fprintf(stderr, "adding new link?? %s\n", ifnameStr.c_str()); + SharedPtr s = new Link(ifnameStr, 0, 0, true, ZT_BOND_SLAVE_MODE_SPARE, "", 0.0); + _interfaceToLinkMap[policyAlias].insert(std::pair >(ifnameStr, s)); + return s; + } + else { + return search->second; + } +} + +SharedPtr Bond::getLinkByName(const std::string& policyAlias, const std::string& ifname) +{ + Mutex::Lock _l(_links_m); + auto search = _interfaceToLinkMap[policyAlias].find(ifname); + if (search != _interfaceToLinkMap[policyAlias].end()) { + return search->second; + } + return SharedPtr(); +} + +void Bond::processBackgroundTasks(void* tPtr, const int64_t now) +{ + unsigned long _currMinReqMonitorInterval = ZT_BOND_FAILOVER_DEFAULT_INTERVAL; + Mutex::Lock _l(_bonds_m); + std::map >::iterator bondItr = _bonds.begin(); + while (bondItr != _bonds.end()) { + // Update Bond Controller's background processing timer + _currMinReqMonitorInterval = std::min(_currMinReqMonitorInterval, (unsigned long)(bondItr->second->monitorInterval())); + // Process bond tasks + bondItr->second->processBackgroundBondTasks(tPtr, now); + ++bondItr; + } + _minReqMonitorInterval = std::min(_currMinReqMonitorInterval, (unsigned long)ZT_BOND_FAILOVER_DEFAULT_INTERVAL); +} + +Bond::Bond(const RuntimeEnvironment* renv) : RR(renv) +{ +} + +Bond::Bond(const RuntimeEnvironment* renv, int policy, const SharedPtr& peer) : RR(renv), _freeRandomByte((unsigned char)((uintptr_t)this >> 4) ^ ++s_freeRandomByteCounter), _peer(peer), _peerId(_peer->_id.address().toInt()) +{ + setBondParameters(policy, SharedPtr(), false); + _policyAlias = getPolicyStrByCode(policy); } Bond::Bond(const RuntimeEnvironment* renv, std::string& basePolicy, std::string& policyAlias, const SharedPtr& peer) : RR(renv), _policyAlias(policyAlias), _peer(peer) { - setReasonableDefaults(BondController::getPolicyCodeByStr(basePolicy), SharedPtr(), false); + setBondParameters(getPolicyCodeByStr(basePolicy), SharedPtr(), false); } Bond::Bond(const RuntimeEnvironment* renv, SharedPtr originalBond, const SharedPtr& peer) : RR(renv) + , _freeRandomByte((unsigned char)((uintptr_t)this >> 4) ^ ++s_freeRandomByteCounter) , _peer(peer) - , _lastAckRateCheck(0) - , _lastQoSRateCheck(0) - , _lastQualityEstimation(0) - , _lastCheckUserPreferences(0) - , _lastBackgroundTaskCheck(0) - , _lastBondStatusLog(0) - , _lastPathNegotiationReceived(0) - , _lastPathNegotiationCheck(0) - , _lastFlowStatReset(0) - , _lastFlowExpirationCheck(0) - , _lastFlowRebalance(0) - , _lastFrame(0) + , _peerId(_peer->_id.address().toInt()) { - setReasonableDefaults(originalBond->_bondingPolicy, originalBond, true); + setBondParameters(originalBond->_policy, originalBond, true); } -void Bond::nominatePath(const SharedPtr& path, int64_t now) +void Bond::nominatePathToBond(const SharedPtr& path, int64_t now) { - char traceMsg[256]; - char pathStr[128]; + char pathStr[64] = { 0 }; path->address().toString(pathStr); Mutex::Lock _l(_paths_m); + /** + * Ensure the link is allowed and the path is not already present + */ if (! RR->bc->linkAllowed(_policyAlias, getLink(path))) { return; } bool alreadyPresent = false; for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (path.ptr() == _paths[i].ptr()) { - // Previously encountered path, not notifying bond + // Sanity check + if (path.ptr() == _paths[i].p.ptr()) { alreadyPresent = true; break; } } if (! alreadyPresent) { + /** + * Find somewhere to stick it + */ for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (! _paths[i]) { - _paths[i] = path; - sprintf( - traceMsg, - "%s (bond) Nominating link %s/%s to peer %llx. It has now entered its trial period", - OSUtils::humanReadableTimestamp().c_str(), - getLink(path)->ifname().c_str(), - pathStr, - (unsigned long long)(_peer->_id.address().toInt())); - RR->t->bondStateMessage(NULL, traceMsg); - _paths[i]->startTrial(now); + if (! _paths[i].p) { + _paths[i].set(now, path); + /** + * Set user preferences and update state variables of other paths on the same link + */ + SharedPtr sl = getLink(_paths[i].p); + if (sl) { + // Determine if there are any other paths on this link + bool bFoundCommonLink = false; + SharedPtr commonLink = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket()); + for (unsigned int j = 0; j < ZT_MAX_PEER_NETWORK_PATHS; ++j) { + if (_paths[j].p && _paths[j].p.ptr() != _paths[i].p.ptr()) { + if (RR->bc->getLinkBySocket(_policyAlias, _paths[j].p->localSocket()) == commonLink) { + bFoundCommonLink = true; + _paths[j].onlyPathOnLink = false; + } + } + } + _paths[i].ipvPref = sl->ipvPref(); + _paths[i].mode = sl->mode(); + _paths[i].enabled = sl->enabled(); + _paths[i].onlyPathOnLink = ! bFoundCommonLink; + } + log("nominate link %s/%s (now in trial period)", getLink(path)->ifname().c_str(), pathStr); break; } } @@ -106,21 +270,29 @@ void Bond::nominatePath(const SharedPtr& path, int64_t now) estimatePathQuality(now); } +void Bond::addPathToBond(int nominatedIdx, int bondedIdx) +{ + // Map bonded set to nominated set + _bondIdxMap[bondedIdx] = nominatedIdx; + // Tell the bonding layer that we can now use this bond for traffic + _paths[nominatedIdx].bonded = true; +} + SharedPtr Bond::getAppropriatePath(int64_t now, int32_t flowId) { Mutex::Lock _l(_paths_m); /** * active-backup */ - if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { - if (_abPath) { - return _abPath; + if (_policy == ZT_BOND_POLICY_ACTIVE_BACKUP) { + if (_abPathIdx != ZT_MAX_PEER_NETWORK_PATHS && _paths[_abPathIdx].p) { + return _paths[_abPathIdx].p; } } /** * broadcast */ - if (_bondingPolicy == ZT_BONDING_POLICY_BROADCAST) { + if (_policy == ZT_BOND_POLICY_BROADCAST) { return SharedPtr(); // Handled in Switch::_trySend() } if (! _numBondedPaths) { @@ -129,16 +301,16 @@ SharedPtr Bond::getAppropriatePath(int64_t now, int32_t flowId) /** * balance-rr */ - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) { + if (_policy == ZT_BOND_POLICY_BALANCE_RR) { if (! _allowFlowHashing) { if (_packetsPerLink == 0) { // Randomly select a path - return _paths[_bondedIdx[_freeRandomByte % _numBondedPaths]]; // TODO: Optimize + return _paths[_bondIdxMap[_freeRandomByte % _numBondedPaths]].p; } if (_rrPacketsSentOnCurrLink < _packetsPerLink) { // Continue to use this link ++_rrPacketsSentOnCurrLink; - return _paths[_bondedIdx[_rrIdx]]; + return _paths[_bondIdxMap[_rrIdx]].p; } // Reset striping counter _rrPacketsSentOnCurrLink = 0; @@ -149,42 +321,42 @@ SharedPtr Bond::getAppropriatePath(int64_t now, int32_t flowId) int _tempIdx = _rrIdx; for (int searchCount = 0; searchCount < (_numBondedPaths - 1); searchCount++) { _tempIdx = (_tempIdx == (_numBondedPaths - 1)) ? 0 : _tempIdx + 1; - if (_bondedIdx[_tempIdx] != ZT_MAX_PEER_NETWORK_PATHS) { - if (_paths[_bondedIdx[_tempIdx]] && _paths[_bondedIdx[_tempIdx]]->eligible(now, _ackSendInterval)) { + if (_bondIdxMap[_tempIdx] != ZT_MAX_PEER_NETWORK_PATHS) { + if (_paths[_bondIdxMap[_tempIdx]].p && _paths[_bondIdxMap[_tempIdx]].eligible) { _rrIdx = _tempIdx; break; } } } } - if (_paths[_bondedIdx[_rrIdx]]) { - return _paths[_bondedIdx[_rrIdx]]; + if (_paths[_bondIdxMap[_rrIdx]].p) { + return _paths[_bondIdxMap[_rrIdx]].p; } } } /** * balance-xor */ - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { + if (_policy == ZT_BOND_POLICY_BALANCE_XOR || _policy == ZT_BOND_POLICY_BALANCE_AWARE) { if (! _allowFlowHashing || flowId == -1) { // No specific path required for unclassified traffic, send on anything - return _paths[_bondedIdx[_freeRandomByte % _numBondedPaths]]; // TODO: Optimize + int m_idx = _bondIdxMap[_freeRandomByte % _numBondedPaths]; + return _paths[m_idx].p; } else if (_allowFlowHashing) { - // TODO: Optimize Mutex::Lock _l(_flows_m); SharedPtr flow; if (_flows.count(flowId)) { flow = _flows[flowId]; - flow->updateActivity(now); + flow->lastActivity = now; } else { unsigned char entropy; Utils::getSecureRandom(&entropy, 1); - flow = createFlow(SharedPtr(), flowId, entropy, now); + flow = createFlow(ZT_MAX_PEER_NETWORK_PATHS, flowId, entropy, now); } if (flow) { - return flow->assignedPath(); + return _paths[flow->assignedPath].p; } } } @@ -193,90 +365,104 @@ SharedPtr Bond::getAppropriatePath(int64_t now, int32_t flowId) void Bond::recordIncomingInvalidPacket(const SharedPtr& path) { - // char traceMsg[256]; char pathStr[128]; path->address().toString(pathStr); - // sprintf(traceMsg, "%s (qos) Invalid packet on link %s/%s from peer %llx", - // OSUtils::humanReadableTimestamp().c_str(), getLink(path)->ifname().c_str(), pathStr, (unsigned long long)(_peer->_id.address().toInt())); - // RR->t->bondStateMessage(NULL, traceMsg); + // char pathStr[64] = { 0 }; path->address().toString(pathStr); + // log("%s (qos) Invalid packet on link %s/%s from peer %llx", + // getLink(path)->ifname().c_str(), pathStr); Mutex::Lock _l(_paths_m); for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (_paths[i] == path) { - _paths[i]->packetValiditySamples.push(false); + if (_paths[i].p == path) { + _paths[i].packetValiditySamples.push(false); } } } -void Bond::recordOutgoingPacket(const SharedPtr& path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now) +void Bond::recordOutgoingPacket(const SharedPtr& path, uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now) { - // char traceMsg[256]; char pathStr[128]; path->address().toString(pathStr); - // sprintf(traceMsg, "%s (bond) Outgoing packet on link %s/%s to peer %llx", - // OSUtils::humanReadableTimestamp().c_str(), getLink(path)->ifname().c_str(), pathStr, (unsigned long long)(_peer->_id.address().toInt())); - // RR->t->bondStateMessage(NULL, traceMsg); _freeRandomByte += (unsigned char)(packetId >> 8); // Grab entropy to use in path selection logic - if (! _shouldCollectPathStatistics) { - return; + bool isFrame = (verb == Packet::Packet::VERB_ECHO || verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME); + if (isFrame) { + // char pathStr[64] = { 0 }; + // path->address().toString(pathStr); + // int pathIdx = getNominatedPathIdx(path); + // log("outgoing packet via [%d]", pathIdx); + // log("outgoing packet via %s/%s", getLink(path)->ifname().c_str(), pathStr); } - bool isFrame = (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME); bool shouldRecord = (packetId & (ZT_QOS_ACK_DIVISOR - 1) && (verb != Packet::VERB_ACK) && (verb != Packet::VERB_QOS_MEASUREMENT)); if (isFrame || shouldRecord) { Mutex::Lock _l(_paths_m); + int pathIdx = getNominatedPathIdx(path); + if (pathIdx == ZT_MAX_PEER_NETWORK_PATHS) { + return; + } if (isFrame) { - ++(path->_packetsOut); + ++(_paths[pathIdx].packetsOut); _lastFrame = now; } if (shouldRecord) { - path->_unackedBytes += payloadLength; + //_paths[pathIdx].unackedBytes += payloadLength; // Take note that we're expecting a VERB_ACK on this path as of a specific time - if (path->qosStatsOut.size() < ZT_QOS_MAX_OUTSTANDING_RECORDS) { - path->qosStatsOut[packetId] = now; + if (_paths[pathIdx].qosStatsOut.size() < ZT_QOS_MAX_OUTSTANDING_RECORDS) { + _paths[pathIdx].qosStatsOut[packetId] = now; } } } if (_allowFlowHashing && (flowId != ZT_QOS_NO_FLOW)) { Mutex::Lock _l(_flows_m); if (_flows.count(flowId)) { - _flows[flowId]->recordOutgoingBytes(payloadLength); + _flows[flowId]->bytesOut += payloadLength; } } } void Bond::recordIncomingPacket(const SharedPtr& path, uint64_t packetId, uint16_t payloadLength, Packet::Verb verb, int32_t flowId, int64_t now) { - // char traceMsg[256]; char pathStr[128]; path->address().toString(pathStr); - // sprintf(traceMsg, "%s (bond) Incoming packet on link %s/%s from peer %llx [id=%llx, len=%d, verb=%d, flowId=%x]", - // OSUtils::humanReadableTimestamp().c_str(), getLink(path)->ifname().c_str(), pathStr, (unsigned long long)(_peer->_id.address().toInt()), packetId, payloadLength, verb, flowId); - // RR->t->bondStateMessage(NULL, traceMsg); - bool isFrame = (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME); + bool isFrame = (verb == Packet::Packet::VERB_ECHO || verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME); + if (isFrame) { + // char pathStr[64] = { 0 }; path->address().toString(pathStr); + // int pathIdx = getNominatedPathIdx(path); + // log("incoming packet via [%d] [id=%llx, len=%d, verb=%d, flowId=%x]", pathIdx, packetId, payloadLength, verb, flowId); + // log("incoming packet via %s/%s (ls=%llx) [id=%llx, len=%d, verb=%d, flowId=%x]", getLink(path)->ifname().c_str(), pathStr, path->localSocket(), packetId, payloadLength, verb, flowId); + } bool shouldRecord = (packetId & (ZT_QOS_ACK_DIVISOR - 1) && (verb != Packet::VERB_ACK) && (verb != Packet::VERB_QOS_MEASUREMENT)); + Mutex::Lock _l(_paths_m); + int pathIdx = getNominatedPathIdx(path); + if (pathIdx == ZT_MAX_PEER_NETWORK_PATHS) { + return; + } + // Take note of the time that this previously-dead path received a packet + if (! _paths[pathIdx].alive) { + _paths[pathIdx].lastAliveToggle = now; + } if (isFrame || shouldRecord) { - Mutex::Lock _l(_paths_m); - if (isFrame) { - ++(path->_packetsIn); - _lastFrame = now; - } - if (shouldRecord) { - path->ackStatsIn[packetId] = payloadLength; - ++(path->_packetsReceivedSinceLastAck); - path->qosStatsIn[packetId] = now; - ++(path->_packetsReceivedSinceLastQoS); - path->packetValiditySamples.push(true); + if (_paths[pathIdx].allowed()) { + if (isFrame) { + ++(_paths[pathIdx].packetsIn); + _lastFrame = now; + } + if (shouldRecord) { + _paths[pathIdx].qosStatsIn[packetId] = now; + ++(_paths[pathIdx].packetsReceivedSinceLastQoS); + _paths[pathIdx].packetValiditySamples.push(true); + } } } + /** * Learn new flows and pro-actively create entries for them in the bond so * that the next time we send a packet out that is part of a flow we know * which path to use. */ - if ((flowId != ZT_QOS_NO_FLOW) && (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE)) { + if ((flowId != ZT_QOS_NO_FLOW) && (_policy == ZT_BOND_POLICY_BALANCE_RR || _policy == ZT_BOND_POLICY_BALANCE_XOR || _policy == ZT_BOND_POLICY_BALANCE_AWARE)) { Mutex::Lock _l(_flows_m); SharedPtr flow; if (! _flows.count(flowId)) { - flow = createFlow(path, flowId, 0, now); + flow = createFlow(pathIdx, flowId, 0, now); } else { flow = _flows[flowId]; } if (flow) { - flow->recordIncomingBytes(payloadLength); + flow->bytesIn += payloadLength; } } } @@ -284,55 +470,31 @@ void Bond::recordIncomingPacket(const SharedPtr& path, uint64_t packetId, void Bond::receivedQoS(const SharedPtr& path, int64_t now, int count, uint64_t* rx_id, uint16_t* rx_ts) { Mutex::Lock _l(_paths_m); - // char traceMsg[256]; char pathStr[128]; path->address().toString(pathStr); - // sprintf(traceMsg, "%s (qos) Received QoS packet sampling %d frames from peer %llx via %s/%s", - // OSUtils::humanReadableTimestamp().c_str(), count, (unsigned long long)(_peer->_id.address().toInt()), getLink(path)->ifname().c_str(), pathStr); - // RR->t->bondStateMessage(NULL, traceMsg); + int pathIdx = getNominatedPathIdx(path); + if (pathIdx == ZT_MAX_PEER_NETWORK_PATHS) { + return; + } + // char pathStr[64] = { 0 }; path->address().toString(pathStr); + // log("received QoS packet (sampling %d frames) via %s/%s", count, getLink(path)->ifname().c_str(), pathStr); // Look up egress times and compute latency values for each record std::map::iterator it; for (int j = 0; j < count; j++) { - it = path->qosStatsOut.find(rx_id[j]); - if (it != path->qosStatsOut.end()) { - path->latencySamples.push(((uint16_t)(now - it->second) - rx_ts[j]) / 2); - path->qosStatsOut.erase(it); + it = _paths[pathIdx].qosStatsOut.find(rx_id[j]); + if (it != _paths[pathIdx].qosStatsOut.end()) { + _paths[pathIdx].latencySamples.push(((uint16_t)(now - it->second) - rx_ts[j]) / 2); + _paths[pathIdx].qosStatsOut.erase(it); } } - path->qosRecordSize.push(count); + _paths[pathIdx].qosRecordSize.push(count); } -void Bond::receivedAck(const SharedPtr& path, int64_t now, int32_t ackedBytes) -{ - Mutex::Lock _l(_paths_m); - // char traceMsg[256]; char pathStr[128]; path->address().toString(pathStr); - // sprintf(traceMsg, "%s (qos) Received ACK packet for %d bytes from peer %llx via %s/%s", - // OSUtils::humanReadableTimestamp().c_str(), ackedBytes, (unsigned long long)(_peer->_id.address().toInt()), getLink(path)->ifname().c_str(), pathStr); - // RR->t->bondStateMessage(NULL, traceMsg); - path->_lastAckReceived = now; - path->_unackedBytes = (ackedBytes > path->_unackedBytes) ? 0 : path->_unackedBytes - ackedBytes; - int64_t timeSinceThroughputEstimate = (now - path->_lastThroughputEstimation); - if (timeSinceThroughputEstimate >= throughputMeasurementInterval) { - // TODO: See if this floating point math can be reduced - uint64_t throughput = (uint64_t)((float)(path->_bytesAckedSinceLastThroughputEstimation) / ((float)timeSinceThroughputEstimate / (float)1000)); - throughput /= 1000; - if (throughput > 0.0) { - path->throughputSamples.push(throughput); - path->_throughputMax = throughput > path->_throughputMax ? throughput : path->_throughputMax; - } - path->_lastThroughputEstimation = now; - path->_bytesAckedSinceLastThroughputEstimation = 0; - } - else { - path->_bytesAckedSinceLastThroughputEstimation += ackedBytes; - } -} - -int32_t Bond::generateQoSPacket(const SharedPtr& path, int64_t now, char* qosBuffer) +int32_t Bond::generateQoSPacket(int pathIdx, int64_t now, char* qosBuffer) { int32_t len = 0; - std::map::iterator it = path->qosStatsIn.begin(); + std::map::iterator it = _paths[pathIdx].qosStatsIn.begin(); int i = 0; - int numRecords = std::min(path->_packetsReceivedSinceLastQoS, ZT_QOS_TABLE_SIZE); - while (i < numRecords && it != path->qosStatsIn.end()) { + int numRecords = std::min(_paths[pathIdx].packetsReceivedSinceLastQoS, ZT_QOS_TABLE_SIZE); + while (i < numRecords && it != _paths[pathIdx].qosStatsIn.end()) { uint64_t id = it->first; memcpy(qosBuffer, &id, sizeof(uint64_t)); qosBuffer += sizeof(uint64_t); @@ -340,7 +502,7 @@ int32_t Bond::generateQoSPacket(const SharedPtr& path, int64_t now, char* memcpy(qosBuffer, &holdingTime, sizeof(uint16_t)); qosBuffer += sizeof(uint16_t); len += sizeof(uint64_t) + sizeof(uint16_t); - path->qosStatsIn.erase(it++); + _paths[pathIdx].qosStatsIn.erase(it++); ++i; } return len; @@ -348,35 +510,23 @@ int32_t Bond::generateQoSPacket(const SharedPtr& path, int64_t now, char* bool Bond::assignFlowToBondedPath(SharedPtr& flow, int64_t now) { - char traceMsg[256]; - char curPathStr[128]; + char curPathStr[64] = { 0 }; unsigned int idx = ZT_MAX_PEER_NETWORK_PATHS; - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR) { - idx = abs((int)(flow->id() % (_numBondedPaths))); - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[_bondedIdx[idx]]->localSocket()); - _paths[_bondedIdx[idx]]->address().toString(curPathStr); - sprintf( - traceMsg, - "%s (balance-xor) Assigned outgoing flow %x to peer %llx to link %s/%s, %lu active flow(s)", - OSUtils::humanReadableTimestamp().c_str(), - flow->id(), - (unsigned long long)(_peer->_id.address().toInt()), - link->ifname().c_str(), - curPathStr, - (unsigned long)_flows.size()); - RR->t->bondStateMessage(NULL, traceMsg); - flow->assignPath(_paths[_bondedIdx[idx]], now); - ++(_paths[_bondedIdx[idx]]->_assignedFlowCount); + if (_policy == ZT_BOND_POLICY_BALANCE_XOR) { + idx = abs((int)(flow->id % (_numBondedPaths))); + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[_bondIdxMap[idx]].p->localSocket()); + _paths[_bondIdxMap[idx]].p->address().toString(curPathStr); + flow->assignPath(_bondIdxMap[idx], now); + ++(_paths[_bondIdxMap[idx]].assignedFlowCount); } - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { + if (_policy == ZT_BOND_POLICY_BALANCE_AWARE) { unsigned char entropy; Utils::getSecureRandom(&entropy, 1); if (_totalBondUnderload) { entropy %= _totalBondUnderload; } if (! _numBondedPaths) { - sprintf(traceMsg, "%s (balance-aware) There are no bonded paths, cannot assign flow %x\n", OSUtils::humanReadableTimestamp().c_str(), flow->id()); - RR->t->bondStateMessage(NULL, traceMsg); + log("unable to assign flow %x (bond has no links)\n", flow->id); return false; } /* Since there may be scenarios where a path is removed before we can re-estimate @@ -385,16 +535,16 @@ bool Bond::assignFlowToBondedPath(SharedPtr& flow, int64_t now) not being able to find a path to assign this flow to. */ int totalIncompleteAllocation = 0; for (unsigned int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (_paths[i] && _paths[i]->bonded()) { - totalIncompleteAllocation += _paths[i]->_allocation; + if (_paths[i].p && _paths[i].bonded) { + totalIncompleteAllocation += _paths[i].allocation; } } entropy %= totalIncompleteAllocation; for (unsigned int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (_paths[i] && _paths[i]->bonded()) { - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); - _paths[i]->address().toString(curPathStr); - uint8_t probabilitySegment = (_totalBondUnderload > 0) ? _paths[i]->_affinity : _paths[i]->_allocation; + if (_paths[i].p && _paths[i].bonded) { + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket()); + _paths[i].p->address().toString(curPathStr); + uint8_t probabilitySegment = (_totalBondUnderload > 0) ? _paths[i].affinity : _paths[i].allocation; if (entropy <= probabilitySegment) { idx = i; break; @@ -403,61 +553,35 @@ bool Bond::assignFlowToBondedPath(SharedPtr& flow, int64_t now) } } if (idx < ZT_MAX_PEER_NETWORK_PATHS) { - if (flow->_assignedPath) { - flow->_previouslyAssignedPath = flow->_assignedPath; - } - flow->assignPath(_paths[idx], now); - ++(_paths[idx]->_assignedFlowCount); + flow->assignPath(idx, now); + ++(_paths[idx].assignedFlowCount); } else { - fprintf(stderr, "could not assign flow?\n"); - exit(0); // TODO: Remove for production + log("unable to assign out-flow %x (unknown reason)", flow->id); return false; } } - if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { - if (_abOverflowEnabled) { - flow->assignPath(_abPath, now); - } - else { - sprintf(traceMsg, "%s (bond) Unable to assign outgoing flow %x to peer %llx, no active overflow link", OSUtils::humanReadableTimestamp().c_str(), flow->id(), (unsigned long long)(_peer->_id.address().toInt())); - RR->t->bondStateMessage(NULL, traceMsg); - return false; + if (_policy == ZT_BOND_POLICY_ACTIVE_BACKUP) { + if (_abPathIdx == ZT_MAX_PEER_NETWORK_PATHS) { + log("unable to assign out-flow %x (no active backup link)", flow->id); } + flow->assignPath(_abPathIdx, now); } - flow->assignedPath()->address().toString(curPathStr); - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, flow->assignedPath()->localSocket()); - sprintf( - traceMsg, - "%s (bond) Assigned outgoing flow %x to peer %llx to link %s/%s, %lu active flow(s)", - OSUtils::humanReadableTimestamp().c_str(), - flow->id(), - (unsigned long long)(_peer->_id.address().toInt()), - link->ifname().c_str(), - curPathStr, - (unsigned long)_flows.size()); - RR->t->bondStateMessage(NULL, traceMsg); + _paths[flow->assignedPath].p->address().toString(curPathStr); + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[flow->assignedPath].p->localSocket()); + log("assign out-flow %x to link %s/%s (%lu / %lu flows)", flow->id, link->ifname().c_str(), curPathStr, _paths[flow->assignedPath].assignedFlowCount, (unsigned long)_flows.size()); return true; } -SharedPtr Bond::createFlow(const SharedPtr& path, int32_t flowId, unsigned char entropy, int64_t now) +SharedPtr Bond::createFlow(int pathIdx, int32_t flowId, unsigned char entropy, int64_t now) { - char traceMsg[256]; - char curPathStr[128]; - // --- + char curPathStr[64] = { 0 }; if (! _numBondedPaths) { - sprintf(traceMsg, "%s (bond) There are no bonded paths to peer %llx, cannot assign flow %x\n", OSUtils::humanReadableTimestamp().c_str(), (unsigned long long)(_peer->_id.address().toInt()), flowId); - RR->t->bondStateMessage(NULL, traceMsg); + log("unable to assign flow %x (bond has no links)\n", flowId); return SharedPtr(); } if (_flows.size() >= ZT_FLOW_MAX_COUNT) { - sprintf( - traceMsg, - "%s (bond) Maximum number of flows on bond to peer %llx reached (%d), forcibly forgetting oldest flow\n", - OSUtils::humanReadableTimestamp().c_str(), - (unsigned long long)(_peer->_id.address().toInt()), - ZT_FLOW_MAX_COUNT); - RR->t->bondStateMessage(NULL, traceMsg); + log("forget oldest flow (max flows reached: %d)\n", ZT_FLOW_MAX_COUNT); forgetFlowsWhenNecessary(0, true, now); } SharedPtr flow = new Flow(flowId, now); @@ -467,27 +591,18 @@ SharedPtr Bond::createFlow(const SharedPtr& path, int32_t flowId, un * is received on a path but no flow exists, in this case we simply assign the path * that the remote peer chose for us. */ - if (path) { - flow->assignPath(path, now); - path->address().toString(curPathStr); - path->_assignedFlowCount++; - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, flow->assignedPath()->localSocket()); - sprintf( - traceMsg, - "%s (bond) Assigned incoming flow %x from peer %llx to link %s/%s, %lu active flow(s)", - OSUtils::humanReadableTimestamp().c_str(), - flow->id(), - (unsigned long long)(_peer->_id.address().toInt()), - link->ifname().c_str(), - curPathStr, - (unsigned long)_flows.size()); - RR->t->bondStateMessage(NULL, traceMsg); + if (pathIdx != ZT_MAX_PEER_NETWORK_PATHS) { + flow->assignPath(pathIdx, now); + _paths[pathIdx].p->address().toString(curPathStr); + _paths[pathIdx].assignedFlowCount++; + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[flow->assignedPath].p->localSocket()); + log("assign in-flow %x to link %s/%s (%lu / %lu)", flow->id, link->ifname().c_str(), curPathStr, _paths[pathIdx].assignedFlowCount, (unsigned long)_flows.size()); } /** * Add a flow when no path was provided. This means that it is an outgoing packet * and that it is up to the local peer to decide how to load-balance its transmission. */ - else if (! path) { + else { assignFlowToBondedPath(flow, now); } return flow; @@ -495,22 +610,14 @@ SharedPtr Bond::createFlow(const SharedPtr& path, int32_t flowId, un void Bond::forgetFlowsWhenNecessary(uint64_t age, bool oldest, int64_t now) { - char traceMsg[256]; std::map >::iterator it = _flows.begin(); std::map >::iterator oldestFlow = _flows.end(); SharedPtr expiredFlow; if (age) { // Remove by specific age while (it != _flows.end()) { if (it->second->age(now) > age) { - sprintf( - traceMsg, - "%s (bond) Forgetting flow %x between this node and peer %llx, %lu active flow(s)", - OSUtils::humanReadableTimestamp().c_str(), - it->first, - (unsigned long long)(_peer->_id.address().toInt()), - (unsigned long)(_flows.size() - 1)); - RR->t->bondStateMessage(NULL, traceMsg); - it->second->assignedPath()->_assignedFlowCount--; + log("forget flow %x (age %llu) (%lu / %lu)", it->first, (unsigned long long)it->second->age(now), _paths[it->second->assignedPath].assignedFlowCount, (unsigned long)(_flows.size() - 1)); + _paths[it->second->assignedPath].assignedFlowCount--; it = _flows.erase(it); } else { @@ -528,16 +635,8 @@ void Bond::forgetFlowsWhenNecessary(uint64_t age, bool oldest, int64_t now) ++it; } if (oldestFlow != _flows.end()) { - sprintf( - traceMsg, - "%s (bond) Forgetting oldest flow %x (of age %llu) between this node and peer %llx, %lu active flow(s)", - OSUtils::humanReadableTimestamp().c_str(), - oldestFlow->first, - (unsigned long long)oldestFlow->second->age(now), - (unsigned long long)(_peer->_id.address().toInt()), - (unsigned long)(_flows.size() - 1)); - RR->t->bondStateMessage(NULL, traceMsg); - oldestFlow->second->assignedPath()->_assignedFlowCount--; + log("forget oldest flow %x (age %llu) (total flows: %lu)", oldestFlow->first, (unsigned long long)oldestFlow->second->age(now), (unsigned long)(_flows.size() - 1)); + _paths[oldestFlow->second->assignedPath].assignedFlowCount--; _flows.erase(oldestFlow); } } @@ -545,96 +644,69 @@ void Bond::forgetFlowsWhenNecessary(uint64_t age, bool oldest, int64_t now) void Bond::processIncomingPathNegotiationRequest(uint64_t now, SharedPtr& path, int16_t remoteUtility) { - char traceMsg[256]; - if (_abLinkSelectMethod != ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { + char pathStr[64] = { 0 }; + if (_abLinkSelectMethod != ZT_BOND_RESELECTION_POLICY_OPTIMIZE) { return; } Mutex::Lock _l(_paths_m); - char pathStr[128]; - path->address().toString(pathStr); + int pathIdx = getNominatedPathIdx(path); + if (pathIdx == ZT_MAX_PEER_NETWORK_PATHS) { + return; + } + _paths[pathIdx].p->address().toString(pathStr); if (! _lastPathNegotiationCheck) { return; } - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, path->localSocket()); + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[pathIdx].p->localSocket()); if (remoteUtility > _localUtility) { - char pathStr[128]; - path->address().toString(pathStr); - sprintf( - traceMsg, - "%s (bond) Peer %llx suggests using alternate link %s/%s. Remote utility (%d) is GREATER than local utility (%d), switching to said link\n", - OSUtils::humanReadableTimestamp().c_str(), - (unsigned long long)(_peer->_id.address().toInt()), - link->ifname().c_str(), - pathStr, - remoteUtility, - _localUtility); - RR->t->bondStateMessage(NULL, traceMsg); - negotiatedPath = path; + _paths[pathIdx].p->address().toString(pathStr); + log("peer suggests alternate link %s/%s, remote utility (%d) greater than local utility (%d), switching to suggested link\n", link->ifname().c_str(), pathStr, remoteUtility, _localUtility); + negotiatedPathIdx = pathIdx; } if (remoteUtility < _localUtility) { - sprintf( - traceMsg, - "%s (bond) Peer %llx suggests using alternate link %s/%s. Remote utility (%d) is LESS than local utility (%d), not switching\n", - OSUtils::humanReadableTimestamp().c_str(), - (unsigned long long)(_peer->_id.address().toInt()), - link->ifname().c_str(), - pathStr, - remoteUtility, - _localUtility); - RR->t->bondStateMessage(NULL, traceMsg); + log("peer suggests alternate link %s/%s, remote utility (%d) less than local utility (%d), not switching\n", link->ifname().c_str(), pathStr, remoteUtility, _localUtility); } if (remoteUtility == _localUtility) { - sprintf( - traceMsg, - "%s (bond) Peer %llx suggests using alternate link %s/%s. Remote utility (%d) is equal to local utility (%d)\n", - OSUtils::humanReadableTimestamp().c_str(), - (unsigned long long)(_peer->_id.address().toInt()), - link->ifname().c_str(), - pathStr, - remoteUtility, - _localUtility); - RR->t->bondStateMessage(NULL, traceMsg); + log("peer suggests alternate link %s/%s, remote utility (%d) equal to local utility (%d)\n", link->ifname().c_str(), pathStr, remoteUtility, _localUtility); if (_peer->_id.address().toInt() > RR->node->identity().address().toInt()) { - sprintf(traceMsg, "%s (bond) Agreeing with peer %llx to use alternate link %s/%s\n", OSUtils::humanReadableTimestamp().c_str(), (unsigned long long)(_peer->_id.address().toInt()), link->ifname().c_str(), pathStr); - RR->t->bondStateMessage(NULL, traceMsg); - negotiatedPath = path; + log("agree with peer to use alternate link %s/%s\n", link->ifname().c_str(), pathStr); + negotiatedPathIdx = pathIdx; } else { - sprintf(traceMsg, "%s (bond) Ignoring petition from peer %llx to use alternate link %s/%s\n", OSUtils::humanReadableTimestamp().c_str(), (unsigned long long)(_peer->_id.address().toInt()), link->ifname().c_str(), pathStr); - RR->t->bondStateMessage(NULL, traceMsg); + log("ignore petition from peer to use alternate link %s/%s\n", link->ifname().c_str(), pathStr); } } } -void Bond::pathNegotiationCheck(void* tPtr, const int64_t now) +void Bond::pathNegotiationCheck(void* tPtr, int64_t now) { - char pathStr[128]; + char pathStr[64] = { 0 }; int maxInPathIdx = ZT_MAX_PEER_NETWORK_PATHS; int maxOutPathIdx = ZT_MAX_PEER_NETWORK_PATHS; uint64_t maxInCount = 0; uint64_t maxOutCount = 0; for (unsigned int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (! _paths[i]) { + if (! _paths[i].p) { continue; } - if (_paths[i]->_packetsIn > maxInCount) { - maxInCount = _paths[i]->_packetsIn; + if (_paths[i].packetsIn > maxInCount) { + maxInCount = _paths[i].packetsIn; maxInPathIdx = i; } - if (_paths[i]->_packetsOut > maxOutCount) { - maxOutCount = _paths[i]->_packetsOut; + if (_paths[i].packetsOut > maxOutCount) { + maxOutCount = _paths[i].packetsOut; maxOutPathIdx = i; } - _paths[i]->resetPacketCounts(); + _paths[i].resetPacketCounts(); } bool _peerLinksSynchronized = ((maxInPathIdx != ZT_MAX_PEER_NETWORK_PATHS) && (maxOutPathIdx != ZT_MAX_PEER_NETWORK_PATHS) && (maxInPathIdx != maxOutPathIdx)) ? false : true; /** * Determine utility and attempt to petition remote peer to switch to our chosen path */ if (! _peerLinksSynchronized) { - _localUtility = _paths[maxOutPathIdx]->_failoverScore - _paths[maxInPathIdx]->_failoverScore; - if (_paths[maxOutPathIdx]->_negotiated) { - _localUtility -= ZT_MULTIPATH_FAILOVER_HANDICAP_NEGOTIATED; + _localUtility = _paths[maxOutPathIdx].failoverScore - _paths[maxInPathIdx].failoverScore; + if (_paths[maxOutPathIdx].negotiated) { + _localUtility -= ZT_BOND_FAILOVER_HANDICAP_NEGOTIATED; } if ((now - _lastSentPathNegotiationRequest) > ZT_PATH_NEGOTIATION_CUTOFF_TIME) { // fprintf(stderr, "BT: (sync) it's been long enough, sending more requests.\n"); @@ -643,320 +715,222 @@ void Bond::pathNegotiationCheck(void* tPtr, const int64_t now) if (_numSentPathNegotiationRequests < ZT_PATH_NEGOTIATION_TRY_COUNT) { if (_localUtility >= 0) { // fprintf(stderr, "BT: (sync) paths appear to be out of sync (utility=%d)\n", _localUtility); - sendPATH_NEGOTIATION_REQUEST(tPtr, _paths[maxOutPathIdx]); + sendPATH_NEGOTIATION_REQUEST(tPtr, _paths[maxOutPathIdx].p); ++_numSentPathNegotiationRequests; _lastSentPathNegotiationRequest = now; - _paths[maxOutPathIdx]->address().toString(pathStr); - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[maxOutPathIdx]->localSocket()); - // fprintf(stderr, "sending request to use %s on %s, ls=%llx, utility=%d\n", pathStr, link->ifname().c_str(), _paths[maxOutPathIdx]->localSocket(), _localUtility); + _paths[maxOutPathIdx].p->address().toString(pathStr); + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[maxOutPathIdx].p->localSocket()); + // fprintf(stderr, "sending request to use %s on %s, ls=%llx, utility=%d\n", pathStr, link->ifname().c_str(), _paths[maxOutPathIdx].p->localSocket(), _localUtility); } } /** * Give up negotiating and consider switching */ - else if ((now - _lastSentPathNegotiationRequest) > (2 * ZT_PATH_NEGOTIATION_CHECK_INTERVAL)) { + else if ((now - _lastSentPathNegotiationRequest) > (2 * ZT_BOND_OPTIMIZE_INTERVAL)) { if (_localUtility == 0) { // There's no loss to us, just switch without sending a another request // fprintf(stderr, "BT: (sync) giving up, switching to remote peer's path.\n"); - negotiatedPath = _paths[maxInPathIdx]; + negotiatedPathIdx = maxInPathIdx; } } } } -void Bond::sendPATH_NEGOTIATION_REQUEST(void* tPtr, const SharedPtr& path) +void Bond::sendPATH_NEGOTIATION_REQUEST(void* tPtr, int pathIdx) { - char traceMsg[256]; - char pathStr[128]; - path->address().toString(pathStr); - sprintf( - traceMsg, - "%s (bond) Sending link negotiation request to peer %llx via link %s/%s, local utility is %d", - OSUtils::humanReadableTimestamp().c_str(), - (unsigned long long)(_peer->_id.address().toInt()), - getLink(path)->ifname().c_str(), - pathStr, - _localUtility); - RR->t->bondStateMessage(NULL, traceMsg); - if (_abLinkSelectMethod != ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { + char pathStr[64] = { 0 }; + _paths[pathIdx].p->address().toString(pathStr); + log("send link negotiation request to peer via link %s/%s, local utility is %d", getLink(_paths[pathIdx].p)->ifname().c_str(), pathStr, _localUtility); + if (_abLinkSelectMethod != ZT_BOND_RESELECTION_POLICY_OPTIMIZE) { return; } Packet outp(_peer->_id.address(), RR->identity.address(), Packet::VERB_PATH_NEGOTIATION_REQUEST); outp.append(_localUtility); - if (path->address()) { + if (_paths[pathIdx].p->address()) { outp.armor(_peer->key(), false, _peer->aesKeysIfSupported()); - RR->node->putPacket(tPtr, path->localSocket(), path->address(), outp.data(), outp.size()); + RR->node->putPacket(tPtr, _paths[pathIdx].p->localSocket(), _paths[pathIdx].p->address(), outp.data(), outp.size()); } } -void Bond::sendACK(void* tPtr, const SharedPtr& path, const int64_t localSocket, const InetAddress& atAddress, int64_t now) +void Bond::sendQOS_MEASUREMENT(void* tPtr, int pathIdx, int64_t localSocket, const InetAddress& atAddress, int64_t now) { - Packet outp(_peer->_id.address(), RR->identity.address(), Packet::VERB_ACK); - int32_t bytesToAck = 0; - std::map::iterator it = path->ackStatsIn.begin(); - while (it != path->ackStatsIn.end()) { - bytesToAck += it->second; - ++it; - } - // char traceMsg[256]; char pathStr[128]; path->address().toString(pathStr); - // sprintf(traceMsg, "%s (qos) Sending ACK packet for %d bytes to peer %llx via link %s/%s", - // OSUtils::humanReadableTimestamp().c_str(), bytesToAck, (unsigned long long)(_peer->_id.address().toInt()), getLink(path)->ifname().c_str(), pathStr); - // RR->t->bondStateMessage(NULL, traceMsg); - outp.append(bytesToAck); - if (atAddress) { - outp.armor(_peer->key(), false, _peer->aesKeysIfSupported()); - RR->node->putPacket(tPtr, localSocket, atAddress, outp.data(), outp.size()); - } - else { - RR->sw->send(tPtr, outp, false); - } - path->ackStatsIn.clear(); - path->_packetsReceivedSinceLastAck = 0; - path->_lastAckSent = now; -} - -void Bond::sendQOS_MEASUREMENT(void* tPtr, const SharedPtr& path, const int64_t localSocket, const InetAddress& atAddress, int64_t now) -{ - // char traceMsg[256]; char pathStr[128]; path->address().toString(pathStr); - // sprintf(traceMsg, "%s (qos) Sending QoS packet to peer %llx via link %s/%s", - // OSUtils::humanReadableTimestamp().c_str(), (unsigned long long)(_peer->_id.address().toInt()), getLink(path)->ifname().c_str(), pathStr); - // RR->t->bondStateMessage(NULL, traceMsg); - const int64_t _now = RR->node->now(); + char pathStr[64] = { 0 }; + _paths[pathIdx].p->address().toString(pathStr); + int64_t _now = RR->node->now(); Packet outp(_peer->_id.address(), RR->identity.address(), Packet::VERB_QOS_MEASUREMENT); char qosData[ZT_QOS_MAX_PACKET_SIZE]; - int16_t len = generateQoSPacket(path, _now, qosData); - outp.append(qosData, len); - if (atAddress) { - outp.armor(_peer->key(), false, _peer->aesKeysIfSupported()); - RR->node->putPacket(tPtr, localSocket, atAddress, outp.data(), outp.size()); + int16_t len = generateQoSPacket(pathIdx, _now, qosData); + _overheadBytes += len; + if (len) { + outp.append(qosData, len); + if (atAddress) { + outp.armor(_peer->key(), false, _peer->aesKeysIfSupported()); + RR->node->putPacket(tPtr, localSocket, atAddress, outp.data(), outp.size()); + } + else { + RR->sw->send(tPtr, outp, false); + } + _paths[pathIdx].packetsReceivedSinceLastQoS = 0; + _paths[pathIdx].lastQoSMeasurement = now; } - else { - RR->sw->send(tPtr, outp, false); - } - // Account for the fact that a VERB_QOS_MEASUREMENT was just sent. Reset timers. - path->_packetsReceivedSinceLastQoS = 0; - path->_lastQoSMeasurement = now; + // log("send QOS via link %s/%s (len=%d)", getLink(_paths[pathIdx].p)->ifname().c_str(), pathStr, len); } -void Bond::processBackgroundTasks(void* tPtr, const int64_t now) +void Bond::processBackgroundBondTasks(void* tPtr, int64_t now) { - Mutex::Lock _l(_paths_m); - if (! _peer->_canUseMultipath || (now - _lastBackgroundTaskCheck) < ZT_BOND_BACKGROUND_TASK_MIN_INTERVAL) { + if (! _peer->_localMultipathSupported || (now - _lastBackgroundTaskCheck) < ZT_BOND_BACKGROUND_TASK_MIN_INTERVAL) { return; } _lastBackgroundTaskCheck = now; - - // Compute dynamic path monitor timer interval - if (_linkMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC) { - int suggestedMonitorInterval = (now - _lastFrame) / 100; - _dynamicPathMonitorInterval = std::min(ZT_PATH_HEARTBEAT_PERIOD, ((suggestedMonitorInterval > _bondMonitorInterval) ? suggestedMonitorInterval : _bondMonitorInterval)); - } - // TODO: Clarify and generalize this logic - if (_linkMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC) { - _shouldCollectPathStatistics = true; - } - - // Memoize oft-used properties in the packet ingress/egress logic path - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { - // Required for real-time balancing - _shouldCollectPathStatistics = true; - } - if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { - if (_abLinkSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_BETTER) { - // Required for judging suitability of primary link after recovery - _shouldCollectPathStatistics = true; - } - if (_abLinkSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { - // Required for judging suitability of new candidate primary - _shouldCollectPathStatistics = true; - } - } - if ((now - _lastCheckUserPreferences) > 1000) { - _lastCheckUserPreferences = now; - applyUserPrefs(); - } + Mutex::Lock _l(_paths_m); curateBond(now, false); if ((now - _lastQualityEstimation) > _qualityEstimationInterval) { _lastQualityEstimation = now; estimatePathQuality(now); } - dumpInfo(now); + dumpInfo(now, false); - // Send QOS/ACK packets as needed - if (_shouldCollectPathStatistics) { - for (unsigned int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (_paths[i] && _paths[i]->allowed()) { - if (_paths[i]->needsToSendQoS(now, _qosSendInterval)) { - sendQOS_MEASUREMENT(tPtr, _paths[i], _paths[i]->localSocket(), _paths[i]->address(), now); - } - if (_paths[i]->needsToSendAck(now, _ackSendInterval)) { - sendACK(tPtr, _paths[i], _paths[i]->localSocket(), _paths[i]->address(), now); + // Send ambient monitoring traffic + for (unsigned int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (_paths[i].p && _paths[i].allowed()) { + // ECHO (this is our bond's heartbeat) + if ((_monitorInterval > 0) && ((now - _paths[i].p->_lastOut) >= _monitorInterval)) { + if ((_peer->remoteVersionProtocol() >= 5) && (! ((_peer->remoteVersionMajor() == 1) && (_peer->remoteVersionMinor() == 1) && (_peer->remoteVersionRevision() == 0)))) { + Packet outp(_peer->address(), RR->identity.address(), Packet::VERB_ECHO); + outp.armor(_peer->key(), true, _peer->aesKeysIfSupported()); + RR->node->expectReplyTo(outp.packetId()); + RR->node->putPacket(tPtr, _paths[i].p->localSocket(), _paths[i].p->address(), outp.data(), outp.size()); + _overheadBytes += outp.size(); + char pathStr[64] = { 0 }; + _paths[i].p->address().toString(pathStr); + // log("send HELLO via link %s/%s (len=%d)", getLink(_paths[i].p)->ifname().c_str(), pathStr, outp.size()); } } + // QOS + if (_paths[i].needsToSendQoS(now, _qosSendInterval)) { + sendQOS_MEASUREMENT(tPtr, i, _paths[i].p->localSocket(), _paths[i].p->address(), now); + } } } // Perform periodic background tasks unique to each bonding policy - switch (_bondingPolicy) { - case ZT_BONDING_POLICY_ACTIVE_BACKUP: + switch (_policy) { + case ZT_BOND_POLICY_ACTIVE_BACKUP: processActiveBackupTasks(tPtr, now); break; - case ZT_BONDING_POLICY_BROADCAST: + case ZT_BOND_POLICY_BROADCAST: break; - case ZT_BONDING_POLICY_BALANCE_RR: - case ZT_BONDING_POLICY_BALANCE_XOR: - case ZT_BONDING_POLICY_BALANCE_AWARE: + case ZT_BOND_POLICY_BALANCE_RR: + case ZT_BOND_POLICY_BALANCE_XOR: + case ZT_BOND_POLICY_BALANCE_AWARE: processBalanceTasks(now); break; default: break; } // Check whether or not a path negotiation needs to be performed - if (((now - _lastPathNegotiationCheck) > ZT_PATH_NEGOTIATION_CHECK_INTERVAL) && _allowPathNegotiation) { + if (((now - _lastPathNegotiationCheck) > ZT_BOND_OPTIMIZE_INTERVAL) && _allowPathNegotiation) { _lastPathNegotiationCheck = now; pathNegotiationCheck(tPtr, now); } } -void Bond::applyUserPrefs() +void Bond::curateBond(int64_t now, bool rebuildBond) { - for (unsigned int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (! _paths[i]) { - continue; - } - SharedPtr sl = getLink(_paths[i]); - if (sl) { - if (sl->monitorInterval() == 0) { // If no interval was specified for this link, use more generic bond-wide interval - sl->setMonitorInterval(_bondMonitorInterval); - } - RR->bc->setMinReqPathMonitorInterval((sl->monitorInterval() < RR->bc->minReqPathMonitorInterval()) ? sl->monitorInterval() : RR->bc->minReqPathMonitorInterval()); - bool bFoundCommonLink = false; - SharedPtr commonLink = RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); - for (unsigned int j = 0; j < ZT_MAX_PEER_NETWORK_PATHS; ++j) { - if (_paths[j] && _paths[j].ptr() != _paths[i].ptr()) { - if (RR->bc->getLinkBySocket(_policyAlias, _paths[j]->localSocket()) == commonLink) { - bFoundCommonLink = true; - } - } - } - _paths[i]->_monitorInterval = sl->monitorInterval(); - _paths[i]->_upDelay = sl->upDelay() ? sl->upDelay() : _upDelay; - _paths[i]->_downDelay = sl->downDelay() ? sl->downDelay() : _downDelay; - _paths[i]->_ipvPref = sl->ipvPref(); - _paths[i]->_mode = sl->mode(); - _paths[i]->_enabled = sl->enabled(); - _paths[i]->_onlyPathOnLink = ! bFoundCommonLink; - } - } - if (_peer) { - _peer->_shouldCollectPathStatistics = _shouldCollectPathStatistics; - _peer->_bondingPolicy = _bondingPolicy; - } -} - -void Bond::curateBond(const int64_t now, bool rebuildBond) -{ - char traceMsg[256]; - char pathStr[128]; + char pathStr[64] = { 0 }; uint8_t tmpNumAliveLinks = 0; uint8_t tmpNumTotalLinks = 0; /** - * Update path states + * Update path state variables. State variables are used so that critical + * blocks that perform fast packet processing won't need to make as many + * function calls or computations. */ for (unsigned int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (! _paths[i]) { + if (! _paths[i].p) { continue; } tmpNumTotalLinks++; - if (_paths[i]->alive(now, true)) { + if (_paths[i].eligible) { tmpNumAliveLinks++; } - bool currEligibility = _paths[i]->eligible(now, _ackSendInterval); - if (currEligibility != _paths[i]->_lastEligibilityState) { - _paths[i]->address().toString(pathStr); - char traceMsg[256]; - _paths[i]->address().toString(pathStr); - sprintf( - traceMsg, - "%s (bond) Eligibility of link %s/%s to peer %llx has changed from %d to %d", - OSUtils::humanReadableTimestamp().c_str(), - getLink(_paths[i])->ifname().c_str(), - pathStr, - (unsigned long long)(_peer->_id.address().toInt()), - _paths[i]->_lastEligibilityState, - currEligibility); - RR->t->bondStateMessage(NULL, traceMsg); + + /** + * Determine alive-ness + */ + _paths[i].alive = (now - _paths[i].p->_lastIn) < _failoverInterval; + + /** + * Determine current eligibility + */ + bool currEligibility = false; + // Simple RX age (driven by packets of any type and gratuitous VERB_HELLOs) + bool acceptableAge = _paths[i].p->age(now) < (_failoverInterval + _downDelay); + // Whether we've waited long enough since the link last came online + bool satisfiedUpDelay = (now - _paths[i].lastAliveToggle) >= _upDelay; + // Whether this path is still in its trial period + bool inTrial = (now - _paths[i].whenNominated) < ZT_BOND_OPTIMIZE_INTERVAL; + // if (includeRefractoryPeriod && _paths[i].refractoryPeriod) { + // As long as the refractory period value has not fully drained this path is not eligible + // currEligibility = false; + //} + currEligibility = _paths[i].allowed() && ((acceptableAge && satisfiedUpDelay) || inTrial); + // log("[%d] allowed=%d, acceptableAge=%d, satisfiedUpDelay=%d, inTrial=%d ==== %d", i, _paths[i].allowed(), acceptableAge, satisfiedUpDelay, inTrial, currEligibility); + + /** + * Note eligibility state change (if any) and take appropriate action + */ + if (currEligibility != _paths[i].eligible) { + _paths[i].p->address().toString(pathStr); + if (currEligibility == 0) { + log("link %s/%s is no longer eligible", getLink(_paths[i].p)->ifname().c_str(), pathStr); + } + if (currEligibility == 1) { + log("link %s/%s is eligible", getLink(_paths[i].p)->ifname().c_str(), pathStr); + } + dumpPathStatus(now, i); if (currEligibility) { rebuildBond = true; } if (! currEligibility) { - _paths[i]->adjustRefractoryPeriod(now, _defaultPathRefractoryPeriod, ! currEligibility); - if (_paths[i]->bonded()) { - char pathStr[128]; - _paths[i]->address().toString(pathStr); - sprintf( - traceMsg, - "%s (bond) Link %s/%s to peer %llx was bonded, reallocation of its flows will occur soon", - OSUtils::humanReadableTimestamp().c_str(), - getLink(_paths[i])->ifname().c_str(), - pathStr, - (unsigned long long)(_peer->_id.address().toInt())); - RR->t->bondStateMessage(NULL, traceMsg); - rebuildBond = true; - _paths[i]->_shouldReallocateFlows = _paths[i]->bonded(); - _paths[i]->setBonded(false); - } - else { - sprintf( - traceMsg, - "%s (bond) Link %s/%s to peer %llx was not bonded, no allocation consequences", - OSUtils::humanReadableTimestamp().c_str(), - getLink(_paths[i])->ifname().c_str(), - pathStr, - (unsigned long long)(_peer->_id.address().toInt())); - RR->t->bondStateMessage(NULL, traceMsg); + _paths[i].adjustRefractoryPeriod(now, _defaultPathRefractoryPeriod, ! currEligibility); + if (_paths[i].bonded) { + _paths[i].bonded = false; + if (_allowFlowHashing) { + _paths[i].p->address().toString(pathStr); + log("link %s/%s was bonded, flow reallocation will occur soon", getLink(_paths[i].p)->ifname().c_str(), pathStr); + rebuildBond = true; + _paths[i].shouldReallocateFlows = _paths[i].bonded; + } } } } if (currEligibility) { - _paths[i]->adjustRefractoryPeriod(now, _defaultPathRefractoryPeriod, false); + _paths[i].adjustRefractoryPeriod(now, _defaultPathRefractoryPeriod, false); } - _paths[i]->_lastEligibilityState = currEligibility; + _paths[i].eligible = currEligibility; } + + /** + * Determine health status to report to user + */ _numAliveLinks = tmpNumAliveLinks; _numTotalLinks = tmpNumTotalLinks; - - /* Determine health status to report to user */ - bool tmpHealthStatus = true; - if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { + if (_policy == ZT_BOND_POLICY_ACTIVE_BACKUP) { if (_numAliveLinks < 2) { - // Considered healthy if there is at least one failover link + // Considered healthy if there is at least one backup link tmpHealthStatus = false; } } - if (_bondingPolicy == ZT_BONDING_POLICY_BROADCAST) { + if (_policy == ZT_BOND_POLICY_BROADCAST) { if (_numAliveLinks < 1) { // Considered healthy if we're able to send frames at all tmpHealthStatus = false; } } - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) { + if ((_policy == ZT_BOND_POLICY_BALANCE_RR) || (_policy == ZT_BOND_POLICY_BALANCE_XOR) || (_policy == ZT_BOND_POLICY_BALANCE_AWARE)) { if (_numAliveLinks < _numTotalLinks) { - // Considered healthy if all known paths are alive, this should be refined to account for user bond config settings - tmpHealthStatus = false; - } - } - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR) { - if (_numAliveLinks < _numTotalLinks) { - // Considered healthy if all known paths are alive, this should be refined to account for user bond config settings - tmpHealthStatus = false; - } - } - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { - if (_numAliveLinks < _numTotalLinks) { - // Considered healthy if all known paths are alive, this should be refined to account for user bond config settings tmpHealthStatus = false; } } @@ -968,152 +942,177 @@ void Bond::curateBond(const int64_t now, bool rebuildBond) else { healthStatusStr = "DEGRADED"; } - sprintf(traceMsg, "%s (bond) Bond to peer %llx is in a %s state (%d/%d links)", OSUtils::humanReadableTimestamp().c_str(), (unsigned long long)(_peer->_id.address().toInt()), healthStatusStr.c_str(), _numAliveLinks, _numTotalLinks); - RR->t->bondStateMessage(NULL, traceMsg); + log("bond is in a %s state (links: %d/%d)", healthStatusStr.c_str(), _numAliveLinks, _numTotalLinks); + dumpInfo(now, true); } _isHealthy = tmpHealthStatus; /** - * Curate the set of paths that are part of the bond proper. Selects a single path + * Curate the set of paths that are part of the bond proper. Select a set of paths * per logical link according to eligibility and user-specified constraints. */ - if ((_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) || (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR) || (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE)) { + + if ((_policy == ZT_BOND_POLICY_BALANCE_RR) || (_policy == ZT_BOND_POLICY_BALANCE_XOR) || (_policy == ZT_BOND_POLICY_BALANCE_AWARE)) { if (! _numBondedPaths) { rebuildBond = true; } - // TODO: Optimize if (rebuildBond) { + log("rebuilding bond"); + // TODO: Obey blacklisting int updatedBondedPathCount = 0; - std::map, int> linkMap; + // Build map associating paths with local physical links. Will be selected from in next step + std::map, std::vector > linkMap; for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (_paths[i] && _paths[i]->allowed() && (_paths[i]->eligible(now, _ackSendInterval) || ! _numBondedPaths)) { - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); - if (! linkMap.count(link)) { - linkMap[link] = i; - } - else { - bool overriden = false; - _paths[i]->address().toString(pathStr); - // fprintf(stderr, " link representative path already exists! (%s %s)\n", getLink(_paths[i])->ifname().c_str(), pathStr); - if (_paths[i]->preferred() && ! _paths[linkMap[link]]->preferred()) { - // Override previous choice if preferred - if (_paths[linkMap[link]]->_assignedFlowCount) { - _paths[linkMap[link]]->_deprecated = true; - } - else { - _paths[linkMap[link]]->_deprecated = true; - _paths[linkMap[link]]->setBonded(false); - } - linkMap[link] = i; - overriden = true; - } - if ((_paths[i]->preferred() && _paths[linkMap[link]]->preferred()) || (! _paths[i]->preferred() && ! _paths[linkMap[link]]->preferred())) { - if (_paths[i]->preferenceRank() > _paths[linkMap[link]]->preferenceRank()) { - // Override if higher preference - if (_paths[linkMap[link]]->_assignedFlowCount) { - _paths[linkMap[link]]->_deprecated = true; - } - else { - _paths[linkMap[link]]->_deprecated = true; - _paths[linkMap[link]]->setBonded(false); - } - linkMap[link] = i; - } - } - } + if (_paths[i].p) { + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket()); + linkMap[link].push_back(i); } } - std::map, int>::iterator it = linkMap.begin(); - for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (! _paths[i]) { - continue; + // Re-form bond from link<->path map + std::map, std::vector >::iterator it = linkMap.begin(); + while (it != linkMap.end()) { + SharedPtr link = it->first; + int ipvPref = link->ipvPref(); + + // If user has no address type preference, then use every path we find on a link + if (ipvPref == 0) { + for (int j = 0; j < it->second.size(); j++) { + int idx = it->second.at(j); + if (! _paths[idx].p || ! _paths[idx].allowed()) { + continue; + } + addPathToBond(idx, updatedBondedPathCount); + ++updatedBondedPathCount; + _paths[idx].p->address().toString(pathStr); + log("add %s/%s (no user addr preference)", link->ifname().c_str(), pathStr); + } } - _bondedIdx[i] = ZT_MAX_PEER_NETWORK_PATHS; - if (it != linkMap.end()) { - _bondedIdx[i] = it->second; - _paths[_bondedIdx[i]]->setBonded(true); - ++it; - ++updatedBondedPathCount; - _paths[_bondedIdx[i]]->address().toString(pathStr); - // fprintf(stderr, "setting i=%d, _bondedIdx[%d]=%d to bonded (%s %s)\n", i, i, _bondedIdx[i], getLink(_paths[_bondedIdx[i]])->ifname().c_str(), pathStr); + // If the user prefers to only use one address type (IPv4 or IPv6) + if (ipvPref == 4 || ipvPref == 6) { + for (int j = 0; j < it->second.size(); j++) { + int idx = it->second.at(j); + if (! _paths[idx].p) { + continue; + } + if (! _paths[idx].allowed()) { + _paths[idx].p->address().toString(pathStr); + log("did not add %s/%s (user addr preference %d)", link->ifname().c_str(), pathStr, ipvPref); + continue; + } + if (! _paths[idx].eligible) { + continue; + } + addPathToBond(idx, updatedBondedPathCount); + ++updatedBondedPathCount; + _paths[idx].p->address().toString(pathStr); + log("add path %s/%s (user addr preference %d)", link->ifname().c_str(), pathStr, ipvPref); + } } + // If the users prefers one address type to another, try to find at least + // one path of that type before considering others. + if (ipvPref == 46 || ipvPref == 64) { + bool foundPreferredPath = false; + // Search for preferred paths + for (int j = 0; j < it->second.size(); j++) { + int idx = it->second.at(j); + if (! _paths[idx].p || ! _paths[idx].eligible) { + continue; + } + if (_paths[idx].preferred() && _paths[idx].allowed()) { + addPathToBond(idx, updatedBondedPathCount); + ++updatedBondedPathCount; + _paths[idx].p->address().toString(pathStr); + log("add %s/%s (user addr preference %d)", link->ifname().c_str(), pathStr, ipvPref); + foundPreferredPath = true; + } + } + // Unable to find a path that matches user preference, settle for another address type + if (! foundPreferredPath) { + log("did not find first-choice path type on link %s (user preference %d)", link->ifname().c_str(), ipvPref); + for (int j = 0; j < it->second.size(); j++) { + int idx = it->second.at(j); + if (! _paths[idx].p || ! _paths[idx].eligible) { + continue; + } + addPathToBond(idx, updatedBondedPathCount); + ++updatedBondedPathCount; + _paths[idx].p->address().toString(pathStr); + log("add %s/%s (user addr preference %d)", link->ifname().c_str(), pathStr, ipvPref); + foundPreferredPath = true; + } + } + } + ++it; // Next link } _numBondedPaths = updatedBondedPathCount; - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) { - // Cause a RR reset since the currently used index might no longer be valid + if (_policy == ZT_BOND_POLICY_BALANCE_RR) { + // Cause a RR reset since the current index might no longer be valid _rrPacketsSentOnCurrLink = _packetsPerLink; } } } } -void Bond::estimatePathQuality(const int64_t now) +void Bond::estimatePathQuality(int64_t now) { uint32_t totUserSpecifiedLinkSpeed = 0; if (_numBondedPaths) { // Compute relative user-specified speeds of links for (unsigned int i = 0; i < _numBondedPaths; ++i) { - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); - if (_paths[i] && _paths[i]->allowed()) { + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket()); + if (_paths[i].p && _paths[i].allowed()) { totUserSpecifiedLinkSpeed += link->speed(); } } for (unsigned int i = 0; i < _numBondedPaths; ++i) { - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); - if (_paths[i] && _paths[i]->allowed()) { + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket()); + if (_paths[i].p && _paths[i].allowed()) { link->setRelativeSpeed((uint8_t)round(((float)link->speed() / (float)totUserSpecifiedLinkSpeed) * 255)); } } } - float lat[ZT_MAX_PEER_NETWORK_PATHS]; - float pdv[ZT_MAX_PEER_NETWORK_PATHS]; - float plr[ZT_MAX_PEER_NETWORK_PATHS]; - float per[ZT_MAX_PEER_NETWORK_PATHS]; + float lat[ZT_MAX_PEER_NETWORK_PATHS] = { 0 }; + float pdv[ZT_MAX_PEER_NETWORK_PATHS] = { 0 }; + float plr[ZT_MAX_PEER_NETWORK_PATHS] = { 0 }; + float per[ZT_MAX_PEER_NETWORK_PATHS] = { 0 }; float maxLAT = 0; float maxPDV = 0; float maxPLR = 0; float maxPER = 0; - float quality[ZT_MAX_PEER_NETWORK_PATHS]; - uint8_t alloc[ZT_MAX_PEER_NETWORK_PATHS]; + float quality[ZT_MAX_PEER_NETWORK_PATHS] = { 0 }; + uint8_t alloc[ZT_MAX_PEER_NETWORK_PATHS] = { 0 }; float totQuality = 0.0f; - memset(&lat, 0, sizeof(lat)); - memset(&pdv, 0, sizeof(pdv)); - memset(&plr, 0, sizeof(plr)); - memset(&per, 0, sizeof(per)); - memset(&quality, 0, sizeof(quality)); - memset(&alloc, 0, sizeof(alloc)); - // Compute initial summary statistics for (unsigned int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (! _paths[i] || ! _paths[i]->allowed()) { + if (! _paths[i].p || ! _paths[i].allowed()) { continue; } // Compute/Smooth average of real-world observations - _paths[i]->_latencyMean = _paths[i]->latencySamples.mean(); - _paths[i]->_latencyVariance = _paths[i]->latencySamples.stddev(); - _paths[i]->_packetErrorRatio = 1.0 - (_paths[i]->packetValiditySamples.count() ? _paths[i]->packetValiditySamples.mean() : 1.0); + _paths[i].latencyMean = _paths[i].latencySamples.mean(); + _paths[i].latencyVariance = _paths[i].latencySamples.stddev(); + _paths[i].packetErrorRatio = 1.0 - (_paths[i].packetValiditySamples.count() ? _paths[i].packetValiditySamples.mean() : 1.0); if (userHasSpecifiedLinkSpeeds()) { // Use user-reported metrics - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket()); if (link) { - _paths[i]->_throughputMean = link->speed(); - _paths[i]->_throughputVariance = 0; + _paths[i].throughputMean = link->speed(); + _paths[i].throughputVariance = 0; } } // Drain unacknowledged QoS records - std::map::iterator it = _paths[i]->qosStatsOut.begin(); + std::map::iterator it = _paths[i].qosStatsOut.begin(); uint64_t currentLostRecords = 0; - while (it != _paths[i]->qosStatsOut.end()) { - int qosRecordTimeout = 5000; //_paths[i]->monitorInterval() * ZT_MULTIPATH_QOS_ACK_INTERVAL_MULTIPLIER * 8; + while (it != _paths[i].qosStatsOut.end()) { + int qosRecordTimeout = 5000; //_paths[i].p->monitorInterval() * ZT_BOND_QOS_ACK_INTERVAL_MULTIPLIER * 8; if ((now - it->second) >= qosRecordTimeout) { // Packet was lost - it = _paths[i]->qosStatsOut.erase(it); + it = _paths[i].qosStatsOut.erase(it); ++currentLostRecords; } else { @@ -1124,10 +1123,10 @@ void Bond::estimatePathQuality(const int64_t now) quality[i] = 0; totQuality = 0; // Normalize raw observations according to sane limits and/or user specified values - lat[i] = 1.0 / expf(4 * Utils::normalize(_paths[i]->_latencyMean, 0, _maxAcceptableLatency, 0, 1)); - pdv[i] = 1.0 / expf(4 * Utils::normalize(_paths[i]->_latencyVariance, 0, _maxAcceptablePacketDelayVariance, 0, 1)); - plr[i] = 1.0 / expf(4 * Utils::normalize(_paths[i]->_packetLossRatio, 0, _maxAcceptablePacketLossRatio, 0, 1)); - per[i] = 1.0 / expf(4 * Utils::normalize(_paths[i]->_packetErrorRatio, 0, _maxAcceptablePacketErrorRatio, 0, 1)); + lat[i] = 1.0 / expf(4 * Utils::normalize(_paths[i].latencyMean, 0, _maxAcceptableLatency, 0, 1)); + pdv[i] = 1.0 / expf(4 * Utils::normalize(_paths[i].latencyVariance, 0, _maxAcceptablePacketDelayVariance, 0, 1)); + plr[i] = 1.0 / expf(4 * Utils::normalize(_paths[i].packetLossRatio, 0, _maxAcceptablePacketLossRatio, 0, 1)); + per[i] = 1.0 / expf(4 * Utils::normalize(_paths[i].packetErrorRatio, 0, _maxAcceptablePacketErrorRatio, 0, 1)); // Record bond-wide maximums to determine relative values maxLAT = lat[i] > maxLAT ? lat[i] : maxLAT; maxPDV = pdv[i] > maxPDV ? pdv[i] : maxPDV; @@ -1136,34 +1135,33 @@ void Bond::estimatePathQuality(const int64_t now) } // Convert metrics to relative quantities and apply contribution weights for (unsigned int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (_paths[i] && _paths[i]->bonded()) { - quality[i] += ((maxLAT > 0.0f ? lat[i] / maxLAT : 0.0f) * _qualityWeights[ZT_QOS_LAT_IDX]); - quality[i] += ((maxPDV > 0.0f ? pdv[i] / maxPDV : 0.0f) * _qualityWeights[ZT_QOS_PDV_IDX]); - quality[i] += ((maxPLR > 0.0f ? plr[i] / maxPLR : 0.0f) * _qualityWeights[ZT_QOS_PLR_IDX]); - quality[i] += ((maxPER > 0.0f ? per[i] / maxPER : 0.0f) * _qualityWeights[ZT_QOS_PER_IDX]); + if (_paths[i].p && _paths[i].bonded) { + quality[i] += ((maxLAT > 0.0f ? lat[i] / maxLAT : 0.0f) * _qw[ZT_QOS_LAT_IDX]); + quality[i] += ((maxPDV > 0.0f ? pdv[i] / maxPDV : 0.0f) * _qw[ZT_QOS_PDV_IDX]); + quality[i] += ((maxPLR > 0.0f ? plr[i] / maxPLR : 0.0f) * _qw[ZT_QOS_PLR_IDX]); + quality[i] += ((maxPER > 0.0f ? per[i] / maxPER : 0.0f) * _qw[ZT_QOS_PER_IDX]); totQuality += quality[i]; } } // Normalize to 8-bit allocation values for (unsigned int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (_paths[i] && _paths[i]->bonded()) { + if (_paths[i].p && _paths[i].bonded) { alloc[i] = (uint8_t)(std::ceil((quality[i] / totQuality) * (float)255)); - _paths[i]->_allocation = alloc[i]; + _paths[i].allocation = alloc[i]; } } } -void Bond::processBalanceTasks(const int64_t now) +void Bond::processBalanceTasks(int64_t now) { - char curPathStr[128]; - // TODO: Generalize + char pathStr[64] = { 0 }; int totalAllocation = 0; for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (! _paths[i]) { + if (! _paths[i].p) { continue; } - if (_paths[i] && _paths[i]->bonded() && _paths[i]->eligible(now, _ackSendInterval)) { - totalAllocation += _paths[i]->_allocation; + if (_paths[i].p && _paths[i].bonded && _paths[i].eligible) { + totalAllocation += _paths[i].allocation; } } unsigned char minimumAllocationValue = (uint8_t)(0.33 * ((float)totalAllocation / (float)_numBondedPaths)); @@ -1172,51 +1170,38 @@ void Bond::processBalanceTasks(const int64_t now) /** * Clean up and reset flows if necessary */ - if ((now - _lastFlowExpirationCheck) > ZT_MULTIPATH_FLOW_CHECK_INTERVAL) { + if ((now - _lastFlowExpirationCheck) > ZT_PEER_PATH_EXPIRATION) { Mutex::Lock _l(_flows_m); - forgetFlowsWhenNecessary(ZT_MULTIPATH_FLOW_EXPIRATION_INTERVAL, false, now); - _lastFlowExpirationCheck = now; - } - if ((now - _lastFlowStatReset) > ZT_FLOW_STATS_RESET_INTERVAL) { - Mutex::Lock _l(_flows_m); - _lastFlowStatReset = now; + forgetFlowsWhenNecessary(ZT_PEER_PATH_EXPIRATION, false, now); std::map >::iterator it = _flows.begin(); while (it != _flows.end()) { it->second->resetByteCounts(); ++it; } + _lastFlowExpirationCheck = now; } /** * Re-allocate flows from dead paths */ - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { + if (_policy == ZT_BOND_POLICY_BALANCE_XOR || _policy == ZT_BOND_POLICY_BALANCE_AWARE) { Mutex::Lock _l(_flows_m); for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (! _paths[i]) { + if (! _paths[i].p) { continue; } - if (! _paths[i]->eligible(now, _ackSendInterval) && _paths[i]->_shouldReallocateFlows) { - char traceMsg[256]; - char pathStr[128]; - _paths[i]->address().toString(pathStr); - sprintf( - traceMsg, - "%s (balance-*) Reallocating flows to peer %llx from dead link %s/%s to surviving links", - OSUtils::humanReadableTimestamp().c_str(), - (unsigned long long)(_peer->_id.address().toInt()), - getLink(_paths[i])->ifname().c_str(), - pathStr); - RR->t->bondStateMessage(NULL, traceMsg); + if (! _paths[i].eligible && _paths[i].shouldReallocateFlows) { + _paths[i].p->address().toString(pathStr); + log("reallocate flows from dead link %s/%s", getLink(_paths[i].p)->ifname().c_str(), pathStr); std::map >::iterator flow_it = _flows.begin(); while (flow_it != _flows.end()) { - if (flow_it->second->assignedPath() == _paths[i]) { + if (_paths[flow_it->second->assignedPath].p == _paths[i].p) { if (assignFlowToBondedPath(flow_it->second, now)) { - _paths[i]->_assignedFlowCount--; + _paths[i].assignedFlowCount--; } } ++flow_it; } - _paths[i]->_shouldReallocateFlows = false; + _paths[i].shouldReallocateFlows = false; } } } @@ -1224,185 +1209,91 @@ void Bond::processBalanceTasks(const int64_t now) * Re-allocate flows from under-performing * NOTE: This could be part of the above block but was kept separate for clarity. */ - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { + if (_policy == ZT_BOND_POLICY_BALANCE_XOR || _policy == ZT_BOND_POLICY_BALANCE_AWARE) { Mutex::Lock _l(_flows_m); for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (! _paths[i]) { + if (! _paths[i].p) { continue; } - if (_paths[i] && _paths[i]->bonded() && _paths[i]->eligible(now, _ackSendInterval) && (_paths[i]->_allocation < minimumAllocationValue) && _paths[i]->_assignedFlowCount) { - _paths[i]->address().toString(curPathStr); - char traceMsg[256]; - char pathStr[128]; - _paths[i]->address().toString(pathStr); - sprintf( - traceMsg, - "%s (balance-*) Reallocating flows to peer %llx from under-performing link %s/%s\n", - OSUtils::humanReadableTimestamp().c_str(), - (unsigned long long)(_peer->_id.address().toInt()), - getLink(_paths[i])->ifname().c_str(), - pathStr); - RR->t->bondStateMessage(NULL, traceMsg); + if (_paths[i].p && _paths[i].bonded && _paths[i].eligible && (_paths[i].allocation < minimumAllocationValue) && _paths[i].assignedFlowCount) { + _paths[i].p->address().toString(pathStr); + log("reallocate flows from under-performing link %s/%s\n", getLink(_paths[i].p)->ifname().c_str(), pathStr); std::map >::iterator flow_it = _flows.begin(); while (flow_it != _flows.end()) { - if (flow_it->second->assignedPath() == _paths[i]) { + if (flow_it->second->assignedPath == _paths[i].p) { if (assignFlowToBondedPath(flow_it->second, now)) { - _paths[i]->_assignedFlowCount--; + _paths[i].assignedFlowCount--; } } ++flow_it; } - _paths[i]->_shouldReallocateFlows = false; + _paths[i].shouldReallocateFlows = false; } } } } - /** - * Tasks specific to (Balance Round Robin) - */ - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) { - // Nothing - } - /** - * Tasks specific to (Balance XOR) - */ - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR) { - // Nothing - } - /** - * Tasks specific to (Balance Aware) - */ - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { - if (_allowFlowHashing) { - Mutex::Lock _l(_flows_m); - if (_flowRebalanceStrategy == ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_PASSIVE) { - // Do nothing here, this is taken care of in the more general case above. - } - if (_flowRebalanceStrategy == ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_OPPORTUNISTIC) { - // If the flow is temporarily inactive we should take this opportunity to re-assign the flow if needed. - } - if (_flowRebalanceStrategy == ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_AGGRESSIVE) { - /** - * Return flows to the original path if it has once again become available - */ - if ((now - _lastFlowRebalance) > ZT_FLOW_REBALANCE_INTERVAL) { - std::map >::iterator flow_it = _flows.begin(); - while (flow_it != _flows.end()) { - if (flow_it->second->_previouslyAssignedPath && flow_it->second->_previouslyAssignedPath->eligible(now, _ackSendInterval) && (flow_it->second->_previouslyAssignedPath->_allocation >= (minimumAllocationValue * 2))) { - // fprintf(stderr, "moving flow back onto its previous path assignment (based on eligibility)\n"); - (flow_it->second->_assignedPath->_assignedFlowCount)--; - flow_it->second->assignPath(flow_it->second->_previouslyAssignedPath, now); - (flow_it->second->_previouslyAssignedPath->_assignedFlowCount)++; - } - ++flow_it; - } - _lastFlowRebalance = now; - } - /** - * Return flows to the original path if it has once again become (performant) - */ - if ((now - _lastFlowRebalance) > ZT_FLOW_REBALANCE_INTERVAL) { - std::map >::iterator flow_it = _flows.begin(); - while (flow_it != _flows.end()) { - if (flow_it->second->_previouslyAssignedPath && flow_it->second->_previouslyAssignedPath->eligible(now, _ackSendInterval) && (flow_it->second->_previouslyAssignedPath->_allocation >= (minimumAllocationValue * 2))) { - // fprintf(stderr, "moving flow back onto its previous path assignment (based on performance)\n"); - (flow_it->second->_assignedPath->_assignedFlowCount)--; - flow_it->second->assignPath(flow_it->second->_previouslyAssignedPath, now); - (flow_it->second->_previouslyAssignedPath->_assignedFlowCount)++; - } - ++flow_it; - } - _lastFlowRebalance = now; - } - } - } - else if (! _allowFlowHashing) { - // Nothing - } - } } -void Bond::dequeueNextActiveBackupPath(const uint64_t now) +void Bond::dequeueNextActiveBackupPath(uint64_t now) { if (_abFailoverQueue.empty()) { return; } - _abPath = _abFailoverQueue.front(); + _abPathIdx = _abFailoverQueue.front(); _abFailoverQueue.pop_front(); _lastActiveBackupPathChange = now; for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (_paths[i]) { - _paths[i]->resetPacketCounts(); + if (_paths[i].p) { + _paths[i].resetPacketCounts(); } } } bool Bond::abForciblyRotateLink() { - char traceMsg[256]; - char prevPathStr[128]; - char curPathStr[128]; - if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { - SharedPtr prevPath = _abPath; - _abPath->address().toString(prevPathStr); + char prevPathStr[64]; + char curPathStr[64]; + if (_policy == ZT_BOND_POLICY_ACTIVE_BACKUP) { + int prevPathIdx = _abPathIdx; + _paths[_abPathIdx].p->address().toString(prevPathStr); dequeueNextActiveBackupPath(RR->node->now()); - _abPath->address().toString(curPathStr); - sprintf( - traceMsg, - "%s (active-backup) Forcibly rotating peer %llx link from %s/%s to %s/%s", - OSUtils::humanReadableTimestamp().c_str(), - (unsigned long long)(_peer->_id.address().toInt()), - getLink(prevPath)->ifname().c_str(), - prevPathStr, - getLink(_abPath)->ifname().c_str(), - curPathStr); - RR->t->bondStateMessage(NULL, traceMsg); + _paths[_abPathIdx].p->address().toString(curPathStr); + log("forcibly rotate link from %s/%s to %s/%s", getLink(_paths[prevPathIdx].p)->ifname().c_str(), prevPathStr, getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr); return true; } return false; } -void Bond::processActiveBackupTasks(void* tPtr, const int64_t now) +void Bond::processActiveBackupTasks(void* tPtr, int64_t now) { - char traceMsg[256]; - char pathStr[128]; - char prevPathStr[128]; - char curPathStr[128]; - - SharedPtr prevActiveBackupPath = _abPath; - SharedPtr nonPreferredPath; + char pathStr[64] = { 0 }; + char prevPathStr[64]; + char curPathStr[64]; + int prevActiveBackupPathIdx = _abPathIdx; + int nonPreferredPathIdx; bool bFoundPrimaryLink = false; /** * Generate periodic status report */ - if ((now - _lastBondStatusLog) > ZT_MULTIPATH_BOND_STATUS_INTERVAL) { + if ((now - _lastBondStatusLog) > ZT_BOND_STATUS_INTERVAL) { _lastBondStatusLog = now; - if (_abPath) { - _abPath->address().toString(curPathStr); - sprintf( - traceMsg, - "%s (active-backup) Active link to peer %llx is %s/%s, failover queue size is %zu", - OSUtils::humanReadableTimestamp().c_str(), - (unsigned long long)(_peer->_id.address().toInt()), - getLink(_abPath)->ifname().c_str(), - curPathStr, - _abFailoverQueue.size()); - RR->t->bondStateMessage(NULL, traceMsg); + if (_abPathIdx == ZT_MAX_PEER_NETWORK_PATHS) { + log("no active link"); } - else { - sprintf(traceMsg, "%s (active-backup) No active link to peer %llx", OSUtils::humanReadableTimestamp().c_str(), (unsigned long long)(_peer->_id.address().toInt())); - RR->t->bondStateMessage(NULL, traceMsg); + else if (_paths[_abPathIdx].p) { + _paths[_abPathIdx].p->address().toString(curPathStr); + log("active link is %s/%s, failover queue size is %zu", getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr, _abFailoverQueue.size()); } if (_abFailoverQueue.empty()) { - sprintf(traceMsg, "%s (active-backup) Failover queue is empty, bond to peer %llx is NOT currently fault-tolerant", OSUtils::humanReadableTimestamp().c_str(), (unsigned long long)(_peer->_id.address().toInt())); - RR->t->bondStateMessage(NULL, traceMsg); + log("failover queue is empty, no longer fault-tolerant"); } } + /** * Select initial "active" active-backup link */ - if (! _abPath) { + if (_abPathIdx == ZT_MAX_PEER_NETWORK_PATHS) { /** * [Automatic mode] * The user has not explicitly specified links or their failover schedule, @@ -1412,24 +1303,16 @@ void Bond::processActiveBackupTasks(void* tPtr, const int64_t now) * simply find the next eligible path. */ if (! userHasSpecifiedLinks()) { - sprintf(traceMsg, "%s (active-backup) No links to peer %llx specified. Searching...", OSUtils::humanReadableTimestamp().c_str(), (unsigned long long)(_peer->_id.address().toInt())); - RR->t->bondStateMessage(NULL, traceMsg); + log("no user-specified links"); for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (_paths[i] && _paths[i]->eligible(now, _ackSendInterval)) { - _paths[i]->address().toString(curPathStr); - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); + if (_paths[i].p && _paths[i].eligible) { + _paths[i].p->address().toString(curPathStr); + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket()); if (link) { - sprintf( - traceMsg, - "%s (active-backup) Found eligible link %s/%s to peer %llx", - OSUtils::humanReadableTimestamp().c_str(), - getLink(_paths[i])->ifname().c_str(), - curPathStr, - (unsigned long long)(_peer->_id.address().toInt())); - RR->t->bondStateMessage(NULL, traceMsg); + log("found eligible link %s/%s", getLink(_paths[i].p)->ifname().c_str(), curPathStr); + _abPathIdx = i; + break; } - _abPath = _paths[i]; - break; } } } @@ -1439,359 +1322,280 @@ void Bond::processActiveBackupTasks(void* tPtr, const int64_t now) */ else if (userHasSpecifiedLinks()) { if (userHasSpecifiedPrimaryLink()) { - // sprintf(traceMsg, "%s (active-backup) Checking local.conf for user-specified primary link\n", OSUtils::humanReadableTimestamp().c_str()); for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (! _paths[i]) { + if (! _paths[i].p) { continue; } - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); - if (_paths[i]->eligible(now, _ackSendInterval) && link->primary()) { - if (! _paths[i]->preferred()) { - _paths[i]->address().toString(curPathStr); + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket()); + if (_paths[i].eligible && link->primary()) { + if (! _paths[i].preferred()) { + _paths[i].p->address().toString(curPathStr); // Found path on primary link, take note in case we don't find a preferred path - nonPreferredPath = _paths[i]; + nonPreferredPathIdx = i; bFoundPrimaryLink = true; } - if (_paths[i]->preferred()) { - _abPath = _paths[i]; - _abPath->address().toString(curPathStr); - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); + if (_paths[i].preferred()) { + _abPathIdx = i; + _paths[_abPathIdx].p->address().toString(curPathStr); bFoundPrimaryLink = true; - break; // Found preferred path %s on primary link + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[_abPathIdx].p->localSocket()); + if (link) { + log("found preferred primary link %s/%s", getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr); + } + break; // Found preferred path on primary link } } } - if (_abPath) { - _abPath->address().toString(curPathStr); - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _abPath->localSocket()); - if (link) { - sprintf( - traceMsg, - "%s (active-backup) Found preferred primary link %s/%s to peer %llx", - OSUtils::humanReadableTimestamp().c_str(), - getLink(_abPath)->ifname().c_str(), - curPathStr, - (unsigned long long)(_peer->_id.address().toInt())); - RR->t->bondStateMessage(NULL, traceMsg); - } + if (bFoundPrimaryLink && nonPreferredPathIdx) { + log("found non-preferred primary link"); + _abPathIdx = nonPreferredPathIdx; } - else { - if (bFoundPrimaryLink && nonPreferredPath) { - sprintf(traceMsg, "%s (active-backup) Found non-preferred primary link to peer %llx", OSUtils::humanReadableTimestamp().c_str(), (unsigned long long)(_peer->_id.address().toInt())); - RR->t->bondStateMessage(NULL, traceMsg); - _abPath = nonPreferredPath; - } - } - if (! _abPath) { - sprintf(traceMsg, "%s (active-backup) Designated primary link to peer %llx is not yet ready", OSUtils::humanReadableTimestamp().c_str(), (unsigned long long)(_peer->_id.address().toInt())); - RR->t->bondStateMessage(NULL, traceMsg); + if (_abPathIdx == ZT_MAX_PEER_NETWORK_PATHS) { + log("user-designated primary link is not yet ready"); // TODO: Should wait for some time (failover interval?) and then switch to spare link } } else if (! userHasSpecifiedPrimaryLink()) { - int _abIdx = ZT_MAX_PEER_NETWORK_PATHS; - sprintf(traceMsg, "%s (active-backup) User did not specify a primary link to peer %llx, selecting first available link", OSUtils::humanReadableTimestamp().c_str(), (unsigned long long)(_peer->_id.address().toInt())); - RR->t->bondStateMessage(NULL, traceMsg); + log("user did not specify a primary link, select first available link"); for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (_paths[i] && _paths[i]->eligible(now, _ackSendInterval)) { - _abIdx = i; + if (_paths[i].p && _paths[i].eligible) { + _abPathIdx = i; break; } } - if (_abIdx == ZT_MAX_PEER_NETWORK_PATHS) { - // Unable to find a candidate next-best, no change - } - else { - _abPath = _paths[_abIdx]; - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _abPath->localSocket()); + if (_abPathIdx != ZT_MAX_PEER_NETWORK_PATHS) { + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[_abPathIdx].p->localSocket()); if (link) { - _abPath->address().toString(curPathStr); - sprintf( - traceMsg, - "%s (active-backup) Selected non-primary link %s/%s to peer %llx", - OSUtils::humanReadableTimestamp().c_str(), - getLink(_abPath)->ifname().c_str(), - curPathStr, - (unsigned long long)(_peer->_id.address().toInt())); - RR->t->bondStateMessage(NULL, traceMsg); + _paths[_abPathIdx].p->address().toString(curPathStr); + log("select non-primary link %s/%s", getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr); } } } } } + + // Short-circuit if we don't have an active link yet + if (_abPathIdx == ZT_MAX_PEER_NETWORK_PATHS) { + return; + } + + // Remove ineligible paths from the failover link queue + for (std::deque::iterator it(_abFailoverQueue.begin()); it != _abFailoverQueue.end();) { + if (_paths[(*it)].p && ! _paths[(*it)].eligible) { + _paths[(*it)].p->address().toString(curPathStr); + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[(*it)].p->localSocket()); + it = _abFailoverQueue.erase(it); + if (link) { + log("link %s/%s is now ineligible, removing from failover queue (%zu links in queue)", getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr, _abFailoverQueue.size()); + } + } + else { + ++it; + } + } + /** + * Failover instructions were provided by user, build queue according those as well as IPv + * preference, disregarding performance. + */ + if (userHasSpecifiedFailoverInstructions()) { + /** + * Clear failover scores + */ + for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (_paths[i].p) { + _paths[i].failoverScore = 0; + } + } + // Follow user-specified failover instructions + for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (! _paths[i].p || ! _paths[i].allowed() || ! _paths[i].eligible) { + continue; + } + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket()); + _paths[i].p->address().toString(pathStr); + + int failoverScoreHandicap = _paths[i].failoverScore; + if (_paths[i].preferred()) { + failoverScoreHandicap += ZT_BOND_FAILOVER_HANDICAP_PREFERRED; + } + if (link->primary()) { + // If using "optimize" primary re-select mode, ignore user link designations + failoverScoreHandicap += ZT_BOND_FAILOVER_HANDICAP_PRIMARY; + } + if (! _paths[i].failoverScore) { + // If we didn't inherit a failover score from a "parent" that wants to use this path as a failover + int newHandicap = failoverScoreHandicap ? failoverScoreHandicap : _paths[i].allocation; + _paths[i].failoverScore = newHandicap; + } + SharedPtr failoverLink; + if (link->failoverToLink().length()) { + failoverLink = RR->bc->getLinkByName(_policyAlias, link->failoverToLink()); + } + if (failoverLink) { + for (int j = 0; j < ZT_MAX_PEER_NETWORK_PATHS; j++) { + if (_paths[j].p && getLink(_paths[j].p) == failoverLink.ptr()) { + _paths[j].p->address().toString(pathStr); + int inheritedHandicap = failoverScoreHandicap - 10; + int newHandicap = _paths[j].failoverScore > inheritedHandicap ? _paths[j].failoverScore : inheritedHandicap; + if (! _paths[j].preferred()) { + newHandicap--; + } + _paths[j].failoverScore = newHandicap; + } + } + } + if (_paths[i].p.ptr() != _paths[_abPathIdx].p.ptr()) { + bool bFoundPathInQueue = false; + for (std::deque::iterator it(_abFailoverQueue.begin()); it != _abFailoverQueue.end(); ++it) { + if (_paths[i].p.ptr() == _paths[(*it)].p.ptr()) { + bFoundPathInQueue = true; + } + } + if (! bFoundPathInQueue) { + _abFailoverQueue.push_front(i); + _paths[i].p->address().toString(curPathStr); + log("add link %s/%s to failover queue (%zu links in queue)", getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr, _abFailoverQueue.size()); + addPathToBond(0, i); + } + } + } + } /** - * Update and maintain the active-backup failover queue + * No failover instructions provided by user, build queue according to performance + * and IPv preference. */ - if (_abPath) { - // Don't worry about the failover queue until we have an active link - // Remove ineligible paths from the failover link queue - for (std::list >::iterator it(_abFailoverQueue.begin()); it != _abFailoverQueue.end();) { - if ((*it) && ! (*it)->eligible(now, _ackSendInterval)) { - (*it)->address().toString(curPathStr); - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, (*it)->localSocket()); - it = _abFailoverQueue.erase(it); - if (link) { - sprintf( - traceMsg, - "%s (active-backup) Link %s/%s to peer %llx is now ineligible, removing from failover queue, there are %zu links in the queue", - OSUtils::humanReadableTimestamp().c_str(), - getLink(_abPath)->ifname().c_str(), - curPathStr, - (unsigned long long)(_peer->_id.address().toInt()), - _abFailoverQueue.size()); - RR->t->bondStateMessage(NULL, traceMsg); - } + else if (! userHasSpecifiedFailoverInstructions()) { + for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (! _paths[i].p || ! _paths[i].allowed() || ! _paths[i].eligible) { + continue; + } + int failoverScoreHandicap = 0; + if (_paths[i].preferred()) { + failoverScoreHandicap = ZT_BOND_FAILOVER_HANDICAP_PREFERRED; + } + if (! _paths[i].eligible) { + failoverScoreHandicap = -10000; + } + if (getLink(_paths[i].p)->primary() && _abLinkSelectMethod != ZT_BOND_RESELECTION_POLICY_OPTIMIZE) { + // If using "optimize" primary re-select mode, ignore user link designations + failoverScoreHandicap = ZT_BOND_FAILOVER_HANDICAP_PRIMARY; + } + if (_paths[i].p.ptr() == _paths[negotiatedPathIdx].p.ptr()) { + _paths[i].negotiated = true; + failoverScoreHandicap = ZT_BOND_FAILOVER_HANDICAP_NEGOTIATED; } else { - ++it; + _paths[i].negotiated = false; } - } - /** - * Failover instructions were provided by user, build queue according those as well as IPv - * preference, disregarding performance. - */ - if (userHasSpecifiedFailoverInstructions()) { - /** - * Clear failover scores - */ - for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (_paths[i]) { - _paths[i]->_failoverScore = 0; - } - } - // Follow user-specified failover instructions - for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (! _paths[i] || ! _paths[i]->allowed() || ! _paths[i]->eligible(now, _ackSendInterval)) { - continue; - } - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); - _paths[i]->address().toString(pathStr); - - int failoverScoreHandicap = _paths[i]->_failoverScore; - if (_paths[i]->preferred()) { - failoverScoreHandicap += ZT_MULTIPATH_FAILOVER_HANDICAP_PREFERRED; - } - if (link->primary()) { - // If using "optimize" primary reselect mode, ignore user link designations - failoverScoreHandicap += ZT_MULTIPATH_FAILOVER_HANDICAP_PRIMARY; - } - if (! _paths[i]->_failoverScore) { - // If we didn't inherit a failover score from a "parent" that wants to use this path as a failover - int newHandicap = failoverScoreHandicap ? failoverScoreHandicap : _paths[i]->_allocation; - _paths[i]->_failoverScore = newHandicap; - } - SharedPtr failoverLink; - if (link->failoverToLink().length()) { - failoverLink = RR->bc->getLinkByName(_policyAlias, link->failoverToLink()); - } - if (failoverLink) { - for (int j = 0; j < ZT_MAX_PEER_NETWORK_PATHS; j++) { - if (_paths[j] && getLink(_paths[j]) == failoverLink.ptr()) { - _paths[j]->address().toString(pathStr); - int inheritedHandicap = failoverScoreHandicap - 10; - int newHandicap = _paths[j]->_failoverScore > inheritedHandicap ? _paths[j]->_failoverScore : inheritedHandicap; - if (! _paths[j]->preferred()) { - newHandicap--; - } - _paths[j]->_failoverScore = newHandicap; - } + _paths[i].failoverScore = _paths[i].allocation + failoverScoreHandicap; + if (_paths[i].p.ptr() != _paths[_abPathIdx].p.ptr()) { + bool bFoundPathInQueue = false; + for (std::deque::iterator it(_abFailoverQueue.begin()); it != _abFailoverQueue.end(); ++it) { + if (_paths[i].p.ptr() == _paths[(*it)].p.ptr()) { + bFoundPathInQueue = true; } } - if (_paths[i].ptr() != _abPath.ptr()) { - bool bFoundPathInQueue = false; - for (std::list >::iterator it(_abFailoverQueue.begin()); it != _abFailoverQueue.end(); ++it) { - if (_paths[i].ptr() == (*it).ptr()) { - bFoundPathInQueue = true; - } - } - if (! bFoundPathInQueue) { - _abFailoverQueue.push_front(_paths[i]); - _paths[i]->address().toString(curPathStr); - sprintf( - traceMsg, - "%s (active-backup) Added link %s/%s to peer %llx to failover queue, there are %zu links in the queue", - OSUtils::humanReadableTimestamp().c_str(), - getLink(_abPath)->ifname().c_str(), - curPathStr, - (unsigned long long)(_peer->_id.address().toInt()), - _abFailoverQueue.size()); - RR->t->bondStateMessage(NULL, traceMsg); - } + if (! bFoundPathInQueue) { + _abFailoverQueue.push_front(i); + _paths[i].p->address().toString(curPathStr); + log("add link %s/%s to failover queue (%zu links in queue)", getLink(_paths[i].p)->ifname().c_str(), curPathStr, _abFailoverQueue.size()); + addPathToBond(0, i); } } } - /** - * No failover instructions provided by user, build queue according to performance - * and IPv preference. - */ - else if (! userHasSpecifiedFailoverInstructions()) { - for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { - if (! _paths[i] || ! _paths[i]->allowed() || ! _paths[i]->eligible(now, _ackSendInterval)) { - continue; - } - int failoverScoreHandicap = 0; - if (_paths[i]->preferred()) { - failoverScoreHandicap = ZT_MULTIPATH_FAILOVER_HANDICAP_PREFERRED; - } - bool includeRefractoryPeriod = true; - if (! _paths[i]->eligible(now, includeRefractoryPeriod)) { - failoverScoreHandicap = -10000; - } - if (getLink(_paths[i])->primary() && _abLinkSelectMethod != ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { - // If using "optimize" primary reselect mode, ignore user link designations - failoverScoreHandicap = ZT_MULTIPATH_FAILOVER_HANDICAP_PRIMARY; - } - if (_paths[i].ptr() == negotiatedPath.ptr()) { - _paths[i]->_negotiated = true; - failoverScoreHandicap = ZT_MULTIPATH_FAILOVER_HANDICAP_NEGOTIATED; - } - else { - _paths[i]->_negotiated = false; - } - _paths[i]->_failoverScore = _paths[i]->_allocation + failoverScoreHandicap; - if (_paths[i].ptr() != _abPath.ptr()) { - bool bFoundPathInQueue = false; - for (std::list >::iterator it(_abFailoverQueue.begin()); it != _abFailoverQueue.end(); ++it) { - if (_paths[i].ptr() == (*it).ptr()) { - bFoundPathInQueue = true; - } - } - if (! bFoundPathInQueue) { - _abFailoverQueue.push_front(_paths[i]); - _paths[i]->address().toString(curPathStr); - sprintf( - traceMsg, - "%s (active-backup) Added link %s/%s to peer %llx to failover queue, there are %zu links in the queue", - OSUtils::humanReadableTimestamp().c_str(), - getLink(_paths[i])->ifname().c_str(), - curPathStr, - (unsigned long long)(_peer->_id.address().toInt()), - _abFailoverQueue.size()); - RR->t->bondStateMessage(NULL, traceMsg); - } - } - } - } - _abFailoverQueue.sort(PathQualityComparator()); } + // Sort queue based on performance + if (! _abFailoverQueue.empty()) { + for (int i = 0; i < _abFailoverQueue.size(); i++) { + int value_to_insert = _abFailoverQueue[i]; + int hole_position = i; + while (hole_position > 0 && (_abFailoverQueue[hole_position - 1] > value_to_insert)) { + _abFailoverQueue[hole_position] = _abFailoverQueue[hole_position - 1]; + hole_position = hole_position - 1; + } + _abFailoverQueue[hole_position] = value_to_insert; + } + } + /** * Short-circuit if we have no queued paths */ if (_abFailoverQueue.empty()) { return; } + /** - * Fulfill primary reselect obligations + * Fulfill primary re-select obligations */ - if (_abPath && ! _abPath->eligible(now, _ackSendInterval)) { // Implicit ZT_MULTIPATH_RESELECTION_POLICY_FAILURE - _abPath->address().toString(curPathStr); - sprintf( - traceMsg, - "%s (active-backup) Link %s/%s to peer %llx has failed. Selecting new link from failover queue, there are %zu links in the queue", - OSUtils::humanReadableTimestamp().c_str(), - getLink(_abPath)->ifname().c_str(), - curPathStr, - (unsigned long long)(_peer->_id.address().toInt()), - _abFailoverQueue.size()); - RR->t->bondStateMessage(NULL, traceMsg); + if (_paths[_abPathIdx].p && ! _paths[_abPathIdx].eligible) { // Implicit ZT_BOND_RESELECTION_POLICY_FAILURE + _paths[_abPathIdx].p->address().toString(curPathStr); + log("link %s/%s has failed, select link from failover queue (%zu links in queue)", getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr, _abFailoverQueue.size()); if (! _abFailoverQueue.empty()) { dequeueNextActiveBackupPath(now); - _abPath->address().toString(curPathStr); - sprintf( - traceMsg, - "%s (active-backup) Active link to peer %llx has been switched to %s/%s", - OSUtils::humanReadableTimestamp().c_str(), - (unsigned long long)(_peer->_id.address().toInt()), - getLink(_abPath)->ifname().c_str(), - curPathStr); - RR->t->bondStateMessage(NULL, traceMsg); + _paths[_abPathIdx].p->address().toString(curPathStr); + log("active link switched to %s/%s", getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr); } else { - sprintf(traceMsg, "%s (active-backup) Failover queue is empty. No links to peer %llx to choose from", OSUtils::humanReadableTimestamp().c_str(), (unsigned long long)(_peer->_id.address().toInt())); - RR->t->bondStateMessage(NULL, traceMsg); + log("failover queue is empty, no links to choose from"); } } /** * Detect change to prevent flopping during later optimization step. */ - if (prevActiveBackupPath != _abPath) { + if (prevActiveBackupPathIdx != _abPathIdx) { _lastActiveBackupPathChange = now; } - if (_abLinkSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS) { - if (_abPath && ! getLink(_abPath)->primary() && getLink(_abFailoverQueue.front())->primary()) { + if (_abLinkSelectMethod == ZT_BOND_RESELECTION_POLICY_ALWAYS) { + if (_paths[_abPathIdx].p && ! getLink(_paths[_abPathIdx].p)->primary() && getLink(_paths[_abFailoverQueue.front()].p)->primary()) { dequeueNextActiveBackupPath(now); - _abPath->address().toString(curPathStr); - sprintf( - traceMsg, - "%s (active-backup) Switching back to available primary link %s/%s to peer %llx [linkSelectionMethod = always]", - OSUtils::humanReadableTimestamp().c_str(), - getLink(_abPath)->ifname().c_str(), - curPathStr, - (unsigned long long)(_peer->_id.address().toInt())); - RR->t->bondStateMessage(NULL, traceMsg); + _paths[_abPathIdx].p->address().toString(curPathStr); + log("switch back to available primary link %s/%s (select: always)", getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr); } } - if (_abLinkSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_BETTER) { - if (_abPath && ! getLink(_abPath)->primary()) { + if (_abLinkSelectMethod == ZT_BOND_RESELECTION_POLICY_BETTER) { + if (_paths[_abPathIdx].p && ! getLink(_paths[_abPathIdx].p)->primary()) { // Active backup has switched to "better" primary link according to re-select policy. - if (getLink(_abFailoverQueue.front())->primary() && (_abFailoverQueue.front()->_failoverScore > _abPath->_failoverScore)) { + if (getLink(_paths[_abFailoverQueue.front()].p)->primary() && (_paths[_abFailoverQueue.front()].failoverScore > _paths[_abPathIdx].failoverScore)) { dequeueNextActiveBackupPath(now); - _abPath->address().toString(curPathStr); - sprintf( - traceMsg, - "%s (active-backup) Switching back to user-defined primary link %s/%s to peer %llx [linkSelectionMethod = better]", - OSUtils::humanReadableTimestamp().c_str(), - getLink(_abPath)->ifname().c_str(), - curPathStr, - (unsigned long long)(_peer->_id.address().toInt())); - RR->t->bondStateMessage(NULL, traceMsg); + _paths[_abPathIdx].p->address().toString(curPathStr); + log("switch back to user-defined primary link %s/%s (select: better)", getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr); } } } - if (_abLinkSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE && ! _abFailoverQueue.empty()) { + if (_abLinkSelectMethod == ZT_BOND_RESELECTION_POLICY_OPTIMIZE && ! _abFailoverQueue.empty()) { /** * Implement link negotiation that was previously-decided */ - if (_abFailoverQueue.front()->_negotiated) { + if (_paths[_abFailoverQueue.front()].negotiated) { dequeueNextActiveBackupPath(now); - _abPath->address().toString(prevPathStr); + _paths[_abPathIdx].p->address().toString(prevPathStr); _lastPathNegotiationCheck = now; - _abPath->address().toString(curPathStr); - sprintf( - traceMsg, - "%s (active-backup) Switching negotiated link %s/%s to peer %llx [linkSelectionMethod = optimize]", - OSUtils::humanReadableTimestamp().c_str(), - getLink(_abPath)->ifname().c_str(), - curPathStr, - (unsigned long long)(_peer->_id.address().toInt())); - RR->t->bondStateMessage(NULL, traceMsg); + _paths[_abPathIdx].p->address().toString(curPathStr); + log("switch negotiated link %s/%s (select: optimize)", getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr); } else { // Try to find a better path and automatically switch to it -- not too often, though. - if ((now - _lastActiveBackupPathChange) > ZT_MULTIPATH_MIN_ACTIVE_BACKUP_AUTOFLOP_INTERVAL) { + if ((now - _lastActiveBackupPathChange) > ZT_BOND_OPTIMIZE_INTERVAL) { if (! _abFailoverQueue.empty()) { - int newFScore = _abFailoverQueue.front()->_failoverScore; - int prevFScore = _abPath->_failoverScore; + int newFScore = _paths[_abFailoverQueue.front()].failoverScore; + int prevFScore = _paths[_abPathIdx].failoverScore; // Establish a minimum switch threshold to prevent flapping - int failoverScoreDifference = _abFailoverQueue.front()->_failoverScore - _abPath->_failoverScore; - int thresholdQuantity = (int)(ZT_MULTIPATH_ACTIVE_BACKUP_OPTIMIZE_MIN_THRESHOLD * (float)_abPath->_allocation); + int failoverScoreDifference = _paths[_abFailoverQueue.front()].failoverScore - _paths[_abPathIdx].failoverScore; + int thresholdQuantity = (int)(ZT_BOND_ACTIVE_BACKUP_OPTIMIZE_MIN_THRESHOLD * (float)_paths[_abPathIdx].allocation); if ((failoverScoreDifference > 0) && (failoverScoreDifference > thresholdQuantity)) { - SharedPtr oldPath = _abPath; - _abPath->address().toString(prevPathStr); + SharedPtr oldPath = _paths[_abPathIdx].p; + _paths[_abPathIdx].p->address().toString(prevPathStr); dequeueNextActiveBackupPath(now); - _abPath->address().toString(curPathStr); - sprintf( - traceMsg, - "%s (active-backup) Switching from %s/%s (fscore=%d) to better link %s/%s (fscore=%d) for peer %llx [linkSelectionMethod = optimize]", - OSUtils::humanReadableTimestamp().c_str(), + _paths[_abPathIdx].p->address().toString(curPathStr); + log("ab", + "switch from %s/%s (score: %d) to better link %s/%s (score: %d) for peer %llx (select: optimize)", getLink(oldPath)->ifname().c_str(), prevPathStr, prevFScore, - getLink(_abPath)->ifname().c_str(), + getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr, newFScore, - (unsigned long long)(_peer->_id.address().toInt())); - RR->t->bondStateMessage(NULL, traceMsg); + _peerId); } } } @@ -1799,165 +1603,132 @@ void Bond::processActiveBackupTasks(void* tPtr, const int64_t now) } } -void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool useTemplate) +void Bond::setBondParameters(int policy, SharedPtr templateBond, bool useTemplate) { - // If invalid bonding policy, try default - int _defaultBondingPolicy = BondController::defaultBondingPolicy(); - if (policy <= ZT_BONDING_POLICY_NONE || policy > ZT_BONDING_POLICY_BALANCE_AWARE) { - // If no default set, use NONE (effectively disabling this bond) - if (_defaultBondingPolicy < ZT_BONDING_POLICY_NONE || _defaultBondingPolicy > ZT_BONDING_POLICY_BALANCE_AWARE) { - _bondingPolicy = ZT_BONDING_POLICY_NONE; - } - _bondingPolicy = _defaultBondingPolicy; - } - else { - _bondingPolicy = policy; - } + // Sanity check for policy - _freeRandomByte = 0; + _defaultPolicy = (_defaultPolicy <= ZT_BOND_POLICY_NONE || _defaultPolicy > ZT_BOND_POLICY_BALANCE_AWARE) ? ZT_BOND_POLICY_NONE : _defaultPolicy; + _policy = (policy <= ZT_BOND_POLICY_NONE || policy > ZT_BOND_POLICY_BALANCE_AWARE) ? ZT_BOND_POLICY_NONE : _defaultPolicy; + + // Flows + + _lastFlowExpirationCheck = 0; + _lastFlowRebalance = 0; + _allowFlowHashing = false; + + // Path negotiation + + _lastSentPathNegotiationRequest = 0; + _lastPathNegotiationCheck = 0; + _allowPathNegotiation = false; + _pathNegotiationCutoffCount = 0; + _lastPathNegotiationReceived = 0; + _localUtility = 0; + + // QOS Verb (and related checks) + + _qosCutoffCount = 0; + _lastQoSRateCheck = 0; + _lastQualityEstimation = 0; + + // User preferences which may override the default bonding algorithm's behavior _userHasSpecifiedPrimaryLink = false; _userHasSpecifiedFailoverInstructions = false; + _userHasSpecifiedLinkSpeeds = 0; + // Bond status + + _lastBondStatusLog = 0; + _lastSummaryDump = 0; _isHealthy = false; _numAliveLinks = 0; _numTotalLinks = 0; + _numBondedPaths = 0; + + // active-backup + + _lastActiveBackupPathChange = 0; + _abPathIdx = ZT_MAX_PEER_NETWORK_PATHS; + + // rr + + _rrPacketsSentOnCurrLink = 0; + _rrIdx = ZT_MAX_PEER_NETWORK_PATHS; + + // General parameters _downDelay = 0; _upDelay = 0; - _allowFlowHashing = false; - _bondMonitorInterval = 0; - _shouldCollectPathStatistics = false; + _monitorInterval = 0; - // Path negotiation - _allowPathNegotiation = false; - _pathNegotiationCutoffCount = 0; - _localUtility = 0; - - _numBondedPaths = 0; - _rrPacketsSentOnCurrLink = 0; - _rrIdx = 0; - - _totalBondUnderload = 0; + // (Sane?) limits _maxAcceptableLatency = 100; _maxAcceptablePacketDelayVariance = 50; _maxAcceptablePacketLossRatio = 0.10f; _maxAcceptablePacketErrorRatio = 0.10f; - _userHasSpecifiedLinkSpeeds = 0; - /* ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_PASSIVE is the most conservative strategy and is - least likely to cause unexpected behavior */ - _flowRebalanceStrategy = ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_AGGRESSIVE; + // General timers + + _lastFrame = 0; + _lastBackgroundTaskCheck = 0; + + // balance-aware + + _totalBondUnderload = 0; + + _overheadBytes = 0; /** - * Paths are actively monitored to provide a real-time quality/preference-ordered rapid failover queue. + * Policy-specific defaults */ - switch (policy) { - case ZT_BONDING_POLICY_ACTIVE_BACKUP: - _failoverInterval = 500; - _abLinkSelectMethod = ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE; - _linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; - _qualityWeights[ZT_QOS_LAT_IDX] = 0.2f; - _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; - _qualityWeights[ZT_QOS_PDV_IDX] = 0.2f; - _qualityWeights[ZT_QOS_PLR_IDX] = 0.2f; - _qualityWeights[ZT_QOS_PER_IDX] = 0.2f; - _qualityWeights[ZT_QOS_THR_IDX] = 0.2f; - _qualityWeights[ZT_QOS_THM_IDX] = 0.0f; - _qualityWeights[ZT_QOS_THV_IDX] = 0.0f; - _qualityWeights[ZT_QOS_SCP_IDX] = 0.0f; + switch (_policy) { + case ZT_BOND_POLICY_ACTIVE_BACKUP: + _abLinkSelectMethod = ZT_BOND_RESELECTION_POLICY_OPTIMIZE; break; - /** - * All seemingly-alive paths are used. Paths are not actively monitored. - */ - case ZT_BONDING_POLICY_BROADCAST: + case ZT_BOND_POLICY_BROADCAST: _downDelay = 30000; _upDelay = 0; break; - /** - * Paths are monitored to determine when/if one needs to be added or removed from the rotation - */ - case ZT_BONDING_POLICY_BALANCE_RR: - _failoverInterval = 3000; - _allowFlowHashing = false; - _packetsPerLink = 1024; - _linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; - _qualityWeights[ZT_QOS_LAT_IDX] = 0.4f; - _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; - _qualityWeights[ZT_QOS_PDV_IDX] = 0.2f; - _qualityWeights[ZT_QOS_PLR_IDX] = 0.1f; - _qualityWeights[ZT_QOS_PER_IDX] = 0.1f; - _qualityWeights[ZT_QOS_THR_IDX] = 0.1f; - _qualityWeights[ZT_QOS_THM_IDX] = 0.0f; - _qualityWeights[ZT_QOS_THV_IDX] = 0.0f; - _qualityWeights[ZT_QOS_SCP_IDX] = 0.0f; + case ZT_BOND_POLICY_BALANCE_RR: + _packetsPerLink = 64; break; - /** - * Path monitoring is used to determine the capacity of each - * path and where to place the next flow. - */ - case ZT_BONDING_POLICY_BALANCE_XOR: - _failoverInterval = 3000; - _upDelay = _bondMonitorInterval * 2; + case ZT_BOND_POLICY_BALANCE_XOR: _allowFlowHashing = true; - _linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; - _qualityWeights[ZT_QOS_LAT_IDX] = 0.4f; - _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; - _qualityWeights[ZT_QOS_PDV_IDX] = 0.2f; - _qualityWeights[ZT_QOS_PLR_IDX] = 0.1f; - _qualityWeights[ZT_QOS_PER_IDX] = 0.1f; - _qualityWeights[ZT_QOS_THR_IDX] = 0.1f; - _qualityWeights[ZT_QOS_THM_IDX] = 0.0f; - _qualityWeights[ZT_QOS_THV_IDX] = 0.0f; - _qualityWeights[ZT_QOS_SCP_IDX] = 0.0f; break; - /** - * Path monitoring is used to determine the capacity of each - * path and where to place the next flow. Additionally, re-shuffling - * of flows may take place. - */ - case ZT_BONDING_POLICY_BALANCE_AWARE: - _failoverInterval = 3000; + case ZT_BOND_POLICY_BALANCE_AWARE: _allowFlowHashing = true; - _linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; - _qualityWeights[ZT_QOS_LAT_IDX] = 0.4f; - _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; - _qualityWeights[ZT_QOS_PDV_IDX] = 0.4f; - _qualityWeights[ZT_QOS_PLR_IDX] = 0.2f; - _qualityWeights[ZT_QOS_PER_IDX] = 0.0f; - _qualityWeights[ZT_QOS_THR_IDX] = 0.0f; - _qualityWeights[ZT_QOS_THM_IDX] = 0.0f; - _qualityWeights[ZT_QOS_THV_IDX] = 0.0f; - _qualityWeights[ZT_QOS_SCP_IDX] = 0.0f; break; default: break; } - /* If a user has specified custom parameters for this bonding policy, overlay - them onto the defaults that were previously set */ + _qw[ZT_QOS_LAT_IDX] = 0.3f; + _qw[ZT_QOS_LTM_IDX] = 0.1f; + _qw[ZT_QOS_PDV_IDX] = 0.3f; + _qw[ZT_QOS_PLR_IDX] = 0.1f; + _qw[ZT_QOS_PER_IDX] = 0.1f; + _qw[ZT_QOS_SCP_IDX] = 0.1f; + + _failoverInterval = ZT_BOND_FAILOVER_DEFAULT_INTERVAL; + + /* If a user has specified custom parameters for this bonding policy, overlay them onto the defaults */ if (useTemplate) { _policyAlias = templateBond->_policyAlias; - _failoverInterval = templateBond->_failoverInterval >= 250 ? templateBond->_failoverInterval : _failoverInterval; + _failoverInterval = templateBond->_failoverInterval >= ZT_BOND_FAILOVER_MIN_INTERVAL ? templateBond->_failoverInterval : ZT_BOND_FAILOVER_MIN_INTERVAL; _downDelay = templateBond->_downDelay; _upDelay = templateBond->_upDelay; - if (templateBond->_linkMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE && templateBond->_failoverInterval != 0) { - // fprintf(stderr, "warning: passive path monitoring was specified, this will prevent failovers from happening in a timely manner.\n"); - } _abLinkSelectMethod = templateBond->_abLinkSelectMethod; - memcpy(_qualityWeights, templateBond->_qualityWeights, ZT_QOS_WEIGHT_SIZE * sizeof(float)); + memcpy(_qw, templateBond->_qw, ZT_QOS_WEIGHT_SIZE * sizeof(float)); } - /* Set timer geometries */ - _bondMonitorInterval = _failoverInterval / 3; - BondController::setMinReqPathMonitorInterval(_bondMonitorInterval); - _ackSendInterval = _failoverInterval; + + // Timer geometry + + _monitorInterval = _failoverInterval / ZT_BOND_ECHOS_PER_FAILOVER_INTERVAL; _qualityEstimationInterval = _failoverInterval * 2; - _dynamicPathMonitorInterval = 0; - _ackCutoffCount = 0; - _qosSendInterval = _bondMonitorInterval * 4; + _qosSendInterval = _failoverInterval * 2; _qosCutoffCount = 0; - throughputMeasurementInterval = _ackSendInterval * 2; _defaultPathRefractoryPeriod = 8000; } @@ -1969,24 +1740,51 @@ void Bond::setUserQualityWeights(float weights[], int len) weightTotal += weights[i]; } if (weightTotal > 0.99 && weightTotal < 1.01) { - memcpy(_qualityWeights, weights, len * sizeof(float)); + memcpy(_qw, weights, len * sizeof(float)); } } } -bool Bond::relevant() -{ - return false; -} - SharedPtr Bond::getLink(const SharedPtr& path) { return RR->bc->getLinkBySocket(_policyAlias, path->localSocket()); } -void Bond::dumpInfo(const int64_t now) +void Bond::dumpPathStatus(int64_t now, int pathIdx) { - // Omitted + char pathStr[64] = { 0 }; + _paths[pathIdx].p->address().toString(pathStr); + log("path status: [%2d] alive:%d, eli:%d, bonded:%d, flows:%6d, lat:%10.3f, jitter:%10.3f, error:%6.4f, loss:%6.4f, age:%6d --- (%s/%s)", + pathIdx, + _paths[pathIdx].alive, + _paths[pathIdx].eligible, + _paths[pathIdx].bonded, + _paths[pathIdx].assignedFlowCount, + _paths[pathIdx].latencyMean, + _paths[pathIdx].latencyVariance, + _paths[pathIdx].packetErrorRatio, + _paths[pathIdx].packetLossRatio, + _paths[pathIdx].p->age(now), + getLink(_paths[pathIdx].p)->ifname().c_str(), + pathStr); +} + +void Bond::dumpInfo(int64_t now, bool force) +{ + uint64_t timeSinceLastDump = now - _lastSummaryDump; + if (! force && timeSinceLastDump < ZT_BOND_STATUS_INTERVAL) { + return; + } + _lastSummaryDump = now; + float overhead = (_overheadBytes / (timeSinceLastDump / 1000.0f) / 1000.0f); + _overheadBytes = 0; + log("bond status: bp: %d, fi: %d, mi: %d, ud: %d, dd: %d, flows: %lu, ambient: %f KB/s", _policy, _failoverInterval, _monitorInterval, _upDelay, _downDelay, (unsigned long)_flows.size(), overhead); + for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (_paths[i].p) { + dumpPathStatus(now, i); + } + } + fprintf(stderr, "\n\n\n"); } } // namespace ZeroTier diff --git a/node/Bond.hpp b/node/Bond.hpp index 5a837a7c0..e8088896e 100644 --- a/node/Bond.hpp +++ b/node/Bond.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2013-2020 ZeroTier, Inc. + * Copyright (c)2013-2021 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2025-01-01 + * Change Date: 2026-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -14,45 +14,521 @@ #ifndef ZT_BOND_HPP #define ZT_BOND_HPP -#include "Flow.hpp" +#include "../osdep/Phy.hpp" #include "Packet.hpp" #include "Path.hpp" -#include "Peer.hpp" +#include "RuntimeEnvironment.hpp" +#include "Trace.hpp" +#include +#include #include #include +/** + * Indices for the path quality weight vector + */ +enum ZT_BondQualityWeightIndex { ZT_QOS_LAT_IDX, ZT_QOS_LTM_IDX, ZT_QOS_PDV_IDX, ZT_QOS_PLR_IDX, ZT_QOS_PER_IDX, ZT_QOS_THR_IDX, ZT_QOS_THM_IDX, ZT_QOS_THV_IDX, ZT_QOS_AGE_IDX, ZT_QOS_SCP_IDX, ZT_QOS_WEIGHT_SIZE }; + +/** + * Multipath bonding policy + */ +enum ZT_BondBondingPolicy { + /** + * Normal operation. No fault tolerance, no load balancing + */ + ZT_BOND_POLICY_NONE = 0, + + /** + * Sends traffic out on only one path at a time. Configurable immediate + * fail-over. + */ + ZT_BOND_POLICY_ACTIVE_BACKUP = 1, + + /** + * Sends traffic out on all paths + */ + ZT_BOND_POLICY_BROADCAST = 2, + + /** + * Stripes packets across all paths + */ + ZT_BOND_POLICY_BALANCE_RR = 3, + + /** + * Packets destined for specific peers will always be sent over the same + * path. + */ + ZT_BOND_POLICY_BALANCE_XOR = 4, + + /** + * Balances flows among all paths according to path performance + */ + ZT_BOND_POLICY_BALANCE_AWARE = 5 +}; + +/** + * Multipath active re-selection policy (linkSelectMethod) + */ +enum ZT_BondLinkSelectMethod { + /** + * Primary link regains status as active link whenever it comes back up + * (default when links are explicitly specified) + */ + ZT_BOND_RESELECTION_POLICY_ALWAYS = 0, + + /** + * Primary link regains status as active link when it comes back up and + * (if) it is better than the currently-active link. + */ + ZT_BOND_RESELECTION_POLICY_BETTER = 1, + + /** + * Primary link regains status as active link only if the currently-active + * link fails. + */ + ZT_BOND_RESELECTION_POLICY_FAILURE = 2, + + /** + * The primary link can change if a superior path is detected. + * (default if user provides no fail-over guidance) + */ + ZT_BOND_RESELECTION_POLICY_OPTIMIZE = 3 +}; + +/** + * Mode of multipath link interface + */ +enum ZT_BondLinkMode { ZT_BOND_SLAVE_MODE_PRIMARY = 0, ZT_BOND_SLAVE_MODE_SPARE = 1 }; + +#include "../node/AtomicCounter.hpp" +#include "../node/SharedPtr.hpp" + +#include + namespace ZeroTier { -class RuntimeEnvironment; +class Link { + friend class SharedPtr; + + public: + /** + * + * @param ifnameStr + * @param ipvPref + * @param speed + * @param enabled + * @param mode + * @param failoverToLinkStr + * @param userSpecifiedAlloc + */ + Link(std::string ifnameStr, uint8_t ipvPref, uint32_t speed, bool enabled, uint8_t mode, std::string failoverToLinkStr, float userSpecifiedAlloc) + : _ifnameStr(ifnameStr) + , _ipvPref(ipvPref) + , _speed(speed) + , _relativeSpeed(0) + , _enabled(enabled) + , _mode(mode) + , _failoverToLinkStr(failoverToLinkStr) + , _userSpecifiedAlloc(userSpecifiedAlloc) + , _isUserSpecified(false) + { + } + + /** + * @return The string representation of this link's underlying interface's system name. + */ + inline std::string ifname() + { + return _ifnameStr; + } + + /** + * @return Whether this link is designated as a primary. + */ + inline bool primary() + { + return _mode == ZT_BOND_SLAVE_MODE_PRIMARY; + } + + /** + * @return Whether this link is designated as a spare. + */ + inline bool spare() + { + return _mode == ZT_BOND_SLAVE_MODE_SPARE; + } + + /** + * @return The name of the link interface that should be used in the event of a failure. + */ + inline std::string failoverToLink() + { + return _failoverToLinkStr; + } + + /** + * @return Whether this link interface was specified by the user or auto-detected. + */ + inline bool isUserSpecified() + { + return _isUserSpecified; + } + + /** + * Signify that this link was specified by the user and not the result of auto-detection. + * + * @param isUserSpecified + */ + inline void setAsUserSpecified(bool isUserSpecified) + { + _isUserSpecified = isUserSpecified; + } + + /** + * @return Whether or not the user has specified failover instructions. + */ + inline bool userHasSpecifiedFailoverInstructions() + { + return _failoverToLinkStr.length(); + } + + /** + * @return The speed of the link relative to others in the bond. + */ + inline uint8_t relativeSpeed() + { + return _relativeSpeed; + } + + /** + * Sets the speed of the link relative to others in the bond. + * + * @param relativeSpeed The speed relative to the rest of the link. + */ + inline void setRelativeSpeed(uint8_t relativeSpeed) + { + _relativeSpeed = relativeSpeed; + } + + /** + * @return The absolute speed of the link (as specified by the user.) + */ + inline uint32_t speed() + { + return _speed; + } + + /** + * @return The address preference for this link (as specified by the user.) + */ + inline uint8_t ipvPref() + { + return _ipvPref; + } + + /** + * @return The mode (e.g. primary/spare) for this link (as specified by the user.) + */ + inline uint8_t mode() + { + return _mode; + } + + /** + * @return Whether this link is enabled or disabled + */ + inline uint8_t enabled() + { + return _enabled; + } + + private: + /** + * String representation of underlying interface's system name + */ + std::string _ifnameStr; + + /** + * What preference (if any) a user has for IP protocol version used in + * path aggregations. Preference is expressed in the order of the digits: + * + * 0: no preference + * 4: IPv4 only + * 6: IPv6 only + * 46: IPv4 over IPv6 + * 64: IPv6 over IPv4 + */ + uint8_t _ipvPref; + + /** + * User-specified speed of this link + */ + uint32_t _speed; + + /** + * Speed relative to other specified links (computed by Bond) + */ + uint8_t _relativeSpeed; + + /** + * Whether this link is enabled, or (disabled (possibly bad config)) + */ + uint8_t _enabled; + + /** + * Whether this link is designated as a primary, a spare, or no preference. + */ + uint8_t _mode; + + /** + * The specific name of the link to be used in the event that this + * link fails. + */ + std::string _failoverToLinkStr; + + /** + * User-specified allocation + */ + float _userSpecifiedAlloc; + + /** + * Whether or not this link was created as a result of manual user specification. This is + * important to know because certain policy decisions are dependent on whether the user + * intents to use a specific set of interfaces. + */ + bool _isUserSpecified; + + AtomicCounter __refCount; +}; + class Link; class Peer; class Bond { + public: + /** + * @return Whether this link is permitted to become a member of a bond. + */ + static bool linkAllowed(std::string& policyAlias, SharedPtr link); + + /** + * @return The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements. + */ + static int minReqMonitorInterval() + { + return _minReqMonitorInterval; + } + + /** + * @return Whether the bonding layer is currently set up to be used. + */ + static bool inUse() + { + return ! _bondPolicyTemplates.empty() || _defaultPolicy; + } + + /** + * @param basePolicyName Bonding policy name (See ZeroTierOne.h) + * @return The bonding policy code for a given human-readable bonding policy name + */ + static int getPolicyCodeByStr(const std::string& basePolicyName) + { + if (basePolicyName == "active-backup") { + return 1; + } + if (basePolicyName == "broadcast") { + return 2; + } + if (basePolicyName == "balance-rr") { + return 3; + } + if (basePolicyName == "balance-xor") { + return 4; + } + if (basePolicyName == "balance-aware") { + return 5; + } + return 0; // "none" + } + + /** + * @param policy Bonding policy code (See ZeroTierOne.h) + * @return The human-readable name for the given bonding policy code + */ + static std::string getPolicyStrByCode(int policy) + { + if (policy == 1) { + return "active-backup"; + } + if (policy == 2) { + return "broadcast"; + } + if (policy == 3) { + return "balance-rr"; + } + if (policy == 4) { + return "balance-xor"; + } + if (policy == 5) { + return "balance-aware"; + } + return "none"; + } + + /** + * Sets the default bonding policy for new or undefined bonds. + * + * @param bp Bonding policy + */ + static void setBondingLayerDefaultPolicy(uint8_t bp) + { + _defaultPolicy = bp; + } + + /** + * Sets the default (custom) bonding policy for new or undefined bonds. + * + * @param alias Human-readable string alias for bonding policy + */ + static void setBondingLayerDefaultPolicyStr(std::string alias) + { + _defaultPolicyStr = alias; + } + + /** + * Add a user-defined link to a given bonding policy. + * + * @param policyAlias User-defined custom name for variant of bonding policy + * @param link Pointer to new link definition + */ + static void addCustomLink(std::string& policyAlias, SharedPtr link); + + /** + * Add a user-defined bonding policy that is based on one of the standard types. + * + * @param newBond Pointer to custom Bond object + * @return Whether a uniquely-named custom policy was successfully added + */ + static bool addCustomPolicy(const SharedPtr& newBond); + + /** + * Assigns a specific bonding policy + * + * @param identity + * @param policyAlias + * @return + */ + static bool assignBondingPolicyToPeer(int64_t identity, const std::string& policyAlias); + + /** + * Get pointer to bond by a given peer ID + * + * @param peer Remote peer ID + * @return A pointer to the Bond + */ + static SharedPtr getBondByPeerId(int64_t identity); + + /** + * Add a new bond to the bond controller. + * + * @param renv Runtime environment + * @param peer Remote peer that this bond services + * @return A pointer to the newly created Bond + */ + static SharedPtr createTransportTriggeredBond(const RuntimeEnvironment* renv, const SharedPtr& peer); + + /** + * Periodically perform maintenance tasks for the bonding layer. + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param now Current time + */ + static void processBackgroundTasks(void* tPtr, int64_t now); + + /** + * Gets a reference to a physical link definition given a policy alias and a local socket. + * + * @param policyAlias Policy in use + * @param localSocket Local source socket + * @return Physical link definition + */ + static SharedPtr getLinkBySocket(const std::string& policyAlias, uint64_t localSocket); + + /** + * Gets a reference to a physical link definition given its human-readable system name. + * + * @param policyAlias Policy in use + * @param ifname Alphanumeric human-readable name + * @return Physical link definition + */ + static SharedPtr getLinkByName(const std::string& policyAlias, const std::string& ifname); + + private: + static Phy* _phy; + + static Mutex _bonds_m; + static Mutex _links_m; + + /** + * The minimum required monitoring interval among all bonds + */ + static int _minReqMonitorInterval; + + /** + * The default bonding policy used for new bonds unless otherwise specified. + */ + static uint8_t _defaultPolicy; + + /** + * The default bonding policy used for new bonds unless otherwise specified. + */ + static std::string _defaultPolicyStr; + + /** + * All currently active bonds. + */ + static std::map > _bonds; + + /** + * Map of peers to custom bonding policies + */ + static std::map _policyTemplateAssignments; + + /** + * User-defined bonding policies (can be assigned to a peer) + */ + static std::map > _bondPolicyTemplates; + + /** + * Set of links defined for a given bonding policy + */ + static std::map > > _linkDefinitions; + + /** + * Set of link objects mapped to their physical interfaces + */ + static std::map > > _interfaceToLinkMap; + + struct NominatedPath; + struct Flow; + friend class SharedPtr; friend class Peer; - friend class BondController; - - struct PathQualityComparator { - bool operator()(const SharedPtr& a, const SharedPtr& b) - { - if (a->_failoverScore == b->_failoverScore) { - return a < b; - } - return a->_failoverScore > b->_failoverScore; - } - }; public: - // TODO: Remove - bool _header; - int64_t _lastLogTS; - int64_t _lastPrintTS; - void dumpInfo(const int64_t now); - bool relevant(); + void dumpInfo(int64_t now, bool force); + void dumpPathStatus(int64_t now, int pathIdx); SharedPtr getLink(const SharedPtr& path); + /** + * Constructor + * + * + */ + Bond(const RuntimeEnvironment* renv); + /** * Constructor. Creates a bond based off of ZT defaults * @@ -96,12 +572,16 @@ class Bond { * @param path Newly-learned Path which should now be handled by the Bond * @param now Current time */ - void nominatePath(const SharedPtr& path, int64_t now); + void nominatePathToBond(const SharedPtr& path, int64_t now); /** - * Propagate and memoize often-used bonding preferences for each path + * Add a nominated path to the bond. This merely maps the index from the nominated set + * to a smaller set and sets the path's bonded flag to true. + * + * @param nominatedIdx The index in the nominated set + * @param bondedIdx The index in the bonded set (subset of nominated) */ - void applyUserPrefs(); + void addPathToBond(int nominatedIdx, int bondedIdx); /** * Check path states and perform bond rebuilds if needed. @@ -109,7 +589,7 @@ class Bond { * @param now Current time * @param rebuild Whether or not the bond should be reconstructed. */ - void curateBond(const int64_t now, bool rebuild); + void curateBond(int64_t now, bool rebuild); /** * Periodically perform statistical summaries of quality metrics for all paths. @@ -149,15 +629,6 @@ class Bond { */ void receivedQoS(const SharedPtr& path, int64_t now, int count, uint64_t* rx_id, uint16_t* rx_ts); - /** - * Process the contents of an inbound VERB_ACK to gather path quality observations. - * - * @param path Path over which packet was received - * @param now Current time - * @param ackedBytes Number of bytes ACKed by this VERB_ACK - */ - void receivedAck(const SharedPtr& path, int64_t now, int32_t ackedBytes); - /** * Generate the contents of a VERB_QOS_MEASUREMENT packet. * @@ -165,7 +636,7 @@ class Bond { * @param qosBuffer destination buffer * @return Size of payload */ - int32_t generateQoSPacket(const SharedPtr& path, int64_t now, char* qosBuffer); + int32_t generateQoSPacket(int pathIdx, int64_t now, char* qosBuffer); /** * Record statistics for an inbound packet. @@ -192,13 +663,13 @@ class Bond { /** * Creates a new flow record * - * @param path Path over which flow shall be handled + * @param np Path over which flow shall be handled * @param flowId Flow ID * @param entropy A byte of entropy to be used by the bonding algorithm * @param now Current time * @return Pointer to newly-created Flow */ - SharedPtr createFlow(const SharedPtr& path, int32_t flowId, unsigned char entropy, int64_t now); + SharedPtr createFlow(int pathIdx, int32_t flowId, unsigned char entropy, int64_t now); /** * Removes flow records that are past a certain age limit. @@ -235,7 +706,7 @@ class Bond { * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call * @param now Current time */ - void pathNegotiationCheck(void* tPtr, const int64_t now); + void pathNegotiationCheck(void* tPtr, int64_t now); /** * Sends a VERB_ACK to the remote peer. @@ -246,7 +717,7 @@ class Bond { * @param atAddress * @param now Current time */ - void sendACK(void* tPtr, const SharedPtr& path, int64_t localSocket, const InetAddress& atAddress, int64_t now); + void sendACK(void* tPtr, int pathIdx, int64_t localSocket, const InetAddress& atAddress, int64_t now); /** * Sends a VERB_QOS_MEASUREMENT to the remote peer. @@ -257,7 +728,7 @@ class Bond { * @param atAddress * @param now Current time */ - void sendQOS_MEASUREMENT(void* tPtr, const SharedPtr& path, int64_t localSocket, const InetAddress& atAddress, int64_t now); + void sendQOS_MEASUREMENT(void* tPtr, int pathIdx, int64_t localSocket, const InetAddress& atAddress, int64_t now); /** * Sends a VERB_PATH_NEGOTIATION_REQUEST to the remote peer. @@ -265,7 +736,7 @@ class Bond { * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call * @param path Path over which packet should be sent */ - void sendPATH_NEGOTIATION_REQUEST(void* tPtr, const SharedPtr& path); + void sendPATH_NEGOTIATION_REQUEST(void* tPtr, int pathIdx); /** * @@ -296,7 +767,7 @@ class Bond { * @param policy Bonding policy * @param templateBond */ - void setReasonableDefaults(int policy, SharedPtr templateBond, bool useTemplate); + void setBondParameters(int policy, SharedPtr templateBond, bool useTemplate); /** * Check and assign user-specified quality weights to this bond. @@ -392,27 +863,7 @@ class Bond { * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call * @param now Current time */ - void processBackgroundTasks(void* tPtr, int64_t now); - - /** - * Rate limit gate for VERB_ACK - * - * @param now Current time - * @return Whether the incoming packet should be rate-gated - */ - inline bool rateGateACK(const int64_t now) - { - _ackCutoffCount++; - int numToDrain = _lastAckRateCheck ? (now - _lastAckRateCheck) / ZT_ACK_DRAINAGE_DIVISOR : _ackCutoffCount; - _lastAckRateCheck = now; - if (_ackCutoffCount > numToDrain) { - _ackCutoffCount -= numToDrain; - } - else { - _ackCutoffCount = 0; - } - return (_ackCutoffCount < ZT_ACK_CUTOFF_LIMIT); - } + void processBackgroundBondTasks(void* tPtr, int64_t now); /** * Rate limit gate for VERB_QOS_MEASUREMENT @@ -420,18 +871,21 @@ class Bond { * @param now Current time * @return Whether the incoming packet should be rate-gated */ - inline bool rateGateQoS(const int64_t now) + inline bool rateGateQoS(int64_t now, SharedPtr& path) { - _qosCutoffCount++; - int numToDrain = (now - _lastQoSRateCheck) / ZT_QOS_DRAINAGE_DIVISOR; - _lastQoSRateCheck = now; - if (_qosCutoffCount > numToDrain) { - _qosCutoffCount -= numToDrain; + // TODO: Verify before production + char pathStr[64] = { 0 }; + path->address().toString(pathStr); + int diff = now - _lastQoSRateCheck; + if ((diff) <= (_qosSendInterval / ZT_MAX_PEER_NETWORK_PATHS)) { + ++_qosCutoffCount; } else { _qosCutoffCount = 0; } - return (_qosCutoffCount < ZT_QOS_CUTOFF_LIMIT); + _lastQoSRateCheck = now; + // fprintf(stderr, "rateGateQoS (count=%d, send_interval=%d, diff=%d, path=%s)\n", _qosCutoffCount, _qosSendInterval, diff, pathStr); + return (_qosCutoffCount < (ZT_MAX_PEER_NETWORK_PATHS * 2)); } /** @@ -440,14 +894,21 @@ class Bond { * @param now Current time * @return Whether the incoming packet should be rate-gated */ - inline bool rateGatePathNegotiation(const int64_t now) + inline bool rateGatePathNegotiation(int64_t now, SharedPtr& path) { - if ((now - _lastPathNegotiationReceived) <= ZT_PATH_NEGOTIATION_CUTOFF_TIME) + // TODO: Verify before production + char pathStr[64] = { 0 }; + path->address().toString(pathStr); + int diff = now - _lastPathNegotiationReceived; + if ((diff) <= (ZT_PATH_NEGOTIATION_CUTOFF_TIME / ZT_MAX_PEER_NETWORK_PATHS)) { ++_pathNegotiationCutoffCount; - else + } + else { _pathNegotiationCutoffCount = 0; + } _lastPathNegotiationReceived = now; - return (_pathNegotiationCutoffCount < ZT_PATH_NEGOTIATION_CUTOFF_LIMIT); + // fprintf(stderr, "rateGateNeg (count=%d, send_interval=%d, diff=%d, path=%s)\n", _pathNegotiationCutoffCount, (ZT_PATH_NEGOTIATION_CUTOFF_TIME / ZT_MAX_PEER_NETWORK_PATHS), diff, pathStr); + return (_pathNegotiationCutoffCount < (ZT_MAX_PEER_NETWORK_PATHS * 2)); } /** @@ -482,14 +943,6 @@ class Bond { _linkMonitorStrategy = strategy; } - /** - * @param abOverflowEnabled Whether "overflow" mode is enabled for this active-backup bond - */ - inline void setOverflowMode(bool abOverflowEnabled) - { - _abOverflowEnabled = abOverflowEnabled; - } - /** * @return the current up delay parameter */ @@ -527,11 +980,11 @@ class Bond { } /** - * @return the current monitoring interval for the bond (can be overridden with intervals specific to certain links.) + * @return The current monitoring interval for the bond */ - inline uint16_t getBondMonitorInterval() + inline int monitorInterval() { - return _bondMonitorInterval; + return _monitorInterval; } /** @@ -541,23 +994,24 @@ class Bond { */ inline void setBondMonitorInterval(uint16_t interval) { - _bondMonitorInterval = interval; + _monitorInterval = interval; } /** * @param policy Bonding policy for this bond */ + /* inline void setPolicy(uint8_t policy) { - _bondingPolicy = policy; + _policy = policy; } - +*/ /** * @return the current bonding policy */ - inline uint8_t getPolicy() + inline uint8_t policy() { - return _bondingPolicy; + return _policy; } /** @@ -661,54 +1115,303 @@ class Bond { */ bool abForciblyRotateLink(); - SharedPtr getPeer() + /** + * @param now Current time + * @return All known paths to this peer + */ + inline std::vector > paths(const int64_t now) const { - return _peer; + std::vector > pp; + Mutex::Lock _l(_paths_m); + for (unsigned int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (! _paths[i].p) + break; + pp.push_back(_paths[i].p); + } + return pp; + } + + /** + * Emit message to tracing system but with added timestamp and subsystem info + * + * TODO: Will be replaced when better logging facilities exist in Trace.hpp + */ + void log(const char* fmt, ...) + { + // TODO: remove + if (_peerId != 0xe421efd4b9 && _peerId != 0xa03bcb43bb) { + return; + } + time_t rawtime; + struct tm* timeinfo; + char timestamp[80]; + time(&rawtime); + timeinfo = localtime(&rawtime); + strftime(timestamp, 80, "%F %T", timeinfo); +#define MAX_BOND_MSG_LEN 1024 + char traceMsg[MAX_BOND_MSG_LEN]; + char userMsg[MAX_BOND_MSG_LEN]; + va_list args; + va_start(args, fmt); + if (vsnprintf(userMsg, sizeof(userMsg), fmt, args) < 0) { + fprintf(stderr, "Encountered format encoding error while writing to trace log\n"); + return; + } + snprintf(traceMsg, MAX_BOND_MSG_LEN, "%s (%llx/%s) %s", timestamp, _peerId, _policyAlias.c_str(), userMsg); + va_end(args); + RR->t->bondStateMessage(NULL, traceMsg); +#undef MAX_MSG_LEN } private: + struct NominatedPath { + NominatedPath() + : lastQoSMeasurement(0) + , lastThroughputEstimation(0) + , lastRefractoryUpdate(0) + , lastAliveToggle(0) + , alive(false) + , eligible(true) + , whenNominated(0) + , refractoryPeriod(0) + , ipvPref(0) + , mode(0) + , onlyPathOnLink(false) + , bonded(false) + , negotiated(false) + , shouldReallocateFlows(false) + , assignedFlowCount(0) + , latencyMean(0) + , latencyVariance(0) + , packetLossRatio(0) + , packetErrorRatio(0) + , throughputMean(0) + , throughputMax(0) + , throughputVariance(0) + , allocation(0) + , byteLoad(0) + , relativeByteLoad(0) + , affinity(0) + , failoverScore(0) + , packetsReceivedSinceLastQoS(0) + , packetsIn(0) + , packetsOut(0) + { + } + + /** + * Set or update a refractory period for the path. + * + * @param punishment How much a path should be punished + * @param pathFailure Whether this call is the result of a recent path failure + */ + inline void adjustRefractoryPeriod(int64_t now, uint32_t punishment, bool pathFailure) + { + if (pathFailure) { + unsigned int suggestedRefractoryPeriod = refractoryPeriod ? punishment + (refractoryPeriod * 2) : punishment; + refractoryPeriod = std::min(suggestedRefractoryPeriod, (unsigned int)ZT_BOND_MAX_REFRACTORY_PERIOD); + lastRefractoryUpdate = 0; + } + else { + uint32_t drainRefractory = 0; + if (lastRefractoryUpdate) { + drainRefractory = (now - lastRefractoryUpdate); + } + else { + drainRefractory = (now - lastAliveToggle); + } + lastRefractoryUpdate = now; + if (refractoryPeriod > drainRefractory) { + refractoryPeriod -= drainRefractory; + } + else { + refractoryPeriod = 0; + lastRefractoryUpdate = 0; + } + } + } + + /** + * @return True if a path is permitted to be used in a bond (according to user pref.) + */ + inline bool allowed() + { + return (! ipvPref || ((p->_addr.isV4() && (ipvPref == 4 || ipvPref == 46 || ipvPref == 64)) || ((p->_addr.isV6() && (ipvPref == 6 || ipvPref == 46 || ipvPref == 64))))); + } + + /** + * @return True if a path is preferred over another on the same physical link (according to user pref.) + */ + inline bool preferred() + { + return onlyPathOnLink || (p->_addr.isV4() && (ipvPref == 4 || ipvPref == 46)) || (p->_addr.isV6() && (ipvPref == 6 || ipvPref == 64)); + } + + /** + * @param now Current time + * @return Whether a QoS (VERB_QOS_MEASUREMENT) packet needs to be emitted at this time + */ + inline bool needsToSendQoS(int64_t now, int qosSendInterval) + { + // fprintf(stderr, "QOS table (%d / %d)\n", packetsReceivedSinceLastQoS, ZT_QOS_TABLE_SIZE); + return ((packetsReceivedSinceLastQoS >= ZT_QOS_TABLE_SIZE) || ((now - lastQoSMeasurement) > qosSendInterval)) && packetsReceivedSinceLastQoS; + } + + /** + * Reset packet counters + */ + inline void resetPacketCounts() + { + packetsIn = 0; + packetsOut = 0; + } + + std::map qosStatsOut; // id:egress_time + std::map qosStatsIn; // id:now + + RingBuffer qosRecordSize; + RingBuffer qosRecordLossSamples; + RingBuffer throughputSamples; + RingBuffer packetValiditySamples; + RingBuffer throughputVarianceSamples; + RingBuffer latencySamples; + + uint64_t lastQoSMeasurement; // Last time that a VERB_QOS_MEASUREMENT was sent out on this path. + uint64_t lastThroughputEstimation; // Last time that the path's throughput was estimated. + uint64_t lastRefractoryUpdate; // The last time that the refractory period was updated. + uint64_t lastAliveToggle; // The last time that the path was marked as "alive". + bool alive; + bool eligible; // State of eligibility at last check. Used for determining state changes. + uint64_t whenNominated; // Timestamp indicating when this path's trial period began. + uint32_t refractoryPeriod; // Amount of time that this path will be prevented from becoming a member of a bond. + uint8_t ipvPref; // IP version preference inherited from the physical link. + uint8_t mode; // Mode inherited from the physical link. + bool onlyPathOnLink; // IP version preference inherited from the physical link. + bool enabled; // Enabled state inherited from the physical link. + bool bonded; // Whether this path is currently part of a bond. + bool negotiated; // Whether this path was intentionally negotiated by either peer. + bool shouldReallocateFlows; // Whether flows should be moved from this path. Current traffic flows will be re-allocated immediately. + uint16_t assignedFlowCount; // The number of flows currently assigned to this path. + float latencyMean; // The mean latency (computed from a sliding window.) + float latencyVariance; // Packet delay variance (computed from a sliding window.) + float packetLossRatio; // The ratio of lost packets to received packets. + float packetErrorRatio; // The ratio of packets that failed their MAC/CRC checks to those that did not. + uint64_t throughputMean; // The estimated mean throughput of this path. + uint64_t throughputMax; // The maximum observed throughput of this path. + float throughputVariance; // The variance in the estimated throughput of this path. + uint8_t allocation; // The relative quality of this path to all others in the bond, [0-255]. + uint64_t byteLoad; // How much load this path is under. + uint8_t relativeByteLoad; // How much load this path is under (relative to other paths in the bond.) + uint8_t affinity; // Relative value expressing how "deserving" this path is of new traffic. + uint32_t failoverScore; // Score that indicates to what degree this path is preferred over others that are available to the bonding policy. (specifically for active-backup) + int32_t packetsReceivedSinceLastQoS; // Number of packets received since the last VERB_QOS_MEASUREMENT was sent to the remote peer. + + /** + * Counters used for tracking path load. + */ + int packetsIn; + int packetsOut; + + AtomicCounter __refCount; + + SharedPtr p; + void set(uint64_t now, const SharedPtr& path) + { + p = path; + whenNominated = now; + p->_bondingMetricPtr = (void*)this; + } + }; + + /** + * Paths nominated to the bond (may or may not actually be bonded) + */ + NominatedPath _paths[ZT_MAX_PEER_NETWORK_PATHS]; + + inline int getNominatedPathIdx(const SharedPtr& path) + { + for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (_paths[i].p == path) { + return i; + } + } + return ZT_MAX_PEER_NETWORK_PATHS; + } + + /** + * A protocol flow that is identified by the origin and destination port. + */ + struct Flow { + /** + * @param flowId Given flow ID + * @param now Current time + */ + Flow(int32_t flowId, int64_t now) : id(flowId), bytesIn(0), bytesOut(0), lastActivity(now), lastPathReassignment(0), assignedPath(ZT_MAX_PEER_NETWORK_PATHS) + { + } + + /** + * Reset flow statistics + */ + inline void resetByteCounts() + { + bytesIn = 0; + bytesOut = 0; + } + + /** + * How long since a packet was sent or received in this flow + * + * @param now Current time + * @return The age of the flow in terms of last recorded activity + */ + int64_t age(int64_t now) + { + return now - lastActivity; + } + + /** + * @param path Assigned path over which this flow should be handled + */ + inline void assignPath(int pathIdx, int64_t now) + { + assignedPath = pathIdx; + lastPathReassignment = now; + } + + AtomicCounter __refCount; + + int32_t id; // Flow ID used for hashing and path selection + uint64_t bytesIn; // Used for tracking flow size + uint64_t bytesOut; // Used for tracking flow size + int64_t lastActivity; // The last time that this flow handled traffic + int64_t lastPathReassignment; // Time of last path assignment. Used for anti-flapping + int assignedPath; // Index of path to which this flow is assigned + }; + const RuntimeEnvironment* RR; AtomicCounter __refCount; - /** - * Custom name given by the user to this bond type. - */ - std::string _policyAlias; - - /** - * Paths that this bond has been made aware of but that are not necessarily - * part of the bond proper. - */ - SharedPtr _paths[ZT_MAX_PEER_NETWORK_PATHS]; + std::string _policyAlias; // Custom name given by the user to this bond type. /** * Set of indices corresponding to paths currently included in the bond proper. This * may only be updated during a call to curateBond(). The reason for this is so that * we can simplify the high frequency packet egress logic. */ - int _bondedIdx[ZT_MAX_PEER_NETWORK_PATHS]; + int _bondIdxMap[ZT_MAX_PEER_NETWORK_PATHS]; + int _numBondedPaths; // Number of paths currently included in the _bondIdxMap set. + std::map > _flows; // Flows hashed according to port and protocol + float _qw[ZT_QOS_WEIGHT_SIZE]; // How much each factor contributes to the "quality" score of a path. - /** - * Number of paths currently included in the _bondedIdx set. - */ - int _numBondedPaths; - - /** - * Flows hashed according to port and protocol - */ - std::map > _flows; - - float _qualityWeights[ZT_QOS_WEIGHT_SIZE]; // How much each factor contributes to the "quality" score of a path. - - uint8_t _bondingPolicy; + uint8_t _policy; uint32_t _upDelay; uint32_t _downDelay; // active-backup - SharedPtr _abPath; // current active path - std::list > _abFailoverQueue; + int _abPathIdx; // current active path + std::deque _abFailoverQueue; uint8_t _abLinkSelectMethod; // link re-selection policy for the primary link in active-backup - bool _abOverflowEnabled; // balance-rr uint8_t _rrIdx; // index to path currently in use during Round Robin operation @@ -726,13 +1429,11 @@ class Bond { // dynamic link monitoring uint8_t _linkMonitorStrategy; - uint32_t _dynamicPathMonitorInterval; // path negotiation int16_t _localUtility; - SharedPtr negotiatedPath; + int negotiatedPathIdx; uint8_t _numSentPathNegotiationRequests; - unsigned int _pathNegotiationCutoffCount; bool _allowPathNegotiation; /** @@ -766,80 +1467,50 @@ class Bond { * drains linearly. For each eligibility change the remaining punishment is doubled. */ uint32_t _defaultPathRefractoryPeriod; + unsigned char _freeRandomByte; // Free byte of entropy that is updated on every packet egress event. + SharedPtr _peer; // Remote peer that this bond services + unsigned long long _peerId; // ID of the peer that this bond services /** - * Whether the current bonding policy requires computation of path statistics - */ - bool _shouldCollectPathStatistics; - - /** - * Free byte of entropy that is updated on every packet egress event. - */ - unsigned char _freeRandomByte; - - /** - * Remote peer that this bond services - */ - SharedPtr _peer; - - /** - * Rate-limit cutoffs + * Rate-limiting */ uint16_t _qosCutoffCount; - uint16_t _ackCutoffCount; + uint64_t _lastQoSRateCheck; + uint16_t _pathNegotiationCutoffCount; + uint64_t _lastPathNegotiationReceived; /** * Recent event timestamps */ - uint64_t _lastAckRateCheck; - uint64_t _lastQoSRateCheck; + uint64_t _lastSummaryDump; + uint64_t _lastQualityEstimation; - uint64_t _lastCheckUserPreferences; uint64_t _lastBackgroundTaskCheck; uint64_t _lastBondStatusLog; - uint64_t _lastPathNegotiationReceived; uint64_t _lastPathNegotiationCheck; uint64_t _lastSentPathNegotiationRequest; - uint64_t _lastFlowStatReset; uint64_t _lastFlowExpirationCheck; uint64_t _lastFlowRebalance; uint64_t _lastFrame; uint64_t _lastActiveBackupPathChange; Mutex _paths_m; + Mutex _flows_m; - /** - * Whether the user has specified links for this bond. - */ - bool _userHasSpecifiedLinks; - - /** - * Whether the user has specified a primary link for this bond. - */ - bool _userHasSpecifiedPrimaryLink; - - /** - * Whether the user has specified failover instructions for this bond. - */ - bool _userHasSpecifiedFailoverInstructions; - - /** - * Whether the user has specified links speeds for this bond. - */ - bool _userHasSpecifiedLinkSpeeds; - + bool _userHasSpecifiedLinks; // Whether the user has specified links for this bond. + bool _userHasSpecifiedPrimaryLink; // Whether the user has specified a primary link for this bond. + bool _userHasSpecifiedFailoverInstructions; // Whether the user has specified failover instructions for this bond. + bool _userHasSpecifiedLinkSpeeds; // Whether the user has specified links speeds for this bond. /** * How frequently (in ms) a VERB_ECHO is sent to a peer to verify that a * path is still active. A value of zero (0) will disable active path * monitoring; as result, all monitoring will be a function of traffic. */ - uint16_t _bondMonitorInterval; + int _monitorInterval; + bool _allowFlowHashing; // Whether or not flow hashing is allowed. - /** - * Whether or not flow hashing is allowed. - */ - bool _allowFlowHashing; + uint64_t _overheadBytes; }; } // namespace ZeroTier diff --git a/node/BondController.cpp b/node/BondController.cpp deleted file mode 100644 index 3b310ac35..000000000 --- a/node/BondController.cpp +++ /dev/null @@ -1,215 +0,0 @@ -/* - * Copyright (c)2013-2020 ZeroTier, Inc. - * - * Use of this software is governed by the Business Source License included - * in the LICENSE.TXT file in the project's root directory. - * - * Change Date: 2025-01-01 - * - * On the date above, in accordance with the Business Source License, use - * of this software will be governed by version 2.0 of the Apache License. - */ -/****/ - -#include "BondController.hpp" - -#include "../osdep/OSUtils.hpp" -#include "Bond.hpp" -#include "Node.hpp" -#include "RuntimeEnvironment.hpp" - -namespace ZeroTier { - -int BondController::_minReqPathMonitorInterval; -uint8_t BondController::_defaultBondingPolicy; - -BondController::BondController(const RuntimeEnvironment* renv) : RR(renv) -{ - bondStartTime = RR->node->now(); - _defaultBondingPolicy = ZT_BONDING_POLICY_NONE; -} - -bool BondController::linkAllowed(std::string& policyAlias, SharedPtr link) -{ - bool foundInDefinitions = false; - if (_linkDefinitions.count(policyAlias)) { - auto it = _linkDefinitions[policyAlias].begin(); - while (it != _linkDefinitions[policyAlias].end()) { - if (link->ifname() == (*it)->ifname()) { - foundInDefinitions = true; - break; - } - ++it; - } - } - return _linkDefinitions[policyAlias].empty() || foundInDefinitions; -} - -void BondController::addCustomLink(std::string& policyAlias, SharedPtr link) -{ - Mutex::Lock _l(_links_m); - _linkDefinitions[policyAlias].push_back(link); - auto search = _interfaceToLinkMap[policyAlias].find(link->ifname()); - if (search == _interfaceToLinkMap[policyAlias].end()) { - link->setAsUserSpecified(true); - _interfaceToLinkMap[policyAlias].insert(std::pair >(link->ifname(), link)); - } -} - -bool BondController::addCustomPolicy(const SharedPtr& newBond) -{ - Mutex::Lock _l(_bonds_m); - if (! _bondPolicyTemplates.count(newBond->policyAlias())) { - _bondPolicyTemplates[newBond->policyAlias()] = newBond; - return true; - } - return false; -} - -bool BondController::assignBondingPolicyToPeer(int64_t identity, const std::string& policyAlias) -{ - Mutex::Lock _l(_bonds_m); - if (! _policyTemplateAssignments.count(identity)) { - _policyTemplateAssignments[identity] = policyAlias; - return true; - } - return false; -} - -SharedPtr BondController::getBondByPeerId(int64_t identity) -{ - Mutex::Lock _l(_bonds_m); - return _bonds.count(identity) ? _bonds[identity] : SharedPtr(); -} - -SharedPtr BondController::createTransportTriggeredBond(const RuntimeEnvironment* renv, const SharedPtr& peer) -{ - Mutex::Lock _l(_bonds_m); - int64_t identity = peer->identity().address().toInt(); - Bond* bond = nullptr; - char traceMsg[128]; - if (! _bonds.count(identity)) { - std::string policyAlias; - if (! _policyTemplateAssignments.count(identity)) { - if (_defaultBondingPolicy) { - sprintf(traceMsg, "%s (bond) Creating new default %s bond to peer %llx", OSUtils::humanReadableTimestamp().c_str(), getPolicyStrByCode(_defaultBondingPolicy).c_str(), (unsigned long long)identity); - RR->t->bondStateMessage(NULL, traceMsg); - bond = new Bond(renv, _defaultBondingPolicy, peer); - } - if (! _defaultBondingPolicy && _defaultBondingPolicyStr.length()) { - sprintf(traceMsg, "%s (bond) Creating new default custom %s bond to peer %llx", OSUtils::humanReadableTimestamp().c_str(), _defaultBondingPolicyStr.c_str(), (unsigned long long)identity); - RR->t->bondStateMessage(NULL, traceMsg); - bond = new Bond(renv, _bondPolicyTemplates[_defaultBondingPolicyStr].ptr(), peer); - } - } - else { - if (! _bondPolicyTemplates[_policyTemplateAssignments[identity]]) { - sprintf( - traceMsg, - "%s (bond) Creating new bond. Assignment for peer %llx was specified as %s but the bond definition was not found. Using default %s", - OSUtils::humanReadableTimestamp().c_str(), - (unsigned long long)identity, - _policyTemplateAssignments[identity].c_str(), - getPolicyStrByCode(_defaultBondingPolicy).c_str()); - RR->t->bondStateMessage(NULL, traceMsg); - bond = new Bond(renv, _defaultBondingPolicy, peer); - } - else { - sprintf(traceMsg, "%s (bond) Creating new default bond %s to peer %llx", OSUtils::humanReadableTimestamp().c_str(), _defaultBondingPolicyStr.c_str(), (unsigned long long)identity); - RR->t->bondStateMessage(NULL, traceMsg); - bond = new Bond(renv, _bondPolicyTemplates[_policyTemplateAssignments[identity]].ptr(), peer); - } - } - } - if (bond) { - _bonds[identity] = bond; - /** - * Determine if user has specified anything that could affect the bonding policy's decisions - */ - if (_interfaceToLinkMap.count(bond->policyAlias())) { - std::map >::iterator it = _interfaceToLinkMap[bond->policyAlias()].begin(); - while (it != _interfaceToLinkMap[bond->policyAlias()].end()) { - if (it->second->isUserSpecified()) { - bond->_userHasSpecifiedLinks = true; - } - if (it->second->isUserSpecified() && it->second->primary()) { - bond->_userHasSpecifiedPrimaryLink = true; - } - if (it->second->isUserSpecified() && it->second->userHasSpecifiedFailoverInstructions()) { - bond->_userHasSpecifiedFailoverInstructions = true; - } - if (it->second->isUserSpecified() && (it->second->speed() > 0)) { - bond->_userHasSpecifiedLinkSpeeds = true; - } - ++it; - } - } - return bond; - } - return SharedPtr(); -} - -SharedPtr BondController::getLinkBySocket(const std::string& policyAlias, uint64_t localSocket) -{ - Mutex::Lock _l(_links_m); - char ifname[16]; - _phy->getIfName((PhySocket*)((uintptr_t)localSocket), ifname, 16); - std::string ifnameStr(ifname); - auto search = _interfaceToLinkMap[policyAlias].find(ifnameStr); - if (search == _interfaceToLinkMap[policyAlias].end()) { - SharedPtr s = new Link(ifnameStr, 0, 0, 0, 0, 0, true, ZT_MULTIPATH_SLAVE_MODE_SPARE, "", 0.0); - _interfaceToLinkMap[policyAlias].insert(std::pair >(ifnameStr, s)); - return s; - } - else { - return search->second; - } -} - -SharedPtr BondController::getLinkByName(const std::string& policyAlias, const std::string& ifname) -{ - Mutex::Lock _l(_links_m); - auto search = _interfaceToLinkMap[policyAlias].find(ifname); - if (search != _interfaceToLinkMap[policyAlias].end()) { - return search->second; - } - return SharedPtr(); -} - -bool BondController::allowedToBind(const std::string& ifname) -{ - return true; - /* - if (!_defaultBondingPolicy) { - return true; // no restrictions - } - Mutex::Lock _l(_links_m); - if (_interfaceToLinkMap.empty()) { - return true; // no restrictions - } - std::map > >::iterator policyItr = _interfaceToLinkMap.begin(); - while (policyItr != _interfaceToLinkMap.end()) { - std::map >::iterator linkItr = policyItr->second.begin(); - while (linkItr != policyItr->second.end()) { - if (linkItr->first == ifname) { - return true; - } - ++linkItr; - } - ++policyItr; - } - return false; - */ -} - -void BondController::processBackgroundTasks(void* tPtr, const int64_t now) -{ - Mutex::Lock _l(_bonds_m); - std::map >::iterator bondItr = _bonds.begin(); - while (bondItr != _bonds.end()) { - bondItr->second->processBackgroundTasks(tPtr, now); - ++bondItr; - } -} - -} // namespace ZeroTier diff --git a/node/BondController.hpp b/node/BondController.hpp deleted file mode 100644 index 1e96a0f40..000000000 --- a/node/BondController.hpp +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Copyright (c)2013-2020 ZeroTier, Inc. - * - * Use of this software is governed by the Business Source License included - * in the LICENSE.TXT file in the project's root directory. - * - * Change Date: 2025-01-01 - * - * On the date above, in accordance with the Business Source License, use - * of this software will be governed by version 2.0 of the Apache License. - */ -/****/ - -#ifndef ZT_BONDCONTROLLER_HPP -#define ZT_BONDCONTROLLER_HPP - -#include "../osdep/Link.hpp" -#include "../osdep/Phy.hpp" -#include "SharedPtr.hpp" - -#include -#include - -namespace ZeroTier { - -class RuntimeEnvironment; -class Bond; -class Peer; -class Mutex; - -class BondController { - friend class Bond; - - public: - BondController(const RuntimeEnvironment* renv); - - /** - * @return Whether this link is permitted to become a member of a bond. - */ - bool linkAllowed(std::string& policyAlias, SharedPtr link); - - /** - * @return The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements. - */ - int minReqPathMonitorInterval() - { - return _minReqPathMonitorInterval; - } - - /** - * @param minReqPathMonitorInterval The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements. - */ - static void setMinReqPathMonitorInterval(int minReqPathMonitorInterval) - { - _minReqPathMonitorInterval = minReqPathMonitorInterval; - } - - /** - * @return Whether the bonding layer is currently set up to be used. - */ - bool inUse() - { - return ! _bondPolicyTemplates.empty() || _defaultBondingPolicy; - } - - /** - * @param basePolicyName Bonding policy name (See ZeroTierOne.h) - * @return The bonding policy code for a given human-readable bonding policy name - */ - static int getPolicyCodeByStr(const std::string& basePolicyName) - { - if (basePolicyName == "active-backup") { - return 1; - } - if (basePolicyName == "broadcast") { - return 2; - } - if (basePolicyName == "balance-rr") { - return 3; - } - if (basePolicyName == "balance-xor") { - return 4; - } - if (basePolicyName == "balance-aware") { - return 5; - } - return 0; // "none" - } - - /** - * @param policy Bonding policy code (See ZeroTierOne.h) - * @return The human-readable name for the given bonding policy code - */ - static std::string getPolicyStrByCode(int policy) - { - if (policy == 1) { - return "active-backup"; - } - if (policy == 2) { - return "broadcast"; - } - if (policy == 3) { - return "balance-rr"; - } - if (policy == 4) { - return "balance-xor"; - } - if (policy == 5) { - return "balance-aware"; - } - return "none"; - } - - /** - * Sets the default bonding policy for new or undefined bonds. - * - * @param bp Bonding policy - */ - void setBondingLayerDefaultPolicy(uint8_t bp) - { - _defaultBondingPolicy = bp; - } - - /** - * Sets the default (custom) bonding policy for new or undefined bonds. - * - * @param alias Human-readable string alias for bonding policy - */ - void setBondingLayerDefaultPolicyStr(std::string alias) - { - _defaultBondingPolicyStr = alias; - } - - /** - * @return The default bonding policy - */ - static int defaultBondingPolicy() - { - return _defaultBondingPolicy; - } - - /** - * Add a user-defined link to a given bonding policy. - * - * @param policyAlias User-defined custom name for variant of bonding policy - * @param link Pointer to new link definition - */ - void addCustomLink(std::string& policyAlias, SharedPtr link); - - /** - * Add a user-defined bonding policy that is based on one of the standard types. - * - * @param newBond Pointer to custom Bond object - * @return Whether a uniquely-named custom policy was successfully added - */ - bool addCustomPolicy(const SharedPtr& newBond); - - /** - * Assigns a specific bonding policy - * - * @param identity - * @param policyAlias - * @return - */ - bool assignBondingPolicyToPeer(int64_t identity, const std::string& policyAlias); - - /** - * Get pointer to bond by a given peer ID - * - * @param peer Remote peer ID - * @return A pointer to the Bond - */ - SharedPtr getBondByPeerId(int64_t identity); - - /** - * Add a new bond to the bond controller. - * - * @param renv Runtime environment - * @param peer Remote peer that this bond services - * @return A pointer to the newly created Bond - */ - SharedPtr createTransportTriggeredBond(const RuntimeEnvironment* renv, const SharedPtr& peer); - - /** - * Periodically perform maintenance tasks for the bonding layer. - * - * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call - * @param now Current time - */ - void processBackgroundTasks(void* tPtr, int64_t now); - - /** - * Gets a reference to a physical link definition given a policy alias and a local socket. - * - * @param policyAlias Policy in use - * @param localSocket Local source socket - * @return Physical link definition - */ - SharedPtr getLinkBySocket(const std::string& policyAlias, uint64_t localSocket); - - /** - * Gets a reference to a physical link definition given its human-readable system name. - * - * @param policyAlias Policy in use - * @param ifname Alphanumeric human-readable name - * @return Physical link definition - */ - SharedPtr getLinkByName(const std::string& policyAlias, const std::string& ifname); - - /** - * @param ifname Name of interface that we want to know if we can bind to - */ - bool allowedToBind(const std::string& ifname); - - uint64_t getBondStartTime() - { - return bondStartTime; - } - - private: - Phy* _phy; - const RuntimeEnvironment* RR; - - Mutex _bonds_m; - Mutex _links_m; - - /** - * The last time that the bond controller updated the set of bonds. - */ - uint64_t _lastBackgroundBondControlTaskCheck; - - /** - * The minimum monitoring interval among all paths in this bond. - */ - static int _minReqPathMonitorInterval; - - /** - * The default bonding policy used for new bonds unless otherwise specified. - */ - static uint8_t _defaultBondingPolicy; - - /** - * The default bonding policy used for new bonds unless otherwise specified. - */ - std::string _defaultBondingPolicyStr; - - /** - * All currently active bonds. - */ - std::map > _bonds; - - /** - * Map of peers to custom bonding policies - */ - std::map _policyTemplateAssignments; - - /** - * User-defined bonding policies (can be assigned to a peer) - */ - std::map > _bondPolicyTemplates; - - /** - * Set of links defined for a given bonding policy - */ - std::map > > _linkDefinitions; - - /** - * Set of link objects mapped to their physical interfaces - */ - std::map > > _interfaceToLinkMap; - - // TODO: Remove - uint64_t bondStartTime; -}; - -} // namespace ZeroTier - -#endif diff --git a/node/Constants.hpp b/node/Constants.hpp index 400976c13..859fc2b66 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -361,7 +361,7 @@ /** * Maximum number of outgoing packets we monitor for QoS information */ -#define ZT_QOS_MAX_OUTSTANDING_RECORDS (1024*16) +#define ZT_QOS_MAX_OUTSTANDING_RECORDS (1024 * 16) /** * Interval used for rate-limiting the computation of path quality estimates. @@ -403,117 +403,11 @@ /** * All unspecified traffic is put in this bucket. Anything in a bucket with a - * smaller value is deprioritized. Anything in a bucket with a higher value is + * smaller value is de-prioritized. Anything in a bucket with a higher value is prioritized over other traffic. */ #define ZT_AQM_DEFAULT_BUCKET 0 -/** - * How often we emit a one-liner bond summary for each peer - */ -#define ZT_MULTIPATH_BOND_STATUS_INTERVAL 60000 - -/** - * How long before we consider a path to be dead in the general sense. This is - * used while searching for default or alternative paths to try in the absence - * of direct guidance from the user or a selection policy. - */ -#define ZT_MULTIPATH_DEFAULT_FAILOVER_INTERVAL 10000 - -/** - * How often flows are evaluated - */ -#define ZT_MULTIPATH_FLOW_CHECK_INTERVAL 10000 - -/** - * How long before we consider a flow to be dead and remove it from the - * policy's list. - */ -#define ZT_MULTIPATH_FLOW_EXPIRATION_INTERVAL (60000 * 5) - -/** - * How often a flow's statistical counters are reset - */ -#define ZT_FLOW_STATS_RESET_INTERVAL ZT_MULTIPATH_FLOW_EXPIRATION_INTERVAL - -/** - * Maximum number of flows allowed before we start forcibly forgetting old ones - */ -#define ZT_FLOW_MAX_COUNT (1024*64) - -/** - * How often flows are rebalanced across link (if at all) - */ -#define ZT_FLOW_MIN_REBALANCE_INTERVAL 5000 - -/** - * How often flows are rebalanced across link (if at all) - */ -#define ZT_FLOW_REBALANCE_INTERVAL 5000 - -/** - * A defensive timer to prevent path quality metrics from being - * processed too often. - */ -#define ZT_BOND_BACKGROUND_TASK_MIN_INTERVAL ZT_CORE_TIMER_TASK_GRANULARITY - -/** - * How often a bonding policy's background tasks are processed, - * some need more frequent attention than others. - */ -#define ZT_MULTIPATH_ACTIVE_BACKUP_CHECK_INTERVAL ZT_CORE_TIMER_TASK_GRANULARITY - -/** - * Minimum amount of time (since a previous transition) before the active-backup bonding - * policy is allowed to transition to a different link. Only valid for active-backup. - */ -#define ZT_MULTIPATH_MIN_ACTIVE_BACKUP_AUTOFLOP_INTERVAL 10000 - -/** - * How often a peer checks that incoming (and outgoing) traffic on a bonded link is - * appropriately paired. - */ -#define ZT_PATH_NEGOTIATION_CHECK_INTERVAL 15000 - -/** - * Time horizon for path negotiation paths cutoff - */ -#define ZT_PATH_NEGOTIATION_CUTOFF_TIME 60000 - -/** - * Maximum number of path negotiations within cutoff time - * - * This limits response to PATH_NEGOTIATION to CUTOFF_LIMIT responses - * per CUTOFF_TIME milliseconds per peer to prevent this from being - * useful for DOS amplification attacks. - */ -#define ZT_PATH_NEGOTIATION_CUTOFF_LIMIT 8 - -/** - * How many times a peer will attempt to petition another peer to synchronize its - * traffic to the same path before giving up and surrendering to the other peer's preference. - */ -#define ZT_PATH_NEGOTIATION_TRY_COUNT 3 - -/** - * How much greater the quality of a path should be before an - * optimization procedure triggers a switch. - */ -#define ZT_MULTIPATH_ACTIVE_BACKUP_OPTIMIZE_MIN_THRESHOLD 0.10 - -/** - * Artificially inflates the failover score for paths which meet - * certain non-performance-related policy ranking criteria. - */ -#define ZT_MULTIPATH_FAILOVER_HANDICAP_PREFERRED 500 -#define ZT_MULTIPATH_FAILOVER_HANDICAP_PRIMARY 1000 -#define ZT_MULTIPATH_FAILOVER_HANDICAP_NEGOTIATED 5000 - -/** - * An indicator that no flow is to be associated with the given packet - */ -#define ZT_QOS_NO_FLOW -1 - /** * Timeout for overall peer activity (measured from last receive) */ @@ -604,8 +498,8 @@ #define ZT_ACK_CUTOFF_LIMIT 128 #define ZT_ACK_DRAINAGE_DIVISOR (1000 / ZT_ACK_CUTOFF_LIMIT) -#define ZT_MULTIPATH_DEFAULT_REFRCTORY_PERIOD 8000 -#define ZT_MULTIPATH_MAX_REFRACTORY_PERIOD 600000 +#define ZT_BOND_DEFAULT_REFRCTORY_PERIOD 8000 +#define ZT_BOND_MAX_REFRACTORY_PERIOD 600000 /** * Maximum number of direct path pushes within cutoff time @@ -641,6 +535,92 @@ */ #define ZT_PEER_GENERAL_RATE_LIMIT 1000 + +/** + * Minimum allowed amount of time between flow/path optimizations (anti-flapping) + */ +#define ZT_BOND_OPTIMIZE_INTERVAL 15000 + +/** + * Maximum number of flows allowed before we start forcibly forgetting old ones + */ +#define ZT_FLOW_MAX_COUNT (1024 * 64) + +/** + * How often we emit a bond summary for each bond + */ +#define ZT_BOND_STATUS_INTERVAL 3000 + +/** + * How long before we consider a path to be dead in the general sense. This is + * used while searching for default or alternative paths to try in the absence + * of direct guidance from the user or a selection policy. + */ +#define ZT_BOND_FAILOVER_DEFAULT_INTERVAL 5000 + +/** + * Anything below this value gets into thrashing territory since we divide + * this value by ZT_BOND_ECHOS_PER_FAILOVER_INTERVAL to send ECHOs often. + */ +#define ZT_BOND_FAILOVER_MIN_INTERVAL 250 + +/** + * How many times per failover interval that an ECHO is sent. This should be + * at least 2. Anything more then 4 starts to increase overhead significantly. + */ +#define ZT_BOND_ECHOS_PER_FAILOVER_INTERVAL 4 + +/** + * A defensive timer to prevent path quality metrics from being + * processed too often. + */ +#define ZT_BOND_BACKGROUND_TASK_MIN_INTERVAL ZT_CORE_TIMER_TASK_GRANULARITY + +/** + * How often a bonding policy's background tasks are processed, + * some need more frequent attention than others. + */ +#define ZT_BOND_ACTIVE_BACKUP_CHECK_INTERVAL ZT_CORE_TIMER_TASK_GRANULARITY + +/** + * Time horizon for path negotiation paths cutoff + */ +#define ZT_PATH_NEGOTIATION_CUTOFF_TIME 60000 + +/** + * Maximum number of path negotiations within cutoff time + * + * This limits response to PATH_NEGOTIATION to CUTOFF_LIMIT responses + * per CUTOFF_TIME milliseconds per peer to prevent this from being + * useful for DOS amplification attacks. + */ +#define ZT_PATH_NEGOTIATION_CUTOFF_LIMIT 8 + +/** + * How many times a peer will attempt to petition another peer to synchronize its + * traffic to the same path before giving up and surrendering to the other peer's preference. + */ +#define ZT_PATH_NEGOTIATION_TRY_COUNT 3 + +/** + * How much greater the quality of a path should be before an + * optimization procedure triggers a switch. + */ +#define ZT_BOND_ACTIVE_BACKUP_OPTIMIZE_MIN_THRESHOLD 0.10 + +/** + * Artificially inflates the failover score for paths which meet + * certain non-performance-related policy ranking criteria. + */ +#define ZT_BOND_FAILOVER_HANDICAP_PREFERRED 500 +#define ZT_BOND_FAILOVER_HANDICAP_PRIMARY 1000 +#define ZT_BOND_FAILOVER_HANDICAP_NEGOTIATED 5000 + +/** + * An indicator that no flow is to be associated with the given packet + */ +#define ZT_QOS_NO_FLOW -1 + /** * Don't do expensive identity validation more often than this * diff --git a/node/Flow.hpp b/node/Flow.hpp deleted file mode 100644 index 91986ddee..000000000 --- a/node/Flow.hpp +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (c)2013-2020 ZeroTier, Inc. - * - * Use of this software is governed by the Business Source License included - * in the LICENSE.TXT file in the project's root directory. - * - * Change Date: 2025-01-01 - * - * On the date above, in accordance with the Business Source License, use - * of this software will be governed by version 2.0 of the Apache License. - */ -/****/ - -#ifndef ZT_FLOW_HPP -#define ZT_FLOW_HPP - -#include "Path.hpp" -#include "SharedPtr.hpp" - -namespace ZeroTier { - -/** - * A protocol flow that is identified by the origin and destination port. - */ -struct Flow { - /** - * @param flowId Given flow ID - * @param now Current time - */ - Flow(int32_t flowId, int64_t now) : _flowId(flowId), _bytesInPerUnitTime(0), _bytesOutPerUnitTime(0), _lastActivity(now), _lastPathReassignment(0), _assignedPath(SharedPtr()) - { - } - - /** - * Reset flow statistics - */ - void resetByteCounts() - { - _bytesInPerUnitTime = 0; - _bytesOutPerUnitTime = 0; - } - - /** - * @return The Flow's ID - */ - int32_t id() - { - return _flowId; - } - - /** - * @return Number of incoming bytes processed on this flow per unit time - */ - int64_t bytesInPerUnitTime() - { - return _bytesInPerUnitTime; - } - - /** - * Record number of incoming bytes on this flow - * - * @param bytes Number of incoming bytes - */ - void recordIncomingBytes(uint64_t bytes) - { - _bytesInPerUnitTime += bytes; - } - - /** - * @return Number of outgoing bytes processed on this flow per unit time - */ - int64_t bytesOutPerUnitTime() - { - return _bytesOutPerUnitTime; - } - - /** - * Record number of outgoing bytes on this flow - * - * @param bytes - */ - void recordOutgoingBytes(uint64_t bytes) - { - _bytesOutPerUnitTime += bytes; - } - - /** - * @return The total number of bytes processed on this flow - */ - uint64_t totalBytes() - { - return _bytesInPerUnitTime + _bytesOutPerUnitTime; - } - - /** - * How long since a packet was sent or received in this flow - * - * @param now Current time - * @return The age of the flow in terms of last recorded activity - */ - int64_t age(int64_t now) - { - return now - _lastActivity; - } - - /** - * Record that traffic was processed on this flow at the given time. - * - * @param now Current time - */ - void updateActivity(int64_t now) - { - _lastActivity = now; - } - - /** - * @return Path assigned to this flow - */ - SharedPtr assignedPath() - { - return _assignedPath; - } - - /** - * @param path Assigned path over which this flow should be handled - */ - void assignPath(const SharedPtr& path, int64_t now) - { - _assignedPath = path; - _lastPathReassignment = now; - } - - AtomicCounter __refCount; - - int32_t _flowId; - uint64_t _bytesInPerUnitTime; - uint64_t _bytesOutPerUnitTime; - int64_t _lastActivity; - int64_t _lastPathReassignment; - SharedPtr _assignedPath; - SharedPtr _previouslyAssignedPath; -}; - -} // namespace ZeroTier - -#endif \ No newline at end of file diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index bcde85cc4..5a2a94642 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -88,7 +88,6 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr,int32_t f peer->received(tPtr,_path,hops(),packetId(),payloadLength(),v,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); break; case Packet::VERB_HELLO: r = _doHELLO(RR,tPtr,true); break; - case Packet::VERB_ACK: r = _doACK(RR,tPtr,peer); break; case Packet::VERB_QOS_MEASUREMENT: r = _doQOS_MEASUREMENT(RR,tPtr,peer); break; case Packet::VERB_ERROR: r = _doERROR(RR,tPtr,peer); break; case Packet::VERB_OK: r = _doOK(RR,tPtr,peer); break; @@ -222,35 +221,12 @@ bool IncomingPacket::_doERROR(const RuntimeEnvironment *RR,void *tPtr,const Shar return true; } -bool IncomingPacket::_doACK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) -{ - SharedPtr bond = peer->bond(); - if (!bond || !bond->rateGateACK(RR->node->now())) { - return true; - } - /* Dissect incoming ACK packet. From this we can estimate current throughput of the path, establish known - * maximums and detect packet loss. */ - int32_t ackedBytes; - if (payloadLength() != sizeof(ackedBytes)) { - return true; // ignore - } - memcpy(&ackedBytes, payload(), sizeof(ackedBytes)); - if (bond) { - bond->receivedAck(_path, RR->node->now(), Utils::ntoh(ackedBytes)); - } - return true; -} - bool IncomingPacket::_doQOS_MEASUREMENT(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) { SharedPtr bond = peer->bond(); - /* TODO: Fix rate gate issue - if (!bond || !bond->rateGateQoS(RR->node->now())) { + if (!bond || !bond->rateGateQoS(RR->node->now(), _path)) { return true; } - */ - /* Dissect incoming QoS packet. From this we can compute latency values and their variance. - * The latency variance is used as a measure of "jitter". */ if (payloadLength() > ZT_QOS_MAX_PACKET_SIZE || payloadLength() < ZT_QOS_MIN_PACKET_SIZE) { return true; // ignore } @@ -1329,7 +1305,7 @@ bool IncomingPacket::_doPATH_NEGOTIATION_REQUEST(const RuntimeEnvironment *RR,vo { uint64_t now = RR->node->now(); SharedPtr bond = peer->bond(); - if (!bond || !bond->rateGatePathNegotiation(now)) { + if (!bond || !bond->rateGatePathNegotiation(now, _path)) { return true; } if (payloadLength() != sizeof(int16_t)) { diff --git a/node/IncomingPacket.hpp b/node/IncomingPacket.hpp index 134b5b3d0..95785795a 100644 --- a/node/IncomingPacket.hpp +++ b/node/IncomingPacket.hpp @@ -112,7 +112,6 @@ private: // been authenticated, decrypted, decompressed, and classified. bool _doERROR(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doHELLO(const RuntimeEnvironment *RR,void *tPtr,const bool alreadyAuthenticated); - bool _doACK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doQOS_MEASUREMENT(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doWHOIS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); diff --git a/node/Node.cpp b/node/Node.cpp index 0c443f828..5b0fa8cc5 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -103,7 +103,7 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,int64 const unsigned long mcs = sizeof(Multicaster) + (((sizeof(Multicaster) & 0xf) != 0) ? (16 - (sizeof(Multicaster) & 0xf)) : 0); const unsigned long topologys = sizeof(Topology) + (((sizeof(Topology) & 0xf) != 0) ? (16 - (sizeof(Topology) & 0xf)) : 0); const unsigned long sas = sizeof(SelfAwareness) + (((sizeof(SelfAwareness) & 0xf) != 0) ? (16 - (sizeof(SelfAwareness) & 0xf)) : 0); - const unsigned long bc = sizeof(BondController) + (((sizeof(BondController) & 0xf) != 0) ? (16 - (sizeof(BondController) & 0xf)) : 0); + const unsigned long bc = sizeof(Bond) + (((sizeof(Bond) & 0xf) != 0) ? (16 - (sizeof(Bond) & 0xf)) : 0); m = reinterpret_cast(::malloc(16 + ts + sws + mcs + topologys + sas + bc)); if (!m) @@ -121,14 +121,14 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,int64 m += topologys; RR->sa = new (m) SelfAwareness(RR); m += sas; - RR->bc = new (m) BondController(RR); + RR->bc = new (m) Bond(RR); } catch ( ... ) { if (RR->sa) RR->sa->~SelfAwareness(); if (RR->topology) RR->topology->~Topology(); if (RR->mc) RR->mc->~Multicaster(); if (RR->sw) RR->sw->~Switch(); if (RR->t) RR->t->~Trace(); - if (RR->bc) RR->bc->~BondController(); + if (RR->bc) RR->bc->~Bond(); ::free(m); throw; } @@ -147,7 +147,7 @@ Node::~Node() if (RR->mc) RR->mc->~Multicaster(); if (RR->sw) RR->sw->~Switch(); if (RR->t) RR->t->~Trace(); - if (RR->bc) RR->bc->~BondController(); + if (RR->bc) RR->bc->~Bond(); ::free(RR->rtmem); } @@ -252,18 +252,14 @@ ZT_ResultCode Node::processBackgroundTasks(void *tptr,int64_t now,volatile int64 _now = now; Mutex::Lock bl(_backgroundTasksLock); - - unsigned long bondCheckInterval = ZT_CORE_TIMER_TASK_GRANULARITY; + // Process background bond tasks + unsigned long bondCheckInterval = ZT_PING_CHECK_INVERVAL; if (RR->bc->inUse()) { - // Gratuitously ping active peers so that QoS metrics have enough data to work with (if active path monitoring is enabled) - bondCheckInterval = std::min(std::max(RR->bc->minReqPathMonitorInterval(), ZT_CORE_TIMER_TASK_GRANULARITY), ZT_PING_CHECK_INVERVAL); - if ((now - _lastGratuitousPingCheck) >= bondCheckInterval) { - Hashtable< Address,std::vector > alwaysContact; - _PingPeersThatNeedPing pfunc(RR,tptr,alwaysContact,now); - RR->topology->eachPeer<_PingPeersThatNeedPing &>(pfunc); + bondCheckInterval = std::max(RR->bc->minReqMonitorInterval(), ZT_CORE_TIMER_TASK_GRANULARITY); + if ((now - _lastGratuitousPingCheck) >= ZT_CORE_TIMER_TASK_GRANULARITY) { _lastGratuitousPingCheck = now; + RR->bc->processBackgroundTasks(tptr, now); } - RR->bc->processBackgroundTasks(tptr, now); } unsigned long timeUntilNextPingCheck = ZT_PING_CHECK_INVERVAL; @@ -512,7 +508,7 @@ ZT_PeerList *Node::peers() const } if (pi->second->bond()) { p->isBonded = pi->second->bond(); - p->bondingPolicy = pi->second->bond()->getPolicy(); + p->bondingPolicy = pi->second->bond()->policy(); p->isHealthy = pi->second->bond()->isHealthy(); p->numAliveLinks = pi->second->bond()->getNumAliveLinks(); p->numTotalLinks = pi->second->bond()->getNumTotalLinks(); diff --git a/node/Node.hpp b/node/Node.hpp index 913bc7142..52506ed9e 100644 --- a/node/Node.hpp +++ b/node/Node.hpp @@ -34,7 +34,7 @@ #include "Salsa20.hpp" #include "NetworkController.hpp" #include "Hashtable.hpp" -#include "BondController.hpp" +#include "Bond.hpp" // Bit mask for "expecting reply" hash #define ZT_EXPECTING_REPLIES_BUCKET_MASK1 255 @@ -187,7 +187,7 @@ public: inline const Identity &identity() const { return _RR.identity; } - inline BondController *bondController() const { return _RR.bc; } + inline Bond *bondController() const { return _RR.bc; } /** * Register that we are expecting a reply to a packet ID diff --git a/node/Path.hpp b/node/Path.hpp index 0839158af..753bf0ab2 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -29,8 +29,6 @@ #include "Packet.hpp" #include "RingBuffer.hpp" -#include "../osdep/Link.hpp" - /** * Maximum return value of preferenceRank() */ @@ -88,46 +86,7 @@ public: _localSocket(-1), _latency(0xffff), _addr(), - _ipScope(InetAddress::IP_SCOPE_NONE), - _lastAckReceived(0), - _lastAckSent(0), - _lastQoSMeasurement(0), - _lastThroughputEstimation(0), - _lastRefractoryUpdate(0), - _lastAliveToggle(0), - _lastEligibilityState(false), - _lastTrialBegin(0), - _refractoryPeriod(0), - _monitorInterval(0), - _upDelay(0), - _downDelay(0), - _ipvPref(0), - _mode(0), - _onlyPathOnLink(false), - _enabled(false), - _bonded(false), - _negotiated(false), - _deprecated(false), - _shouldReallocateFlows(false), - _assignedFlowCount(0), - _latencyMean(0), - _latencyVariance(0), - _packetLossRatio(0), - _packetErrorRatio(0), - _throughputMean(0), - _throughputMax(0), - _throughputVariance(0), - _allocation(0), - _byteLoad(0), - _relativeByteLoad(0), - _affinity(0), - _failoverScore(0), - _unackedBytes(0), - _packetsReceivedSinceLastAck(0), - _packetsReceivedSinceLastQoS(0), - _bytesAckedSinceLastThroughputEstimation(0), - _packetsIn(0), - _packetsOut(0) + _ipScope(InetAddress::IP_SCOPE_NONE) {} Path(const int64_t localSocket,const InetAddress &addr) : @@ -137,46 +96,7 @@ public: _localSocket(localSocket), _latency(0xffff), _addr(addr), - _ipScope(addr.ipScope()), - _lastAckReceived(0), - _lastAckSent(0), - _lastQoSMeasurement(0), - _lastThroughputEstimation(0), - _lastRefractoryUpdate(0), - _lastAliveToggle(0), - _lastEligibilityState(false), - _lastTrialBegin(0), - _refractoryPeriod(0), - _monitorInterval(0), - _upDelay(0), - _downDelay(0), - _ipvPref(0), - _mode(0), - _onlyPathOnLink(false), - _enabled(false), - _bonded(false), - _negotiated(false), - _deprecated(false), - _shouldReallocateFlows(false), - _assignedFlowCount(0), - _latencyMean(0), - _latencyVariance(0), - _packetLossRatio(0), - _packetErrorRatio(0), - _throughputMean(0), - _throughputMax(0), - _throughputVariance(0), - _allocation(0), - _byteLoad(0), - _relativeByteLoad(0), - _affinity(0), - _failoverScore(0), - _unackedBytes(0), - _packetsReceivedSinceLastAck(0), - _packetsReceivedSinceLastQoS(0), - _bytesAckedSinceLastThroughputEstimation(0), - _packetsIn(0), - _packetsOut(0) + _ipScope(addr.ipScope()) {} /** @@ -186,9 +106,6 @@ public: */ inline void received(const uint64_t t) { - if (!alive(t,_bonded)) { - _lastAliveToggle = _lastIn; - } _lastIn = t; } @@ -317,21 +234,11 @@ public: return (((age < (ZT_PATH_HEARTBEAT_PERIOD + 5000)) ? l : (l + 0xffff + age)) * (long)((ZT_INETADDRESS_MAX_SCOPE - _ipScope) + 1)); } - /** - * @param bonded Whether this path is part of a bond. - */ - inline void setBonded(bool bonded) { _bonded = bonded; } - - /** - * @return True if this path is currently part of a bond. - */ - inline bool bonded() { return _bonded; } - /** * @return True if this path is alive (receiving heartbeats) */ - inline bool alive(const int64_t now, bool bondingEnabled = false) const { - return (bondingEnabled && _monitorInterval) ? ((now - _lastIn) < (_monitorInterval * 3)) : ((now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000)); + inline bool alive(const int64_t now) const { + return (now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000); } /** @@ -339,11 +246,6 @@ public: */ inline bool needsHeartbeat(const int64_t now) const { return ((now - _lastOut) >= ZT_PATH_HEARTBEAT_PERIOD); } - /** - * @return True if this path needs a heartbeat in accordance to the user-specified path monitor frequency - */ - inline bool needsGratuitousHeartbeat(const int64_t now) { return allowed() && (_monitorInterval > 0) && ((now - _lastOut) >= _monitorInterval); } - /** * @return Last time we sent something */ @@ -364,134 +266,7 @@ public: */ inline int64_t lastTrustEstablishedPacketReceived() const { return _lastTrustEstablishedPacketReceived; } - /** - * @return Time since last VERB_ACK was received - */ - inline int64_t ackAge(int64_t now) { return _lastAckReceived ? now - _lastAckReceived : 0; } - - /** - * Set or update a refractory period for the path. - * - * @param punishment How much a path should be punished - * @param pathFailure Whether this call is the result of a recent path failure - */ - inline void adjustRefractoryPeriod(int64_t now, uint32_t punishment, bool pathFailure) { - if (pathFailure) { - unsigned int suggestedRefractoryPeriod = _refractoryPeriod ? punishment + (_refractoryPeriod * 2) : punishment; - _refractoryPeriod = std::min(suggestedRefractoryPeriod, (unsigned int)ZT_MULTIPATH_MAX_REFRACTORY_PERIOD); - _lastRefractoryUpdate = 0; - } else { - uint32_t drainRefractory = 0; - if (_lastRefractoryUpdate) { - drainRefractory = (now - _lastRefractoryUpdate); - } else { - drainRefractory = (now - _lastAliveToggle); - } - _lastRefractoryUpdate = now; - if (_refractoryPeriod > drainRefractory) { - _refractoryPeriod -= drainRefractory; - } else { - _refractoryPeriod = 0; - _lastRefractoryUpdate = 0; - } - } - } - - /** - * Determine the current state of eligibility of the path. - * - * @param includeRefractoryPeriod Whether current punishment should be taken into consideration - * @return True if this path can be used in a bond at the current time - */ - inline bool eligible(uint64_t now, int ackSendInterval, bool includeRefractoryPeriod = false) { - if (includeRefractoryPeriod && _refractoryPeriod) { - return false; - } - bool acceptableAge = age(now) < ((_monitorInterval * 4) + _downDelay); // Simple RX age (driven by packets of any type and gratuitous VERB_HELLOs) - bool acceptableAckAge = ackAge(now) < (ackSendInterval); // Whether the remote peer is actually responding to our outgoing traffic or simply sending stuff to us - bool notTooEarly = (now - _lastAliveToggle) >= _upDelay; // Whether we've waited long enough since the link last came online - bool inTrial = (now - _lastTrialBegin) < _upDelay; // Whether this path is still in its trial period - bool currEligibility = allowed() && (((acceptableAge || acceptableAckAge) && notTooEarly) || inTrial); - return currEligibility; - } - - /** - * Record when this path first entered the bond. Each path is given a trial period where it is admitted - * to the bond without requiring observations to prove its performance or reliability. - */ - inline void startTrial(uint64_t now) { _lastTrialBegin = now; } - - /** - * @return True if a path is permitted to be used in a bond (according to user pref.) - */ - inline bool allowed() { - return _enabled - && (!_ipvPref - || ((_addr.isV4() && (_ipvPref == 4 || _ipvPref == 46 || _ipvPref == 64)) - || ((_addr.isV6() && (_ipvPref == 6 || _ipvPref == 46 || _ipvPref == 64))))); - } - - /** - * @return True if a path is preferred over another on the same physical link (according to user pref.) - */ - inline bool preferred() { - return _onlyPathOnLink - || (_addr.isV4() && (_ipvPref == 4 || _ipvPref == 46)) - || (_addr.isV6() && (_ipvPref == 6 || _ipvPref == 64)); - } - - /** - * @param now Current time - * @return Whether an ACK (VERB_ACK) packet needs to be emitted at this time - */ - inline bool needsToSendAck(int64_t now, int ackSendInterval) { - return ((now - _lastAckSent) >= ackSendInterval || - (_packetsReceivedSinceLastAck == ZT_QOS_TABLE_SIZE)) && _packetsReceivedSinceLastAck; - } - - /** - * @param now Current time - * @return Whether a QoS (VERB_QOS_MEASUREMENT) packet needs to be emitted at this time - */ - inline bool needsToSendQoS(int64_t now, int qosSendInterval) { - return ((_packetsReceivedSinceLastQoS >= ZT_QOS_TABLE_SIZE) || - ((now - _lastQoSMeasurement) > qosSendInterval)) && _packetsReceivedSinceLastQoS; - } - - /** - * Reset packet counters - */ - inline void resetPacketCounts() - { - _packetsIn = 0; - _packetsOut = 0; - } - - - /** - * The mean latency (computed from a sliding window.) - */ - float latencyMean() { return _latencyMean; } - - /** - * Packet delay variance (computed from a sliding window.) - */ - float latencyVariance() { return _latencyVariance; } - - /** - * The ratio of lost packets to received packets. - */ - float packetLossRatio() { return _packetLossRatio; } - - /** - * The ratio of packets that failed their MAC/CRC checks to those that did not. - */ - float packetErrorRatio() { return _packetErrorRatio; } - - /** - * - */ - uint8_t allocation() { return _allocation; } + void *_bondingMetricPtr; private: @@ -503,212 +278,6 @@ private: InetAddress _addr; InetAddress::IpScope _ipScope; // memoize this since it's a computed value checked often AtomicCounter __refCount; - - std::map qosStatsOut; // id:egress_time - std::map qosStatsIn; // id:now - std::map ackStatsIn; // id:len - - RingBuffer qosRecordSize; - RingBuffer qosRecordLossSamples; - RingBuffer throughputSamples; - RingBuffer packetValiditySamples; - RingBuffer _throughputVarianceSamples; - RingBuffer latencySamples; - - /** - * Last time that a VERB_ACK was received on this path. - */ - uint64_t _lastAckReceived; - - /** - * Last time that a VERB_ACK was sent out on this path. - */ - uint64_t _lastAckSent; - - /** - * Last time that a VERB_QOS_MEASUREMENT was sent out on this path. - */ - uint64_t _lastQoSMeasurement; - - /** - * Last time that the path's throughput was estimated. - */ - uint64_t _lastThroughputEstimation; - - /** - * The last time that the refractory period was updated. - */ - uint64_t _lastRefractoryUpdate; - - /** - * The last time that the path was marked as "alive". - */ - uint64_t _lastAliveToggle; - - /** - * State of eligibility at last check. Used for determining state changes. - */ - bool _lastEligibilityState; - - /** - * Timestamp indicating when this path's trial period began. - */ - uint64_t _lastTrialBegin; - - /** - * Amount of time that this path will be prevented from becoming a member of a bond. - */ - uint32_t _refractoryPeriod; - - /** - * Monitor interval specific to this path or that was inherited from the bond controller. - */ - int32_t _monitorInterval; - - /** - * Up delay interval specific to this path or that was inherited from the bond controller. - */ - uint32_t _upDelay; - - /** - * Down delay interval specific to this path or that was inherited from the bond controller. - */ - uint32_t _downDelay; - - /** - * IP version preference inherited from the physical link. - */ - uint8_t _ipvPref; - - /** - * Mode inherited from the physical link. - */ - uint8_t _mode; - - /** - * IP version preference inherited from the physical link. - */ - bool _onlyPathOnLink; - - /** - * Enabled state inherited from the physical link. - */ - bool _enabled; - - /** - * Whether this path is currently part of a bond. - */ - bool _bonded; - - /** - * Whether this path was intentionally negotiated by either peer. - */ - bool _negotiated; - - /** - * Whether this path has been deprecated due to performance issues. Current traffic flows - * will be re-allocated to other paths in the most non-disruptive manner (if possible), - * and new traffic will not be allocated to this path. - */ - bool _deprecated; - - /** - * Whether flows should be moved from this path. Current traffic flows will be re-allocated - * immediately. - */ - bool _shouldReallocateFlows; - - /** - * The number of flows currently assigned to this path. - */ - uint16_t _assignedFlowCount; - - /** - * The mean latency (computed from a sliding window.) - */ - float _latencyMean; - - /** - * Packet delay variance (computed from a sliding window.) - */ - float _latencyVariance; - - /** - * The ratio of lost packets to received packets. - */ - float _packetLossRatio; - - /** - * The ratio of packets that failed their MAC/CRC checks to those that did not. - */ - float _packetErrorRatio; - - /** - * The estimated mean throughput of this path. - */ - uint64_t _throughputMean; - - /** - * The maximum observed throughput of this path. - */ - uint64_t _throughputMax; - - /** - * The variance in the estimated throughput of this path. - */ - float _throughputVariance; - - /** - * The relative quality of this path to all others in the bond, [0-255]. - */ - uint8_t _allocation; - - /** - * How much load this path is under. - */ - uint64_t _byteLoad; - - /** - * How much load this path is under (relative to other paths in the bond.) - */ - uint8_t _relativeByteLoad; - - /** - * Relative value expressing how "deserving" this path is of new traffic. - */ - uint8_t _affinity; - - /** - * Score that indicates to what degree this path is preferred over others that - * are available to the bonding policy. (specifically for active-backup) - */ - uint32_t _failoverScore; - - /** - * Number of bytes thus far sent that have not been acknowledged by the remote peer. - */ - int64_t _unackedBytes; - - /** - * Number of packets received since the last VERB_ACK was sent to the remote peer. - */ - int32_t _packetsReceivedSinceLastAck; - - /** - * Number of packets received since the last VERB_QOS_MEASUREMENT was sent to the remote peer. - */ - int32_t _packetsReceivedSinceLastQoS; - - /** - * Bytes acknowledged via incoming VERB_ACK since the last estimation of throughput. - */ - uint64_t _bytesAckedSinceLastThroughputEstimation; - - /** - * Counters used for tracking path load. - */ - int _packetsIn; - int _packetsOut; }; } // namespace ZeroTier diff --git a/node/Peer.cpp b/node/Peer.cpp index fb405ad92..833304bbd 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -50,12 +50,7 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _directPathPushCutoffCount(0), _credentialsCutoffCount(0), _echoRequestCutoffCount(0), - _uniqueAlivePathCount(0), _localMultipathSupported(false), - _remoteMultipathSupported(false), - _canUseMultipath(false), - _shouldCollectPathStatistics(0), - _bondingPolicy(0), _lastComputedAggregateMeanLatency(0) { if (!myIdentity.agree(peerIdentity,_key)) @@ -229,7 +224,8 @@ void Peer::received( SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired, int32_t flowId) { - if (!_bondToPeer) { + Mutex::Lock _l(_bond_m); + if (!_bond) { Mutex::Lock _l(_paths_m); unsigned int bestPath = ZT_MAX_PEER_NETWORK_PATHS; /** @@ -253,7 +249,7 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired, int32 } return SharedPtr(); } - return _bondToPeer->getAppropriatePath(now, flowId); + return _bond->getAppropriatePath(now, flowId); } void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr &other) const @@ -444,39 +440,22 @@ void Peer::tryMemorizedPath(void *tPtr,int64_t now) void Peer::performMultipathStateCheck(void *tPtr, int64_t now) { + Mutex::Lock _l(_bond_m); /** * Check for conditions required for multipath bonding and create a bond * if allowed. */ _localMultipathSupported = ((RR->bc->inUse()) && (ZT_PROTO_VERSION > 9)); - if (_localMultipathSupported) { - int currAlivePathCount = 0; - int duplicatePathsFound = 0; - for (unsigned int i=0;iaddress().ipsEqual2(_paths[j].p->address()) && i != j) { - duplicatePathsFound+=1; - break; - } - } - } - } - _uniqueAlivePathCount = (currAlivePathCount - (duplicatePathsFound / 2)); - _remoteMultipathSupported = _vProto > 9; - _canUseMultipath = _localMultipathSupported && _remoteMultipathSupported && (_uniqueAlivePathCount > 1); - } - if (_canUseMultipath && !_bondToPeer) { + if (_localMultipathSupported && !_bond) { if (RR->bc) { - _bondToPeer = RR->bc->createTransportTriggeredBond(RR, this); + _bond = RR->bc->createTransportTriggeredBond(RR, this); /** * Allow new bond to retroactively learn all paths known to this peer */ - if (_bondToPeer) { + if (_bond) { for (unsigned int i=0;inominatePath(_paths[i].p, now); + _bond->nominatePathToBond(_paths[i].p, now); } } } @@ -510,8 +489,7 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) if (_paths[i].p) { // Clean expired and reduced priority paths if ( ((now - _paths[i].lr) < ZT_PEER_PATH_EXPIRATION) && (_paths[i].priority == maxPriority) ) { - if ((sendFullHello)||(_paths[i].p->needsHeartbeat(now)) - || (_canUseMultipath && _paths[i].p->needsGratuitousHeartbeat(now))) { + if ((sendFullHello)||(_paths[i].p->needsHeartbeat(now))) { attemptToContactAt(tPtr,_paths[i].p->localSocket(),_paths[i].p->address(),now,sendFullHello); _paths[i].p->sent(now); sent |= (_paths[i].p->address().ss_family == AF_INET) ? 0x1 : 0x2; @@ -591,27 +569,27 @@ void Peer::resetWithinScope(void *tPtr,InetAddress::IpScope scope,int inetAddres void Peer::recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now) { - if (!_shouldCollectPathStatistics || !_bondToPeer) { + if (!_shouldCollectPathStatistics || !_bond) { return; } - _bondToPeer->recordOutgoingPacket(path, packetId, payloadLength, verb, flowId, now); + _bond->recordOutgoingPacket(path, packetId, payloadLength, verb, flowId, now); } void Peer::recordIncomingInvalidPacket(const SharedPtr& path) { - if (!_shouldCollectPathStatistics || !_bondToPeer) { + if (!_shouldCollectPathStatistics || !_bond) { return; } - _bondToPeer->recordIncomingInvalidPacket(path); + _bond->recordIncomingInvalidPacket(path); } void Peer::recordIncomingPacket(const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now) { - if (!_shouldCollectPathStatistics || !_bondToPeer) { + if (!_shouldCollectPathStatistics || !_bond) { return; } - _bondToPeer->recordIncomingPacket(path, packetId, payloadLength, verb, flowId, now); + _bond->recordIncomingPacket(path, packetId, payloadLength, verb, flowId, now); } } // namespace ZeroTier diff --git a/node/Peer.hpp b/node/Peer.hpp index 9d012cdbe..cceae3ed4 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -33,7 +33,6 @@ #include "Hashtable.hpp" #include "Mutex.hpp" #include "Bond.hpp" -#include "BondController.hpp" #include "AES.hpp" #define ZT_PEER_MAX_SERIALIZED_STATE_SIZE (sizeof(Peer) + 32 + (sizeof(Path) * 2)) @@ -305,12 +304,13 @@ public: */ inline unsigned int latency(const int64_t now) { - if (_canUseMultipath) { + if (_localMultipathSupported) { return (int)_lastComputedAggregateMeanLatency; } else { SharedPtr bp(getAppropriatePath(now,false)); - if (bp) + if (bp) { return bp->latency(); + } return 0xffff; } } @@ -503,16 +503,20 @@ public: } /** - * - * @return + * @return The bonding policy used to reach this peer */ - SharedPtr bond() { return _bondToPeer; } + SharedPtr bond() { return _bond; } /** - * - * @return + * @return The bonding policy used to reach this peer */ - inline int8_t bondingPolicy() { return _bondingPolicy; } + inline int8_t bondingPolicy() { + Mutex::Lock _l(_paths_m); + if (_bond) { + return _bond->policy(); + } + return ZT_BOND_POLICY_NONE; + } //inline const AES *aesKeysIfSupported() const //{ return (const AES *)0; } @@ -562,6 +566,7 @@ private: _PeerPath _paths[ZT_MAX_PEER_NETWORK_PATHS]; Mutex _paths_m; + Mutex _bond_m; Identity _id; @@ -571,18 +576,13 @@ private: AtomicCounter __refCount; - bool _remotePeerMultipathEnabled; - int _uniqueAlivePathCount; bool _localMultipathSupported; - bool _remoteMultipathSupported; - bool _canUseMultipath; volatile bool _shouldCollectPathStatistics; - volatile int8_t _bondingPolicy; int32_t _lastComputedAggregateMeanLatency; - SharedPtr _bondToPeer; + SharedPtr _bond; }; } // namespace ZeroTier diff --git a/node/RuntimeEnvironment.hpp b/node/RuntimeEnvironment.hpp index 4603afa0f..019645513 100644 --- a/node/RuntimeEnvironment.hpp +++ b/node/RuntimeEnvironment.hpp @@ -30,7 +30,7 @@ class Multicaster; class NetworkController; class SelfAwareness; class Trace; -class BondController; +class Bond; /** * Holds global state for an instance of ZeroTier::Node @@ -76,7 +76,7 @@ public: Multicaster *mc; Topology *topology; SelfAwareness *sa; - BondController *bc; + Bond *bc; // This node's identity and string representations thereof Identity identity; diff --git a/node/Switch.cpp b/node/Switch.cpp index 3bcaecdd9..2721cf92f 100644 --- a/node/Switch.cpp +++ b/node/Switch.cpp @@ -1003,14 +1003,12 @@ bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt,int32_t flowId) const SharedPtr peer(RR->topology->getPeer(tPtr,destination)); if (peer) { - if ((peer->bondingPolicy() == ZT_BONDING_POLICY_BROADCAST) + if ((peer->bondingPolicy() == ZT_BOND_POLICY_BROADCAST) && (packet.verb() == Packet::VERB_FRAME || packet.verb() == Packet::VERB_EXT_FRAME)) { const SharedPtr relay(RR->topology->getUpstreamPeer()); Mutex::Lock _l(peer->_paths_m); for(int i=0;i_paths[i].p && peer->_paths[i].p->alive(now)) { - char pathStr[128]; - peer->_paths[i].p->address().toString(pathStr); _sendViaSpecificPath(tPtr,peer,peer->_paths[i].p,now,packet,encrypt,flowId); } } diff --git a/objects.mk b/objects.mk index cc6f96ee2..61df844bc 100644 --- a/objects.mk +++ b/objects.mk @@ -28,8 +28,7 @@ CORE_OBJS=\ node/Topology.o \ node/Trace.o \ node/Utils.o \ - node/Bond.o \ - node/BondController.o + node/Bond.o ONE_OBJS=\ controller/EmbeddedNetworkController.o \ diff --git a/one.cpp b/one.cpp index a319b495e..26bcb8cdf 100644 --- a/one.cpp +++ b/one.cpp @@ -84,7 +84,7 @@ #include "osdep/Http.hpp" #include "osdep/Thread.hpp" -#include "node/BondController.hpp" +#include "node/Bond.hpp" #include "service/OneService.hpp" @@ -496,7 +496,7 @@ static int cli(int argc,char **argv) return 1; } } else if (command == "bond") { - /* zerotier-cli bond */ + /* zerotier-cli bond */ if (arg1.empty()) { printf("(bond) command is missing required arguments" ZT_EOL_S); return 2; @@ -541,8 +541,8 @@ static int cli(int argc,char **argv) healthStr = "DEGRADED"; } std::string policyStr = "none"; - if (bondingPolicy >= ZT_BONDING_POLICY_NONE && bondingPolicy <= ZT_BONDING_POLICY_BALANCE_AWARE) { - policyStr = BondController::getPolicyStrByCode(bondingPolicy); + if (bondingPolicy >= ZT_BOND_POLICY_NONE && bondingPolicy <= ZT_BOND_POLICY_BALANCE_AWARE) { + policyStr = Bond::getPolicyStrByCode(bondingPolicy); } printf("%10s %32s %8s %d/%d" ZT_EOL_S, OSUtils::jsonString(p ["address"],"-").c_str(), @@ -563,11 +563,7 @@ static int cli(int argc,char **argv) return 1; } } - else if (arg1.length() == 10) { /* zerotier-cli bond enable */ - if (arg2 == "enable") { - fprintf(stderr, "zerotier-cli bond enable\n"); - return 0; - } + else if (arg1.length() == 10) { if (arg2 == "rotate") { /* zerotier-cli bond rotate */ fprintf(stderr, "zerotier-cli bond rotate\n"); requestHeaders["Content-Type"] = "application/json"; @@ -631,7 +627,7 @@ static int cli(int argc,char **argv) int numTotalLinks = OSUtils::jsonInt(j["numTotalLinks"],0); printf("Peer : %s\n", arg1.c_str()); printf("Bond : %s\n", OSUtils::jsonString(j["bondingPolicy"],"-").c_str()); - //if (bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { + //if (bondingPolicy == ZT_BOND_POLICY_ACTIVE_BACKUP) { printf("Link Select Method : %d\n", (int)OSUtils::jsonInt(j["linkSelectMethod"],0)); //} printf("Status : %s\n", healthStr.c_str()); @@ -728,8 +724,8 @@ static int cli(int argc,char **argv) healthStr = "Degraded"; } std::string policyStr = "none"; - if (bondingPolicy >= ZT_BONDING_POLICY_NONE && bondingPolicy <= ZT_BONDING_POLICY_BALANCE_AWARE) { - policyStr = BondController::getPolicyStrByCode(bondingPolicy); + if (bondingPolicy >= ZT_BOND_POLICY_NONE && bondingPolicy <= ZT_BOND_POLICY_BALANCE_AWARE) { + policyStr = Bond::getPolicyStrByCode(bondingPolicy); } printf("%10s %32s %8s %d/%d" ZT_EOL_S, diff --git a/osdep/Binder.hpp b/osdep/Binder.hpp index d321d3955..f02900325 100644 --- a/osdep/Binder.hpp +++ b/osdep/Binder.hpp @@ -414,27 +414,6 @@ class Binder { } } - // Generate set of unique interface names (used for formation of logical link set in multipath code) - // TODO: Could be gated not to run if multipath is not enabled. - for (std::map::const_iterator ii(localIfAddrs.begin()); ii != localIfAddrs.end(); ++ii) { - linkIfNames.insert(ii->second); - } - for (std::set::iterator si(linkIfNames.begin()); si != linkIfNames.end();) { - bool bFoundMatch = false; - for (std::map::const_iterator ii(localIfAddrs.begin()); ii != localIfAddrs.end(); ++ii) { - if (ii->second == *si) { - bFoundMatch = true; - break; - } - } - if (! bFoundMatch) { - linkIfNames.erase(si++); - } - else { - ++si; - } - } - // Create new bindings for those not already bound for (std::map::const_iterator ii(localIfAddrs.begin()); ii != localIfAddrs.end(); ++ii) { unsigned int bi = 0; @@ -535,14 +514,7 @@ class Binder { return false; } - inline std::set getLinkInterfaceNames() - { - Mutex::Lock _l(_lock); - return linkIfNames; - } - private: - std::set linkIfNames; _Binding _bindings[ZT_BINDER_MAX_BINDINGS]; std::atomic _bindingCount; Mutex _lock; diff --git a/osdep/Link.hpp b/osdep/Link.hpp deleted file mode 100644 index d66bbd9f7..000000000 --- a/osdep/Link.hpp +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright (c)2013-2020 ZeroTier, Inc. - * - * Use of this software is governed by the Business Source License included - * in the LICENSE.TXT file in the project's root directory. - * - * Change Date: 2025-01-01 - * - * On the date above, in accordance with the Business Source License, use - * of this software will be governed by version 2.0 of the Apache License. - */ -/****/ - -#ifndef ZT_LINK_HPP -#define ZT_LINK_HPP - -#include "../node/AtomicCounter.hpp" -#include "../node/SharedPtr.hpp" - -#include - -namespace ZeroTier { - -class Link { - friend class SharedPtr; - - public: - /** - * - * @param ifnameStr - * @param ipvPref - * @param speed - * @param enabled - * @param mode - * @param failoverToLinkStr - * @param userSpecifiedAlloc - */ - Link(std::string& ifnameStr, uint8_t ipvPref, uint32_t speed, uint32_t linkMonitorInterval, uint32_t upDelay, uint32_t downDelay, bool enabled, uint8_t mode, std::string failoverToLinkStr, float userSpecifiedAlloc) - : _ifnameStr(ifnameStr) - , _ipvPref(ipvPref) - , _speed(speed) - , _relativeSpeed(0) - , _linkMonitorInterval(linkMonitorInterval) - , _upDelay(upDelay) - , _downDelay(downDelay) - , _enabled(enabled) - , _mode(mode) - , _failoverToLinkStr(failoverToLinkStr) - , _userSpecifiedAlloc(userSpecifiedAlloc) - , _isUserSpecified(false) - { - } - - /** - * @return The string representation of this link's underlying interface's system name. - */ - inline std::string ifname() - { - return _ifnameStr; - } - - /** - * @return Whether this link is designated as a primary. - */ - inline bool primary() - { - return _mode == ZT_MULTIPATH_SLAVE_MODE_PRIMARY; - } - - /** - * @return Whether this link is designated as a spare. - */ - inline bool spare() - { - return _mode == ZT_MULTIPATH_SLAVE_MODE_SPARE; - } - - /** - * @return The name of the link interface that should be used in the event of a failure. - */ - inline std::string failoverToLink() - { - return _failoverToLinkStr; - } - - /** - * @return Whether this link interface was specified by the user or auto-detected. - */ - inline bool isUserSpecified() - { - return _isUserSpecified; - } - - /** - * Signify that this link was specified by the user and not the result of auto-detection. - * - * @param isUserSpecified - */ - inline void setAsUserSpecified(bool isUserSpecified) - { - _isUserSpecified = isUserSpecified; - } - - /** - * @return Whether or not the user has specified failover instructions. - */ - inline bool userHasSpecifiedFailoverInstructions() - { - return _failoverToLinkStr.length(); - } - - /** - * @return The speed of the link relative to others in the bond. - */ - inline uint8_t relativeSpeed() - { - return _relativeSpeed; - } - - /** - * Sets the speed of the link relative to others in the bond. - * - * @param relativeSpeed The speed relative to the rest of the link. - */ - inline void setRelativeSpeed(uint8_t relativeSpeed) - { - _relativeSpeed = relativeSpeed; - } - - /** - * Sets the speed of the link relative to others in the bond. - * - * @param relativeSpeed - */ - inline void setMonitorInterval(uint32_t interval) - { - _linkMonitorInterval = interval; - } - - /** - * @return The absolute speed of the link (as specified by the user.) - */ - inline uint32_t monitorInterval() - { - return _linkMonitorInterval; - } - - /** - * @return The absolute speed of the link (as specified by the user.) - */ - inline uint32_t speed() - { - return _speed; - } - - /** - * @return The address preference for this link (as specified by the user.) - */ - inline uint8_t ipvPref() - { - return _ipvPref; - } - - /** - * @return The mode (e.g. primary/spare) for this link (as specified by the user.) - */ - inline uint8_t mode() - { - return _mode; - } - - /** - * @return The upDelay parameter for all paths on this link. - */ - inline uint32_t upDelay() - { - return _upDelay; - } - - /** - * @return The downDelay parameter for all paths on this link. - */ - inline uint32_t downDelay() - { - return _downDelay; - } - - /** - * @return Whether this link is enabled or disabled - */ - inline uint8_t enabled() - { - return _enabled; - } - - private: - /** - * String representation of underlying interface's system name - */ - std::string _ifnameStr; - - /** - * What preference (if any) a user has for IP protocol version used in - * path aggregations. Preference is expressed in the order of the digits: - * - * 0: no preference - * 4: IPv4 only - * 6: IPv6 only - * 46: IPv4 over IPv6 - * 64: IPv6 over IPv4 - */ - uint8_t _ipvPref; - - /** - * User-specified speed of this link - */ - uint32_t _speed; - - /** - * Speed relative to other specified links (computed by Bond) - */ - uint8_t _relativeSpeed; - - /** - * User-specified interval for monitoring paths on this specific link - * instead of using the more generic interval specified for the entire - * bond. - */ - uint32_t _linkMonitorInterval; - - /** - * How long before a path is considered to be usable after coming online. (when using policies that - * support fail-over events). - */ - uint32_t _upDelay; - - /** - * How long before a path is considered to be dead (when using policies that - * support fail-over events). - */ - uint32_t _downDelay; - - /** - * Whether this link is enabled, or (disabled (possibly bad config)) - */ - uint8_t _enabled; - - /** - * Whether this link is designated as a primary, a spare, or no preference. - */ - uint8_t _mode; - - /** - * The specific name of the link to be used in the event that this - * link fails. - */ - std::string _failoverToLinkStr; - - /** - * User-specified allocation - */ - float _userSpecifiedAlloc; - - /** - * Whether or not this link was created as a result of manual user specification. This is - * important to know because certain policy decisions are dependent on whether the user - * intents to use a specific set of interfaces. - */ - bool _isUserSpecified; - - AtomicCounter __refCount; -}; - -} // namespace ZeroTier - -#endif diff --git a/osdep/OSUtils.hpp b/osdep/OSUtils.hpp index dc5563491..e9b70d234 100644 --- a/osdep/OSUtils.hpp +++ b/osdep/OSUtils.hpp @@ -195,20 +195,6 @@ public: */ static std::vector resolve(const char *name); - /** - * @return Current time in a human-readable format - */ - static inline std::string humanReadableTimestamp() - { - time_t rawtime; - struct tm * timeinfo; - char buffer [80]; - time (&rawtime); - timeinfo = localtime (&rawtime); - strftime (buffer,80,"%F %T",timeinfo); - return std::string(buffer); - } - /** * @return Current time in milliseconds since epoch */ diff --git a/osdep/Phy.hpp b/osdep/Phy.hpp index 8c38d2fae..e8c7a5bf0 100644 --- a/osdep/Phy.hpp +++ b/osdep/Phy.hpp @@ -145,7 +145,7 @@ private: ZT_PHY_SOCKFD_TYPE sock; void *uptr; // user-settable pointer ZT_PHY_SOCKADDR_STORAGE_TYPE saddr; // remote for TCP_OUT and TCP_IN, local for TCP_LISTEN, RAW, and UDP - char ifname[16]; + char ifname[32]; }; std::list _socks; diff --git a/service/OneService.cpp b/service/OneService.cpp index 758904be0..99c35d553 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -49,7 +49,6 @@ #include "../osdep/Binder.hpp" #include "../osdep/ManagedRoute.hpp" #include "../osdep/BlockingQueue.hpp" -#include "../osdep/Link.hpp" #include "OneService.hpp" #include "SoftwareUpdater.hpp" @@ -306,9 +305,9 @@ static void _bondToJson(nlohmann::json &pj, SharedPtr &bond) { uint64_t now = OSUtils::now(); - int bondingPolicy = bond->getPolicy(); - pj["bondingPolicy"] = BondController::getPolicyStrByCode(bondingPolicy); - if (bondingPolicy == ZT_BONDING_POLICY_NONE) { + int bondingPolicy = bond->policy(); + pj["bondingPolicy"] = Bond::getPolicyStrByCode(bondingPolicy); + if (bondingPolicy == ZT_BOND_POLICY_NONE) { return; } @@ -318,15 +317,15 @@ static void _bondToJson(nlohmann::json &pj, SharedPtr &bond) pj["failoverInterval"] = bond->getFailoverInterval(); pj["downDelay"] = bond->getDownDelay(); pj["upDelay"] = bond->getUpDelay(); - if (bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) { + if (bondingPolicy == ZT_BOND_POLICY_BALANCE_RR) { pj["packetsPerLink"] = bond->getPacketsPerLink(); } - if (bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { + if (bondingPolicy == ZT_BOND_POLICY_ACTIVE_BACKUP) { pj["linkSelectMethod"] = bond->getLinkSelectMethod(); } nlohmann::json pa = nlohmann::json::array(); - std::vector< SharedPtr > paths = bond->getPeer()->paths(now); + std::vector< SharedPtr > paths = bond->paths(now); for(unsigned int i=0;i &bond) nlohmann::json j; j["ifname"] = bond->getLink(paths[i])->ifname(); j["path"] = pathStr; + /* j["alive"] = paths[i]->alive(now,true); j["bonded"] = paths[i]->bonded(); j["latencyMean"] = paths[i]->latencyMean(); @@ -343,6 +343,7 @@ static void _bondToJson(nlohmann::json &pj, SharedPtr &bond) j["packetErrorRatio"] = paths[i]->packetErrorRatio(); j["givenLinkSpeed"] = 1000; j["allocation"] = paths[i]->allocation(); + */ pa.push_back(j); } pj["links"] = pa; @@ -1762,11 +1763,11 @@ public: if (basePolicyStr.empty()) { fprintf(stderr, "error: no base policy was specified for custom policy (%s)\n", customPolicyStr.c_str()); } - if (_node->bondController()->getPolicyCodeByStr(basePolicyStr) == ZT_BONDING_POLICY_NONE) { + if (_node->bondController()->getPolicyCodeByStr(basePolicyStr) == ZT_BOND_POLICY_NONE) { fprintf(stderr, "error: custom policy (%s) is invalid, unknown base policy (%s).\n", customPolicyStr.c_str(), basePolicyStr.c_str()); continue; - } if (_node->bondController()->getPolicyCodeByStr(customPolicyStr) != ZT_BONDING_POLICY_NONE) { + } if (_node->bondController()->getPolicyCodeByStr(customPolicyStr) != ZT_BOND_POLICY_NONE) { fprintf(stderr, "error: custom policy (%s) will be ignored, cannot use standard policy names for custom policies.\n", customPolicyStr.c_str()); continue; @@ -1795,20 +1796,12 @@ public: newTemplateBond->setUserQualityWeights(weights,ZT_QOS_WEIGHT_SIZE); } // Bond-specific properties - newTemplateBond->setOverflowMode(OSUtils::jsonInt(customPolicy["overflow"],false)); newTemplateBond->setUpDelay(OSUtils::jsonInt(customPolicy["upDelay"],-1)); newTemplateBond->setDownDelay(OSUtils::jsonInt(customPolicy["downDelay"],-1)); newTemplateBond->setFlowRebalanceStrategy(OSUtils::jsonInt(customPolicy["flowRebalanceStrategy"],(uint64_t)0)); newTemplateBond->setFailoverInterval(OSUtils::jsonInt(customPolicy["failoverInterval"],(uint64_t)0)); newTemplateBond->setPacketsPerLink(OSUtils::jsonInt(customPolicy["packetsPerLink"],-1)); - std::string linkMonitorStrategyStr(OSUtils::jsonString(customPolicy["linkMonitorStrategy"],"")); - uint8_t linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DEFAULT; - if (linkMonitorStrategyStr == "passive") { linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE; } - if (linkMonitorStrategyStr == "active") { linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_ACTIVE; } - if (linkMonitorStrategyStr == "dynamic") { linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; } - newTemplateBond->setLinkMonitorStrategy(linkMonitorStrategy); - // Policy-Specific link set json &links = customPolicy["links"]; for (json::iterator linkItr = links.begin(); linkItr != links.end();++linkItr) { @@ -1824,40 +1817,40 @@ public: speed, alloc, linkNameStr.c_str()); enabled = false; } - uint32_t upDelay = OSUtils::jsonInt(link["upDelay"],-1); - uint32_t downDelay = OSUtils::jsonInt(link["downDelay"],-1); + //uint32_t upDelay = OSUtils::jsonInt(link["upDelay"],-1); + //uint32_t downDelay = OSUtils::jsonInt(link["downDelay"],-1); uint8_t ipvPref = OSUtils::jsonInt(link["ipvPref"],0); - uint32_t linkMonitorInterval = OSUtils::jsonInt(link["monitorInterval"],(uint64_t)0); + //uint32_t linkMonitorInterval = OSUtils::jsonInt(link["monitorInterval"],(uint64_t)0); std::string failoverToStr(OSUtils::jsonString(link["failoverTo"],"")); // Mode std::string linkModeStr(OSUtils::jsonString(link["mode"],"spare")); - uint8_t linkMode = ZT_MULTIPATH_SLAVE_MODE_SPARE; - if (linkModeStr == "primary") { linkMode = ZT_MULTIPATH_SLAVE_MODE_PRIMARY; } - if (linkModeStr == "spare") { linkMode = ZT_MULTIPATH_SLAVE_MODE_SPARE; } + uint8_t linkMode = ZT_BOND_SLAVE_MODE_SPARE; + if (linkModeStr == "primary") { linkMode = ZT_BOND_SLAVE_MODE_PRIMARY; } + if (linkModeStr == "spare") { linkMode = ZT_BOND_SLAVE_MODE_SPARE; } // ipvPref if ((ipvPref != 0) && (ipvPref != 4) && (ipvPref != 6) && (ipvPref != 46) && (ipvPref != 64)) { fprintf(stderr, "error: invalid ipvPref value (%d), link disabled.\n", ipvPref); enabled = false; } - if (linkMode == ZT_MULTIPATH_SLAVE_MODE_SPARE && failoverToStr.length()) { + if (linkMode == ZT_BOND_SLAVE_MODE_SPARE && failoverToStr.length()) { fprintf(stderr, "error: cannot specify failover links for spares, link disabled.\n"); failoverToStr = ""; enabled = false; } - _node->bondController()->addCustomLink(customPolicyStr, new Link(linkNameStr,ipvPref,speed,linkMonitorInterval,upDelay,downDelay,enabled,linkMode,failoverToStr,alloc)); + _node->bondController()->addCustomLink(customPolicyStr, new Link(linkNameStr,ipvPref,speed,enabled,linkMode,failoverToStr,alloc)); } std::string linkSelectMethodStr(OSUtils::jsonString(customPolicy["activeReselect"],"optimize")); if (linkSelectMethodStr == "always") { - newTemplateBond->setLinkSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS); + newTemplateBond->setLinkSelectMethod(ZT_BOND_RESELECTION_POLICY_ALWAYS); } if (linkSelectMethodStr == "better") { - newTemplateBond->setLinkSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_BETTER); + newTemplateBond->setLinkSelectMethod(ZT_BOND_RESELECTION_POLICY_BETTER); } if (linkSelectMethodStr == "failure") { - newTemplateBond->setLinkSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_FAILURE); + newTemplateBond->setLinkSelectMethod(ZT_BOND_RESELECTION_POLICY_FAILURE); } if (linkSelectMethodStr == "optimize") { - newTemplateBond->setLinkSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE); + newTemplateBond->setLinkSelectMethod(ZT_BOND_RESELECTION_POLICY_OPTIMIZE); } if (newTemplateBond->getLinkSelectMethod() < 0 || newTemplateBond->getLinkSelectMethod() > 3) { fprintf(stderr, "warning: invalid value (%s) for linkSelectMethod, assuming mode: always\n", linkSelectMethodStr.c_str()); @@ -3094,9 +3087,6 @@ public: if (!strncmp(p->c_str(),ifname,p->length())) return false; } - if (!_node->bondController()->allowedToBind(std::string(ifname))) { - return false; - } } { // Check global blacklists