From 48ce7632fa39faedd7ac46c267e0d333ab5f7ffd Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Tue, 21 Sep 2021 11:20:15 -0400 Subject: [PATCH] Backport guts of 1.8 to 1.6 tree so we can point release without waiting for UI quirks to be fixed. --- controller/ConnectionPool.hpp | 161 ++ controller/DB.cpp | 18 +- controller/DB.hpp | 12 +- controller/DBMirrorSet.cpp | 65 +- controller/DBMirrorSet.hpp | 8 + controller/EmbeddedNetworkController.cpp | 134 +- controller/EmbeddedNetworkController.hpp | 3 + controller/FileDB.cpp | 3 +- controller/PostgreSQL.cpp | 1693 +++++++--------- controller/PostgreSQL.hpp | 79 +- include/ZeroTierOne.h | 173 +- node/Bond.cpp | 2246 +++++++++++----------- node/Bond.hpp | 1210 +++++++++--- node/BondController.cpp | 212 -- node/BondController.hpp | 239 --- node/CertificateOfMembership.cpp | 192 +- node/CertificateOfMembership.hpp | 50 +- node/Constants.hpp | 205 +- node/Flow.hpp | 124 -- node/Identity.hpp | 12 + node/IncomingPacket.cpp | 51 +- node/IncomingPacket.hpp | 1 - node/Membership.hpp | 7 +- node/Network.cpp | 16 +- node/Network.hpp | 15 +- node/NetworkConfig.cpp | 22 + node/NetworkConfig.hpp | 28 +- node/NetworkController.hpp | 10 +- node/Node.cpp | 39 +- node/Node.hpp | 6 +- node/Packet.hpp | 11 +- node/Path.hpp | 441 +---- node/Peer.cpp | 67 +- node/Peer.hpp | 60 +- node/RuntimeEnvironment.hpp | 4 +- node/Switch.cpp | 5 +- node/Trace.cpp | 2 +- node/Utils.cpp | 2 + objects.mk | 3 +- one.cpp | 81 +- selftest.cpp | 16 +- service/OneService.cpp | 181 +- 42 files changed, 3754 insertions(+), 4153 deletions(-) create mode 100644 controller/ConnectionPool.hpp delete mode 100644 node/BondController.cpp delete mode 100644 node/BondController.hpp delete mode 100644 node/Flow.hpp diff --git a/controller/ConnectionPool.hpp b/controller/ConnectionPool.hpp new file mode 100644 index 000000000..8ffc1645c --- /dev/null +++ b/controller/ConnectionPool.hpp @@ -0,0 +1,161 @@ +/* + * Copyright (c)2021 ZeroTier, Inc. + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file in the project's root directory. + * + * Change Date: 2025-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2.0 of the Apache License. + */ +/****/ + +#ifndef ZT_CONNECTION_POOL_H_ +#define ZT_CONNECTION_POOL_H_ + + +#ifndef _DEBUG + #define _DEBUG(x) +#endif + +#include +#include +#include +#include +#include +#include + +namespace ZeroTier { + +struct ConnectionUnavailable : std::exception { + char const* what() const throw() { + return "Unable to allocate connection"; + }; +}; + + +class Connection { +public: + virtual ~Connection() {}; +}; + +class ConnectionFactory { +public: + virtual ~ConnectionFactory() {}; + virtual std::shared_ptr create()=0; +}; + +struct ConnectionPoolStats { + size_t pool_size; + size_t borrowed_size; +}; + +template +class ConnectionPool { +public: + ConnectionPool(size_t max_pool_size, size_t min_pool_size, std::shared_ptr factory) + : m_maxPoolSize(max_pool_size) + , m_minPoolSize(min_pool_size) + , m_factory(factory) + { + while(m_pool.size() < m_minPoolSize){ + m_pool.push_back(m_factory->create()); + } + }; + + ConnectionPoolStats get_stats() { + std::unique_lock lock(m_poolMutex); + + ConnectionPoolStats stats; + stats.pool_size = m_pool.size(); + stats.borrowed_size = m_borrowed.size(); + + return stats; + }; + + ~ConnectionPool() { + }; + + /** + * Borrow + * + * Borrow a connection for temporary use + * + * When done, either (a) call unborrow() to return it, or (b) (if it's bad) just let it go out of scope. This will cause it to automatically be replaced. + * @retval a shared_ptr to the connection object + */ + std::shared_ptr borrow() { + std::unique_lock l(m_poolMutex); + + while((m_pool.size() + m_borrowed.size()) < m_minPoolSize) { + std::shared_ptr conn = m_factory->create(); + m_pool.push_back(conn); + } + + if(m_pool.size()==0){ + + if ((m_pool.size() + m_borrowed.size()) <= m_maxPoolSize) { + try { + std::shared_ptr conn = m_factory->create(); + m_borrowed.insert(conn); + return std::static_pointer_cast(conn); + } catch (std::exception &e) { + throw ConnectionUnavailable(); + } + } else { + for(auto it = m_borrowed.begin(); it != m_borrowed.end(); ++it){ + if((*it).unique()) { + // This connection has been abandoned! Destroy it and create a new connection + try { + // If we are able to create a new connection, return it + _DEBUG("Creating new connection to replace discarded connection"); + std::shared_ptr conn = m_factory->create(); + m_borrowed.erase(it); + m_borrowed.insert(conn); + return std::static_pointer_cast(conn); + } catch(std::exception& e) { + // Error creating a replacement connection + throw ConnectionUnavailable(); + } + } + } + // Nothing available + throw ConnectionUnavailable(); + } + } + + // Take one off the front + std::shared_ptr conn = m_pool.front(); + m_pool.pop_front(); + // Add it to the borrowed list + m_borrowed.insert(conn); + return std::static_pointer_cast(conn); + }; + + /** + * Unborrow a connection + * + * Only call this if you are returning a working connection. If the connection was bad, just let it go out of scope (so the connection manager can replace it). + * @param the connection + */ + void unborrow(std::shared_ptr conn) { + // Lock + std::unique_lock lock(m_poolMutex); + m_borrowed.erase(conn); + if ((m_pool.size() + m_borrowed.size()) < m_maxPoolSize) { + m_pool.push_back(conn); + } + }; +protected: + size_t m_maxPoolSize; + size_t m_minPoolSize; + std::shared_ptr m_factory; + std::deque > m_pool; + std::set > m_borrowed; + std::mutex m_poolMutex; +}; + +} + +#endif \ No newline at end of file diff --git a/controller/DB.cpp b/controller/DB.cpp index 8a86ae376..27578bf77 100644 --- a/controller/DB.cpp +++ b/controller/DB.cpp @@ -49,6 +49,9 @@ void DB::initNetwork(nlohmann::json &network) }}; } if (!network.count("dns")) network["dns"] = nlohmann::json::array(); + if (!network.count("ssoEnabled")) network["ssoEnabled"] = false; + if (!network.count("clientId")) network["clientId"] = ""; + if (!network.count("authorizationEndpoint")) network["authorizationEndpoint"] = ""; network["objtype"] = "network"; } @@ -56,6 +59,7 @@ void DB::initNetwork(nlohmann::json &network) void DB::initMember(nlohmann::json &member) { if (!member.count("authorized")) member["authorized"] = false; + if (!member.count("ssoExempt")) member["ssoExempt"] = false; if (!member.count("ipAssignments")) member["ipAssignments"] = nlohmann::json::array(); if (!member.count("activeBridge")) member["activeBridge"] = false; if (!member.count("tags")) member["tags"] = nlohmann::json::array(); @@ -67,6 +71,7 @@ void DB::initMember(nlohmann::json &member) if (!member.count("lastAuthorizedTime")) member["lastAuthorizedTime"] = 0ULL; if (!member.count("lastAuthorizedCredentialType")) member["lastAuthorizedCredentialType"] = nlohmann::json(); if (!member.count("lastAuthorizedCredential")) member["lastAuthorizedCredential"] = nlohmann::json(); + if (!member.count("authenticationExpiryTime")) member["authenticationExpiryTime"] = 0LL; if (!member.count("vMajor")) member["vMajor"] = -1; if (!member.count("vMinor")) member["vMinor"] = -1; if (!member.count("vRev")) member["vRev"] = -1; @@ -92,6 +97,8 @@ void DB::cleanMember(nlohmann::json &member) member.erase("recentLog"); member.erase("lastModified"); member.erase("lastRequestMetaData"); + member.erase("authenticationURL"); // computed + member.erase("authenticationClientID"); // computed } DB::DB() {} @@ -174,8 +181,9 @@ bool DB::get(const uint64_t networkId,nlohmann::json &network,std::vector l2(nw->lock); network = nw->config; - for(auto m=nw->members.begin();m!=nw->members.end();++m) + for(auto m=nw->members.begin();m!=nw->members.end();++m) { members.push_back(m->second); + } } return true; } @@ -188,6 +196,14 @@ void DB::networks(std::set &networks) networks.insert(n->first); } +void DB::networkMemberSSOHasExpired(uint64_t nwid, int64_t now) { + std::lock_guard l(_networks_l); + auto nw = _networks.find(nwid); + if (nw != _networks.end()) { + nw->second->mostRecentDeauthTime = now; + } +} + void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool notifyListeners) { uint64_t memberId = 0; diff --git a/controller/DB.hpp b/controller/DB.hpp index 6a6906eff..0a5d784c2 100644 --- a/controller/DB.hpp +++ b/controller/DB.hpp @@ -31,9 +31,12 @@ #include #include #include +#include #include "../ext/json/json.hpp" +#define ZT_MEMBER_AUTH_TIMEOUT_NOTIFY_BEFORE 25000 + namespace ZeroTier { @@ -101,12 +104,13 @@ public: } virtual bool save(nlohmann::json &record,bool notifyListeners) = 0; - virtual void eraseNetwork(const uint64_t networkId) = 0; virtual void eraseMember(const uint64_t networkId,const uint64_t memberId) = 0; - virtual void nodeIsOnline(const uint64_t networkId,const uint64_t memberId,const InetAddress &physicalAddress) = 0; + virtual std::string getSSOAuthURL(const nlohmann::json &member, const std::string &redirectURL) { return ""; } + virtual void networkMemberSSOHasExpired(uint64_t nwid, int64_t ts); + inline void addListener(DB::ChangeListener *const listener) { std::lock_guard l(_changeListeners_l); @@ -148,8 +152,8 @@ protected: std::mutex lock; }; - void _memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool notifyListeners); - void _networkChanged(nlohmann::json &old,nlohmann::json &networkConfig,bool notifyListeners); + virtual void _memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool notifyListeners); + virtual void _networkChanged(nlohmann::json &old,nlohmann::json &networkConfig,bool notifyListeners); void _fillSummaryInfo(const std::shared_ptr<_Network> &nw,NetworkSummaryInfo &info); std::vector _changeListeners; diff --git a/controller/DBMirrorSet.cpp b/controller/DBMirrorSet.cpp index f19741bb3..de7ebefe1 100644 --- a/controller/DBMirrorSet.cpp +++ b/controller/DBMirrorSet.cpp @@ -36,7 +36,7 @@ DBMirrorSet::DBMirrorSet(DB::ChangeListener *listener) : } for(auto db=dbs.begin();db!=dbs.end();++db) { - (*db)->each([this,&dbs,&db](uint64_t networkId,const nlohmann::json &network,uint64_t memberId,const nlohmann::json &member) { + (*db)->each([&dbs,&db](uint64_t networkId,const nlohmann::json &network,uint64_t memberId,const nlohmann::json &member) { try { if (network.is_object()) { if (memberId == 0) { @@ -125,6 +125,26 @@ bool DBMirrorSet::get(const uint64_t networkId,nlohmann::json &network,std::vect return false; } +std::string DBMirrorSet::getSSOAuthURL(const nlohmann::json &member, const std::string &redirectURL) +{ + std::lock_guard l(_dbs_l); + for(auto d=_dbs.begin();d!=_dbs.end();++d) { + std::string url = (*d)->getSSOAuthURL(member, redirectURL); + if (!url.empty()) { + return url; + } + } + return ""; +} + +void DBMirrorSet::networkMemberSSOHasExpired(uint64_t nwid, int64_t ts) +{ + std::lock_guard l(_dbs_l); + for(auto d=_dbs.begin();d!=_dbs.end();++d) { + (*d)->networkMemberSSOHasExpired(nwid, ts); + } +} + void DBMirrorSet::networks(std::set &networks) { std::lock_guard l(_dbs_l); @@ -228,4 +248,47 @@ void DBMirrorSet::onNetworkMemberDeauthorize(const void *db,uint64_t networkId,u _listener->onNetworkMemberDeauthorize(this,networkId,memberId); } +void DBMirrorSet::membersExpiring(std::set< std::pair > &soon, std::set< std::pair > &expired) +{ + std::unique_lock l(_membersExpiringSoon_l); + int64_t now = OSUtils::now(); + for(auto next=_membersExpiringSoon.begin();next!=_membersExpiringSoon.end();) { + if (next->first > now) { + const uint64_t nwid = next->second.first; + const uint64_t memberId = next->second.second; + nlohmann::json network, member; + if (this->get(nwid, network, memberId, member)) { + try { + const bool authorized = member["authorized"]; + const bool ssoExempt = member["ssoExempt"]; + const int64_t authenticationExpiryTime = member["authenticationExpiryTime"]; + if ((authenticationExpiryTime == next->first)&&(authorized)&&(!ssoExempt)) { + if ((authenticationExpiryTime - now) > ZT_MEMBER_AUTH_TIMEOUT_NOTIFY_BEFORE) { + // Stop when we get to entries too far in the future. + break; + } else { + const bool ssoEnabled = network["ssoEnabled"]; + if (ssoEnabled) + soon.insert(std::pair(nwid, memberId)); + } + } else { + // Obsolete entry, no longer authorized, or SSO exempt. + } + } catch ( ... ) { + // Invalid member object, erase. + } + } else { + // Not found. + } + } + _membersExpiringSoon.erase(next++); + } +} + +void DBMirrorSet::memberWillExpire(int64_t expTime, uint64_t nwid, uint64_t memberId) +{ + std::unique_lock l(_membersExpiringSoon_l); + _membersExpiringSoon.insert(std::pair< int64_t, std::pair< uint64_t, uint64_t > >(expTime, std::pair< uint64_t, uint64_t >(nwid, memberId))); +} + } // namespace ZeroTier diff --git a/controller/DBMirrorSet.hpp b/controller/DBMirrorSet.hpp index 967cd9360..4ce962740 100644 --- a/controller/DBMirrorSet.hpp +++ b/controller/DBMirrorSet.hpp @@ -51,6 +51,9 @@ public: virtual void onNetworkMemberUpdate(const void *db,uint64_t networkId,uint64_t memberId,const nlohmann::json &member); virtual void onNetworkMemberDeauthorize(const void *db,uint64_t networkId,uint64_t memberId); + std::string getSSOAuthURL(const nlohmann::json &member, const std::string &redirectURL); + void networkMemberSSOHasExpired(uint64_t nwid, int64_t ts); + inline void addDB(const std::shared_ptr &db) { db->addListener(this); @@ -58,12 +61,17 @@ public: _dbs.push_back(db); } + void membersExpiring(std::set< std::pair > &soon, std::set< std::pair > &expired); + void memberWillExpire(int64_t expTime, uint64_t nwid, uint64_t memberId); + private: DB::ChangeListener *const _listener; std::atomic_bool _running; std::thread _syncCheckerThread; std::vector< std::shared_ptr< DB > > _dbs; mutable std::mutex _dbs_l; + std::set< std::pair< int64_t, std::pair > > _membersExpiringSoon; + mutable std::mutex _membersExpiringSoon_l; }; } // namespace ZeroTier diff --git a/controller/EmbeddedNetworkController.cpp b/controller/EmbeddedNetworkController.cpp index 1b2da4c56..ea70cb3ae 100644 --- a/controller/EmbeddedNetworkController.cpp +++ b/controller/EmbeddedNetworkController.cpp @@ -28,6 +28,9 @@ #include #include #include +#include +#include +#include #include "../include/ZeroTierOne.h" #include "../version.h" @@ -60,6 +63,29 @@ namespace ZeroTier { namespace { +std::string url_encode(const std::string &value) { + std::ostringstream escaped; + escaped.fill('0'); + escaped << std::hex; + + for (std::string::const_iterator i = value.begin(), n = value.end(); i != n; ++i) { + std::string::value_type c = (*i); + + // Keep alphanumeric and other accepted characters intact + if (isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') { + escaped << c; + continue; + } + + // Any other characters are percent-encoded + escaped << std::uppercase; + escaped << '%' << std::setw(2) << int((unsigned char) c); + escaped << std::nouppercase; + } + + return escaped.str(); +} + static json _renderRule(ZT_VirtualNetworkRule &rule) { char tmp[128]; @@ -476,6 +502,10 @@ EmbeddedNetworkController::~EmbeddedNetworkController() t->join(); } +void EmbeddedNetworkController::setSSORedirectURL(const std::string &url) { + _ssoRedirectURL = url_encode(url); +} + void EmbeddedNetworkController::init(const Identity &signingId,Sender *sender) { char tmp[64]; @@ -688,8 +718,10 @@ unsigned int EmbeddedNetworkController::handleControlPlaneHttpPOST( DB::initMember(member); try { - if (b.count("activeBridge")) member["activeBridge"] = OSUtils::jsonBool(b["activeBridge"],false); - if (b.count("noAutoAssignIps")) member["noAutoAssignIps"] = OSUtils::jsonBool(b["noAutoAssignIps"],false); + if (b.count("activeBridge")) member["activeBridge"] = OSUtils::jsonBool(b["activeBridge"], false); + if (b.count("noAutoAssignIps")) member["noAutoAssignIps"] = OSUtils::jsonBool(b["noAutoAssignIps"], false); + if (b.count("authenticationExpiryTime")) member["authenticationExpiryTime"] = (uint64_t)OSUtils::jsonInt(b["authenticationExpiryTime"], 0ULL); + if (b.count("authenticationURL")) member["authenticationURL"] = OSUtils::jsonString(b["authenticationURL"], ""); if (b.count("remoteTraceTarget")) { const std::string rtt(OSUtils::jsonString(b["remoteTraceTarget"],"")); @@ -1248,7 +1280,7 @@ void EmbeddedNetworkController::_request( Utils::hex(nwid,nwids); _db.get(nwid,network,identity.address().toInt(),member,ns); if ((!network.is_object())||(network.empty())) { - _sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_OBJECT_NOT_FOUND); + _sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_OBJECT_NOT_FOUND, nullptr, 0); return; } const bool newMember = ((!member.is_object())||(member.empty())); @@ -1262,11 +1294,11 @@ void EmbeddedNetworkController::_request( // known member. try { if (Identity(haveIdStr.c_str()) != identity) { - _sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_ACCESS_DENIED); + _sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_ACCESS_DENIED, nullptr, 0); return; } } catch ( ... ) { - _sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_ACCESS_DENIED); + _sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_ACCESS_DENIED, nullptr, 0); return; } } else { @@ -1323,6 +1355,38 @@ void EmbeddedNetworkController::_request( member["lastAuthorizedCredential"] = autoAuthCredential; } + // Should we check SSO Stuff? + // If network is configured with SSO, and the member is not marked exempt: yes + // Otherwise no, we use standard auth logic. + bool networkSSOEnabled = OSUtils::jsonBool(network["ssoEnabled"], false); + bool memberSSOExempt = OSUtils::jsonBool(member["ssoExempt"], false); + std::string authenticationURL; + if (networkSSOEnabled && !memberSSOExempt) { + authenticationURL = _db.getSSOAuthURL(member, _ssoRedirectURL); + std::string memberId = member["id"]; + //fprintf(stderr, "ssoEnabled && !ssoExempt %s-%s\n", nwids, memberId.c_str()); + uint64_t authenticationExpiryTime = (int64_t)OSUtils::jsonInt(member["authenticationExpiryTime"], 0); + //fprintf(stderr, "authExpiryTime: %lld\n", authenticationExpiryTime); + if (authenticationExpiryTime < now) { + if (!authenticationURL.empty()) { + _db.networkMemberSSOHasExpired(nwid, now); + onNetworkMemberDeauthorize(&_db, nwid, identity.address().toInt()); + + Dictionary<3072> authInfo; + authInfo.add("aU", authenticationURL.c_str()); + //fprintf(stderr, "sending auth URL: %s\n", authenticationURL.c_str()); + + DB::cleanMember(member); + _db.save(member,true); + + _sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_AUTHENTICATION_REQUIRED, authInfo.data(), authInfo.sizeBytes()); + return; + } + } else if (authorized) { + _db.memberWillExpire(authenticationExpiryTime, nwid, identity.address().toInt()); + } + } + if (authorized) { // Update version info and meta-data if authorized and if this is a genuine request if (requestPacketId) { @@ -1347,17 +1411,18 @@ void EmbeddedNetworkController::_request( ms.lastRequestMetaData = metaData; ms.identity = identity; } - } + } } else { + // If they are not authorized, STOP! DB::cleanMember(member); _db.save(member,true); - _sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_ACCESS_DENIED); + _sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_ACCESS_DENIED, nullptr, 0); return; } // ------------------------------------------------------------------------- - // If we made it this far, they are authorized. + // If we made it this far, they are authorized (and authenticated). // ------------------------------------------------------------------------- int64_t credentialtmd = ZT_NETWORKCONFIG_DEFAULT_CREDENTIAL_TIME_MAX_MAX_DELTA; @@ -1386,7 +1451,11 @@ void EmbeddedNetworkController::_request( nc->mtu = std::max(std::min((unsigned int)OSUtils::jsonInt(network["mtu"],ZT_DEFAULT_MTU),(unsigned int)ZT_MAX_MTU),(unsigned int)ZT_MIN_MTU); nc->multicastLimit = (unsigned int)OSUtils::jsonInt(network["multicastLimit"],32ULL); - + nc->ssoEnabled = OSUtils::jsonBool(network["ssoEnabled"], false); + nc->authenticationExpiryTime = OSUtils::jsonInt(member["authenticationExpiryTime"], 0LL); + if (!authenticationURL.empty()) + Utils::scopy(nc->authenticationURL, sizeof(nc->authenticationURL), authenticationURL.c_str()); + std::string rtt(OSUtils::jsonString(member["remoteTraceTarget"],"")); if (rtt.length() == 10) { nc->remoteTraceTarget = Address(Utils::hexStrToU64(rtt.c_str())); @@ -1415,6 +1484,8 @@ void EmbeddedNetworkController::_request( json &memberTags = member["tags"]; json &dns = network["dns"]; + //fprintf(stderr, "IP Assignment Pools for Network %s: %s\n", nwids, OSUtils::jsonDump(ipAssignmentPools, 2).c_str()); + if (metaData.getUI(ZT_NETWORKCONFIG_REQUEST_METADATA_KEY_RULES_ENGINE_REV,0) <= 0) { // Old versions with no rules engine support get an allow everything rule. // Since rules are enforced bidirectionally, newer versions *will* still @@ -1730,11 +1801,11 @@ void EmbeddedNetworkController::_request( nc->certificateOfOwnershipCount = 1; } - CertificateOfMembership com(now,credentialtmd,nwid,identity.address()); + CertificateOfMembership com(now,credentialtmd,nwid,identity); if (com.sign(_signingId)) { nc->com = com; } else { - _sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_INTERNAL_SERVER_ERROR); + _sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_INTERNAL_SERVER_ERROR, nullptr, 0); return; } @@ -1753,17 +1824,44 @@ void EmbeddedNetworkController::_startThreads() _threads.emplace_back([this]() { for(;;) { _RQEntry *qe = (_RQEntry *)0; - if (!_queue.get(qe)) + auto timedWaitResult = _queue.get(qe, 1000); + if (timedWaitResult == BlockingQueue<_RQEntry *>::STOP) { break; - try { + } else if (timedWaitResult == BlockingQueue<_RQEntry *>::OK) { if (qe) { - _request(qe->nwid,qe->fromAddr,qe->requestPacketId,qe->identity,qe->metaData); + try { + _request(qe->nwid,qe->fromAddr,qe->requestPacketId,qe->identity,qe->metaData); + } catch (std::exception &e) { + fprintf(stderr,"ERROR: exception in controller request handling thread: %s" ZT_EOL_S,e.what()); + } catch ( ... ) { + fprintf(stderr,"ERROR: exception in controller request handling thread: unknown exception" ZT_EOL_S); + } delete qe; } - } catch (std::exception &e) { - fprintf(stderr,"ERROR: exception in controller request handling thread: %s" ZT_EOL_S,e.what()); - } catch ( ... ) { - fprintf(stderr,"ERROR: exception in controller request handling thread: unknown exception" ZT_EOL_S); + } + + std::set< std::pair > soon; + std::set< std::pair > expired; + _db.membersExpiring(soon, expired); + + for(auto s=soon.begin();s!=soon.end();++s) { + Identity identity; + Dictionary lastMetaData; + { + std::unique_lock ll(_memberStatus_l); + auto ms = _memberStatus.find(_MemberStatusKey(s->first, s->second)); + if (ms != _memberStatus.end()) { + lastMetaData = ms->second.lastRequestMetaData; + identity = ms->second.identity; + } + } + if (identity) { + request(s->first,InetAddress(),0,identity,lastMetaData); + } + } + + for(auto e=expired.begin();e!=expired.end();++e) { + onNetworkMemberDeauthorize(nullptr, e->first, e->second); } } }); diff --git a/controller/EmbeddedNetworkController.hpp b/controller/EmbeddedNetworkController.hpp index e499dd647..fc18cbded 100644 --- a/controller/EmbeddedNetworkController.hpp +++ b/controller/EmbeddedNetworkController.hpp @@ -57,6 +57,8 @@ public: virtual void init(const Identity &signingId,Sender *sender); + void setSSORedirectURL(const std::string &url); + virtual void request( uint64_t nwid, const InetAddress &fromAddr, @@ -151,6 +153,7 @@ private: std::mutex _memberStatus_l; RedisConfig *_rc; + std::string _ssoRedirectURL; }; } // namespace ZeroTier diff --git a/controller/FileDB.cpp b/controller/FileDB.cpp index bf573f3bf..d454e93e1 100644 --- a/controller/FileDB.cpp +++ b/controller/FileDB.cpp @@ -140,8 +140,7 @@ void FileDB::eraseNetwork(const uint64_t networkId) void FileDB::eraseMember(const uint64_t networkId,const uint64_t memberId) { nlohmann::json network,member,nullJson; - get(networkId,network); - get(memberId,member); + get(networkId,network,memberId,member); char p[4096]; OSUtils::ztsnprintf(p,sizeof(p),"%s" ZT_PATH_SEPARATOR_S "%.16llx" ZT_PATH_SEPARATOR_S "member" ZT_PATH_SEPARATOR_S "%.10llx.json",_networksPath.c_str(),networkId,memberId); OSUtils::rm(p); diff --git a/controller/PostgreSQL.cpp b/controller/PostgreSQL.cpp index a031c1ff0..5f71a3699 100644 --- a/controller/PostgreSQL.cpp +++ b/controller/PostgreSQL.cpp @@ -16,6 +16,7 @@ #ifdef ZT_CONTROLLER_USE_LIBPQ #include "../node/Constants.hpp" +#include "../node/SHA512.hpp" #include "EmbeddedNetworkController.hpp" #include "../version.h" #include "Redis.hpp" @@ -23,16 +24,20 @@ #include #include #include +#include +// #define ZT_TRACE 1 + using json = nlohmann::json; namespace { -static const int DB_MINIMUM_VERSION = 5; +static const int DB_MINIMUM_VERSION = 20; static const char *_timestr() { + time_t t = time(0); char *ts = ctime(&t); char *p = ts; @@ -65,16 +70,73 @@ std::string join(const std::vector &elements, const char * const se } */ +std::vector split(std::string str, char delim){ + std::istringstream iss(str); + std::vector tokens; + std::string item; + while(std::getline(iss, item, delim)) { + tokens.push_back(item); + } + return tokens; +} + + } // anonymous namespace using namespace ZeroTier; + +MemberNotificationReceiver::MemberNotificationReceiver(PostgreSQL *p, pqxx::connection &c, const std::string &channel) + : pqxx::notification_receiver(c, channel) + , _psql(p) +{ + fprintf(stderr, "initialize MemberNotificaitonReceiver\n"); +} + + +void MemberNotificationReceiver::operator() (const std::string &payload, int packend_pid) { + fprintf(stderr, "Member Notification received: %s\n", payload.c_str()); + json tmp(json::parse(payload)); + json &ov = tmp["old_val"]; + json &nv = tmp["new_val"]; + json oldConfig, newConfig; + if (ov.is_object()) oldConfig = ov; + if (nv.is_object()) newConfig = nv; + if (oldConfig.is_object() || newConfig.is_object()) { + _psql->_memberChanged(oldConfig,newConfig,(_psql->_ready>=2)); + fprintf(stderr, "payload sent\n"); + } +} + + +NetworkNotificationReceiver::NetworkNotificationReceiver(PostgreSQL *p, pqxx::connection &c, const std::string &channel) + : pqxx::notification_receiver(c, channel) + , _psql(p) +{ + fprintf(stderr, "initialize NetworkNotificationReceiver\n"); +} + +void NetworkNotificationReceiver::operator() (const std::string &payload, int packend_pid) { + fprintf(stderr, "Network Notificaiton received: %s\n", payload.c_str()); + json tmp(json::parse(payload)); + json &ov = tmp["old_val"]; + json &nv = tmp["new_val"]; + json oldConfig, newConfig; + if (ov.is_object()) oldConfig = ov; + if (nv.is_object()) newConfig = nv; + if (oldConfig.is_object() || newConfig.is_object()) { + _psql->_networkChanged(oldConfig,newConfig,(_psql->_ready>=2)); + fprintf(stderr, "payload sent\n"); + } +} + using Attrs = std::vector>; using Item = std::pair; using ItemStream = std::vector; PostgreSQL::PostgreSQL(const Identity &myId, const char *path, int listenPort, RedisConfig *rc) : DB() + , _pool() , _myId(myId) , _myAddress(myId.address()) , _ready(0) @@ -88,34 +150,35 @@ PostgreSQL::PostgreSQL(const Identity &myId, const char *path, int listenPort, R { char myAddress[64]; _myAddressStr = myId.address().toString(myAddress); - _connString = std::string(path) + " application_name=controller_" + _myAddressStr; - - // Database Schema Version Check - PGconn *conn = getPgConn(); - if (PQstatus(conn) != CONNECTION_OK) { - fprintf(stderr, "Bad Database Connection: %s", PQerrorMessage(conn)); - exit(1); + _connString = std::string(path); + auto f = std::make_shared(_connString); + _pool = std::make_shared >( + 15, 5, std::static_pointer_cast(f)); + + memset(_ssoPsk, 0, sizeof(_ssoPsk)); + char *const ssoPskHex = getenv("ZT_SSO_PSK"); +#ifdef ZT_TRACE + fprintf(stderr, "ZT_SSO_PSK: %s\n", ssoPskHex); +#endif + if (ssoPskHex) { + // SECURITY: note that ssoPskHex will always be null-terminated if libc acatually + // returns something non-NULL. If the hex encodes something shorter than 48 bytes, + // it will be padded at the end with zeroes. If longer, it'll be truncated. + Utils::unhex(ssoPskHex, _ssoPsk, sizeof(_ssoPsk)); } - PGresult *res = PQexec(conn, "SELECT version FROM ztc_database"); - if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Error determining database version"); - exit(1); - } + auto c = _pool->borrow(); + pqxx::work txn{*c->c}; - if (PQntuples(res) != 1) { - fprintf(stderr, "Invalid number of db version tuples returned."); - exit(1); - } - - int dbVersion = std::stoi(PQgetvalue(res, 0, 0)); + pqxx::row r{txn.exec1("SELECT version FROM ztc_database")}; + int dbVersion = r[0].as(); + txn.commit(); if (dbVersion < DB_MINIMUM_VERSION) { fprintf(stderr, "Central database schema version too low. This controller version requires a minimum schema version of %d. Please upgrade your Central instance", DB_MINIMUM_VERSION); exit(1); } - PQclear(res); - res = NULL; + _pool->unborrow(c); if (_rc != NULL) { sw::redis::ConnectionOptions opts; @@ -139,11 +202,8 @@ PostgreSQL::PostgreSQL(const Identity &myId, const char *path, int listenPort, R fprintf(stderr, "[%s] NOTICE: %.10llx controller PostgreSQL waiting for initial data download..." ZT_EOL_S, ::_timestr(), (unsigned long long)_myAddress.toInt()); _waitNoticePrinted = true; - initializeNetworks(conn); - initializeMembers(conn); - - PQfinish(conn); - conn = NULL; + initializeNetworks(); + initializeMembers(); _heartbeatThread = std::thread(&PostgreSQL::heartbeat, this); _membersDbWatcher = std::thread(&PostgreSQL::membersDbWatcher, this); @@ -188,10 +248,13 @@ bool PostgreSQL::save(nlohmann::json &record,bool notifyListeners) { bool modified = false; try { - if (!record.is_object()) + if (!record.is_object()) { + fprintf(stderr, "record is not an object?!?\n"); return false; + } const std::string objtype = record["objtype"]; if (objtype == "network") { + //fprintf(stderr, "network save\n"); const uint64_t nwid = OSUtils::jsonIntHex(record["id"],0ULL); if (nwid) { nlohmann::json old; @@ -203,17 +266,25 @@ bool PostgreSQL::save(nlohmann::json &record,bool notifyListeners) } } } else if (objtype == "member") { + std::string networkId = record["nwid"]; + std::string memberId = record["id"]; const uint64_t nwid = OSUtils::jsonIntHex(record["nwid"],0ULL); const uint64_t id = OSUtils::jsonIntHex(record["id"],0ULL); + //fprintf(stderr, "member save %s-%s\n", networkId.c_str(), memberId.c_str()); if ((id)&&(nwid)) { nlohmann::json network,old; get(nwid,network,id,old); if ((!old.is_object())||(!_compareRecords(old,record))) { + //fprintf(stderr, "commit queue post\n"); record["revision"] = OSUtils::jsonInt(record["revision"],0ULL) + 1ULL; _commitQueue.post(std::pair(record,notifyListeners)); modified = true; + } else { + //fprintf(stderr, "no change\n"); } } + } else { + fprintf(stderr, "uhh waaat\n"); } } catch (std::exception &e) { fprintf(stderr, "Error on PostgreSQL::save: %s\n", e.what()); @@ -225,6 +296,7 @@ bool PostgreSQL::save(nlohmann::json &record,bool notifyListeners) void PostgreSQL::eraseNetwork(const uint64_t networkId) { + fprintf(stderr, "PostgreSQL::eraseNetwork\n"); char tmp2[24]; waitForReady(); Utils::hex(networkId, tmp2); @@ -239,6 +311,7 @@ void PostgreSQL::eraseNetwork(const uint64_t networkId) void PostgreSQL::eraseMember(const uint64_t networkId, const uint64_t memberId) { + fprintf(stderr, "PostgreSQL::eraseMember\n"); char tmp2[24]; waitForReady(); std::pair tmp, nw; @@ -263,196 +336,219 @@ void PostgreSQL::nodeIsOnline(const uint64_t networkId, const uint64_t memberId, } } -void PostgreSQL::initializeNetworks(PGconn *conn) +std::string PostgreSQL::getSSOAuthURL(const nlohmann::json &member, const std::string &redirectURL) +{ + // NONCE is just a random character string. no semantic meaning + // state = HMAC SHA384 of Nonce based on shared sso key + // + // need nonce timeout in database? make sure it's used within X time + // X is 5 minutes for now. Make configurable later? + // + // how do we tell when a nonce is used? if auth_expiration_time is set + std::string networkId = member["nwid"]; + std::string memberId = member["id"]; + char authenticationURL[4096] = {0}; + + //fprintf(stderr, "PostgreSQL::updateMemberOnLoad: %s-%s\n", networkId.c_str(), memberId.c_str()); + bool have_auth = false; + try { + auto c = _pool->borrow(); + pqxx::work w(*c->c); + + char nonceBytes[16] = {0}; + std::string nonce = ""; + + // check if the member exists first. + pqxx::row count = w.exec_params1("SELECT count(id) FROM ztc_member WHERE id = $1 AND network_id = $2 AND deleted = false", memberId, networkId); + if (count[0].as() == 1) { + // find an unused nonce, if one exists. + pqxx::result r = w.exec_params("SELECT nonce FROM ztc_sso_expiry " + "WHERE network_id = $1 AND member_id = $2 " + "AND authentication_expiry_time IS NULL AND ((NOW() AT TIME ZONE 'UTC') <= nonce_expiration)", + networkId, memberId); + + if (r.size() == 1) { + // we have an existing nonce. Use it + nonce = r.at(0)[0].as(); + Utils::unhex(nonce.c_str(), nonceBytes, sizeof(nonceBytes)); + } else if (r.empty()) { + // create a nonce + Utils::getSecureRandom(nonceBytes, 16); + char nonceBuf[64] = {0}; + Utils::hex(nonceBytes, sizeof(nonceBytes), nonceBuf); + nonce = std::string(nonceBuf); + + pqxx::result ir = w.exec_params0("INSERT INTO ztc_sso_expiry " + "(nonce, nonce_expiration, network_id, member_id) VALUES " + "($1, TO_TIMESTAMP($2::double precision/1000), $3, $4)", + nonce, OSUtils::now() + 300000, networkId, memberId); + + w.commit(); + } else { + // > 1 ?!? Thats an error! + fprintf(stderr, "> 1 unused nonce!\n"); + exit(6); + } + + r = w.exec_params("SELECT org.client_id, org.authorization_endpoint " + "FROM ztc_network AS nw, ztc_org AS org " + "WHERE nw.id = $1 AND nw.sso_enabled = true AND org.owner_id = nw.owner_id", networkId); + + std::string client_id = ""; + std::string authorization_endpoint = ""; + + if (r.size() == 1) { + client_id = r.at(0)[0].as(); + authorization_endpoint = r.at(0)[1].as(); + } else if (r.size() > 1) { + fprintf(stderr, "ERROR: More than one auth endpoint for an organization?!?!? NetworkID: %s\n", networkId.c_str()); + } else { + fprintf(stderr, "No client or auth endpoint?!?\n"); + } + + // no catch all else because we don't actually care if no records exist here. just continue as normal. + if ((!client_id.empty())&&(!authorization_endpoint.empty())) { + have_auth = true; + + uint8_t state[48]; + HMACSHA384(_ssoPsk, nonceBytes, sizeof(nonceBytes), state); + char state_hex[256]; + Utils::hex(state, 48, state_hex); + + OSUtils::ztsnprintf(authenticationURL, sizeof(authenticationURL), + "%s?response_type=id_token&response_mode=form_post&scope=openid+email+profile&redirect_uri=%s&nonce=%s&state=%s&client_id=%s", + authorization_endpoint.c_str(), + redirectURL.c_str(), + nonce.c_str(), + state_hex, + client_id.c_str()); + } else { + fprintf(stderr, "client_id: %s\nauthorization_endpoint: %s\n", client_id.c_str(), authorization_endpoint.c_str()); + } + } + + _pool->unborrow(c); + } catch (std::exception &e) { + fprintf(stderr, "ERROR: Error updating member on load: %s\n", e.what()); + } + + return std::string(authenticationURL); +} + +void PostgreSQL::initializeNetworks() { try { - if (PQstatus(conn) != CONNECTION_OK) { - fprintf(stderr, "Bad Database Connection: %s", PQerrorMessage(conn)); - exit(1); - } - std::string setKey = "networks:{" + _myAddressStr + "}"; - - // if (_rc != NULL) { - // try { - // if (_rc->clusterMode) { - // _cluster->del(setKey); - // } else { - // _redis->del(setKey); - // } - // } catch (sw::redis::Error &e) { - // // del can throw an error if the key doesn't exist - // // swallow it and move along - // } - // } - std::unordered_set networkSet; - - const char *params[1] = { - _myAddressStr.c_str() - }; - fprintf(stderr, "Initializing Networks...\n"); - PGresult *res = PQexecParams(conn, "SELECT id, EXTRACT(EPOCH FROM creation_time AT TIME ZONE 'UTC')*1000, capabilities, " - "enable_broadcast, EXTRACT(EPOCH FROM last_modified AT TIME ZONE 'UTC')*1000, mtu, multicast_limit, name, private, remote_trace_level, " - "remote_trace_target, revision, rules, tags, v4_assign_mode, v6_assign_mode FROM ztc_network " - "WHERE deleted = false AND controller_id = $1", - 1, - NULL, - params, - NULL, - NULL, - 0); + char qbuf[2048] = {0}; + sprintf(qbuf, "SELECT n.id, (EXTRACT(EPOCH FROM n.creation_time AT TIME ZONE 'UTC')*1000)::bigint as creation_time, n.capabilities, " + "n.enable_broadcast, (EXTRACT(EPOCH FROM n.last_modified AT TIME ZONE 'UTC')*1000)::bigint AS last_modified, n.mtu, n.multicast_limit, n.name, n.private, n.remote_trace_level, " + "n.remote_trace_target, n.revision, n.rules, n.tags, n.v4_assign_mode, n.v6_assign_mode, n.sso_enabled, (CASE WHEN n.sso_enabled THEN o.client_id ELSE NULL END) as client_id, " + "(CASE WHEN n.sso_enabled THEN o.authorization_endpoint ELSE NULL END) as authorization_endpoint, d.domain, d.servers, " + "ARRAY(SELECT CONCAT(host(ip_range_start),'|', host(ip_range_end)) FROM ztc_network_assignment_pool WHERE network_id = n.id) AS assignment_pool, " + "ARRAY(SELECT CONCAT(host(address),'/',bits::text,'|',COALESCE(host(via), 'NULL'))FROM ztc_network_route WHERE network_id = n.id) AS routes " + "FROM ztc_network n " + "LEFT OUTER JOIN ztc_org o " + " ON o.owner_id = n.owner_id " + "LEFT OUTER JOIN ztc_network_dns d " + " ON d.network_id = n.id " + "WHERE deleted = false AND controller_id = '%s'", _myAddressStr.c_str()); + auto c = _pool->borrow(); + auto c2 = _pool->borrow(); + pqxx::work w{*c->c}; + + auto stream = pqxx::stream_from::query(w, qbuf); - if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Networks Initialization Failed: %s", PQerrorMessage(conn)); - PQclear(res); - exit(1); - } + std::tuple< + std::string // network ID + , std::optional // creationTime + , std::optional // capabilities + , std::optional // enableBroadcast + , std::optional // lastModified + , std::optional // mtu + , std::optional // multicastLimit + , std::optional // name + , bool // private + , std::optional // remoteTraceLevel + , std::optional // remoteTraceTarget + , std::optional // revision + , std::optional // rules + , std::optional // tags + , std::optional // v4AssignMode + , std::optional // v6AssignMode + , std::optional // ssoEnabled + , std::optional // clientId + , std::optional // authorizationEndpoint + , std::optional // domain + , std::optional // servers + , std::string // assignmentPoolString + , std::string // routeString + > row; + + uint64_t count = 0; + auto tmp = std::chrono::high_resolution_clock::now(); + uint64_t total = 0; + while (stream >> row) { + auto start = std::chrono::high_resolution_clock::now(); - int numRows = PQntuples(res); - for (int i = 0; i < numRows; ++i) { json empty; json config; - const char *nwidparam[1] = { - PQgetvalue(res, i, 0) - }; - std::string nwid = PQgetvalue(res, i, 0); + initNetwork(config); + + std::string nwid = std::get<0>(row); + std::optional creationTime = std::get<1>(row); + std::optional capabilities = std::get<2>(row); + std::optional enableBroadcast = std::get<3>(row); + std::optional lastModified = std::get<4>(row); + std::optional mtu = std::get<5>(row); + std::optional multicastLimit = std::get<6>(row); + std::optional name = std::get<7>(row); + bool isPrivate = std::get<8>(row); + std::optional remoteTraceLevel = std::get<9>(row); + std::optional remoteTraceTarget = std::get<10>(row); + std::optional revision = std::get<11>(row); + std::optional rules = std::get<12>(row); + std::optional tags = std::get<13>(row); + std::optional v4AssignMode = std::get<14>(row); + std::optional v6AssignMode = std::get<15>(row); + std::optional ssoEnabled = std::get<16>(row); + std::optional clientId = std::get<17>(row); + std::optional authorizationEndpoint = std::get<18>(row); + std::optional dnsDomain = std::get<19>(row); + std::optional dnsServers = std::get<20>(row); + std::string assignmentPoolString = std::get<21>(row); + std::string routesString = std::get<22>(row); - networkSet.insert(nwid); + config["id"] = nwid; + config["nwid"] = nwid; + config["creationTime"] = creationTime.value_or(0); + config["capabilities"] = json::parse(capabilities.value_or("[]")); + config["enableBroadcast"] = enableBroadcast.value_or(false); + config["lastModified"] = lastModified.value_or(0); + config["mtu"] = mtu.value_or(2800); + config["multicastLimit"] = multicastLimit.value_or(64); + config["name"] = name.value_or(""); + config["private"] = isPrivate; + config["remoteTraceLevel"] = remoteTraceLevel.value_or(0); + config["remoteTraceTarget"] = remoteTraceTarget.value_or(""); + config["revision"] = revision.value_or(0); + config["rules"] = json::parse(rules.value_or("[]")); + config["tags"] = json::parse(tags.value_or("[]")); + config["v4AssignMode"] = json::parse(v4AssignMode.value_or("{}")); + config["v6AssignMode"] = json::parse(v6AssignMode.value_or("{}")); + config["ssoEnabled"] = ssoEnabled.value_or(false); + config["objtype"] = "network"; + config["ipAssignmentPools"] = json::array(); + config["routes"] = json::array(); + config["clientId"] = clientId.value_or(""); + config["authorizationEndpoint"] = authorizationEndpoint.value_or(""); - config["id"] = nwid; - config["nwid"] = nwid; - try { - config["creationTime"] = std::stoull(PQgetvalue(res, i, 1)); - } catch (std::exception &e) { - config["creationTime"] = 0ULL; - //fprintf(stderr, "Error converting creation time: %s\n", PQgetvalue(res, i, 1)); - } - config["capabilities"] = json::parse(PQgetvalue(res, i, 2)); - config["enableBroadcast"] = (strcmp(PQgetvalue(res, i, 3),"t")==0); - try { - config["lastModified"] = std::stoull(PQgetvalue(res, i, 4)); - } catch (std::exception &e) { - config["lastModified"] = 0ULL; - //fprintf(stderr, "Error converting last modified: %s\n", PQgetvalue(res, i, 4)); - } - try { - config["mtu"] = std::stoi(PQgetvalue(res, i, 5)); - } catch (std::exception &e) { - config["mtu"] = 2800; - } - try { - config["multicastLimit"] = std::stoi(PQgetvalue(res, i, 6)); - } catch (std::exception &e) { - config["multicastLimit"] = 64; - } - config["name"] = PQgetvalue(res, i, 7); - config["private"] = (strcmp(PQgetvalue(res, i, 8),"t")==0); - try { - config["remoteTraceLevel"] = std::stoi(PQgetvalue(res, i, 9)); - } catch (std::exception &e) { - config["remoteTraceLevel"] = 0; - } - config["remoteTraceTarget"] = PQgetvalue(res, i, 10); - try { - config["revision"] = std::stoull(PQgetvalue(res, i, 11)); - } catch (std::exception &e) { - config["revision"] = 0ULL; - //fprintf(stderr, "Error converting revision: %s\n", PQgetvalue(res, i, 11)); - } - config["rules"] = json::parse(PQgetvalue(res, i, 12)); - config["tags"] = json::parse(PQgetvalue(res, i, 13)); - config["v4AssignMode"] = json::parse(PQgetvalue(res, i, 14)); - config["v6AssignMode"] = json::parse(PQgetvalue(res, i, 15)); - config["objtype"] = "network"; - config["ipAssignmentPools"] = json::array(); - config["routes"] = json::array(); - - PGresult *r2 = PQexecParams(conn, - "SELECT host(ip_range_start), host(ip_range_end) FROM ztc_network_assignment_pool WHERE network_id = $1", - 1, - NULL, - nwidparam, - NULL, - NULL, - 0); - - if (PQresultStatus(r2) != PGRES_TUPLES_OK) { - fprintf(stderr, "ERROR: Error retreiving IP pools for network: %s\n", PQresultErrorMessage(r2)); - PQclear(r2); - PQclear(res); - exit(1); - } - - int n = PQntuples(r2); - for (int j = 0; j < n; ++j) { - json ip; - ip["ipRangeStart"] = PQgetvalue(r2, j, 0); - ip["ipRangeEnd"] = PQgetvalue(r2, j, 1); - - config["ipAssignmentPools"].push_back(ip); - } - - PQclear(r2); - - r2 = PQexecParams(conn, - "SELECT host(address), bits, host(via) FROM ztc_network_route WHERE network_id = $1", - 1, - NULL, - nwidparam, - NULL, - NULL, - 0); - - if (PQresultStatus(r2) != PGRES_TUPLES_OK) { - fprintf(stderr, "ERROR: Error retreiving routes for network: %s\n", PQresultErrorMessage(r2)); - PQclear(r2); - PQclear(res); - exit(1); - } - - n = PQntuples(r2); - for (int j = 0; j < n; ++j) { - std::string addr = PQgetvalue(r2, j, 0); - std::string bits = PQgetvalue(r2, j, 1); - std::string via = PQgetvalue(r2, j, 2); - json route; - route["target"] = addr + "/" + bits; - - if (via == "NULL") { - route["via"] = nullptr; - } else { - route["via"] = via; - } - config["routes"].push_back(route); - } - - r2 = PQexecParams(conn, - "SELECT domain, servers FROM ztc_network_dns WHERE network_id = $1", - 1, - NULL, - nwidparam, - NULL, - NULL, - 0); - - if (PQresultStatus(r2) != PGRES_TUPLES_OK) { - fprintf(stderr, "ERROR: Error retrieving DNS settings for network: %s\n", PQresultErrorMessage(r2)); - PQclear(r2); - PQclear(res); - exit(1); - } - - n = PQntuples(r2); - if (n > 1) { - fprintf(stderr, "ERROR: invalid number of DNS configurations for network %s. Must be 0 or 1\n", nwid.c_str()); - } else if (n == 1) { + if (dnsDomain.has_value()) { + std::string serverList = dnsServers.value(); json obj; - std::string domain = PQgetvalue(r2, 0, 0); - std::string serverList = PQgetvalue(r2, 0, 1); auto servers = json::array(); if (serverList.rfind("{",0) != std::string::npos) { serverList = serverList.substr(1, serverList.size()-2); @@ -463,29 +559,56 @@ void PostgreSQL::initializeNetworks(PGconn *conn) servers.push_back(server); } } - obj["domain"] = domain; + obj["domain"] = dnsDomain.value(); obj["servers"] = servers; config["dns"] = obj; } - PQclear(r2); + config["ipAssignmentPools"] = json::array(); + if (assignmentPoolString != "{}") { + std::string tmp = assignmentPoolString.substr(1, assignmentPoolString.size()-2); + std::vector assignmentPools = split(tmp, ','); + for (auto it = assignmentPools.begin(); it != assignmentPools.end(); ++it) { + std::vector r = split(*it, '|'); + json ip; + ip["ipRangeStart"] = r[0]; + ip["ipRangeEnd"] = r[1]; + config["ipAssignmentPools"].push_back(ip); + } + } - _networkChanged(empty, config, false); + config["routes"] = json::array(); + if (routesString != "{}") { + std::string tmp = routesString.substr(1, routesString.size()-2); + std::vector routes = split(tmp, ','); + for (auto it = routes.begin(); it != routes.end(); ++it) { + std::vector r = split(*it, '|'); + json route; + route["target"] = r[0]; + route["via"] = ((route["via"] == "NULL")? nullptr : r[1]); + config["routes"].push_back(route); + } + } + + _networkChanged(empty, config, false); + + auto end = std::chrono::high_resolution_clock::now(); + auto dur = std::chrono::duration_cast(end - start);; + total += dur.count(); + ++count; + if (count > 0 && count % 10000 == 0) { + fprintf(stderr, "Averaging %llu us per network\n", (total/count)); + } } - PQclear(res); + if (count > 0) { + fprintf(stderr, "Took %llu us per network to load\n", (total/count)); + } + stream.complete(); - // if(!networkSet.empty()) { - // if (_rc && _rc->clusterMode) { - // auto tx = _cluster->transaction(_myAddressStr, true); - // tx.sadd(setKey, networkSet.begin(), networkSet.end()); - // tx.exec(); - // } else if (_rc && !_rc->clusterMode) { - // auto tx = _redis->transaction(true); - // tx.sadd(setKey, networkSet.begin(), networkSet.end()); - // tx.exec(); - // } - // } + w.commit(); + _pool->unborrow(c2); + _pool->unborrow(c); if (++this->_ready == 2) { if (_waitNoticePrinted) { @@ -502,206 +625,147 @@ void PostgreSQL::initializeNetworks(PGconn *conn) } } -void PostgreSQL::initializeMembers(PGconn *conn) +void PostgreSQL::initializeMembers() { + std::string memberId; + std::string networkId; try { - if (PQstatus(conn) != CONNECTION_OK) { - fprintf(stderr, "Bad Database Connection: %s", PQerrorMessage(conn)); - exit(1); - } - // std::string setKeyBase = "network-nodes-all:{" + _myAddressStr + "}:"; - - // if (_rc != NULL) { - // std::lock_guard l(_networks_l); - // std::unordered_set deletes; - // for ( auto it : _networks) { - // uint64_t nwid_i = it.first; - // char nwidTmp[64] = {0}; - // OSUtils::ztsnprintf(nwidTmp, sizeof(nwidTmp), "%.16llx", nwid_i); - // std::string nwid(nwidTmp); - // std::string key = setKeyBase + nwid; - // deletes.insert(key); - // } - - // if (!deletes.empty()) { - // if (_rc->clusterMode) { - // auto tx = _cluster->transaction(_myAddressStr, true); - // for (std::string k : deletes) { - // tx.del(k); - // } - // tx.exec(); - // } else { - // auto tx = _redis->transaction(true); - // for (std::string k : deletes) { - // tx.del(k); - // } - // tx.exec(); - // } - // } - // } - - const char *params[1] = { - _myAddressStr.c_str() - }; - std::unordered_map networkMembers; - fprintf(stderr, "Initializing Members...\n"); - PGresult *res = PQexecParams(conn, - "SELECT m.id, m.network_id, m.active_bridge, m.authorized, m.capabilities, EXTRACT(EPOCH FROM m.creation_time AT TIME ZONE 'UTC')*1000, m.identity, " - " EXTRACT(EPOCH FROM m.last_authorized_time AT TIME ZONE 'UTC')*1000, " - " EXTRACT(EPOCH FROM m.last_deauthorized_time AT TIME ZONE 'UTC')*1000, " + + char qbuf[2048]; + sprintf(qbuf, "SELECT m.id, m.network_id, m.active_bridge, m.authorized, m.capabilities, (EXTRACT(EPOCH FROM m.creation_time AT TIME ZONE 'UTC')*1000)::bigint, m.identity, " + " (EXTRACT(EPOCH FROM m.last_authorized_time AT TIME ZONE 'UTC')*1000)::bigint, " + " (EXTRACT(EPOCH FROM m.last_deauthorized_time AT TIME ZONE 'UTC')*1000)::bigint, " " m.remote_trace_level, m.remote_trace_target, m.tags, m.v_major, m.v_minor, m.v_rev, m.v_proto, " - " m.no_auto_assign_ips, m.revision " + " m.no_auto_assign_ips, m.revision, sso_exempt, " + " (SELECT (EXTRACT(EPOCH FROM e.authentication_expiry_time)*1000)::bigint " + " FROM ztc_sso_expiry e " + " INNER JOIN ztc_network n1 " + " ON n.id = e.network_id " + " WHERE e.network_id = m.network_id AND e.member_id = m.id AND n.sso_enabled = TRUE AND e.authentication_expiry_time IS NOT NULL " + " ORDER BY e.authentication_expiry_time DESC LIMIT 1) AS authentication_expiry_time, " + " ARRAY(SELECT DISTINCT address FROM ztc_member_ip_assignment WHERE member_id = m.id AND network_id = m.network_id) AS assigned_addresses " "FROM ztc_member m " "INNER JOIN ztc_network n " " ON n.id = m.network_id " - "WHERE n.controller_id = $1 AND m.deleted = false", - 1, - NULL, - params, - NULL, - NULL, - 0); + "WHERE n.controller_id = '%s' AND m.deleted = false", _myAddressStr.c_str()); + auto c = _pool->borrow(); + auto c2 = _pool->borrow(); + pqxx::work w{*c->c}; + + auto stream = pqxx::stream_from::query(w, qbuf); - if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Member Initialization Failed: %s", PQerrorMessage(conn)); - PQclear(res); - exit(1); - } + std::tuple< + std::string // memberId + , std::string // memberId + , std::optional // activeBridge + , std::optional // authorized + , std::optional // capabilities + , std::optional // creationTime + , std::optional // identity + , std::optional // lastAuthorizedTime + , std::optional // lastDeauthorizedTime + , std::optional // remoteTraceLevel + , std::optional // remoteTraceTarget + , std::optional // tags + , std::optional // vMajor + , std::optional // vMinor + , std::optional // vRev + , std::optional // vProto + , std::optional // noAutoAssignIps + , std::optional // revision + , std::optional // ssoExempt + , std::optional // authenticationExpiryTime + , std::string // assignedAddresses + > row; - int numRows = PQntuples(res); - for (int i = 0; i < numRows; ++i) { + uint64_t count = 0; + auto tmp = std::chrono::high_resolution_clock::now(); + uint64_t total = 0; + while (stream >> row) { + auto start = std::chrono::high_resolution_clock::now(); json empty; json config; + + initMember(config); - std::string memberId(PQgetvalue(res, i, 0)); - std::string networkId(PQgetvalue(res, i, 1)); + memberId = std::get<0>(row); + networkId = std::get<1>(row); + std::optional activeBridge = std::get<2>(row); + std::optional authorized = std::get<3>(row); + std::optional capabilities = std::get<4>(row); + std::optional creationTime = std::get<5>(row); + std::optional identity = std::get<6>(row); + std::optional lastAuthorizedTime = std::get<7>(row); + std::optional lastDeauthorizedTime = std::get<8>(row); + std::optional remoteTraceLevel = std::get<9>(row); + std::optional remoteTraceTarget = std::get<10>(row); + std::optional tags = std::get<11>(row); + std::optional vMajor = std::get<12>(row); + std::optional vMinor = std::get<13>(row); + std::optional vRev = std::get<14>(row); + std::optional vProto = std::get<15>(row); + std::optional noAutoAssignIps = std::get<16>(row); + std::optional revision = std::get<17>(row); + std::optional ssoExempt = std::get<18>(row); + std::optional authenticationExpiryTime = std::get<19>(row); + std::string assignedAddresses = std::get<20>(row); - // networkMembers.insert(std::pair(setKeyBase+networkId, memberId)); - - std::string ctime = PQgetvalue(res, i, 5); config["id"] = memberId; config["nwid"] = networkId; - config["activeBridge"] = (strcmp(PQgetvalue(res, i, 2), "t") == 0); - config["authorized"] = (strcmp(PQgetvalue(res, i, 3), "t") == 0); - try { - config["capabilities"] = json::parse(PQgetvalue(res, i, 4)); - } catch (std::exception &e) { - config["capabilities"] = json::array(); - } - try { - config["creationTime"] = std::stoull(PQgetvalue(res, i, 5)); - } catch (std::exception &e) { - config["creationTime"] = 0ULL; - //fprintf(stderr, "Error upding creation time (member): %s\n", PQgetvalue(res, i, 5)); - } - config["identity"] = PQgetvalue(res, i, 6); - try { - config["lastAuthorizedTime"] = std::stoull(PQgetvalue(res, i, 7)); - } catch(std::exception &e) { - config["lastAuthorizedTime"] = 0ULL; - //fprintf(stderr, "Error updating last auth time (member): %s\n", PQgetvalue(res, i, 7)); - } - try { - config["lastDeauthorizedTime"] = std::stoull(PQgetvalue(res, i, 8)); - } catch( std::exception &e) { - config["lastDeauthorizedTime"] = 0ULL; - //fprintf(stderr, "Error updating last deauth time (member): %s\n", PQgetvalue(res, i, 8)); - } - try { - config["remoteTraceLevel"] = std::stoi(PQgetvalue(res, i, 9)); - } catch (std::exception &e) { - config["remoteTraceLevel"] = 0; - } - config["remoteTraceTarget"] = PQgetvalue(res, i, 10); - try { - config["tags"] = json::parse(PQgetvalue(res, i, 11)); - } catch (std::exception &e) { - config["tags"] = json::array(); - } - try { - config["vMajor"] = std::stoi(PQgetvalue(res, i, 12)); - } catch(std::exception &e) { - config["vMajor"] = -1; - } - try { - config["vMinor"] = std::stoi(PQgetvalue(res, i, 13)); - } catch (std::exception &e) { - config["vMinor"] = -1; - } - try { - config["vRev"] = std::stoi(PQgetvalue(res, i, 14)); - } catch (std::exception &e) { - config["vRev"] = -1; - } - try { - config["vProto"] = std::stoi(PQgetvalue(res, i, 15)); - } catch (std::exception &e) { - config["vProto"] = -1; - } - config["noAutoAssignIps"] = (strcmp(PQgetvalue(res, i, 16), "t") == 0); - try { - config["revision"] = std::stoull(PQgetvalue(res, i, 17)); - } catch (std::exception &e) { - config["revision"] = 0ULL; - //fprintf(stderr, "Error updating revision (member): %s\n", PQgetvalue(res, i, 17)); - } + config["activeBridge"] = activeBridge.value_or(false); + config["authorized"] = authorized.value_or(false); + config["capabilities"] = json::parse(capabilities.value_or("[]")); + config["creationTime"] = creationTime.value_or(0); + config["identity"] = identity.value_or(""); + config["lastAuthorizedTime"] = lastAuthorizedTime.value_or(0); + config["lastDeauthorizedTime"] = lastDeauthorizedTime.value_or(0); + config["remoteTraceLevel"] = remoteTraceLevel.value_or(0); + config["remoteTraceTarget"] = remoteTraceTarget.value_or(""); + config["tags"] = json::parse(tags.value_or("[]")); + config["vMajor"] = vMajor.value_or(-1); + config["vMinor"] = vMinor.value_or(-1); + config["vRev"] = vRev.value_or(-1); + config["vProto"] = vProto.value_or(-1); + config["noAutoAssignIps"] = noAutoAssignIps.value_or(false); + config["revision"] = revision.value_or(0); + config["ssoExempt"] = ssoExempt.value_or(false); + config["authenticationExpiryTime"] = authenticationExpiryTime.value_or(0); config["objtype"] = "member"; config["ipAssignments"] = json::array(); - const char *p2[2] = { - memberId.c_str(), - networkId.c_str() - }; - PGresult *r2 = PQexecParams(conn, - "SELECT DISTINCT address FROM ztc_member_ip_assignment WHERE member_id = $1 AND network_id = $2", - 2, - NULL, - p2, - NULL, - NULL, - 0); - - if (PQresultStatus(r2) != PGRES_TUPLES_OK) { - fprintf(stderr, "Member Initialization Failed: %s", PQerrorMessage(conn)); - PQclear(r2); - PQclear(res); - exit(1); - } - - int n = PQntuples(r2); - for (int j = 0; j < n; ++j) { - std::string ipaddr = PQgetvalue(r2, j, 0); - std::size_t pos = ipaddr.find('/'); - if (pos != std::string::npos) { - ipaddr = ipaddr.substr(0, pos); + if (assignedAddresses != "{}") { + std::string tmp = assignedAddresses.substr(1, assignedAddresses.size()-2); + std::vector addrs = split(tmp, ','); + for (auto it = addrs.begin(); it != addrs.end(); ++it) { + config["ipAssignments"].push_back(*it); } - config["ipAssignments"].push_back(ipaddr); } _memberChanged(empty, config, false); + + memberId = ""; + networkId = ""; + + auto end = std::chrono::high_resolution_clock::now(); + auto dur = std::chrono::duration_cast(end - start);; + total += dur.count(); + ++count; + if (count > 0 && count % 10000 == 0) { + fprintf(stderr, "Averaging %llu us per member\n", (total/count)); + } + } + if (count > 0) { + fprintf(stderr, "Took %llu us per member to load\n", (total/count)); } - PQclear(res); + stream.complete(); + + w.commit(); + _pool->unborrow(c2); + _pool->unborrow(c); - // if (!networkMembers.empty()) { - // if (_rc != NULL) { - // if (_rc->clusterMode) { - // auto tx = _cluster->transaction(_myAddressStr, true); - // for (auto it : networkMembers) { - // tx.sadd(it.first, it.second); - // } - // tx.exec(); - // } else { - // auto tx = _redis->transaction(true); - // for (auto it : networkMembers) { - // tx.sadd(it.first, it.second); - // } - // tx.exec(); - // } - // } - // } if (++this->_ready == 2) { if (_waitNoticePrinted) { fprintf(stderr,"[%s] NOTICE: %.10llx controller PostgreSQL data download complete." ZT_EOL_S,_timestr(),(unsigned long long)_myAddress.toInt()); @@ -711,7 +775,7 @@ void PostgreSQL::initializeMembers(PGconn *conn) } catch (sw::redis::Error &e) { fprintf(stderr, "ERROR: Error initializing members (redis): %s\n", e.what()); } catch (std::exception &e) { - fprintf(stderr, "ERROR: Error initializing members: %s\n", e.what()); + fprintf(stderr, "ERROR: Error initializing member: %s-%s %s\n", networkId.c_str(), memberId.c_str(), e.what()); exit(-1); } } @@ -735,92 +799,52 @@ void PostgreSQL::heartbeat() const char *publicIdentity = publicId; const char *hostname = hostnameTmp; - PGconn *conn = getPgConn(); - if (PQstatus(conn) == CONNECTION_BAD) { - fprintf(stderr, "Connection to database failed: %s\n", PQerrorMessage(conn)); - PQfinish(conn); - exit(1); - } while (_run == 1) { - if(PQstatus(conn) != CONNECTION_OK) { - fprintf(stderr, "%s heartbeat thread lost connection to Database\n", _myAddressStr.c_str()); - PQfinish(conn); - exit(6); - } + // fprintf(stderr, "%s: heartbeat\n", controllerId); + auto c = _pool->borrow(); int64_t ts = OSUtils::now(); - if (conn) { + + if(c->c) { + pqxx::work w{*c->c}; + std::string major = std::to_string(ZEROTIER_ONE_VERSION_MAJOR); std::string minor = std::to_string(ZEROTIER_ONE_VERSION_MINOR); std::string rev = std::to_string(ZEROTIER_ONE_VERSION_REVISION); std::string build = std::to_string(ZEROTIER_ONE_VERSION_BUILD); std::string now = std::to_string(ts); std::string host_port = std::to_string(_listenPort); - std::string use_redis = "false"; // (_rc != NULL) ? "true" : "false"; - const char *values[10] = { - controllerId, - hostname, - now.c_str(), - publicIdentity, - major.c_str(), - minor.c_str(), - rev.c_str(), - build.c_str(), - host_port.c_str(), - use_redis.c_str() - }; - - PGresult *res = PQexecParams(conn, - "INSERT INTO ztc_controller (id, cluster_host, last_alive, public_identity, v_major, v_minor, v_rev, v_build, host_port, use_redis) " - "VALUES ($1, $2, TO_TIMESTAMP($3::double precision/1000), $4, $5, $6, $7, $8, $9, $10) " + std::string use_redis = (_rc != NULL) ? "true" : "false"; + + try { + pqxx::result res = w.exec0("INSERT INTO ztc_controller (id, cluster_host, last_alive, public_identity, v_major, v_minor, v_rev, v_build, host_port, use_redis) " + "VALUES ("+w.quote(controllerId)+", "+w.quote(hostname)+", TO_TIMESTAMP("+now+"::double precision/1000), "+ + w.quote(publicIdentity)+", "+major+", "+minor+", "+rev+", "+build+", "+host_port+", "+use_redis+") " "ON CONFLICT (id) DO UPDATE SET cluster_host = EXCLUDED.cluster_host, last_alive = EXCLUDED.last_alive, " "public_identity = EXCLUDED.public_identity, v_major = EXCLUDED.v_major, v_minor = EXCLUDED.v_minor, " "v_rev = EXCLUDED.v_rev, v_build = EXCLUDED.v_rev, host_port = EXCLUDED.host_port, " - "use_redis = EXCLUDED.use_redis", - 10, // number of parameters - NULL, // oid field. ignore - values, // values for substitution - NULL, // lengths in bytes of each value - NULL, // binary? - 0); - - if (PQresultStatus(res) != PGRES_COMMAND_OK) { - fprintf(stderr, "Heartbeat Update Failed: %s\n", PQresultErrorMessage(res)); - } - PQclear(res); + "use_redis = EXCLUDED.use_redis"); + } catch (std::exception &e) { + fprintf(stderr, "Heartbeat update failed: %s\n", e.what()); + w.abort(); + _pool->unborrow(c); + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + continue; + } + w.commit(); } - // if (_rc != NULL) { - // if (_rc->clusterMode) { - // _cluster->zadd("controllers", controllerId, ts); - // } else { - // _redis->zadd("controllers", controllerId, ts); - // } - // } + _pool->unborrow(c); std::this_thread::sleep_for(std::chrono::milliseconds(1000)); } - - PQfinish(conn); - conn = NULL; fprintf(stderr, "Exited heartbeat thread\n"); } void PostgreSQL::membersDbWatcher() { - PGconn *conn = getPgConn(NO_OVERRIDE); - if (PQstatus(conn) == CONNECTION_BAD) { - fprintf(stderr, "Connection to database failed: %s\n", PQerrorMessage(conn)); - PQfinish(conn); - exit(1); - } - if (_rc) { - PQfinish(conn); - conn = NULL; _membersWatcher_Redis(); } else { - _membersWatcher_Postgres(conn); - PQfinish(conn); - conn = NULL; + _membersWatcher_Postgres(); } if (_run == 1) { @@ -830,46 +854,19 @@ void PostgreSQL::membersDbWatcher() fprintf(stderr, "Exited membersDbWatcher\n"); } -void PostgreSQL::_membersWatcher_Postgres(PGconn *conn) { - char buf[11] = {0}; - std::string cmd = "LISTEN member_" + std::string(_myAddress.toString(buf)); - fprintf(stderr, "Listening to member stream: %s\n", cmd.c_str()); - PGresult *res = PQexec(conn, cmd.c_str()); - if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) { - fprintf(stderr, "LISTEN command failed: %s\n", PQresultErrorMessage(res)); - PQclear(res); - PQfinish(conn); - exit(1); - } +void PostgreSQL::_membersWatcher_Postgres() { + auto c = _pool->borrow(); - PQclear(res); res = NULL; + std::string stream = "member_" + _myAddressStr; + + fprintf(stderr, "Listening to member stream: %s\n", stream.c_str()); + MemberNotificationReceiver m(this, *c->c, stream); while(_run == 1) { - if (PQstatus(conn) != CONNECTION_OK) { - fprintf(stderr, "ERROR: Member Watcher lost connection to Postgres."); - exit(-1); - } - PGnotify *notify = NULL; - PQconsumeInput(conn); - while ((notify = PQnotifies(conn)) != NULL) { - //fprintf(stderr, "ASYNC NOTIFY of '%s' id:%s received\n", notify->relname, notify->extra); - - try { - json tmp(json::parse(notify->extra)); - json &ov = tmp["old_val"]; - json &nv = tmp["new_val"]; - json oldConfig, newConfig; - if (ov.is_object()) oldConfig = ov; - if (nv.is_object()) newConfig = nv; - if (oldConfig.is_object() || newConfig.is_object()) { - _memberChanged(oldConfig,newConfig,(this->_ready>=2)); - } - } catch (...) {} // ignore bad records - - free(notify); - } - std::this_thread::sleep_for(std::chrono::milliseconds(10)); + c->c->await_notification(5, 0); } + + _pool->unborrow(c); } void PostgreSQL::_membersWatcher_Redis() { @@ -932,21 +929,10 @@ void PostgreSQL::_membersWatcher_Redis() { void PostgreSQL::networksDbWatcher() { - PGconn *conn = getPgConn(NO_OVERRIDE); - if (PQstatus(conn) == CONNECTION_BAD) { - fprintf(stderr, "Connection to database failed: %s\n", PQerrorMessage(conn)); - PQfinish(conn); - exit(1); - } - if (_rc) { - PQfinish(conn); - conn = NULL; _networksWatcher_Redis(); } else { - _networksWatcher_Postgres(conn); - PQfinish(conn); - conn = NULL; + _networksWatcher_Postgres(); } if (_run == 1) { @@ -956,42 +942,17 @@ void PostgreSQL::networksDbWatcher() fprintf(stderr, "Exited networksDbWatcher\n"); } -void PostgreSQL::_networksWatcher_Postgres(PGconn *conn) { - char buf[11] = {0}; - std::string cmd = "LISTEN network_" + std::string(_myAddress.toString(buf)); - PGresult *res = PQexec(conn, cmd.c_str()); - if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) { - fprintf(stderr, "LISTEN command failed: %s\n", PQresultErrorMessage(res)); - PQclear(res); - PQfinish(conn); - exit(1); - } +void PostgreSQL::_networksWatcher_Postgres() { + std::string stream = "network_" + _myAddressStr; - PQclear(res); res = NULL; + fprintf(stderr, "Listening to member stream: %s\n", stream.c_str()); + + auto c = _pool->borrow(); + + NetworkNotificationReceiver n(this, *c->c, stream); while(_run == 1) { - if (PQstatus(conn) != CONNECTION_OK) { - fprintf(stderr, "ERROR: Network Watcher lost connection to Postgres."); - exit(-1); - } - PGnotify *notify = NULL; - PQconsumeInput(conn); - while ((notify = PQnotifies(conn)) != NULL) { - //fprintf(stderr, "ASYNC NOTIFY of '%s' id:%s received\n", notify->relname, notify->extra); - try { - json tmp(json::parse(notify->extra)); - json &ov = tmp["old_val"]; - json &nv = tmp["new_val"]; - json oldConfig, newConfig; - if (ov.is_object()) oldConfig = ov; - if (nv.is_object()) newConfig = nv; - if (oldConfig.is_object()||newConfig.is_object()) { - _networkChanged(oldConfig,newConfig,(this->_ready >= 2)); - } - } catch (...) {} // ignore bad records - free(notify); - } - std::this_thread::sleep_for(std::chrono::milliseconds(10)); + c->c->await_notification(5,0); } } @@ -1056,78 +1017,33 @@ void PostgreSQL::_networksWatcher_Redis() { void PostgreSQL::commitThread() { - PGconn *conn = getPgConn(); - if (PQstatus(conn) == CONNECTION_BAD) { - fprintf(stderr, "ERROR: Connection to database failed: %s\n", PQerrorMessage(conn)); - PQfinish(conn); - exit(1); - } - + fprintf(stderr, "%s: commitThread start\n", _myAddressStr.c_str()); std::pair qitem; while(_commitQueue.get(qitem)&(_run == 1)) { + //fprintf(stderr, "commitThread tick\n"); if (!qitem.first.is_object()) { + fprintf(stderr, "not an object\n"); continue; } - if (PQstatus(conn) == CONNECTION_BAD) { - fprintf(stderr, "ERROR: Connection to database failed: %s\n", PQerrorMessage(conn)); - PQfinish(conn); - exit(1); - } + try { nlohmann::json *config = &(qitem.first); const std::string objtype = (*config)["objtype"]; if (objtype == "member") { + // fprintf(stderr, "%s: commitThread: member\n", _myAddressStr.c_str()); try { + auto c = _pool->borrow(); + pqxx::work w(*c->c); + std::string memberId = (*config)["id"]; std::string networkId = (*config)["nwid"]; - std::string identity = (*config)["identity"]; std::string target = "NULL"; - if (!(*config)["remoteTraceTarget"].is_null()) { target = (*config)["remoteTraceTarget"]; } - - std::string caps = OSUtils::jsonDump((*config)["capabilities"], -1); - std::string lastAuthTime = std::to_string((long long)(*config)["lastAuthorizedTime"]); - std::string lastDeauthTime = std::to_string((long long)(*config)["lastDeauthorizedTime"]); - std::string rtraceLevel = std::to_string((int)(*config)["remoteTraceLevel"]); - std::string rev = std::to_string((unsigned long long)(*config)["revision"]); - std::string tags = OSUtils::jsonDump((*config)["tags"], -1); - std::string vmajor = std::to_string((int)(*config)["vMajor"]); - std::string vminor = std::to_string((int)(*config)["vMinor"]); - std::string vrev = std::to_string((int)(*config)["vRev"]); - std::string vproto = std::to_string((int)(*config)["vProto"]); - const char *values[19] = { - memberId.c_str(), - networkId.c_str(), - ((*config)["activeBridge"] ? "true" : "false"), - ((*config)["authorized"] ? "true" : "false"), - caps.c_str(), - identity.c_str(), - lastAuthTime.c_str(), - lastDeauthTime.c_str(), - ((*config)["noAutoAssignIps"] ? "true" : "false"), - rtraceLevel.c_str(), - (target == "NULL") ? NULL : target.c_str(), - rev.c_str(), - tags.c_str(), - vmajor.c_str(), - vminor.c_str(), - vrev.c_str(), - vproto.c_str() - }; - - PGresult *res = PQexec(conn, "BEGIN"); - if (PQresultStatus(res) != PGRES_COMMAND_OK) { - fprintf(stderr, "ERROR: Error beginning update transaction: %s\n", PQresultErrorMessage(res)); - PQclear(res); - delete config; - config = nullptr; - continue; - } - - res = PQexecParams(conn, + + pqxx::result res = w.exec_params0( "INSERT INTO ztc_member (id, network_id, active_bridge, authorized, capabilities, " "identity, last_authorized_time, last_deauthorized_time, no_auto_assign_ips, " "remote_trace_level, remote_trace_target, revision, tags, v_major, v_minor, v_rev, v_proto) " @@ -1140,49 +1056,27 @@ void PostgreSQL::commitThread() "remote_trace_level = EXCLUDED.remote_trace_level, remote_trace_target = EXCLUDED.remote_trace_target, " "revision = EXCLUDED.revision+1, tags = EXCLUDED.tags, v_major = EXCLUDED.v_major, " "v_minor = EXCLUDED.v_minor, v_rev = EXCLUDED.v_rev, v_proto = EXCLUDED.v_proto", - 17, - NULL, - values, - NULL, - NULL, - 0); + memberId, + networkId, + (bool)(*config)["activeBridge"], + (bool)(*config)["authorized"], + OSUtils::jsonDump((*config)["capabilities"], -1), + OSUtils::jsonString((*config)["identity"], ""), + (uint64_t)(*config)["lastAuthorizedTime"], + (uint64_t)(*config)["lastDeauthorizedTime"], + (bool)(*config)["noAutoAssignIps"], + (int)(*config)["remoteTraceLevel"], + target, + (uint64_t)(*config)["revision"], + OSUtils::jsonDump((*config)["tags"], -1), + (int)(*config)["vMajor"], + (int)(*config)["vMinor"], + (int)(*config)["vRev"], + (int)(*config)["vProto"]); - if (PQresultStatus(res) != PGRES_COMMAND_OK) { - fprintf(stderr, "ERROR: Error updating member: %s\n", PQresultErrorMessage(res)); - fprintf(stderr, "%s", OSUtils::jsonDump(*config, 2).c_str()); - PQclear(res); - PQclear(PQexec(conn, "ROLLBACK")); - delete config; - config = nullptr; - continue; - } - PQclear(res); - - const char *v2[2] = { - memberId.c_str(), - networkId.c_str() - }; - - res = PQexecParams(conn, - "DELETE FROM ztc_member_ip_assignment WHERE member_id = $1 AND network_id = $2", - 2, - NULL, - v2, - NULL, - NULL, - 0); - - if (PQresultStatus(res) != PGRES_COMMAND_OK) { - fprintf(stderr, "ERROR: Error updating IP address assignments: %s\n", PQresultErrorMessage(res)); - PQclear(res); - PQclear(PQexec(conn, "ROLLBACK"));; - delete config; - config = nullptr; - continue; - } - - PQclear(res); + res = w.exec_params0("DELETE FROM ztc_member_ip_assignment WHERE member_id = $1 AND network_id = $2", + memberId, networkId); std::vector assignments; bool ipAssignError = false; @@ -1193,46 +1087,21 @@ void PostgreSQL::commitThread() continue; } - const char *v3[3] = { - memberId.c_str(), - networkId.c_str(), - addr.c_str() - }; - - res = PQexecParams(conn, + res = w.exec_params0( "INSERT INTO ztc_member_ip_assignment (member_id, network_id, address) VALUES ($1, $2, $3) ON CONFLICT (network_id, member_id, address) DO NOTHING", - 3, - NULL, - v3, - NULL, - NULL, - 0); + memberId, networkId, addr); - if (PQresultStatus(res) != PGRES_COMMAND_OK) { - fprintf(stderr, "ERROR: Error setting IP addresses for member: %s\n", PQresultErrorMessage(res)); - PQclear(res); - PQclear(PQexec(conn, "ROLLBACK")); - ipAssignError = true; - break; - } - PQclear(res); assignments.push_back(addr); } if (ipAssignError) { + fprintf(stderr, "%s: ipAssignError\n", _myAddressStr.c_str()); delete config; config = nullptr; continue; } - res = PQexec(conn, "COMMIT"); - if (PQresultStatus(res) != PGRES_COMMAND_OK) { - fprintf(stderr, "ERROR: Error committing member transaction: %s\n", PQresultErrorMessage(res)); - PQclear(res); - PQclear(PQexec(conn, "ROLLBACK")); - delete config; - config = nullptr; - continue; - } + w.commit(); + _pool->unborrow(c); const uint64_t nwidInt = OSUtils::jsonIntHex((*config)["nwid"], 0ULL); const uint64_t memberidInt = OSUtils::jsonIntHex((*config)["id"], 0ULL); @@ -1246,66 +1115,27 @@ void PostgreSQL::commitThread() _memberChanged(memOrig, memNew, qitem.second); } else { - fprintf(stderr, "Can't notify of change. Error parsing nwid or memberid: %llu-%llu\n", (unsigned long long)nwidInt, (unsigned long long)memberidInt); + fprintf(stderr, "%s: Can't notify of change. Error parsing nwid or memberid: %llu-%llu\n", _myAddressStr.c_str(), (unsigned long long)nwidInt, (unsigned long long)memberidInt); } } catch (std::exception &e) { - fprintf(stderr, "ERROR: Error updating member: %s\n", e.what()); + fprintf(stderr, "%s ERROR: Error updating member: %s\n", _myAddressStr.c_str(), e.what()); } } else if (objtype == "network") { try { + // fprintf(stderr, "%s: commitThread: network\n", _myAddressStr.c_str()); + auto c = _pool->borrow(); + pqxx::work w(*c->c); + std::string id = (*config)["id"]; - std::string controllerId = _myAddressStr.c_str(); - std::string name = (*config)["name"]; - std::string remoteTraceTarget("NULL"); - if (!(*config)["remoteTraceTarget"].is_null()) { + std::string remoteTraceTarget = ""; + if(!(*config)["remoteTraceTarget"].is_null()) { remoteTraceTarget = (*config)["remoteTraceTarget"]; } - std::string rulesSource; + std::string rulesSource = ""; if ((*config)["rulesSource"].is_string()) { rulesSource = (*config)["rulesSource"]; } - std::string caps = OSUtils::jsonDump((*config)["capabilitles"], -1); - std::string now = std::to_string(OSUtils::now()); - std::string mtu = std::to_string((int)(*config)["mtu"]); - std::string mcastLimit = std::to_string((int)(*config)["multicastLimit"]); - std::string rtraceLevel = std::to_string((int)(*config)["remoteTraceLevel"]); - std::string rules = OSUtils::jsonDump((*config)["rules"], -1); - std::string tags = OSUtils::jsonDump((*config)["tags"], -1); - std::string v4mode = OSUtils::jsonDump((*config)["v4AssignMode"],-1); - std::string v6mode = OSUtils::jsonDump((*config)["v6AssignMode"], -1); - bool enableBroadcast = (*config)["enableBroadcast"]; - bool isPrivate = (*config)["private"]; - - const char *values[16] = { - id.c_str(), - controllerId.c_str(), - caps.c_str(), - enableBroadcast ? "true" : "false", - now.c_str(), - mtu.c_str(), - mcastLimit.c_str(), - name.c_str(), - isPrivate ? "true" : "false", - rtraceLevel.c_str(), - (remoteTraceTarget == "NULL" ? NULL : remoteTraceTarget.c_str()), - rules.c_str(), - rulesSource.c_str(), - tags.c_str(), - v4mode.c_str(), - v6mode.c_str(), - }; - - PGresult *res = PQexec(conn, "BEGIN"); - if (PQresultStatus(res) != PGRES_COMMAND_OK) { - fprintf(stderr, "ERROR: Error beginnning transaction: %s\n", PQresultErrorMessage(res)); - PQclear(res); - delete config; - config = nullptr; - continue; - } - - PQclear(res); // This ugly query exists because when we want to mirror networks to/from // another data store (e.g. FileDB or LFDB) it is possible to get a network @@ -1313,15 +1143,15 @@ void PostgreSQL::commitThread() // the owner_id to the "first" global admin in the user DB if the record // did not previously exist. If the record already exists owner_id is left // unchanged, so owner_id should be left out of the update clause. - res = PQexecParams(conn, + pqxx::result res = w.exec_params0( "INSERT INTO ztc_network (id, creation_time, owner_id, controller_id, capabilities, enable_broadcast, " "last_modified, mtu, multicast_limit, name, private, " "remote_trace_level, remote_trace_target, rules, rules_source, " - "tags, v4_assign_mode, v6_assign_mode) VALUES (" + "tags, v4_assign_mode, v6_assign_mode, sso_enabled) VALUES (" "$1, TO_TIMESTAMP($5::double precision/1000), " "(SELECT user_id AS owner_id FROM ztc_global_permissions WHERE authorize = true AND del = true AND modify = true AND read = true LIMIT 1)," "$2, $3, $4, TO_TIMESTAMP($5::double precision/1000), " - "$6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16) " + "$6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, 17) " "ON CONFLICT (id) DO UPDATE set controller_id = EXCLUDED.controller_id, " "capabilities = EXCLUDED.capabilities, enable_broadcast = EXCLUDED.enable_broadcast, " "last_modified = EXCLUDED.last_modified, mtu = EXCLUDED.mtu, " @@ -1329,101 +1159,40 @@ void PostgreSQL::commitThread() "private = EXCLUDED.private, remote_trace_level = EXCLUDED.remote_trace_level, " "remote_trace_target = EXCLUDED.remote_trace_target, rules = EXCLUDED.rules, " "rules_source = EXCLUDED.rules_source, tags = EXCLUDED.tags, " - "v4_assign_mode = EXCLUDED.v4_assign_mode, v6_assign_mode = EXCLUDED.v6_assign_mode", - 16, - NULL, - values, - NULL, - NULL, - 0); + "v4_assign_mode = EXCLUDED.v4_assign_mode, v6_assign_mode = EXCLUDED.v6_assign_mode, " + "sso_enabled = EXCLUDED.sso_enabled", + id, + _myAddressStr, + OSUtils::jsonDump((*config)["capabilitles"], -1), + (bool)(*config)["enableBroadcast"], + OSUtils::now(), + (int)(*config)["mtu"], + (int)(*config)["multicastLimit"], + OSUtils::jsonString((*config)["name"],""), + (bool)(*config)["private"], + (int)(*config)["remoteTraceLevel"], + remoteTraceTarget, + OSUtils::jsonDump((*config)["rules"], -1), + rulesSource, + OSUtils::jsonDump((*config)["tags"], -1), + OSUtils::jsonDump((*config)["v4AssignMode"],-1), + OSUtils::jsonDump((*config)["v6AssignMode"], -1), + OSUtils::jsonBool((*config)["ssoEnabled"], false)); - if (PQresultStatus(res) != PGRES_COMMAND_OK) { - fprintf(stderr, "ERROR: Error updating network record: %s\n", PQresultErrorMessage(res)); - PQclear(res); - PQclear(PQexec(conn, "ROLLBACK")); - delete config; - config = nullptr; - continue; - } - - PQclear(res); - - const char *params[1] = { - id.c_str() - }; - res = PQexecParams(conn, - "DELETE FROM ztc_network_assignment_pool WHERE network_id = $1", - 1, - NULL, - params, - NULL, - NULL, - 0); - if (PQresultStatus(res) != PGRES_COMMAND_OK) { - fprintf(stderr, "ERROR: Error updating assignment pool: %s\n", PQresultErrorMessage(res)); - PQclear(res); - PQclear(PQexec(conn, "ROLLBACK")); - delete config; - config = nullptr; - continue; - } - - PQclear(res); + res = w.exec_params0("DELETE FROM ztc_network_assignment_pool WHERE network_id = $1", 0); auto pool = (*config)["ipAssignmentPools"]; bool err = false; for (auto i = pool.begin(); i != pool.end(); ++i) { std::string start = (*i)["ipRangeStart"]; std::string end = (*i)["ipRangeEnd"]; - const char *p[3] = { - id.c_str(), - start.c_str(), - end.c_str() - }; - res = PQexecParams(conn, + res = w.exec_params0( "INSERT INTO ztc_network_assignment_pool (network_id, ip_range_start, ip_range_end) " - "VALUES ($1, $2, $3)", - 3, - NULL, - p, - NULL, - NULL, - 0); - if (PQresultStatus(res) != PGRES_COMMAND_OK) { - fprintf(stderr, "ERROR: Error updating assignment pool: %s\n", PQresultErrorMessage(res)); - PQclear(res); - err = true; - break; - } - PQclear(res); - } - if (err) { - PQclear(res); - PQclear(PQexec(conn, "ROLLBACK")); - delete config; - config = nullptr; - continue; - } - - res = PQexecParams(conn, - "DELETE FROM ztc_network_route WHERE network_id = $1", - 1, - NULL, - params, - NULL, - NULL, - 0); - - if (PQresultStatus(res) != PGRES_COMMAND_OK) { - fprintf(stderr, "ERROR: Error updating routes: %s\n", PQresultErrorMessage(res)); - PQclear(res); - PQclear(PQexec(conn, "ROLLBACK")); - delete config; - config = nullptr; - continue; + "VALUES ($1, $2, $3)", id, start, end); } + res = w.exec_params0("DELETE FROM ztc_network_route WHERE network_id = $1", id); auto routes = (*config)["routes"]; err = false; @@ -1445,36 +1214,18 @@ void PostgreSQL::commitThread() via = (*i)["via"]; } - const char *p[4] = { - id.c_str(), - targetAddr.c_str(), - targetBits.c_str(), - (via == "NULL" ? NULL : via.c_str()), - }; - - res = PQexecParams(conn, - "INSERT INTO ztc_network_route (network_id, address, bits, via) VALUES ($1, $2, $3, $4)", - 4, - NULL, - p, - NULL, - NULL, - 0); - - if (PQresultStatus(res) != PGRES_COMMAND_OK) { - fprintf(stderr, "ERROR: Error updating routes: %s\n", PQresultErrorMessage(res)); - PQclear(res); - err = true; - break; - } - PQclear(res); + res = w.exec_params0("INSERT INTO ztc_network_route (network_id, address, bits, via) VALUES ($1, $2, $3, $4)", + id, targetAddr, targetBits, (via == "NULL" ? NULL : via.c_str())); } if (err) { - PQclear(PQexec(conn, "ROLLBACK")); + fprintf(stderr, "%s: route add error\n", _myAddressStr.c_str()); + w.abort(); + _pool->unborrow(c); delete config; config = nullptr; continue; } + auto dns = (*config)["dns"]; std::string domain = dns["domain"]; std::stringstream servers; @@ -1487,38 +1238,13 @@ void PostgreSQL::commitThread() } servers << "}"; - const char *p[3] = { - id.c_str(), - domain.c_str(), - servers.str().c_str() - }; + std::string s = servers.str(); - res = PQexecParams(conn, "INSERT INTO ztc_network_dns (network_id, domain, servers) VALUES ($1, $2, $3) ON CONFLICT (network_id) DO UPDATE SET domain = EXCLUDED.domain, servers = EXCLUDED.servers", - 3, - NULL, - p, - NULL, - NULL, - 0); - if (PQresultStatus(res) != PGRES_COMMAND_OK) { - fprintf(stderr, "ERROR: Error updating DNS: %s\n", PQresultErrorMessage(res)); - PQclear(res); - PQclear(PQexec(conn, "ROLLBACK")); - err = true; - break; - } - PQclear(res); + res = w.exec_params0("INSERT INTO ztc_network_dns (network_id, domain, servers) VALUES ($1, $2, $3) ON CONFLICT (network_id) DO UPDATE SET domain = EXCLUDED.domain, servers = EXCLUDED.servers", + id, domain, s); - res = PQexec(conn, "COMMIT"); - if (PQresultStatus(res) != PGRES_COMMAND_OK) { - fprintf(stderr, "ERROR: Error committing network update: %s\n", PQresultErrorMessage(res)); - PQclear(res); - PQclear(PQexec(conn, "ROLLBACK")); - delete config; - config = nullptr; - continue; - } - PQclear(res); + w.commit(); + _pool->unborrow(c); const uint64_t nwidInt = OSUtils::jsonIntHex((*config)["nwid"], 0ULL); if (nwidInt) { @@ -1529,207 +1255,119 @@ void PostgreSQL::commitThread() _networkChanged(nwOrig, nwNew, qitem.second); } else { - fprintf(stderr, "Can't notify network changed: %llu\n", (unsigned long long)nwidInt); + fprintf(stderr, "%s: Can't notify network changed: %llu\n", _myAddressStr.c_str(), (unsigned long long)nwidInt); } } catch (std::exception &e) { - fprintf(stderr, "ERROR: Error updating member: %s\n", e.what()); + fprintf(stderr, "%s ERROR: Error updating network: %s\n", _myAddressStr.c_str(), e.what()); } - // if (_rc != NULL) { - // try { - // std::string id = (*config)["id"]; - // std::string controllerId = _myAddressStr.c_str(); - // std::string key = "networks:{" + controllerId + "}"; - // if (_rc->clusterMode) { - // _cluster->sadd(key, id); - // } else { - // _redis->sadd(key, id); - // } - // } catch (sw::redis::Error &e) { - // fprintf(stderr, "ERROR: Error adding network to Redis: %s\n", e.what()); - // } - // } } else if (objtype == "_delete_network") { + // fprintf(stderr, "%s: commitThread: delete network\n", _myAddressStr.c_str()); try { + auto c = _pool->borrow(); + pqxx::work w(*c->c); + std::string networkId = (*config)["nwid"]; - const char *values[1] = { - networkId.c_str() - }; - PGresult * res = PQexecParams(conn, - "UPDATE ztc_network SET deleted = true WHERE id = $1", - 1, - NULL, - values, - NULL, - NULL, - 0); - if (PQresultStatus(res) != PGRES_COMMAND_OK) { - fprintf(stderr, "ERROR: Error deleting network: %s\n", PQresultErrorMessage(res)); - } + pqxx::result res = w.exec_params0("UPDATE ztc_network SET deleted = true WHERE id = $1", + networkId); - PQclear(res); + w.commit(); + _pool->unborrow(c); } catch (std::exception &e) { - fprintf(stderr, "ERROR: Error deleting network: %s\n", e.what()); + fprintf(stderr, "%s ERROR: Error deleting network: %s\n", _myAddressStr.c_str(), e.what()); } - // if (_rc != NULL) { - // try { - // std::string id = (*config)["id"]; - // std::string controllerId = _myAddressStr.c_str(); - // std::string key = "networks:{" + controllerId + "}"; - // if (_rc->clusterMode) { - // _cluster->srem(key, id); - // _cluster->del("network-nodes-online:{"+controllerId+"}:"+id); - // } else { - // _redis->srem(key, id); - // _redis->del("network-nodes-online:{"+controllerId+"}:"+id); - // } - // } catch (sw::redis::Error &e) { - // fprintf(stderr, "ERROR: Error adding network to Redis: %s\n", e.what()); - // } - // } + } else if (objtype == "_delete_member") { + // fprintf(stderr, "%s commitThread: delete member\n", _myAddressStr.c_str()); try { + auto c = _pool->borrow(); + pqxx::work w(*c->c); + std::string memberId = (*config)["id"]; std::string networkId = (*config)["nwid"]; - const char *values[2] = { - memberId.c_str(), - networkId.c_str() - }; - - PGresult *res = PQexecParams(conn, + pqxx::result res = w.exec_params0( "UPDATE ztc_member SET hidden = true, deleted = true WHERE id = $1 AND network_id = $2", - 2, - NULL, - values, - NULL, - NULL, - 0); + memberId, networkId); - if (PQresultStatus(res) != PGRES_COMMAND_OK) { - fprintf(stderr, "ERROR: Error deleting member: %s\n", PQresultErrorMessage(res)); - } - - PQclear(res); + w.commit(); + _pool->unborrow(c); } catch (std::exception &e) { - fprintf(stderr, "ERROR: Error deleting member: %s\n", e.what()); + fprintf(stderr, "%s ERROR: Error deleting member: %s\n", _myAddressStr.c_str(), e.what()); } - // if (_rc != NULL) { - // try { - // std::string memberId = (*config)["id"]; - // std::string networkId = (*config)["nwid"]; - // std::string controllerId = _myAddressStr.c_str(); - // std::string key = "network-nodes-all:{" + controllerId + "}:" + networkId; - // if (_rc->clusterMode) { - // _cluster->srem(key, memberId); - // _cluster->del("member:{"+controllerId+"}:"+networkId+":"+memberId); - // } else { - // _redis->srem(key, memberId); - // _redis->del("member:{"+controllerId+"}:"+networkId+":"+memberId); - // } - // } catch (sw::redis::Error &e) { - // fprintf(stderr, "ERROR: Error deleting member from Redis: %s\n", e.what()); - // } - // } } else { - fprintf(stderr, "ERROR: unknown objtype"); + fprintf(stderr, "%s ERROR: unknown objtype\n", _myAddressStr.c_str()); } } catch (std::exception &e) { - fprintf(stderr, "ERROR: Error getting objtype: %s\n", e.what()); + fprintf(stderr, "%s ERROR: Error getting objtype: %s\n", _myAddressStr.c_str(), e.what()); } - std::this_thread::sleep_for(std::chrono::milliseconds(100)); } - PQfinish(conn); - if (_run == 1) { - fprintf(stderr, "ERROR: %s commitThread should still be running! Exiting Controller.\n", _myAddressStr.c_str()); - exit(7); - } - fprintf(stderr, "commitThread finished\n"); + fprintf(stderr, "%s commitThread finished\n", _myAddressStr.c_str()); } void PostgreSQL::onlineNotificationThread() { waitForReady(); - - // if (_rc != NULL) { - // onlineNotification_Redis(); - // } else { - onlineNotification_Postgres(); - // } + onlineNotification_Postgres(); } void PostgreSQL::onlineNotification_Postgres() { - PGconn *conn = getPgConn(); - if (PQstatus(conn) == CONNECTION_BAD) { - fprintf(stderr, "Connection to database failed: %s\n", PQerrorMessage(conn)); - PQfinish(conn); - exit(1); - } _connected = 1; nlohmann::json jtmp1, jtmp2; while (_run == 1) { - if (PQstatus(conn) != CONNECTION_OK) { - fprintf(stderr, "ERROR: Online Notification thread lost connection to Postgres."); - PQfinish(conn); - exit(5); - } - - std::unordered_map< std::pair,std::pair,_PairHasher > lastOnline; - { - std::lock_guard l(_lastOnline_l); - lastOnline.swap(_lastOnline); - } - - PGresult *res = NULL; - - std::stringstream memberUpdate; - memberUpdate << "INSERT INTO ztc_member_status (network_id, member_id, address, last_updated) VALUES "; - bool firstRun = true; - bool memberAdded = false; - for (auto i=lastOnline.begin(); i != lastOnline.end(); ++i) { - uint64_t nwid_i = i->first.first; - char nwidTmp[64]; - char memTmp[64]; - char ipTmp[64]; - OSUtils::ztsnprintf(nwidTmp,sizeof(nwidTmp), "%.16llx", nwid_i); - OSUtils::ztsnprintf(memTmp,sizeof(memTmp), "%.10llx", i->first.second); - - if(!get(nwid_i, jtmp1, i->first.second, jtmp2)) { - continue; // skip non existent networks/members + auto c = _pool->borrow(); + try { + fprintf(stderr, "%s onlineNotification_Postgres\n", _myAddressStr.c_str()); + std::unordered_map< std::pair,std::pair,_PairHasher > lastOnline; + { + std::lock_guard l(_lastOnline_l); + lastOnline.swap(_lastOnline); } + + pqxx::work w(*c->c); - std::string networkId(nwidTmp); - std::string memberId(memTmp); + // using pqxx::stream_to would be a really nice alternative here, but + // unfortunately it doesn't support upserts. + // fprintf(stderr, "online notification tick\n"); + std::stringstream memberUpdate; + memberUpdate << "INSERT INTO ztc_member_status (network_id, member_id, address, last_updated) VALUES "; + bool firstRun = true; + bool memberAdded = false; + int updateCount = 0; + for (auto i=lastOnline.begin(); i != lastOnline.end(); ++i) { + updateCount += 1; + uint64_t nwid_i = i->first.first; + char nwidTmp[64]; + char memTmp[64]; + char ipTmp[64]; + OSUtils::ztsnprintf(nwidTmp,sizeof(nwidTmp), "%.16llx", nwid_i); + OSUtils::ztsnprintf(memTmp,sizeof(memTmp), "%.10llx", i->first.second); - const char *qvals[2] = { - networkId.c_str(), - memberId.c_str() - }; + if(!get(nwid_i, jtmp1, i->first.second, jtmp2)) { + continue; // skip non existent networks/members + } - res = PQexecParams(conn, - "SELECT id, network_id FROM ztc_member WHERE network_id = $1 AND id = $2", - 2, - NULL, - qvals, - NULL, - NULL, - 0); + std::string networkId(nwidTmp); + std::string memberId(memTmp); - if (PQresultStatus(res) != PGRES_TUPLES_OK) { - fprintf(stderr, "Member count failed: %s", PQerrorMessage(conn)); - PQclear(res); - continue; - } + const char *qvals[2] = { + networkId.c_str(), + memberId.c_str() + }; - int nrows = PQntuples(res); - PQclear(res); + try { + pqxx::row r = w.exec_params1("SELECT id, network_id FROM ztc_member WHERE network_id = $1 AND id = $2", + networkId, memberId); + } catch (pqxx::unexpected_rows &e) { + // fprintf(stderr, "Member count failed: %s\n", e.what()); + continue; + } - if (nrows == 1) { int64_t ts = i->second.first; std::string ipAddr = i->second.second.toIpString(ipTmp); std::string timestamp = std::to_string(ts); @@ -1748,27 +1386,27 @@ void PostgreSQL::onlineNotification_Postgres() } memberUpdate << "TO_TIMESTAMP(" << timestamp << "::double precision/1000))"; memberAdded = true; - } else if (nrows > 1) { - fprintf(stderr, "nrows > 1?!?"); - continue; - } else { - continue; } - } - memberUpdate << " ON CONFLICT (network_id, member_id) DO UPDATE SET address = EXCLUDED.address, last_updated = EXCLUDED.last_updated;"; + memberUpdate << " ON CONFLICT (network_id, member_id) DO UPDATE SET address = EXCLUDED.address, last_updated = EXCLUDED.last_updated;"; - if (memberAdded) { - res = PQexec(conn, memberUpdate.str().c_str()); - if (PQresultStatus(res) != PGRES_COMMAND_OK) { - fprintf(stderr, "Multiple insert failed: %s", PQerrorMessage(conn)); + if (memberAdded) { + //fprintf(stderr, "%s\n", memberUpdate.str().c_str()); + pqxx::result res = w.exec0(memberUpdate.str()); + w.commit(); } - PQclear(res); - } + fprintf(stderr, "%s: Updated online status of %d members\n", _myAddressStr.c_str(), updateCount); + } catch (std::exception &e) { + fprintf(stderr, "%s: error in onlinenotification thread: %s\n", _myAddressStr.c_str(), e.what()); + } + _pool->unborrow(c); + + ConnectionPoolStats stats = _pool->get_stats(); + fprintf(stderr, "%s pool stats: in use size: %llu, available size: %llu, total: %llu\n", + _myAddressStr.c_str(), stats.borrowed_size, stats.pool_size, (stats.borrowed_size + stats.pool_size)); std::this_thread::sleep_for(std::chrono::seconds(10)); } fprintf(stderr, "%s: Fell out of run loop in onlineNotificationThread\n", _myAddressStr.c_str()); - PQfinish(conn); if (_run == 1) { fprintf(stderr, "ERROR: %s onlineNotificationThread should still be running! Exiting Controller.\n", _myAddressStr.c_str()); exit(6); @@ -1864,20 +1502,5 @@ void PostgreSQL::_doRedisUpdate(sw::redis::Transaction &tx, std::string &control tx.exec(); } -PGconn *PostgreSQL::getPgConn(OverrideMode m) -{ - if (m == ALLOW_PGBOUNCER_OVERRIDE) { - char *connStr = getenv("PGBOUNCER_CONNSTR"); - if (connStr != NULL) { - fprintf(stderr, "PGBouncer Override\n"); - std::string conn(connStr); - conn += " application_name=controller-"; - conn += _myAddressStr.c_str(); - return PQconnectdb(conn.c_str()); - } - } - - return PQconnectdb(_connString.c_str()); -} #endif //ZT_CONTROLLER_USE_LIBPQ diff --git a/controller/PostgreSQL.hpp b/controller/PostgreSQL.hpp index c1d9dfd1a..637b29749 100644 --- a/controller/PostgreSQL.hpp +++ b/controller/PostgreSQL.hpp @@ -20,6 +20,9 @@ #define ZT_CENTRAL_CONTROLLER_COMMIT_THREADS 4 +#include "ConnectionPool.hpp" +#include + #include #include @@ -31,14 +34,69 @@ namespace ZeroTier { struct RedisConfig; + +class PostgresConnection : public Connection { +public: + virtual ~PostgresConnection() { + } + + std::shared_ptr c; + int a; +}; + + +class PostgresConnFactory : public ConnectionFactory { +public: + PostgresConnFactory(std::string &connString) + : m_connString(connString) + { + } + + virtual std::shared_ptr create() { + auto c = std::shared_ptr(new PostgresConnection()); + c->c = std::make_shared(m_connString); + return std::static_pointer_cast(c); + } +private: + std::string m_connString; +}; + +class PostgreSQL; + +class MemberNotificationReceiver : public pqxx::notification_receiver { +public: + MemberNotificationReceiver(PostgreSQL *p, pqxx::connection &c, const std::string &channel); + virtual ~MemberNotificationReceiver() { + fprintf(stderr, "MemberNotificationReceiver destroyed\n"); + } + + virtual void operator() (const std::string &payload, int backendPid); +private: + PostgreSQL *_psql; +}; + +class NetworkNotificationReceiver : public pqxx::notification_receiver { +public: + NetworkNotificationReceiver(PostgreSQL *p, pqxx::connection &c, const std::string &channel); + virtual ~NetworkNotificationReceiver() { + fprintf(stderr, "NetworkNotificationReceiver destroyed\n"); + }; + + virtual void operator() (const std::string &payload, int packend_pid); +private: + PostgreSQL *_psql; +}; + /** * A controller database driver that talks to PostgreSQL * * This is for use with ZeroTier Central. Others are free to build and use it - * but be aware taht we might change it at any time. + * but be aware that we might change it at any time. */ class PostgreSQL : public DB { + friend class MemberNotificationReceiver; + friend class NetworkNotificationReceiver; public: PostgreSQL(const Identity &myId, const char *path, int listenPort, RedisConfig *rc); virtual ~PostgreSQL(); @@ -49,21 +107,29 @@ public: virtual void eraseNetwork(const uint64_t networkId); virtual void eraseMember(const uint64_t networkId, const uint64_t memberId); virtual void nodeIsOnline(const uint64_t networkId, const uint64_t memberId, const InetAddress &physicalAddress); + virtual std::string getSSOAuthURL(const nlohmann::json &member, const std::string &redirectURL); protected: struct _PairHasher { inline std::size_t operator()(const std::pair &p) const { return (std::size_t)(p.first ^ p.second); } }; + virtual void _memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool notifyListeners) { + DB::_memberChanged(old, memberConfig, notifyListeners); + } + + virtual void _networkChanged(nlohmann::json &old,nlohmann::json &networkConfig,bool notifyListeners) { + DB::_networkChanged(old, networkConfig, notifyListeners); + } private: - void initializeNetworks(PGconn *conn); - void initializeMembers(PGconn *conn); + void initializeNetworks(); + void initializeMembers(); void heartbeat(); void membersDbWatcher(); - void _membersWatcher_Postgres(PGconn *conn); + void _membersWatcher_Postgres(); void networksDbWatcher(); - void _networksWatcher_Postgres(PGconn *conn); + void _networksWatcher_Postgres(); void _membersWatcher_Redis(); void _networksWatcher_Redis(); @@ -80,7 +146,7 @@ private: NO_OVERRIDE = 1 }; - PGconn * getPgConn( OverrideMode m = ALLOW_PGBOUNCER_OVERRIDE ); + std::shared_ptr > _pool; const Identity _myId; const Address _myAddress; @@ -103,6 +169,7 @@ private: mutable volatile bool _waitNoticePrinted; int _listenPort; + uint8_t _ssoPsk[48]; RedisConfig *_rc; std::shared_ptr _redis; diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h index 83c4a4787..2bdea6474 100644 --- a/include/ZeroTierOne.h +++ b/include/ZeroTierOne.h @@ -420,157 +420,6 @@ enum ZT_ResultCode */ #define ZT_ResultCode_isFatal(x) ((((int)(x)) >= 100)&&(((int)(x)) < 1000)) - -/** - * Multipath bonding policy - */ -enum ZT_MultipathBondingPolicy -{ - /** - * Normal operation. No fault tolerance, no load balancing - */ - ZT_BONDING_POLICY_NONE = 0, - - /** - * Sends traffic out on only one path at a time. Configurable immediate - * fail-over. - */ - ZT_BONDING_POLICY_ACTIVE_BACKUP = 1, - - /** - * Sends traffic out on all paths - */ - ZT_BONDING_POLICY_BROADCAST = 2, - - /** - * Stripes packets across all paths - */ - ZT_BONDING_POLICY_BALANCE_RR = 3, - - /** - * Packets destined for specific peers will always be sent over the same - * path. - */ - ZT_BONDING_POLICY_BALANCE_XOR = 4, - - /** - * Balances flows among all paths according to path performance - */ - ZT_BONDING_POLICY_BALANCE_AWARE = 5 -}; - -/** - * Multipath active re-selection policy (linkSelectMethod) - */ -enum ZT_MultipathLinkSelectMethod -{ - /** - * Primary link regains status as active link whenever it comes back up - * (default when links are explicitly specified) - */ - ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS = 0, - - /** - * Primary link regains status as active link when it comes back up and - * (if) it is better than the currently-active link. - */ - ZT_MULTIPATH_RESELECTION_POLICY_BETTER = 1, - - /** - * Primary link regains status as active link only if the currently-active - * link fails. - */ - ZT_MULTIPATH_RESELECTION_POLICY_FAILURE = 2, - - /** - * The primary link can change if a superior path is detected. - * (default if user provides no fail-over guidance) - */ - ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE = 3 -}; - -/** - * Mode of multipath link interface - */ -enum ZT_MultipathLinkMode -{ - ZT_MULTIPATH_SLAVE_MODE_PRIMARY = 0, - ZT_MULTIPATH_SLAVE_MODE_SPARE = 1 -}; - -/** - * Strategy for path monitoring - */ -enum ZT_MultipathMonitorStrategy -{ - /** - * Use bonding policy's default strategy - */ - ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DEFAULT = 0, - - /** - * Does not actively send probes to judge aliveness, will rely - * on conventional traffic and summary statistics. - */ - ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE = 1, - - /** - * Sends probes at a constant rate to judge aliveness. - */ - ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_ACTIVE = 2, - - /** - * Sends probes at varying rates which correlate to native - * traffic loads to judge aliveness. - */ - ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC = 3 -}; - -/** - * Strategy for re-balancing protocol flows - */ -enum ZT_MultipathFlowRebalanceStrategy -{ - /** - * Flows will only be re-balanced among links during - * assignment or failover. This minimizes the possibility - * of sequence reordering and is thus the default setting. - */ - ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_PASSIVE = 0, - - /** - * Flows that are active may be re-assigned to a new more - * suitable link if it can be done without disrupting the flow. - * This setting can sometimes cause sequence re-ordering. - */ - ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_OPPORTUNISTIC = 0, - - /** - * Flows will be continuously re-assigned the most suitable link - * in order to maximize "balance". This can often cause sequence - * reordering and is thus only reccomended for protocols like UDP. - */ - ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_AGGRESSIVE = 2 -}; - -/** - * Indices for the path quality weight vector - */ -enum ZT_MultipathQualityWeightIndex -{ - ZT_QOS_LAT_IDX, - ZT_QOS_LTM_IDX, - ZT_QOS_PDV_IDX, - ZT_QOS_PLR_IDX, - ZT_QOS_PER_IDX, - ZT_QOS_THR_IDX, - ZT_QOS_THM_IDX, - ZT_QOS_THV_IDX, - ZT_QOS_AGE_IDX, - ZT_QOS_SCP_IDX, - ZT_QOS_WEIGHT_SIZE -}; - /** * Status codes sent to status update callback when things happen */ @@ -820,7 +669,12 @@ enum ZT_VirtualNetworkStatus /** * ZeroTier core version too old */ - ZT_NETWORK_STATUS_CLIENT_TOO_OLD = 5 + ZT_NETWORK_STATUS_CLIENT_TOO_OLD = 5, + + /** + * External authentication is required (e.g. SSO) + */ + ZT_NETWORK_STATUS_AUTHENTICATION_REQUIRED = 6 }; /** @@ -1339,6 +1193,21 @@ typedef struct * Network specific DNS configuration */ ZT_VirtualNetworkDNS dns; + + /** + * sso enabled + */ + bool ssoEnabled; + + /** + * If the status us AUTHENTICATION_REQUIRED, this may contain a URL for authentication. + */ + char authenticationURL[2048]; + + /** + * Time that current authentication expires. only valid if ssoEnabled is true + */ + uint64_t authenticationExpiryTime; } ZT_VirtualNetworkConfig; /** diff --git a/node/Bond.cpp b/node/Bond.cpp index d6a16ad86..cf4b4eabd 100644 --- a/node/Bond.cpp +++ b/node/Bond.cpp @@ -1,102 +1,267 @@ /* - * Copyright (c)2013-2020 ZeroTier, Inc. + * Copyright (c)2013-2021 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2025-01-01 + * Change Date: 2026-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. */ /****/ -#include - -#include "../osdep/OSUtils.hpp" - -#include "Peer.hpp" #include "Bond.hpp" #include "Switch.hpp" -#include "Flow.hpp" -#include "Path.hpp" + +#include +#include namespace ZeroTier { -Bond::Bond(const RuntimeEnvironment *renv, int policy, const SharedPtr& peer) : - RR(renv), - _peer(peer), - _qosCutoffCount(0), - _ackCutoffCount(0), - _lastAckRateCheck(0), - _lastQoSRateCheck(0), - _lastQualityEstimation(0), - _lastCheckUserPreferences(0), - _lastBackgroundTaskCheck(0), - _lastBondStatusLog(0), - _lastPathNegotiationReceived(0), - _lastPathNegotiationCheck(0), - _lastSentPathNegotiationRequest(0), - _lastFlowStatReset(0), - _lastFlowExpirationCheck(0), - _lastFlowRebalance(0), - _lastFrame(0), - _lastActiveBackupPathChange(0) +static unsigned char s_freeRandomByteCounter = 0; + +int Bond::_minReqMonitorInterval = ZT_BOND_FAILOVER_DEFAULT_INTERVAL; +uint8_t Bond::_defaultPolicy = ZT_BOND_POLICY_NONE; + +Phy* Bond::_phy; + +Mutex Bond::_bonds_m; +Mutex Bond::_links_m; + +std::string Bond::_defaultPolicyStr; +std::map > Bond::_bonds; +std::map Bond::_policyTemplateAssignments; +std::map > Bond::_bondPolicyTemplates; +std::map > > Bond::_linkDefinitions; +std::map > > Bond::_interfaceToLinkMap; + +bool Bond::linkAllowed(std::string& policyAlias, SharedPtr link) { - setReasonableDefaults(policy, SharedPtr(), false); - _policyAlias = BondController::getPolicyStrByCode(policy); + bool foundInDefinitions = false; + if (_linkDefinitions.count(policyAlias)) { + auto it = _linkDefinitions[policyAlias].begin(); + while (it != _linkDefinitions[policyAlias].end()) { + if (link->ifname() == (*it)->ifname()) { + foundInDefinitions = true; + break; + } + ++it; + } + } + return _linkDefinitions[policyAlias].empty() || foundInDefinitions; } -Bond::Bond(const RuntimeEnvironment *renv, std::string& basePolicy, std::string& policyAlias, const SharedPtr& peer) : - RR(renv), - _policyAlias(policyAlias), - _peer(peer) +void Bond::addCustomLink(std::string& policyAlias, SharedPtr link) { - setReasonableDefaults(BondController::getPolicyCodeByStr(basePolicy), SharedPtr(), false); + Mutex::Lock _l(_links_m); + _linkDefinitions[policyAlias].push_back(link); + auto search = _interfaceToLinkMap[policyAlias].find(link->ifname()); + if (search == _interfaceToLinkMap[policyAlias].end()) { + link->setAsUserSpecified(true); + _interfaceToLinkMap[policyAlias].insert(std::pair >(link->ifname(), link)); + } } -Bond::Bond(const RuntimeEnvironment *renv, SharedPtr originalBond, const SharedPtr& peer) : - RR(renv), - _peer(peer), - _lastAckRateCheck(0), - _lastQoSRateCheck(0), - _lastQualityEstimation(0), - _lastCheckUserPreferences(0), - _lastBackgroundTaskCheck(0), - _lastBondStatusLog(0), - _lastPathNegotiationReceived(0), - _lastPathNegotiationCheck(0), - _lastFlowStatReset(0), - _lastFlowExpirationCheck(0), - _lastFlowRebalance(0), - _lastFrame(0) +bool Bond::addCustomPolicy(const SharedPtr& newBond) { - setReasonableDefaults(originalBond->_bondingPolicy, originalBond, true); + Mutex::Lock _l(_bonds_m); + if (! _bondPolicyTemplates.count(newBond->policyAlias())) { + _bondPolicyTemplates[newBond->policyAlias()] = newBond; + return true; + } + return false; } -void Bond::nominatePath(const SharedPtr& path, int64_t now) +bool Bond::assignBondingPolicyToPeer(int64_t identity, const std::string& policyAlias) { - char traceMsg[256]; char pathStr[128]; path->address().toString(pathStr); + Mutex::Lock _l(_bonds_m); + if (! _policyTemplateAssignments.count(identity)) { + _policyTemplateAssignments[identity] = policyAlias; + return true; + } + return false; +} + +SharedPtr Bond::getBondByPeerId(int64_t identity) +{ + Mutex::Lock _l(_bonds_m); + return _bonds.count(identity) ? _bonds[identity] : SharedPtr(); +} + +SharedPtr Bond::createTransportTriggeredBond(const RuntimeEnvironment* renv, const SharedPtr& peer) +{ + Mutex::Lock _l(_bonds_m); + int64_t identity = peer->identity().address().toInt(); + Bond* bond = nullptr; + if (! _bonds.count(identity)) { + std::string policyAlias; + if (! _policyTemplateAssignments.count(identity)) { + if (_defaultPolicy) { + bond = new Bond(renv, _defaultPolicy, peer); + bond->log("new default bond"); + } + if (! _defaultPolicy && _defaultPolicyStr.length()) { + bond = new Bond(renv, _bondPolicyTemplates[_defaultPolicyStr].ptr(), peer); + bond->log("new default custom bond"); + } + } + else { + if (! _bondPolicyTemplates[_policyTemplateAssignments[identity]]) { + bond = new Bond(renv, _defaultPolicy, peer); + bond->log("peer-specific bond, was specified as %s but the bond definition was not found, using default %s", _policyTemplateAssignments[identity].c_str(), getPolicyStrByCode(_defaultPolicy).c_str()); + } + else { + bond = new Bond(renv, _bondPolicyTemplates[_policyTemplateAssignments[identity]].ptr(), peer); + bond->log("new default bond"); + } + } + } + if (bond) { + _bonds[identity] = bond; + /** + * Determine if user has specified anything that could affect the bonding policy's decisions + */ + if (_interfaceToLinkMap.count(bond->policyAlias())) { + std::map >::iterator it = _interfaceToLinkMap[bond->policyAlias()].begin(); + while (it != _interfaceToLinkMap[bond->policyAlias()].end()) { + if (it->second->isUserSpecified()) { + bond->_userHasSpecifiedLinks = true; + } + if (it->second->isUserSpecified() && it->second->primary()) { + bond->_userHasSpecifiedPrimaryLink = true; + } + if (it->second->isUserSpecified() && it->second->userHasSpecifiedFailoverInstructions()) { + bond->_userHasSpecifiedFailoverInstructions = true; + } + if (it->second->isUserSpecified() && (it->second->speed() > 0)) { + bond->_userHasSpecifiedLinkSpeeds = true; + } + ++it; + } + } + return bond; + } + return SharedPtr(); +} + +SharedPtr Bond::getLinkBySocket(const std::string& policyAlias, uint64_t localSocket) +{ + Mutex::Lock _l(_links_m); + char ifname[32] = { 0 }; // 256 because interfaces on Windows can potentially be that long + _phy->getIfName((PhySocket*)((uintptr_t)localSocket), ifname, sizeof(ifname) - 1); + // fprintf(stderr, "ifname %s\n",ifname); + std::string ifnameStr(ifname); + auto search = _interfaceToLinkMap[policyAlias].find(ifnameStr); + if (search == _interfaceToLinkMap[policyAlias].end()) { + // If the link wasn't already known, add a new entry + // fprintf(stderr, "adding new link?? %s\n", ifnameStr.c_str()); + SharedPtr s = new Link(ifnameStr, 0, 0, true, ZT_BOND_SLAVE_MODE_SPARE, "", 0.0); + _interfaceToLinkMap[policyAlias].insert(std::pair >(ifnameStr, s)); + return s; + } + else { + return search->second; + } +} + +SharedPtr Bond::getLinkByName(const std::string& policyAlias, const std::string& ifname) +{ + Mutex::Lock _l(_links_m); + auto search = _interfaceToLinkMap[policyAlias].find(ifname); + if (search != _interfaceToLinkMap[policyAlias].end()) { + return search->second; + } + return SharedPtr(); +} + +void Bond::processBackgroundTasks(void* tPtr, const int64_t now) +{ + unsigned long _currMinReqMonitorInterval = ZT_BOND_FAILOVER_DEFAULT_INTERVAL; + Mutex::Lock _l(_bonds_m); + std::map >::iterator bondItr = _bonds.begin(); + while (bondItr != _bonds.end()) { + // Update Bond Controller's background processing timer + _currMinReqMonitorInterval = std::min(_currMinReqMonitorInterval, (unsigned long)(bondItr->second->monitorInterval())); + // Process bond tasks + bondItr->second->processBackgroundBondTasks(tPtr, now); + ++bondItr; + } + _minReqMonitorInterval = std::min(_currMinReqMonitorInterval, (unsigned long)ZT_BOND_FAILOVER_DEFAULT_INTERVAL); +} + +Bond::Bond(const RuntimeEnvironment* renv) : RR(renv) +{ +} + +Bond::Bond(const RuntimeEnvironment* renv, int policy, const SharedPtr& peer) : RR(renv), _freeRandomByte((unsigned char)((uintptr_t)this >> 4) ^ ++s_freeRandomByteCounter), _peer(peer), _peerId(_peer->_id.address().toInt()) +{ + setBondParameters(policy, SharedPtr(), false); + _policyAlias = getPolicyStrByCode(policy); +} + +Bond::Bond(const RuntimeEnvironment* renv, std::string& basePolicy, std::string& policyAlias, const SharedPtr& peer) : RR(renv), _policyAlias(policyAlias), _peer(peer) +{ + setBondParameters(getPolicyCodeByStr(basePolicy), SharedPtr(), false); +} + +Bond::Bond(const RuntimeEnvironment* renv, SharedPtr originalBond, const SharedPtr& peer) + : RR(renv) + , _freeRandomByte((unsigned char)((uintptr_t)this >> 4) ^ ++s_freeRandomByteCounter) + , _peer(peer) + , _peerId(_peer->_id.address().toInt()) +{ + setBondParameters(originalBond->_policy, originalBond, true); +} + +void Bond::nominatePathToBond(const SharedPtr& path, int64_t now) +{ + char pathStr[64] = { 0 }; + path->address().toString(pathStr); Mutex::Lock _l(_paths_m); - if (!RR->bc->linkAllowed(_policyAlias, getLink(path))) { + /** + * Ensure the link is allowed and the path is not already present + */ + if (! RR->bc->linkAllowed(_policyAlias, getLink(path))) { return; } bool alreadyPresent = false; - for (int i=0; iifname().c_str(), pathStr, _peer->_id.address().toInt()); - RR->t->bondStateMessage(NULL, traceMsg); - _paths[i]->startTrial(now); + if (! alreadyPresent) { + /** + * Find somewhere to stick it + */ + for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (! _paths[i].p) { + _paths[i].set(now, path); + /** + * Set user preferences and update state variables of other paths on the same link + */ + SharedPtr sl = getLink(_paths[i].p); + if (sl) { + // Determine if there are any other paths on this link + bool bFoundCommonLink = false; + SharedPtr commonLink = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket()); + for (unsigned int j = 0; j < ZT_MAX_PEER_NETWORK_PATHS; ++j) { + if (_paths[j].p && _paths[j].p.ptr() != _paths[i].p.ptr()) { + if (RR->bc->getLinkBySocket(_policyAlias, _paths[j].p->localSocket()) == commonLink) { + bFoundCommonLink = true; + _paths[j].onlyPathOnLink = false; + } + } + } + _paths[i].ipvPref = sl->ipvPref(); + _paths[i].mode = sl->mode(); + _paths[i].enabled = sl->enabled(); + _paths[i].onlyPathOnLink = ! bFoundCommonLink; + } + log("nominate link %s/%s (now in trial period)", getLink(path)->ifname().c_str(), pathStr); break; } } @@ -105,85 +270,93 @@ void Bond::nominatePath(const SharedPtr& path, int64_t now) estimatePathQuality(now); } +void Bond::addPathToBond(int nominatedIdx, int bondedIdx) +{ + // Map bonded set to nominated set + _bondIdxMap[bondedIdx] = nominatedIdx; + // Tell the bonding layer that we can now use this bond for traffic + _paths[nominatedIdx].bonded = true; +} + SharedPtr Bond::getAppropriatePath(int64_t now, int32_t flowId) { Mutex::Lock _l(_paths_m); /** * active-backup */ - if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { - if (_abPath) { - return _abPath; + if (_policy == ZT_BOND_POLICY_ACTIVE_BACKUP) { + if (_abPathIdx != ZT_MAX_PEER_NETWORK_PATHS && _paths[_abPathIdx].p) { + return _paths[_abPathIdx].p; } } /** * broadcast */ - if (_bondingPolicy == ZT_BONDING_POLICY_BROADCAST) { - return SharedPtr(); // Handled in Switch::_trySend() + if (_policy == ZT_BOND_POLICY_BROADCAST) { + return SharedPtr(); // Handled in Switch::_trySend() } - if (!_numBondedPaths) { - return SharedPtr(); // No paths assigned to bond yet, cannot balance traffic + if (! _numBondedPaths) { + return SharedPtr(); // No paths assigned to bond yet, cannot balance traffic } /** * balance-rr */ - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) { - if (!_allowFlowHashing) { + if (_policy == ZT_BOND_POLICY_BALANCE_RR) { + if (! _allowFlowHashing) { if (_packetsPerLink == 0) { // Randomly select a path - return _paths[_bondedIdx[_freeRandomByte % _numBondedPaths]]; // TODO: Optimize + return _paths[_bondIdxMap[_freeRandomByte % _numBondedPaths]].p; } if (_rrPacketsSentOnCurrLink < _packetsPerLink) { // Continue to use this link ++_rrPacketsSentOnCurrLink; - return _paths[_bondedIdx[_rrIdx]]; + return _paths[_bondIdxMap[_rrIdx]].p; } // Reset striping counter _rrPacketsSentOnCurrLink = 0; - if (_numBondedPaths == 1) { + if (_numBondedPaths == 1 || _rrIdx >= (ZT_MAX_PEER_NETWORK_PATHS-1)) { _rrIdx = 0; } else { int _tempIdx = _rrIdx; - for (int searchCount = 0; searchCount < (_numBondedPaths-1); searchCount++) { - _tempIdx = (_tempIdx == (_numBondedPaths-1)) ? 0 : _tempIdx+1; - if (_bondedIdx[_tempIdx] != ZT_MAX_PEER_NETWORK_PATHS) { - if (_paths[_bondedIdx[_tempIdx]] && _paths[_bondedIdx[_tempIdx]]->eligible(now,_ackSendInterval)) { + for (int searchCount = 0; searchCount < (_numBondedPaths - 1); searchCount++) { + _tempIdx = (_tempIdx == (_numBondedPaths - 1)) ? 0 : _tempIdx + 1; + if (_bondIdxMap[_tempIdx] != ZT_MAX_PEER_NETWORK_PATHS) { + if (_paths[_bondIdxMap[_tempIdx]].p && _paths[_bondIdxMap[_tempIdx]].eligible) { _rrIdx = _tempIdx; break; } } } } - if (_paths[_bondedIdx[_rrIdx]]) { - return _paths[_bondedIdx[_rrIdx]]; + if (_paths[_bondIdxMap[_rrIdx]].p) { + return _paths[_bondIdxMap[_rrIdx]].p; } } } /** * balance-xor */ - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { - if (!_allowFlowHashing || flowId == -1) { + if (_policy == ZT_BOND_POLICY_BALANCE_XOR || _policy == ZT_BOND_POLICY_BALANCE_AWARE) { + if (! _allowFlowHashing || flowId == -1) { // No specific path required for unclassified traffic, send on anything - return _paths[_bondedIdx[_freeRandomByte % _numBondedPaths]]; // TODO: Optimize + int m_idx = _bondIdxMap[_freeRandomByte % _numBondedPaths]; + return _paths[m_idx].p; } else if (_allowFlowHashing) { - // TODO: Optimize Mutex::Lock _l(_flows_m); SharedPtr flow; if (_flows.count(flowId)) { flow = _flows[flowId]; - flow->updateActivity(now); + flow->lastActivity = now; } else { unsigned char entropy; Utils::getSecureRandom(&entropy, 1); - flow = createFlow(SharedPtr(), flowId, entropy, now); + flow = createFlow(ZT_MAX_PEER_NETWORK_PATHS, flowId, entropy, now); } if (flow) { - return flow->assignedPath(); + return _paths[flow->assignedPath].p; } } } @@ -192,191 +365,168 @@ SharedPtr Bond::getAppropriatePath(int64_t now, int32_t flowId) void Bond::recordIncomingInvalidPacket(const SharedPtr& path) { - //char traceMsg[256]; char pathStr[128]; path->address().toString(pathStr); - //sprintf(traceMsg, "%s (qos) Invalid packet on link %s/%s from peer %llx", - // OSUtils::humanReadableTimestamp().c_str(), getLink(path)->ifname().c_str(), pathStr, _peer->_id.address().toInt()); - //RR->t->bondStateMessage(NULL, traceMsg); + // char pathStr[64] = { 0 }; path->address().toString(pathStr); + // log("%s (qos) Invalid packet on link %s/%s from peer %llx", + // getLink(path)->ifname().c_str(), pathStr); Mutex::Lock _l(_paths_m); - for (int i=0; ipacketValiditySamples.push(false); + for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (_paths[i].p == path) { + _paths[i].packetValiditySamples.push(false); } } } -void Bond::recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, - uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now) +void Bond::recordOutgoingPacket(const SharedPtr& path, uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now) { - //char traceMsg[256]; char pathStr[128]; path->address().toString(pathStr); - //sprintf(traceMsg, "%s (bond) Outgoing packet on link %s/%s to peer %llx", - // OSUtils::humanReadableTimestamp().c_str(), getLink(path)->ifname().c_str(), pathStr, _peer->_id.address().toInt()); - //RR->t->bondStateMessage(NULL, traceMsg); - _freeRandomByte += (unsigned char)(packetId >> 8); // Grab entropy to use in path selection logic - if (!_shouldCollectPathStatistics) { - return; + _freeRandomByte += (unsigned char)(packetId >> 8); // Grab entropy to use in path selection logic + bool isFrame = (verb == Packet::Packet::VERB_ECHO || verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME); + if (isFrame) { + // char pathStr[64] = { 0 }; + // path->address().toString(pathStr); + // int pathIdx = getNominatedPathIdx(path); + // log("outgoing packet via [%d]", pathIdx); + // log("outgoing packet via %s/%s", getLink(path)->ifname().c_str(), pathStr); } - bool isFrame = (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME); - bool shouldRecord = (packetId & (ZT_QOS_ACK_DIVISOR - 1) - && (verb != Packet::VERB_ACK) - && (verb != Packet::VERB_QOS_MEASUREMENT)); + bool shouldRecord = (packetId & (ZT_QOS_ACK_DIVISOR - 1) && (verb != Packet::VERB_ACK) && (verb != Packet::VERB_QOS_MEASUREMENT)); if (isFrame || shouldRecord) { Mutex::Lock _l(_paths_m); + int pathIdx = getNominatedPathIdx(path); + if (pathIdx == ZT_MAX_PEER_NETWORK_PATHS) { + return; + } if (isFrame) { - ++(path->_packetsOut); - _lastFrame=now; + ++(_paths[pathIdx].packetsOut); + _lastFrame = now; } if (shouldRecord) { - path->_unackedBytes += payloadLength; + //_paths[pathIdx].unackedBytes += payloadLength; // Take note that we're expecting a VERB_ACK on this path as of a specific time - if (path->qosStatsOut.size() < ZT_QOS_MAX_OUTSTANDING_RECORDS) { - path->qosStatsOut[packetId] = now; + if (_paths[pathIdx].qosStatsOut.size() < ZT_QOS_MAX_OUTSTANDING_RECORDS) { + _paths[pathIdx].qosStatsOut[packetId] = now; } } } if (_allowFlowHashing && (flowId != ZT_QOS_NO_FLOW)) { Mutex::Lock _l(_flows_m); if (_flows.count(flowId)) { - _flows[flowId]->recordOutgoingBytes(payloadLength); + _flows[flowId]->bytesOut += payloadLength; } } } -void Bond::recordIncomingPacket(const SharedPtr& path, uint64_t packetId, uint16_t payloadLength, - Packet::Verb verb, int32_t flowId, int64_t now) +void Bond::recordIncomingPacket(const SharedPtr& path, uint64_t packetId, uint16_t payloadLength, Packet::Verb verb, int32_t flowId, int64_t now) { - //char traceMsg[256]; char pathStr[128]; path->address().toString(pathStr); - //sprintf(traceMsg, "%s (bond) Incoming packet on link %s/%s from peer %llx [id=%llx, len=%d, verb=%d, flowId=%x]", - // OSUtils::humanReadableTimestamp().c_str(), getLink(path)->ifname().c_str(), pathStr, _peer->_id.address().toInt(), packetId, payloadLength, verb, flowId); - //RR->t->bondStateMessage(NULL, traceMsg); - bool isFrame = (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME); - bool shouldRecord = (packetId & (ZT_QOS_ACK_DIVISOR - 1) - && (verb != Packet::VERB_ACK) - && (verb != Packet::VERB_QOS_MEASUREMENT)); + bool isFrame = (verb == Packet::Packet::VERB_ECHO || verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME); + if (isFrame) { + // char pathStr[64] = { 0 }; path->address().toString(pathStr); + // int pathIdx = getNominatedPathIdx(path); + // log("incoming packet via [%d] [id=%llx, len=%d, verb=%d, flowId=%x]", pathIdx, packetId, payloadLength, verb, flowId); + // log("incoming packet via %s/%s (ls=%llx) [id=%llx, len=%d, verb=%d, flowId=%x]", getLink(path)->ifname().c_str(), pathStr, path->localSocket(), packetId, payloadLength, verb, flowId); + } + bool shouldRecord = (packetId & (ZT_QOS_ACK_DIVISOR - 1) && (verb != Packet::VERB_ACK) && (verb != Packet::VERB_QOS_MEASUREMENT)); + Mutex::Lock _l(_paths_m); + int pathIdx = getNominatedPathIdx(path); + if (pathIdx == ZT_MAX_PEER_NETWORK_PATHS) { + return; + } + // Take note of the time that this previously-dead path received a packet + if (! _paths[pathIdx].alive) { + _paths[pathIdx].lastAliveToggle = now; + } if (isFrame || shouldRecord) { - Mutex::Lock _l(_paths_m); - if (isFrame) { - ++(path->_packetsIn); - _lastFrame=now; - } - if (shouldRecord) { - path->ackStatsIn[packetId] = payloadLength; - ++(path->_packetsReceivedSinceLastAck); - path->qosStatsIn[packetId] = now; - ++(path->_packetsReceivedSinceLastQoS); - path->packetValiditySamples.push(true); + if (_paths[pathIdx].allowed()) { + if (isFrame) { + ++(_paths[pathIdx].packetsIn); + _lastFrame = now; + } + if (shouldRecord) { + _paths[pathIdx].qosStatsIn[packetId] = now; + ++(_paths[pathIdx].packetsReceivedSinceLastQoS); + _paths[pathIdx].packetValiditySamples.push(true); + } } } + /** * Learn new flows and pro-actively create entries for them in the bond so * that the next time we send a packet out that is part of a flow we know * which path to use. */ - if ((flowId != ZT_QOS_NO_FLOW) - && (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR - || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR - || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE)) { + if ((flowId != ZT_QOS_NO_FLOW) && (_policy == ZT_BOND_POLICY_BALANCE_RR || _policy == ZT_BOND_POLICY_BALANCE_XOR || _policy == ZT_BOND_POLICY_BALANCE_AWARE)) { Mutex::Lock _l(_flows_m); SharedPtr flow; - if (!_flows.count(flowId)) { - flow = createFlow(path, flowId, 0, now); - } else { + if (! _flows.count(flowId)) { + flow = createFlow(pathIdx, flowId, 0, now); + } + else { flow = _flows[flowId]; } if (flow) { - flow->recordIncomingBytes(payloadLength); + flow->bytesIn += payloadLength; } } } -void Bond::receivedQoS(const SharedPtr& path, int64_t now, int count, uint64_t *rx_id, uint16_t *rx_ts) +void Bond::receivedQoS(const SharedPtr& path, int64_t now, int count, uint64_t* rx_id, uint16_t* rx_ts) { Mutex::Lock _l(_paths_m); - //char traceMsg[256]; char pathStr[128]; path->address().toString(pathStr); - //sprintf(traceMsg, "%s (qos) Received QoS packet sampling %d frames from peer %llx via %s/%s", - // OSUtils::humanReadableTimestamp().c_str(), count, _peer->_id.address().toInt(), getLink(path)->ifname().c_str(), pathStr); - //RR->t->bondStateMessage(NULL, traceMsg); + int pathIdx = getNominatedPathIdx(path); + if (pathIdx == ZT_MAX_PEER_NETWORK_PATHS) { + return; + } + // char pathStr[64] = { 0 }; path->address().toString(pathStr); + // log("received QoS packet (sampling %d frames) via %s/%s", count, getLink(path)->ifname().c_str(), pathStr); // Look up egress times and compute latency values for each record - std::map::iterator it; - for (int j=0; jqosStatsOut.find(rx_id[j]); - if (it != path->qosStatsOut.end()) { - path->latencySamples.push(((uint16_t)(now - it->second) - rx_ts[j]) / 2); - path->qosStatsOut.erase(it); + std::map::iterator it; + for (int j = 0; j < count; j++) { + it = _paths[pathIdx].qosStatsOut.find(rx_id[j]); + if (it != _paths[pathIdx].qosStatsOut.end()) { + _paths[pathIdx].latencySamples.push(((uint16_t)(now - it->second) - rx_ts[j]) / 2); + _paths[pathIdx].qosStatsOut.erase(it); } } - path->qosRecordSize.push(count); + _paths[pathIdx].qosRecordSize.push(count); } -void Bond::receivedAck(const SharedPtr& path, int64_t now, int32_t ackedBytes) -{ - Mutex::Lock _l(_paths_m); - //char traceMsg[256]; char pathStr[128]; path->address().toString(pathStr); - //sprintf(traceMsg, "%s (qos) Received ACK packet for %d bytes from peer %llx via %s/%s", - // OSUtils::humanReadableTimestamp().c_str(), ackedBytes, _peer->_id.address().toInt(), getLink(path)->ifname().c_str(), pathStr); - //RR->t->bondStateMessage(NULL, traceMsg); - path->_lastAckReceived = now; - path->_unackedBytes = (ackedBytes > path->_unackedBytes) ? 0 : path->_unackedBytes - ackedBytes; - int64_t timeSinceThroughputEstimate = (now - path->_lastThroughputEstimation); - if (timeSinceThroughputEstimate >= throughputMeasurementInterval) { - // TODO: See if this floating point math can be reduced - uint64_t throughput = (uint64_t)((float)(path->_bytesAckedSinceLastThroughputEstimation) / ((float)timeSinceThroughputEstimate / (float)1000)); - throughput /= 1000; - if (throughput > 0.0) { - path->throughputSamples.push(throughput); - path->_throughputMax = throughput > path->_throughputMax ? throughput : path->_throughputMax; - } - path->_lastThroughputEstimation = now; - path->_bytesAckedSinceLastThroughputEstimation = 0; - } else { - path->_bytesAckedSinceLastThroughputEstimation += ackedBytes; - } -} - -int32_t Bond::generateQoSPacket(const SharedPtr& path, int64_t now, char *qosBuffer) +int32_t Bond::generateQoSPacket(int pathIdx, int64_t now, char* qosBuffer) { int32_t len = 0; - std::map::iterator it = path->qosStatsIn.begin(); - int i=0; - int numRecords = std::min(path->_packetsReceivedSinceLastQoS,ZT_QOS_TABLE_SIZE); - while (iqosStatsIn.end()) { + std::map::iterator it = _paths[pathIdx].qosStatsIn.begin(); + int i = 0; + int numRecords = std::min(_paths[pathIdx].packetsReceivedSinceLastQoS, ZT_QOS_TABLE_SIZE); + while (i < numRecords && it != _paths[pathIdx].qosStatsIn.end()) { uint64_t id = it->first; memcpy(qosBuffer, &id, sizeof(uint64_t)); - qosBuffer+=sizeof(uint64_t); + qosBuffer += sizeof(uint64_t); uint16_t holdingTime = (uint16_t)(now - it->second); memcpy(qosBuffer, &holdingTime, sizeof(uint16_t)); - qosBuffer+=sizeof(uint16_t); - len+=sizeof(uint64_t)+sizeof(uint16_t); - path->qosStatsIn.erase(it++); + qosBuffer += sizeof(uint16_t); + len += sizeof(uint64_t) + sizeof(uint16_t); + _paths[pathIdx].qosStatsIn.erase(it++); ++i; } return len; } -bool Bond::assignFlowToBondedPath(SharedPtr &flow, int64_t now) +bool Bond::assignFlowToBondedPath(SharedPtr& flow, int64_t now) { - char traceMsg[256]; - char curPathStr[128]; + char curPathStr[64] = { 0 }; unsigned int idx = ZT_MAX_PEER_NETWORK_PATHS; - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR) { - idx = abs((int)(flow->id() % (_numBondedPaths))); - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[_bondedIdx[idx]]->localSocket()); - _paths[_bondedIdx[idx]]->address().toString(curPathStr); - sprintf(traceMsg, "%s (balance-xor) Assigned outgoing flow %x to peer %llx to link %s/%s, %lu active flow(s)", - OSUtils::humanReadableTimestamp().c_str(), flow->id(), _peer->_id.address().toInt(), link->ifname().c_str(), curPathStr, _flows.size()); - RR->t->bondStateMessage(NULL, traceMsg); - flow->assignPath(_paths[_bondedIdx[idx]],now); - ++(_paths[_bondedIdx[idx]]->_assignedFlowCount); + if (_policy == ZT_BOND_POLICY_BALANCE_XOR) { + idx = abs((int)(flow->id % (_numBondedPaths))); + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[_bondIdxMap[idx]].p->localSocket()); + _paths[_bondIdxMap[idx]].p->address().toString(curPathStr); + flow->assignPath(_bondIdxMap[idx], now); + ++(_paths[_bondIdxMap[idx]].assignedFlowCount); } - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { + if (_policy == ZT_BOND_POLICY_BALANCE_AWARE) { unsigned char entropy; Utils::getSecureRandom(&entropy, 1); if (_totalBondUnderload) { entropy %= _totalBondUnderload; } - if (!_numBondedPaths) { - sprintf(traceMsg, "%s (balance-aware) There are no bonded paths, cannot assign flow %x\n", - OSUtils::humanReadableTimestamp().c_str(), flow->id()); - RR->t->bondStateMessage(NULL, traceMsg); + if (! _numBondedPaths) { + log("unable to assign flow %x (bond has no links)\n", flow->id); return false; } /* Since there may be scenarios where a path is removed before we can re-estimate @@ -384,17 +534,17 @@ bool Bond::assignFlowToBondedPath(SharedPtr &flow, int64_t now) value that we use to randomly assign among the surviving paths, otherwise we risk not being able to find a path to assign this flow to. */ int totalIncompleteAllocation = 0; - for(unsigned int i=0;ibonded()) { - totalIncompleteAllocation += _paths[i]->_allocation; + for (unsigned int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (_paths[i].p && _paths[i].bonded) { + totalIncompleteAllocation += _paths[i].allocation; } } entropy %= totalIncompleteAllocation; - for(unsigned int i=0;ibonded()) { - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); - _paths[i]->address().toString(curPathStr); - uint8_t probabilitySegment = (_totalBondUnderload > 0) ? _paths[i]->_affinity : _paths[i]->_allocation; + for (unsigned int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (_paths[i].p && _paths[i].bonded) { + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket()); + _paths[i].p->address().toString(curPathStr); + uint8_t probabilitySegment = (_totalBondUnderload > 0) ? _paths[i].affinity : _paths[i].allocation; if (entropy <= probabilitySegment) { idx = i; break; @@ -403,50 +553,35 @@ bool Bond::assignFlowToBondedPath(SharedPtr &flow, int64_t now) } } if (idx < ZT_MAX_PEER_NETWORK_PATHS) { - if (flow->_assignedPath) { - flow->_previouslyAssignedPath = flow->_assignedPath; - } - flow->assignPath(_paths[idx],now); - ++(_paths[idx]->_assignedFlowCount); + flow->assignPath(idx, now); + ++(_paths[idx].assignedFlowCount); } else { - fprintf(stderr, "could not assign flow?\n"); exit(0); // TODO: Remove for production + log("unable to assign out-flow %x (unknown reason)", flow->id); return false; } } - if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { - if (_abOverflowEnabled) { - flow->assignPath(_abPath, now); - } else { - sprintf(traceMsg, "%s (bond) Unable to assign outgoing flow %x to peer %llx, no active overflow link", - OSUtils::humanReadableTimestamp().c_str(), flow->id(), _peer->_id.address().toInt()); - RR->t->bondStateMessage(NULL, traceMsg); - return false; + if (_policy == ZT_BOND_POLICY_ACTIVE_BACKUP) { + if (_abPathIdx == ZT_MAX_PEER_NETWORK_PATHS) { + log("unable to assign out-flow %x (no active backup link)", flow->id); } + flow->assignPath(_abPathIdx, now); } - flow->assignedPath()->address().toString(curPathStr); - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, flow->assignedPath()->localSocket()); - sprintf(traceMsg, "%s (bond) Assigned outgoing flow %x to peer %llx to link %s/%s, %lu active flow(s)", - OSUtils::humanReadableTimestamp().c_str(), flow->id(), _peer->_id.address().toInt(), link->ifname().c_str(), curPathStr, _flows.size()); - RR->t->bondStateMessage(NULL, traceMsg); + _paths[flow->assignedPath].p->address().toString(curPathStr); + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[flow->assignedPath].p->localSocket()); + log("assign out-flow %x to link %s/%s (%lu / %lu flows)", flow->id, link->ifname().c_str(), curPathStr, _paths[flow->assignedPath].assignedFlowCount, (unsigned long)_flows.size()); return true; } -SharedPtr Bond::createFlow(const SharedPtr &path, int32_t flowId, unsigned char entropy, int64_t now) +SharedPtr Bond::createFlow(int pathIdx, int32_t flowId, unsigned char entropy, int64_t now) { - char traceMsg[256]; - char curPathStr[128]; - // --- - if (!_numBondedPaths) { - sprintf(traceMsg, "%s (bond) There are no bonded paths to peer %llx, cannot assign flow %x\n", - OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt(), flowId); - RR->t->bondStateMessage(NULL, traceMsg); + char curPathStr[64] = { 0 }; + if (! _numBondedPaths) { + log("unable to assign flow %x (bond has no links)\n", flowId); return SharedPtr(); } if (_flows.size() >= ZT_FLOW_MAX_COUNT) { - sprintf(traceMsg, "%s (bond) Maximum number of flows on bond to peer %llx reached (%d), forcibly forgetting oldest flow\n", - OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt(), ZT_FLOW_MAX_COUNT); - RR->t->bondStateMessage(NULL, traceMsg); + log("forget oldest flow (max flows reached: %d)\n", ZT_FLOW_MAX_COUNT); forgetFlowsWhenNecessary(0, true, now); } SharedPtr flow = new Flow(flowId, now); @@ -456,20 +591,18 @@ SharedPtr Bond::createFlow(const SharedPtr &path, int32_t flowId, un * is received on a path but no flow exists, in this case we simply assign the path * that the remote peer chose for us. */ - if (path) { - flow->assignPath(path,now); - path->address().toString(curPathStr); - path->_assignedFlowCount++; - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, flow->assignedPath()->localSocket()); - sprintf(traceMsg, "%s (bond) Assigned incoming flow %x from peer %llx to link %s/%s, %lu active flow(s)", - OSUtils::humanReadableTimestamp().c_str(), flow->id(), _peer->_id.address().toInt(), link->ifname().c_str(), curPathStr, _flows.size()); - RR->t->bondStateMessage(NULL, traceMsg); + if (pathIdx != ZT_MAX_PEER_NETWORK_PATHS) { + flow->assignPath(pathIdx, now); + _paths[pathIdx].p->address().toString(curPathStr); + _paths[pathIdx].assignedFlowCount++; + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[flow->assignedPath].p->localSocket()); + log("assign in-flow %x to link %s/%s (%lu / %lu)", flow->id, link->ifname().c_str(), curPathStr, _paths[pathIdx].assignedFlowCount, (unsigned long)_flows.size()); } /** * Add a flow when no path was provided. This means that it is an outgoing packet * and that it is up to the local peer to decide how to load-balance its transmission. */ - else if (!path) { + else { assignFlowToBondedPath(flow, now); } return flow; @@ -477,24 +610,22 @@ SharedPtr Bond::createFlow(const SharedPtr &path, int32_t flowId, un void Bond::forgetFlowsWhenNecessary(uint64_t age, bool oldest, int64_t now) { - char traceMsg[256]; - std::map >::iterator it = _flows.begin(); - std::map >::iterator oldestFlow = _flows.end(); + std::map >::iterator it = _flows.begin(); + std::map >::iterator oldestFlow = _flows.end(); SharedPtr expiredFlow; - if (age) { // Remove by specific age + if (age) { // Remove by specific age while (it != _flows.end()) { if (it->second->age(now) > age) { - sprintf(traceMsg, "%s (bond) Forgetting flow %x between this node and peer %llx, %lu active flow(s)", - OSUtils::humanReadableTimestamp().c_str(), it->first, _peer->_id.address().toInt(), (_flows.size()-1)); - RR->t->bondStateMessage(NULL, traceMsg); - it->second->assignedPath()->_assignedFlowCount--; + log("forget flow %x (age %llu) (%lu / %lu)", it->first, (unsigned long long)it->second->age(now), _paths[it->second->assignedPath].assignedFlowCount, (unsigned long)(_flows.size() - 1)); + _paths[it->second->assignedPath].assignedFlowCount--; it = _flows.erase(it); - } else { + } + else { ++it; } } } - else if (oldest) { // Remove single oldest by natural expiration + else if (oldest) { // Remove single oldest by natural expiration uint64_t maxAge = 0; while (it != _flows.end()) { if (it->second->age(now) > maxAge) { @@ -504,383 +635,302 @@ void Bond::forgetFlowsWhenNecessary(uint64_t age, bool oldest, int64_t now) ++it; } if (oldestFlow != _flows.end()) { - sprintf(traceMsg, "%s (bond) Forgetting oldest flow %x (of age %llu) between this node and peer %llx, %lu active flow(s)", - OSUtils::humanReadableTimestamp().c_str(), oldestFlow->first, oldestFlow->second->age(now), _peer->_id.address().toInt(), (_flows.size()-1)); - RR->t->bondStateMessage(NULL, traceMsg); - oldestFlow->second->assignedPath()->_assignedFlowCount--; + log("forget oldest flow %x (age %llu) (total flows: %lu)", oldestFlow->first, (unsigned long long)oldestFlow->second->age(now), (unsigned long)(_flows.size() - 1)); + _paths[oldestFlow->second->assignedPath].assignedFlowCount--; _flows.erase(oldestFlow); } } } -void Bond::processIncomingPathNegotiationRequest(uint64_t now, SharedPtr &path, int16_t remoteUtility) +void Bond::processIncomingPathNegotiationRequest(uint64_t now, SharedPtr& path, int16_t remoteUtility) { - char traceMsg[256]; - if (_abLinkSelectMethod != ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { + char pathStr[64] = { 0 }; + if (_abLinkSelectMethod != ZT_BOND_RESELECTION_POLICY_OPTIMIZE) { return; } Mutex::Lock _l(_paths_m); - char pathStr[128]; - path->address().toString(pathStr); - if (!_lastPathNegotiationCheck) { + int pathIdx = getNominatedPathIdx(path); + if (pathIdx == ZT_MAX_PEER_NETWORK_PATHS) { return; } - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, path->localSocket()); + _paths[pathIdx].p->address().toString(pathStr); + if (! _lastPathNegotiationCheck) { + return; + } + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[pathIdx].p->localSocket()); if (remoteUtility > _localUtility) { - char pathStr[128]; path->address().toString(pathStr); - sprintf(traceMsg, "%s (bond) Peer %llx suggests using alternate link %s/%s. Remote utility (%d) is GREATER than local utility (%d), switching to said link\n", - OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt(), link->ifname().c_str(), pathStr, remoteUtility, _localUtility); - RR->t->bondStateMessage(NULL, traceMsg); - negotiatedPath = path; + _paths[pathIdx].p->address().toString(pathStr); + log("peer suggests alternate link %s/%s, remote utility (%d) greater than local utility (%d), switching to suggested link\n", link->ifname().c_str(), pathStr, remoteUtility, _localUtility); + negotiatedPathIdx = pathIdx; } if (remoteUtility < _localUtility) { - sprintf(traceMsg, "%s (bond) Peer %llx suggests using alternate link %s/%s. Remote utility (%d) is LESS than local utility (%d), not switching\n", - OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt(), link->ifname().c_str(), pathStr, remoteUtility, _localUtility); - RR->t->bondStateMessage(NULL, traceMsg); + log("peer suggests alternate link %s/%s, remote utility (%d) less than local utility (%d), not switching\n", link->ifname().c_str(), pathStr, remoteUtility, _localUtility); } if (remoteUtility == _localUtility) { - sprintf(traceMsg, "%s (bond) Peer %llx suggests using alternate link %s/%s. Remote utility (%d) is equal to local utility (%d)\n", - OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt(), link->ifname().c_str(), pathStr, remoteUtility, _localUtility); - RR->t->bondStateMessage(NULL, traceMsg); + log("peer suggests alternate link %s/%s, remote utility (%d) equal to local utility (%d)\n", link->ifname().c_str(), pathStr, remoteUtility, _localUtility); if (_peer->_id.address().toInt() > RR->node->identity().address().toInt()) { - sprintf(traceMsg, "%s (bond) Agreeing with peer %llx to use alternate link %s/%s\n", - OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt(), link->ifname().c_str(), pathStr); - RR->t->bondStateMessage(NULL, traceMsg); - negotiatedPath = path; - } else { - sprintf(traceMsg, "%s (bond) Ignoring petition from peer %llx to use alternate link %s/%s\n", - OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt(), link->ifname().c_str(), pathStr); - RR->t->bondStateMessage(NULL, traceMsg); + log("agree with peer to use alternate link %s/%s\n", link->ifname().c_str(), pathStr); + negotiatedPathIdx = pathIdx; + } + else { + log("ignore petition from peer to use alternate link %s/%s\n", link->ifname().c_str(), pathStr); } } } -void Bond::pathNegotiationCheck(void *tPtr, const int64_t now) +void Bond::pathNegotiationCheck(void* tPtr, int64_t now) { - char pathStr[128]; + char pathStr[64] = { 0 }; int maxInPathIdx = ZT_MAX_PEER_NETWORK_PATHS; int maxOutPathIdx = ZT_MAX_PEER_NETWORK_PATHS; uint64_t maxInCount = 0; uint64_t maxOutCount = 0; - for(unsigned int i=0;i_packetsIn > maxInCount) { - maxInCount = _paths[i]->_packetsIn; + if (_paths[i].packetsIn > maxInCount) { + maxInCount = _paths[i].packetsIn; maxInPathIdx = i; } - if (_paths[i]->_packetsOut > maxOutCount) { - maxOutCount = _paths[i]->_packetsOut; + if (_paths[i].packetsOut > maxOutCount) { + maxOutCount = _paths[i].packetsOut; maxOutPathIdx = i; } - _paths[i]->resetPacketCounts(); + _paths[i].resetPacketCounts(); } - bool _peerLinksSynchronized = ((maxInPathIdx != ZT_MAX_PEER_NETWORK_PATHS) - && (maxOutPathIdx != ZT_MAX_PEER_NETWORK_PATHS) - && (maxInPathIdx != maxOutPathIdx)) ? false : true; + bool _peerLinksSynchronized = ((maxInPathIdx != ZT_MAX_PEER_NETWORK_PATHS) && (maxOutPathIdx != ZT_MAX_PEER_NETWORK_PATHS) && (maxInPathIdx != maxOutPathIdx)) ? false : true; /** * Determine utility and attempt to petition remote peer to switch to our chosen path */ - if (!_peerLinksSynchronized) { - _localUtility = _paths[maxOutPathIdx]->_failoverScore - _paths[maxInPathIdx]->_failoverScore; - if (_paths[maxOutPathIdx]->_negotiated) { - _localUtility -= ZT_MULTIPATH_FAILOVER_HANDICAP_NEGOTIATED; + if (! _peerLinksSynchronized) { + _localUtility = _paths[maxOutPathIdx].failoverScore - _paths[maxInPathIdx].failoverScore; + if (_paths[maxOutPathIdx].negotiated) { + _localUtility -= ZT_BOND_FAILOVER_HANDICAP_NEGOTIATED; } if ((now - _lastSentPathNegotiationRequest) > ZT_PATH_NEGOTIATION_CUTOFF_TIME) { - //fprintf(stderr, "BT: (sync) it's been long enough, sending more requests.\n"); + // fprintf(stderr, "BT: (sync) it's been long enough, sending more requests.\n"); _numSentPathNegotiationRequests = 0; } if (_numSentPathNegotiationRequests < ZT_PATH_NEGOTIATION_TRY_COUNT) { if (_localUtility >= 0) { - //fprintf(stderr, "BT: (sync) paths appear to be out of sync (utility=%d)\n", _localUtility); - sendPATH_NEGOTIATION_REQUEST(tPtr, _paths[maxOutPathIdx]); + // fprintf(stderr, "BT: (sync) paths appear to be out of sync (utility=%d)\n", _localUtility); + sendPATH_NEGOTIATION_REQUEST(tPtr, _paths[maxOutPathIdx].p); ++_numSentPathNegotiationRequests; _lastSentPathNegotiationRequest = now; - _paths[maxOutPathIdx]->address().toString(pathStr); - SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[maxOutPathIdx]->localSocket()); - //fprintf(stderr, "sending request to use %s on %s, ls=%llx, utility=%d\n", pathStr, link->ifname().c_str(), _paths[maxOutPathIdx]->localSocket(), _localUtility); + _paths[maxOutPathIdx].p->address().toString(pathStr); + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[maxOutPathIdx].p->localSocket()); + // fprintf(stderr, "sending request to use %s on %s, ls=%llx, utility=%d\n", pathStr, link->ifname().c_str(), _paths[maxOutPathIdx].p->localSocket(), _localUtility); } } /** * Give up negotiating and consider switching */ - else if ((now - _lastSentPathNegotiationRequest) > (2 * ZT_PATH_NEGOTIATION_CHECK_INTERVAL)) { + else if ((now - _lastSentPathNegotiationRequest) > (2 * ZT_BOND_OPTIMIZE_INTERVAL)) { if (_localUtility == 0) { // There's no loss to us, just switch without sending a another request - //fprintf(stderr, "BT: (sync) giving up, switching to remote peer's path.\n"); - negotiatedPath = _paths[maxInPathIdx]; + // fprintf(stderr, "BT: (sync) giving up, switching to remote peer's path.\n"); + negotiatedPathIdx = maxInPathIdx; } } } } -void Bond::sendPATH_NEGOTIATION_REQUEST(void *tPtr, const SharedPtr &path) +void Bond::sendPATH_NEGOTIATION_REQUEST(void* tPtr, int pathIdx) { - char traceMsg[256]; char pathStr[128]; path->address().toString(pathStr); - sprintf(traceMsg, "%s (bond) Sending link negotiation request to peer %llx via link %s/%s, local utility is %d", - OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt(), getLink(path)->ifname().c_str(), pathStr, _localUtility); - RR->t->bondStateMessage(NULL, traceMsg); - if (_abLinkSelectMethod != ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { + char pathStr[64] = { 0 }; + _paths[pathIdx].p->address().toString(pathStr); + log("send link negotiation request to peer via link %s/%s, local utility is %d", getLink(_paths[pathIdx].p)->ifname().c_str(), pathStr, _localUtility); + if (_abLinkSelectMethod != ZT_BOND_RESELECTION_POLICY_OPTIMIZE) { return; } - Packet outp(_peer->_id.address(),RR->identity.address(),Packet::VERB_PATH_NEGOTIATION_REQUEST); + Packet outp(_peer->_id.address(), RR->identity.address(), Packet::VERB_PATH_NEGOTIATION_REQUEST); outp.append(_localUtility); - if (path->address()) { - outp.armor(_peer->key(),false,_peer->aesKeysIfSupported()); - RR->node->putPacket(tPtr,path->localSocket(),path->address(),outp.data(),outp.size()); + if (_paths[pathIdx].p->address()) { + outp.armor(_peer->key(), false, _peer->aesKeysIfSupported()); + RR->node->putPacket(tPtr, _paths[pathIdx].p->localSocket(), _paths[pathIdx].p->address(), outp.data(), outp.size()); } } -void Bond::sendACK(void *tPtr, const SharedPtr &path,const int64_t localSocket, - const InetAddress &atAddress,int64_t now) +void Bond::sendQOS_MEASUREMENT(void* tPtr, int pathIdx, int64_t localSocket, const InetAddress& atAddress, int64_t now) { - Packet outp(_peer->_id.address(),RR->identity.address(),Packet::VERB_ACK); - int32_t bytesToAck = 0; - std::map::iterator it = path->ackStatsIn.begin(); - while (it != path->ackStatsIn.end()) { - bytesToAck += it->second; - ++it; - } - //char traceMsg[256]; char pathStr[128]; path->address().toString(pathStr); - //sprintf(traceMsg, "%s (qos) Sending ACK packet for %d bytes to peer %llx via link %s/%s", - // OSUtils::humanReadableTimestamp().c_str(), bytesToAck, _peer->_id.address().toInt(), getLink(path)->ifname().c_str(), pathStr); - //RR->t->bondStateMessage(NULL, traceMsg); - outp.append(bytesToAck); - if (atAddress) { - outp.armor(_peer->key(),false,_peer->aesKeysIfSupported()); - RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size()); - } else { - RR->sw->send(tPtr,outp,false); - } - path->ackStatsIn.clear(); - path->_packetsReceivedSinceLastAck = 0; - path->_lastAckSent = now; -} - -void Bond::sendQOS_MEASUREMENT(void *tPtr,const SharedPtr &path,const int64_t localSocket, - const InetAddress &atAddress,int64_t now) -{ - //char traceMsg[256]; char pathStr[128]; path->address().toString(pathStr); - //sprintf(traceMsg, "%s (qos) Sending QoS packet to peer %llx via link %s/%s", - // OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt(), getLink(path)->ifname().c_str(), pathStr); - //RR->t->bondStateMessage(NULL, traceMsg); - const int64_t _now = RR->node->now(); - Packet outp(_peer->_id.address(),RR->identity.address(),Packet::VERB_QOS_MEASUREMENT); + char pathStr[64] = { 0 }; + _paths[pathIdx].p->address().toString(pathStr); + int64_t _now = RR->node->now(); + Packet outp(_peer->_id.address(), RR->identity.address(), Packet::VERB_QOS_MEASUREMENT); char qosData[ZT_QOS_MAX_PACKET_SIZE]; - int16_t len = generateQoSPacket(path, _now,qosData); - outp.append(qosData,len); - if (atAddress) { - outp.armor(_peer->key(),false,_peer->aesKeysIfSupported()); - RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size()); - } else { - RR->sw->send(tPtr,outp,false); + int16_t len = generateQoSPacket(pathIdx, _now, qosData); + _overheadBytes += len; + if (len) { + outp.append(qosData, len); + if (atAddress) { + outp.armor(_peer->key(), false, _peer->aesKeysIfSupported()); + RR->node->putPacket(tPtr, localSocket, atAddress, outp.data(), outp.size()); + } + else { + RR->sw->send(tPtr, outp, false); + } + _paths[pathIdx].packetsReceivedSinceLastQoS = 0; + _paths[pathIdx].lastQoSMeasurement = now; } - // Account for the fact that a VERB_QOS_MEASUREMENT was just sent. Reset timers. - path->_packetsReceivedSinceLastQoS = 0; - path->_lastQoSMeasurement = now; + // log("send QOS via link %s/%s (len=%d)", getLink(_paths[pathIdx].p)->ifname().c_str(), pathStr, len); } -void Bond::processBackgroundTasks(void *tPtr, const int64_t now) +void Bond::processBackgroundBondTasks(void* tPtr, int64_t now) { - Mutex::Lock _l(_paths_m); - if (!_peer->_canUseMultipath || (now - _lastBackgroundTaskCheck) < ZT_BOND_BACKGROUND_TASK_MIN_INTERVAL) { + if (! _peer->_localMultipathSupported || (now - _lastBackgroundTaskCheck) < ZT_BOND_BACKGROUND_TASK_MIN_INTERVAL) { return; } _lastBackgroundTaskCheck = now; + Mutex::Lock _l(_paths_m); - // Compute dynamic path monitor timer interval - if (_linkMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC) { - int suggestedMonitorInterval = (now - _lastFrame) / 100; - _dynamicPathMonitorInterval = std::min(ZT_PATH_HEARTBEAT_PERIOD, ((suggestedMonitorInterval > _bondMonitorInterval) ? suggestedMonitorInterval : _bondMonitorInterval)); - } - // TODO: Clarify and generalize this logic - if (_linkMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC) { - _shouldCollectPathStatistics = true; - } - - // Memoize oft-used properties in the packet ingress/egress logic path - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { - // Required for real-time balancing - _shouldCollectPathStatistics = true; - } - if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { - if (_abLinkSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_BETTER) { - // Required for judging suitability of primary link after recovery - _shouldCollectPathStatistics = true; - } - if (_abLinkSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { - // Required for judging suitability of new candidate primary - _shouldCollectPathStatistics = true; - } - } - if ((now - _lastCheckUserPreferences) > 1000) { - _lastCheckUserPreferences = now; - applyUserPrefs(); - } - - curateBond(now,false); + curateBond(now, false); if ((now - _lastQualityEstimation) > _qualityEstimationInterval) { _lastQualityEstimation = now; estimatePathQuality(now); } - dumpInfo(now); + dumpInfo(now, false); - // Send QOS/ACK packets as needed - if (_shouldCollectPathStatistics) { - for(unsigned int i=0;iallowed()) { - if (_paths[i]->needsToSendQoS(now,_qosSendInterval)) { - sendQOS_MEASUREMENT(tPtr, _paths[i], _paths[i]->localSocket(), _paths[i]->address(), now); - } - if (_paths[i]->needsToSendAck(now,_ackSendInterval)) { - sendACK(tPtr, _paths[i], _paths[i]->localSocket(), _paths[i]->address(), now); + // Send ambient monitoring traffic + for (unsigned int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (_paths[i].p && _paths[i].allowed()) { + // ECHO (this is our bond's heartbeat) + if ((_monitorInterval > 0) && ((now - _paths[i].p->_lastOut) >= _monitorInterval)) { + if ((_peer->remoteVersionProtocol() >= 5) && (! ((_peer->remoteVersionMajor() == 1) && (_peer->remoteVersionMinor() == 1) && (_peer->remoteVersionRevision() == 0)))) { + Packet outp(_peer->address(), RR->identity.address(), Packet::VERB_ECHO); + outp.armor(_peer->key(), true, _peer->aesKeysIfSupported()); + RR->node->expectReplyTo(outp.packetId()); + RR->node->putPacket(tPtr, _paths[i].p->localSocket(), _paths[i].p->address(), outp.data(), outp.size()); + _overheadBytes += outp.size(); + char pathStr[64] = { 0 }; + _paths[i].p->address().toString(pathStr); + // log("send HELLO via link %s/%s (len=%d)", getLink(_paths[i].p)->ifname().c_str(), pathStr, outp.size()); } } + // QOS + if (_paths[i].needsToSendQoS(now, _qosSendInterval)) { + sendQOS_MEASUREMENT(tPtr, i, _paths[i].p->localSocket(), _paths[i].p->address(), now); + } } } // Perform periodic background tasks unique to each bonding policy - switch (_bondingPolicy) - { - case ZT_BONDING_POLICY_ACTIVE_BACKUP: + switch (_policy) { + case ZT_BOND_POLICY_ACTIVE_BACKUP: processActiveBackupTasks(tPtr, now); break; - case ZT_BONDING_POLICY_BROADCAST: + case ZT_BOND_POLICY_BROADCAST: break; - case ZT_BONDING_POLICY_BALANCE_RR: - case ZT_BONDING_POLICY_BALANCE_XOR: - case ZT_BONDING_POLICY_BALANCE_AWARE: + case ZT_BOND_POLICY_BALANCE_RR: + case ZT_BOND_POLICY_BALANCE_XOR: + case ZT_BOND_POLICY_BALANCE_AWARE: processBalanceTasks(now); break; default: break; } // Check whether or not a path negotiation needs to be performed - if (((now - _lastPathNegotiationCheck) > ZT_PATH_NEGOTIATION_CHECK_INTERVAL) && _allowPathNegotiation) { + if (((now - _lastPathNegotiationCheck) > ZT_BOND_OPTIMIZE_INTERVAL) && _allowPathNegotiation) { _lastPathNegotiationCheck = now; pathNegotiationCheck(tPtr, now); } } -void Bond::applyUserPrefs() +void Bond::curateBond(int64_t now, bool rebuildBond) { - for(unsigned int i=0;i sl = getLink(_paths[i]); - if (sl) { - if (sl->monitorInterval() == 0) { // If no interval was specified for this link, use more generic bond-wide interval - sl->setMonitorInterval(_bondMonitorInterval); - } - RR->bc->setMinReqPathMonitorInterval((sl->monitorInterval() < RR->bc->minReqPathMonitorInterval()) ? sl->monitorInterval() : RR->bc->minReqPathMonitorInterval()); - bool bFoundCommonLink = false; - SharedPtr commonLink =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); - for(unsigned int j=0;jbc->getLinkBySocket(_policyAlias, _paths[j]->localSocket()) == commonLink) { - bFoundCommonLink = true; - } - } - } - _paths[i]->_monitorInterval = sl->monitorInterval(); - _paths[i]->_upDelay = sl->upDelay() ? sl->upDelay() : _upDelay; - _paths[i]->_downDelay = sl->downDelay() ? sl->downDelay() : _downDelay; - _paths[i]->_ipvPref = sl->ipvPref(); - _paths[i]->_mode = sl->mode(); - _paths[i]->_enabled = sl->enabled(); - _paths[i]->_onlyPathOnLink = !bFoundCommonLink; - } - } - if (_peer) { - _peer->_shouldCollectPathStatistics = _shouldCollectPathStatistics; - _peer->_bondingPolicy = _bondingPolicy; - } -} - -void Bond::curateBond(const int64_t now, bool rebuildBond) -{ - char traceMsg[256]; - char pathStr[128]; + char pathStr[64] = { 0 }; uint8_t tmpNumAliveLinks = 0; uint8_t tmpNumTotalLinks = 0; /** - * Update path states + * Update path state variables. State variables are used so that critical + * blocks that perform fast packet processing won't need to make as many + * function calls or computations. */ - for(unsigned int i=0;ialive(now, true)) { + if (_paths[i].eligible) { tmpNumAliveLinks++; } - bool currEligibility = _paths[i]->eligible(now,_ackSendInterval); - if (currEligibility != _paths[i]->_lastEligibilityState) { - _paths[i]->address().toString(pathStr); - char traceMsg[256]; _paths[i]->address().toString(pathStr); - sprintf(traceMsg, "%s (bond) Eligibility of link %s/%s to peer %llx has changed from %d to %d", - OSUtils::humanReadableTimestamp().c_str(), getLink(_paths[i])->ifname().c_str(), pathStr, _peer->_id.address().toInt(), _paths[i]->_lastEligibilityState, currEligibility); - RR->t->bondStateMessage(NULL, traceMsg); + + /** + * Determine alive-ness + */ + _paths[i].alive = (now - _paths[i].p->_lastIn) < _failoverInterval; + + /** + * Determine current eligibility + */ + bool currEligibility = false; + // Simple RX age (driven by packets of any type and gratuitous VERB_HELLOs) + bool acceptableAge = _paths[i].p->age(now) < (_failoverInterval + _downDelay); + // Whether we've waited long enough since the link last came online + bool satisfiedUpDelay = (now - _paths[i].lastAliveToggle) >= _upDelay; + // Whether this path is still in its trial period + bool inTrial = (now - _paths[i].whenNominated) < ZT_BOND_OPTIMIZE_INTERVAL; + // if (includeRefractoryPeriod && _paths[i].refractoryPeriod) { + // As long as the refractory period value has not fully drained this path is not eligible + // currEligibility = false; + //} + currEligibility = _paths[i].allowed() && ((acceptableAge && satisfiedUpDelay) || inTrial); + // log("[%d] allowed=%d, acceptableAge=%d, satisfiedUpDelay=%d, inTrial=%d ==== %d", i, _paths[i].allowed(), acceptableAge, satisfiedUpDelay, inTrial, currEligibility); + + /** + * Note eligibility state change (if any) and take appropriate action + */ + if (currEligibility != _paths[i].eligible) { + _paths[i].p->address().toString(pathStr); + if (currEligibility == 0) { + log("link %s/%s is no longer eligible", getLink(_paths[i].p)->ifname().c_str(), pathStr); + } + if (currEligibility == 1) { + log("link %s/%s is eligible", getLink(_paths[i].p)->ifname().c_str(), pathStr); + } + dumpPathStatus(now, i); if (currEligibility) { rebuildBond = true; } - if (!currEligibility) { - _paths[i]->adjustRefractoryPeriod(now, _defaultPathRefractoryPeriod, !currEligibility); - if (_paths[i]->bonded()) { - char pathStr[128]; _paths[i]->address().toString(pathStr); - sprintf(traceMsg, "%s (bond) Link %s/%s to peer %llx was bonded, reallocation of its flows will occur soon", - OSUtils::humanReadableTimestamp().c_str(), getLink(_paths[i])->ifname().c_str(), pathStr, _peer->_id.address().toInt()); - RR->t->bondStateMessage(NULL, traceMsg); - rebuildBond = true; - _paths[i]->_shouldReallocateFlows = _paths[i]->bonded(); - _paths[i]->setBonded(false); - } else { - sprintf(traceMsg, "%s (bond) Link %s/%s to peer %llx was not bonded, no allocation consequences", - OSUtils::humanReadableTimestamp().c_str(), getLink(_paths[i])->ifname().c_str(), pathStr, _peer->_id.address().toInt()); - RR->t->bondStateMessage(NULL, traceMsg); + if (! currEligibility) { + _paths[i].adjustRefractoryPeriod(now, _defaultPathRefractoryPeriod, ! currEligibility); + if (_paths[i].bonded) { + _paths[i].bonded = false; + if (_allowFlowHashing) { + _paths[i].p->address().toString(pathStr); + log("link %s/%s was bonded, flow reallocation will occur soon", getLink(_paths[i].p)->ifname().c_str(), pathStr); + rebuildBond = true; + _paths[i].shouldReallocateFlows = _paths[i].bonded; + } } } } if (currEligibility) { - _paths[i]->adjustRefractoryPeriod(now, _defaultPathRefractoryPeriod, false); + _paths[i].adjustRefractoryPeriod(now, _defaultPathRefractoryPeriod, false); } - _paths[i]->_lastEligibilityState = currEligibility; + _paths[i].eligible = currEligibility; } + + /** + * Determine health status to report to user + */ _numAliveLinks = tmpNumAliveLinks; _numTotalLinks = tmpNumTotalLinks; - - /* Determine health status to report to user */ - bool tmpHealthStatus = true; - if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { + if (_policy == ZT_BOND_POLICY_ACTIVE_BACKUP) { if (_numAliveLinks < 2) { - // Considered healthy if there is at least one failover link + // Considered healthy if there is at least one backup link tmpHealthStatus = false; } } - if (_bondingPolicy == ZT_BONDING_POLICY_BROADCAST) { + if (_policy == ZT_BOND_POLICY_BROADCAST) { if (_numAliveLinks < 1) { - // Considerd healthy if we're able to send frames at all + // Considered healthy if we're able to send frames at all tmpHealthStatus = false; } } - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) { + if ((_policy == ZT_BOND_POLICY_BALANCE_RR) || (_policy == ZT_BOND_POLICY_BALANCE_XOR) || (_policy == ZT_BOND_POLICY_BALANCE_AWARE)) { if (_numAliveLinks < _numTotalLinks) { - // Considerd healthy if all known paths are alive, this should be refined to account for user bond config settings - tmpHealthStatus = false; - } - } - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR) { - if (_numAliveLinks < _numTotalLinks) { - // Considerd healthy if all known paths are alive, this should be refined to account for user bond config settings - tmpHealthStatus = false; - } - } - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { - if (_numAliveLinks < _numTotalLinks) { - // Considerd healthy if all known paths are alive, this should be refined to account for user bond config settings tmpHealthStatus = false; } } @@ -888,171 +938,195 @@ void Bond::curateBond(const int64_t now, bool rebuildBond) std::string healthStatusStr; if (tmpHealthStatus == true) { healthStatusStr = "HEALTHY"; - } else { + } + else { healthStatusStr = "DEGRADED"; } - sprintf(traceMsg, "%s (bond) Bond to peer %llx is in a %s state (%d/%d links)", - OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt(), healthStatusStr.c_str(), _numAliveLinks, _numTotalLinks); - RR->t->bondStateMessage(NULL, traceMsg); + log("bond is in a %s state (links: %d/%d)", healthStatusStr.c_str(), _numAliveLinks, _numTotalLinks); + dumpInfo(now, true); } _isHealthy = tmpHealthStatus; /** - * Curate the set of paths that are part of the bond proper. Selects a single path + * Curate the set of paths that are part of the bond proper. Select a set of paths * per logical link according to eligibility and user-specified constraints. */ - if ((_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) - || (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR) - || (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE)) { - if (!_numBondedPaths) { + + if ((_policy == ZT_BOND_POLICY_BALANCE_RR) || (_policy == ZT_BOND_POLICY_BALANCE_XOR) || (_policy == ZT_BOND_POLICY_BALANCE_AWARE)) { + if (! _numBondedPaths) { rebuildBond = true; } - // TODO: Optimize if (rebuildBond) { + log("rebuilding bond"); + // TODO: Obey blacklisting int updatedBondedPathCount = 0; - std::map,int> linkMap; - for (int i=0;iallowed() && (_paths[i]->eligible(now,_ackSendInterval) || !_numBondedPaths)) { - SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); - if (!linkMap.count(link)) { - linkMap[link] = i; - } - else { - bool overriden = false; - _paths[i]->address().toString(pathStr); - //fprintf(stderr, " link representative path already exists! (%s %s)\n", getLink(_paths[i])->ifname().c_str(), pathStr); - if (_paths[i]->preferred() && !_paths[linkMap[link]]->preferred()) { - // Override previous choice if preferred - if (_paths[linkMap[link]]->_assignedFlowCount) { - _paths[linkMap[link]]->_deprecated = true; - } - else { - _paths[linkMap[link]]->_deprecated = true; - _paths[linkMap[link]]->setBonded(false); - } - linkMap[link] = i; - overriden = true; - } - if ((_paths[i]->preferred() && _paths[linkMap[link]]->preferred()) - || (!_paths[i]->preferred() && !_paths[linkMap[link]]->preferred())) { - if (_paths[i]->preferenceRank() > _paths[linkMap[link]]->preferenceRank()) { - // Override if higher preference - if (_paths[linkMap[link]]->_assignedFlowCount) { - _paths[linkMap[link]]->_deprecated = true; - } - else { - _paths[linkMap[link]]->_deprecated = true; - _paths[linkMap[link]]->setBonded(false); - } - linkMap[link] = i; - } - } - } + // Build map associating paths with local physical links. Will be selected from in next step + std::map, std::vector > linkMap; + for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (_paths[i].p) { + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket()); + linkMap[link].push_back(i); } } - std::map,int>::iterator it = linkMap.begin(); - for (int i=0; ipath map + std::map, std::vector >::iterator it = linkMap.begin(); + while (it != linkMap.end()) { + SharedPtr link = it->first; + int ipvPref = link->ipvPref(); + + // If user has no address type preference, then use every path we find on a link + if (ipvPref == 0) { + for (int j = 0; j < it->second.size(); j++) { + int idx = it->second.at(j); + if (! _paths[idx].p || ! _paths[idx].allowed()) { + continue; + } + addPathToBond(idx, updatedBondedPathCount); + ++updatedBondedPathCount; + _paths[idx].p->address().toString(pathStr); + log("add %s/%s (no user addr preference)", link->ifname().c_str(), pathStr); + } } - _bondedIdx[i] = ZT_MAX_PEER_NETWORK_PATHS; - if (it != linkMap.end()) { - _bondedIdx[i] = it->second; - _paths[_bondedIdx[i]]->setBonded(true); - ++it; - ++updatedBondedPathCount; - _paths[_bondedIdx[i]]->address().toString(pathStr); - //fprintf(stderr, "setting i=%d, _bondedIdx[%d]=%d to bonded (%s %s)\n", i, i, _bondedIdx[i], getLink(_paths[_bondedIdx[i]])->ifname().c_str(), pathStr); + // If the user prefers to only use one address type (IPv4 or IPv6) + if (ipvPref == 4 || ipvPref == 6) { + for (int j = 0; j < it->second.size(); j++) { + int idx = it->second.at(j); + if (! _paths[idx].p) { + continue; + } + if (! _paths[idx].allowed()) { + _paths[idx].p->address().toString(pathStr); + log("did not add %s/%s (user addr preference %d)", link->ifname().c_str(), pathStr, ipvPref); + continue; + } + if (! _paths[idx].eligible) { + continue; + } + addPathToBond(idx, updatedBondedPathCount); + ++updatedBondedPathCount; + _paths[idx].p->address().toString(pathStr); + log("add path %s/%s (user addr preference %d)", link->ifname().c_str(), pathStr, ipvPref); + } } + // If the users prefers one address type to another, try to find at least + // one path of that type before considering others. + if (ipvPref == 46 || ipvPref == 64) { + bool foundPreferredPath = false; + // Search for preferred paths + for (int j = 0; j < it->second.size(); j++) { + int idx = it->second.at(j); + if (! _paths[idx].p || ! _paths[idx].eligible) { + continue; + } + if (_paths[idx].preferred() && _paths[idx].allowed()) { + addPathToBond(idx, updatedBondedPathCount); + ++updatedBondedPathCount; + _paths[idx].p->address().toString(pathStr); + log("add %s/%s (user addr preference %d)", link->ifname().c_str(), pathStr, ipvPref); + foundPreferredPath = true; + } + } + // Unable to find a path that matches user preference, settle for another address type + if (! foundPreferredPath) { + log("did not find first-choice path type on link %s (user preference %d)", link->ifname().c_str(), ipvPref); + for (int j = 0; j < it->second.size(); j++) { + int idx = it->second.at(j); + if (! _paths[idx].p || ! _paths[idx].eligible) { + continue; + } + addPathToBond(idx, updatedBondedPathCount); + ++updatedBondedPathCount; + _paths[idx].p->address().toString(pathStr); + log("add %s/%s (user addr preference %d)", link->ifname().c_str(), pathStr, ipvPref); + foundPreferredPath = true; + } + } + } + ++it; // Next link } _numBondedPaths = updatedBondedPathCount; - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) { - // Cause a RR reset since the currently used index might no longer be valid + if (_policy == ZT_BOND_POLICY_BALANCE_RR) { + // Cause a RR reset since the current index might no longer be valid _rrPacketsSentOnCurrLink = _packetsPerLink; } } } } -void Bond::estimatePathQuality(const int64_t now) +void Bond::estimatePathQuality(int64_t now) { - char pathStr[128]; uint32_t totUserSpecifiedLinkSpeed = 0; - if (_numBondedPaths) { // Compute relative user-specified speeds of links - for(unsigned int i=0;i<_numBondedPaths;++i) { - SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); - if (_paths[i] && _paths[i]->allowed()) { + if (_numBondedPaths) { // Compute relative user-specified speeds of links + for (unsigned int i = 0; i < _numBondedPaths; ++i) { + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket()); + if (_paths[i].p && _paths[i].allowed()) { totUserSpecifiedLinkSpeed += link->speed(); } } - for(unsigned int i=0;i<_numBondedPaths;++i) { - SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); - if (_paths[i] && _paths[i]->allowed()) { - link->setRelativeSpeed(round( ((float)link->speed() / (float)totUserSpecifiedLinkSpeed) * 255)); + for (unsigned int i = 0; i < _numBondedPaths; ++i) { + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket()); + if (_paths[i].p && _paths[i].allowed()) { + link->setRelativeSpeed((uint8_t)round(((float)link->speed() / (float)totUserSpecifiedLinkSpeed) * 255)); } } } - float lat[ZT_MAX_PEER_NETWORK_PATHS]; - float pdv[ZT_MAX_PEER_NETWORK_PATHS]; - float plr[ZT_MAX_PEER_NETWORK_PATHS]; - float per[ZT_MAX_PEER_NETWORK_PATHS]; + float lat[ZT_MAX_PEER_NETWORK_PATHS] = { 0 }; + float pdv[ZT_MAX_PEER_NETWORK_PATHS] = { 0 }; + float plr[ZT_MAX_PEER_NETWORK_PATHS] = { 0 }; + float per[ZT_MAX_PEER_NETWORK_PATHS] = { 0 }; float maxLAT = 0; float maxPDV = 0; float maxPLR = 0; float maxPER = 0; - float quality[ZT_MAX_PEER_NETWORK_PATHS]; - uint8_t alloc[ZT_MAX_PEER_NETWORK_PATHS]; + float quality[ZT_MAX_PEER_NETWORK_PATHS] = { 0 }; + uint8_t alloc[ZT_MAX_PEER_NETWORK_PATHS] = { 0 }; float totQuality = 0.0f; - memset(&lat, 0, sizeof(lat)); - memset(&pdv, 0, sizeof(pdv)); - memset(&plr, 0, sizeof(plr)); - memset(&per, 0, sizeof(per)); - memset(&quality, 0, sizeof(quality)); - memset(&alloc, 0, sizeof(alloc)); - // Compute initial summary statistics - for(unsigned int i=0;iallowed()) { + for (unsigned int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (! _paths[i].p || ! _paths[i].allowed()) { continue; } // Compute/Smooth average of real-world observations - _paths[i]->_latencyMean = _paths[i]->latencySamples.mean(); - _paths[i]->_latencyVariance = _paths[i]->latencySamples.stddev(); - _paths[i]->_packetErrorRatio = 1.0 - (_paths[i]->packetValiditySamples.count() ? _paths[i]->packetValiditySamples.mean() : 1.0); + _paths[i].latencyMean = _paths[i].latencySamples.mean(); + _paths[i].latencyVariance = _paths[i].latencySamples.stddev(); + _paths[i].packetErrorRatio = 1.0 - (_paths[i].packetValiditySamples.count() ? _paths[i].packetValiditySamples.mean() : 1.0); if (userHasSpecifiedLinkSpeeds()) { // Use user-reported metrics - SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket()); if (link) { - _paths[i]->_throughputMean = link->speed(); - _paths[i]->_throughputVariance = 0; + _paths[i].throughputMean = link->speed(); + _paths[i].throughputVariance = 0; } } // Drain unacknowledged QoS records - std::map::iterator it = _paths[i]->qosStatsOut.begin(); + std::map::iterator it = _paths[i].qosStatsOut.begin(); uint64_t currentLostRecords = 0; - while (it != _paths[i]->qosStatsOut.end()) { - int qosRecordTimeout = 5000; //_paths[i]->monitorInterval() * ZT_MULTIPATH_QOS_ACK_INTERVAL_MULTIPLIER * 8; + while (it != _paths[i].qosStatsOut.end()) { + int qosRecordTimeout = 5000; //_paths[i].p->monitorInterval() * ZT_BOND_QOS_ACK_INTERVAL_MULTIPLIER * 8; if ((now - it->second) >= qosRecordTimeout) { // Packet was lost - it = _paths[i]->qosStatsOut.erase(it); + it = _paths[i].qosStatsOut.erase(it); ++currentLostRecords; - } else { ++it; } + } + else { + ++it; + } } - quality[i]=0; - totQuality=0; + quality[i] = 0; + totQuality = 0; // Normalize raw observations according to sane limits and/or user specified values - lat[i] = 1.0 / expf(4*Utils::normalize(_paths[i]->_latencyMean, 0, _maxAcceptableLatency, 0, 1)); - pdv[i] = 1.0 / expf(4*Utils::normalize(_paths[i]->_latencyVariance, 0, _maxAcceptablePacketDelayVariance, 0, 1)); - plr[i] = 1.0 / expf(4*Utils::normalize(_paths[i]->_packetLossRatio, 0, _maxAcceptablePacketLossRatio, 0, 1)); - per[i] = 1.0 / expf(4*Utils::normalize(_paths[i]->_packetErrorRatio, 0, _maxAcceptablePacketErrorRatio, 0, 1)); + lat[i] = 1.0 / expf(4 * Utils::normalize(_paths[i].latencyMean, 0, _maxAcceptableLatency, 0, 1)); + pdv[i] = 1.0 / expf(4 * Utils::normalize(_paths[i].latencyVariance, 0, _maxAcceptablePacketDelayVariance, 0, 1)); + plr[i] = 1.0 / expf(4 * Utils::normalize(_paths[i].packetLossRatio, 0, _maxAcceptablePacketLossRatio, 0, 1)); + per[i] = 1.0 / expf(4 * Utils::normalize(_paths[i].packetErrorRatio, 0, _maxAcceptablePacketErrorRatio, 0, 1)); // Record bond-wide maximums to determine relative values maxLAT = lat[i] > maxLAT ? lat[i] : maxLAT; maxPDV = pdv[i] > maxPDV ? pdv[i] : maxPDV; @@ -1060,81 +1134,74 @@ void Bond::estimatePathQuality(const int64_t now) maxPER = per[i] > maxPER ? per[i] : maxPER; } // Convert metrics to relative quantities and apply contribution weights - for(unsigned int i=0;ibonded()) { - quality[i] += ((maxLAT > 0.0f ? lat[i] / maxLAT : 0.0f) * _qualityWeights[ZT_QOS_LAT_IDX]); - quality[i] += ((maxPDV > 0.0f ? pdv[i] / maxPDV : 0.0f) * _qualityWeights[ZT_QOS_PDV_IDX]); - quality[i] += ((maxPLR > 0.0f ? plr[i] / maxPLR : 0.0f) * _qualityWeights[ZT_QOS_PLR_IDX]); - quality[i] += ((maxPER > 0.0f ? per[i] / maxPER : 0.0f) * _qualityWeights[ZT_QOS_PER_IDX]); + for (unsigned int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (_paths[i].p && _paths[i].bonded) { + quality[i] += ((maxLAT > 0.0f ? lat[i] / maxLAT : 0.0f) * _qw[ZT_QOS_LAT_IDX]); + quality[i] += ((maxPDV > 0.0f ? pdv[i] / maxPDV : 0.0f) * _qw[ZT_QOS_PDV_IDX]); + quality[i] += ((maxPLR > 0.0f ? plr[i] / maxPLR : 0.0f) * _qw[ZT_QOS_PLR_IDX]); + quality[i] += ((maxPER > 0.0f ? per[i] / maxPER : 0.0f) * _qw[ZT_QOS_PER_IDX]); totQuality += quality[i]; } } // Normalize to 8-bit allocation values - for(unsigned int i=0;ibonded()) { - alloc[i] = std::ceil((quality[i] / totQuality) * (float)255); - _paths[i]->_allocation = alloc[i]; + for (unsigned int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (_paths[i].p && _paths[i].bonded) { + alloc[i] = (uint8_t)(std::ceil((quality[i] / totQuality) * (float)255)); + _paths[i].allocation = alloc[i]; } } } -void Bond::processBalanceTasks(const int64_t now) +void Bond::processBalanceTasks(int64_t now) { - char curPathStr[128]; - // TODO: Generalize + char pathStr[64] = { 0 }; int totalAllocation = 0; - for (int i=0;ibonded() && _paths[i]->eligible(now,_ackSendInterval)) { - totalAllocation+=_paths[i]->_allocation; + if (_paths[i].p && _paths[i].bonded && _paths[i].eligible) { + totalAllocation += _paths[i].allocation; } } - unsigned char minimumAllocationValue = 0.33 * ((float)totalAllocation / (float)_numBondedPaths); + unsigned char minimumAllocationValue = (uint8_t)(0.33 * ((float)totalAllocation / (float)_numBondedPaths)); if (_allowFlowHashing) { /** * Clean up and reset flows if necessary */ - if ((now - _lastFlowExpirationCheck) > ZT_MULTIPATH_FLOW_CHECK_INTERVAL) { + if ((now - _lastFlowExpirationCheck) > ZT_PEER_PATH_EXPIRATION) { Mutex::Lock _l(_flows_m); - forgetFlowsWhenNecessary(ZT_MULTIPATH_FLOW_EXPIRATION_INTERVAL,false,now); - _lastFlowExpirationCheck = now; - } - if ((now - _lastFlowStatReset) > ZT_FLOW_STATS_RESET_INTERVAL) { - Mutex::Lock _l(_flows_m); - _lastFlowStatReset = now; - std::map >::iterator it = _flows.begin(); + forgetFlowsWhenNecessary(ZT_PEER_PATH_EXPIRATION, false, now); + std::map >::iterator it = _flows.begin(); while (it != _flows.end()) { it->second->resetByteCounts(); ++it; } + _lastFlowExpirationCheck = now; } /** * Re-allocate flows from dead paths */ - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { + if (_policy == ZT_BOND_POLICY_BALANCE_XOR || _policy == ZT_BOND_POLICY_BALANCE_AWARE) { Mutex::Lock _l(_flows_m); - for (int i=0;ieligible(now,_ackSendInterval) && _paths[i]->_shouldReallocateFlows) { - char traceMsg[256]; char pathStr[128]; _paths[i]->address().toString(pathStr); - sprintf(traceMsg, "%s (balance-*) Reallocating flows to peer %llx from dead link %s/%s to surviving links", - OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt(), getLink(_paths[i])->ifname().c_str(), pathStr); - RR->t->bondStateMessage(NULL, traceMsg); - std::map >::iterator flow_it = _flows.begin(); + if (! _paths[i].eligible && _paths[i].shouldReallocateFlows) { + _paths[i].p->address().toString(pathStr); + log("reallocate flows from dead link %s/%s", getLink(_paths[i].p)->ifname().c_str(), pathStr); + std::map >::iterator flow_it = _flows.begin(); while (flow_it != _flows.end()) { - if (flow_it->second->assignedPath() == _paths[i]) { - if(assignFlowToBondedPath(flow_it->second, now)) { - _paths[i]->_assignedFlowCount--; + if (_paths[flow_it->second->assignedPath].p == _paths[i].p) { + if (assignFlowToBondedPath(flow_it->second, now)) { + _paths[i].assignedFlowCount--; } } ++flow_it; } - _paths[i]->_shouldReallocateFlows = false; + _paths[i].shouldReallocateFlows = false; } } } @@ -1142,173 +1209,91 @@ void Bond::processBalanceTasks(const int64_t now) * Re-allocate flows from under-performing * NOTE: This could be part of the above block but was kept separate for clarity. */ - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR || _bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { + if (_policy == ZT_BOND_POLICY_BALANCE_XOR || _policy == ZT_BOND_POLICY_BALANCE_AWARE) { Mutex::Lock _l(_flows_m); - for (int i=0;ibonded() && _paths[i]->eligible(now,_ackSendInterval) && (_paths[i]->_allocation < minimumAllocationValue) && _paths[i]->_assignedFlowCount) { - _paths[i]->address().toString(curPathStr); - char traceMsg[256]; char pathStr[128]; _paths[i]->address().toString(pathStr); - sprintf(traceMsg, "%s (balance-*) Reallocating flows to peer %llx from under-performing link %s/%s\n", - OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt(), getLink(_paths[i])->ifname().c_str(), pathStr); - RR->t->bondStateMessage(NULL, traceMsg); - std::map >::iterator flow_it = _flows.begin(); + if (_paths[i].p && _paths[i].bonded && _paths[i].eligible && (_paths[i].allocation < minimumAllocationValue) && _paths[i].assignedFlowCount) { + _paths[i].p->address().toString(pathStr); + log("reallocate flows from under-performing link %s/%s\n", getLink(_paths[i].p)->ifname().c_str(), pathStr); + std::map >::iterator flow_it = _flows.begin(); while (flow_it != _flows.end()) { - if (flow_it->second->assignedPath() == _paths[i]) { - if(assignFlowToBondedPath(flow_it->second, now)) { - _paths[i]->_assignedFlowCount--; + if (flow_it->second->assignedPath == _paths[i].p) { + if (assignFlowToBondedPath(flow_it->second, now)) { + _paths[i].assignedFlowCount--; } } ++flow_it; } - _paths[i]->_shouldReallocateFlows = false; + _paths[i].shouldReallocateFlows = false; } } } } - /** - * Tasks specific to (Balance Round Robin) - */ - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) { - // Nothing - } - /** - * Tasks specific to (Balance XOR) - */ - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR) { - // Nothing - } - /** - * Tasks specific to (Balance Aware) - */ - if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { - if (_allowFlowHashing) { - Mutex::Lock _l(_flows_m); - if (_flowRebalanceStrategy == ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_PASSIVE) { - // Do nothing here, this is taken care of in the more general case above. - } - if (_flowRebalanceStrategy == ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_OPPORTUNISTIC) { - // If the flow is temporarily inactive we should take this opportunity to re-assign the flow if needed. - } - if (_flowRebalanceStrategy == ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_AGGRESSIVE) { - /** - * Return flows to the original path if it has once again become available - */ - if ((now - _lastFlowRebalance) > ZT_FLOW_REBALANCE_INTERVAL) { - std::map >::iterator flow_it = _flows.begin(); - while (flow_it != _flows.end()) { - if (flow_it->second->_previouslyAssignedPath && flow_it->second->_previouslyAssignedPath->eligible(now, _ackSendInterval) - && (flow_it->second->_previouslyAssignedPath->_allocation >= (minimumAllocationValue * 2))) { - //fprintf(stderr, "moving flow back onto its previous path assignment (based on eligibility)\n"); - (flow_it->second->_assignedPath->_assignedFlowCount)--; - flow_it->second->assignPath(flow_it->second->_previouslyAssignedPath,now); - (flow_it->second->_previouslyAssignedPath->_assignedFlowCount)++; - } - ++flow_it; - } - _lastFlowRebalance = now; - } - /** - * Return flows to the original path if it has once again become (performant) - */ - if ((now - _lastFlowRebalance) > ZT_FLOW_REBALANCE_INTERVAL) { - std::map >::iterator flow_it = _flows.begin(); - while (flow_it != _flows.end()) { - if (flow_it->second->_previouslyAssignedPath && flow_it->second->_previouslyAssignedPath->eligible(now, _ackSendInterval) - && (flow_it->second->_previouslyAssignedPath->_allocation >= (minimumAllocationValue * 2))) { - //fprintf(stderr, "moving flow back onto its previous path assignment (based on performance)\n"); - (flow_it->second->_assignedPath->_assignedFlowCount)--; - flow_it->second->assignPath(flow_it->second->_previouslyAssignedPath,now); - (flow_it->second->_previouslyAssignedPath->_assignedFlowCount)++; - } - ++flow_it; - } - _lastFlowRebalance = now; - } - } - } - else if (!_allowFlowHashing) { - // Nothing - } - } } -void Bond::dequeueNextActiveBackupPath(const uint64_t now) +void Bond::dequeueNextActiveBackupPath(uint64_t now) { if (_abFailoverQueue.empty()) { return; } - _abPath = _abFailoverQueue.front(); + _abPathIdx = _abFailoverQueue.front(); _abFailoverQueue.pop_front(); _lastActiveBackupPathChange = now; - for (int i=0; iresetPacketCounts(); + for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (_paths[i].p) { + _paths[i].resetPacketCounts(); } } } bool Bond::abForciblyRotateLink() { - char traceMsg[256]; - char prevPathStr[128]; - char curPathStr[128]; - if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { - SharedPtr prevPath = _abPath; - _abPath->address().toString(prevPathStr); + char prevPathStr[64]; + char curPathStr[64]; + if (_policy == ZT_BOND_POLICY_ACTIVE_BACKUP) { + int prevPathIdx = _abPathIdx; + _paths[_abPathIdx].p->address().toString(prevPathStr); dequeueNextActiveBackupPath(RR->node->now()); - _abPath->address().toString(curPathStr); - sprintf(traceMsg, "%s (active-backup) Forcibly rotating peer %llx link from %s/%s to %s/%s", - OSUtils::humanReadableTimestamp().c_str(), - _peer->_id.address().toInt(), - getLink(prevPath)->ifname().c_str(), - prevPathStr, - getLink(_abPath)->ifname().c_str(), - curPathStr); - RR->t->bondStateMessage(NULL, traceMsg); + _paths[_abPathIdx].p->address().toString(curPathStr); + log("forcibly rotate link from %s/%s to %s/%s", getLink(_paths[prevPathIdx].p)->ifname().c_str(), prevPathStr, getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr); return true; } return false; } -void Bond::processActiveBackupTasks(void *tPtr, const int64_t now) +void Bond::processActiveBackupTasks(void* tPtr, int64_t now) { - char traceMsg[256]; - char pathStr[128]; - char prevPathStr[128]; - char curPathStr[128]; - - SharedPtr prevActiveBackupPath = _abPath; - SharedPtr nonPreferredPath; + char pathStr[64] = { 0 }; + char prevPathStr[64]; + char curPathStr[64]; + int prevActiveBackupPathIdx = _abPathIdx; + int nonPreferredPathIdx; bool bFoundPrimaryLink = false; /** - * Generate periodic statuc report + * Generate periodic status report */ - if ((now - _lastBondStatusLog) > ZT_MULTIPATH_BOND_STATUS_INTERVAL) { + if ((now - _lastBondStatusLog) > ZT_BOND_STATUS_INTERVAL) { _lastBondStatusLog = now; - if (_abPath) { - _abPath->address().toString(curPathStr); - sprintf(traceMsg, "%s (active-backup) Active link to peer %llx is %s/%s, failover queue size is %zu", - OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt(), getLink(_abPath)->ifname().c_str(), curPathStr, _abFailoverQueue.size()); - RR->t->bondStateMessage(NULL, traceMsg); - } else { - sprintf(traceMsg, "%s (active-backup) No active link to peer %llx", - OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt()); - RR->t->bondStateMessage(NULL, traceMsg); + if (_abPathIdx == ZT_MAX_PEER_NETWORK_PATHS) { + log("no active link"); + } + else if (_paths[_abPathIdx].p) { + _paths[_abPathIdx].p->address().toString(curPathStr); + log("active link is %s/%s, failover queue size is %zu", getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr, _abFailoverQueue.size()); } if (_abFailoverQueue.empty()) { - sprintf(traceMsg, "%s (active-backup) Failover queue is empty, bond to peer %llx is NOT currently fault-tolerant", - OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt()); - RR->t->bondStateMessage(NULL, traceMsg); + log("failover queue is empty, no longer fault-tolerant"); } } + /** * Select initial "active" active-backup link */ - if (!_abPath) { + if (_abPathIdx == ZT_MAX_PEER_NETWORK_PATHS) { /** * [Automatic mode] * The user has not explicitly specified links or their failover schedule, @@ -1317,20 +1302,17 @@ void Bond::processActiveBackupTasks(void *tPtr, const int64_t now) * policy will assign it as the new active backup path. If the path fails it will * simply find the next eligible path. */ - if (!userHasSpecifiedLinks()) { - sprintf(traceMsg, "%s (active-backup) No links to peer %llx specified. Searching...", - OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt()); RR->t->bondStateMessage(NULL, traceMsg); - for (int i=0; ieligible(now,_ackSendInterval)) { - _paths[i]->address().toString(curPathStr); - SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); + if (! userHasSpecifiedLinks()) { + log("no user-specified links"); + for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (_paths[i].p && _paths[i].eligible) { + _paths[i].p->address().toString(curPathStr); + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket()); if (link) { - sprintf(traceMsg, "%s (active-backup) Found eligible link %s/%s to peer %llx", - OSUtils::humanReadableTimestamp().c_str(), getLink(_paths[i])->ifname().c_str(), curPathStr, _peer->_id.address().toInt()); - RR->t->bondStateMessage(NULL, traceMsg); + log("found eligible link %s/%s", getLink(_paths[i].p)->ifname().c_str(), curPathStr); + _abPathIdx = i; + break; } - _abPath = _paths[i]; - break; } } } @@ -1340,300 +1322,280 @@ void Bond::processActiveBackupTasks(void *tPtr, const int64_t now) */ else if (userHasSpecifiedLinks()) { if (userHasSpecifiedPrimaryLink()) { - //sprintf(traceMsg, "%s (active-backup) Checking local.conf for user-specified primary link\n", OSUtils::humanReadableTimestamp().c_str()); - for (int i=0; i link =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); - if (_paths[i]->eligible(now,_ackSendInterval) && link->primary()) { - if (!_paths[i]->preferred()) { - _paths[i]->address().toString(curPathStr); + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket()); + if (_paths[i].eligible && link->primary()) { + if (! _paths[i].preferred()) { + _paths[i].p->address().toString(curPathStr); // Found path on primary link, take note in case we don't find a preferred path - nonPreferredPath = _paths[i]; + nonPreferredPathIdx = i; bFoundPrimaryLink = true; } - if (_paths[i]->preferred()) { - _abPath = _paths[i]; - _abPath->address().toString(curPathStr); - SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); + if (_paths[i].preferred()) { + _abPathIdx = i; + _paths[_abPathIdx].p->address().toString(curPathStr); bFoundPrimaryLink = true; - break; // Found preferred path %s on primary link + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[_abPathIdx].p->localSocket()); + if (link) { + log("found preferred primary link %s/%s", getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr); + } + break; // Found preferred path on primary link } } } - if (_abPath) { - _abPath->address().toString(curPathStr); - SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _abPath->localSocket()); - if (link) { - sprintf(traceMsg, "%s (active-backup) Found preferred primary link %s/%s to peer %llx", - OSUtils::humanReadableTimestamp().c_str(), getLink(_abPath)->ifname().c_str(), curPathStr, _peer->_id.address().toInt()); - RR->t->bondStateMessage(NULL, traceMsg); - } + if (bFoundPrimaryLink && nonPreferredPathIdx) { + log("found non-preferred primary link"); + _abPathIdx = nonPreferredPathIdx; } - else { - if (bFoundPrimaryLink && nonPreferredPath) { - sprintf(traceMsg, "%s (active-backup) Found non-preferred primary link to peer %llx", - OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt()); - RR->t->bondStateMessage(NULL, traceMsg); - _abPath = nonPreferredPath; - } - } - if (!_abPath) { - sprintf(traceMsg, "%s (active-backup) Designated primary link to peer %llx is not yet ready", - OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt()); - RR->t->bondStateMessage(NULL, traceMsg); - // TODO: Should wait for some time (failover interval?) and then swtich to spare link + if (_abPathIdx == ZT_MAX_PEER_NETWORK_PATHS) { + log("user-designated primary link is not yet ready"); + // TODO: Should wait for some time (failover interval?) and then switch to spare link } } - else if (!userHasSpecifiedPrimaryLink()) { - int _abIdx = ZT_MAX_PEER_NETWORK_PATHS; - sprintf(traceMsg, "%s (active-backup) User did not specify a primary link to peer %llx, selecting first available link", - OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt()); - RR->t->bondStateMessage(NULL, traceMsg); - for (int i=0; ieligible(now,_ackSendInterval)) { - _abIdx = i; + else if (! userHasSpecifiedPrimaryLink()) { + log("user did not specify a primary link, select first available link"); + for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (_paths[i].p && _paths[i].eligible) { + _abPathIdx = i; break; } } - if (_abIdx == ZT_MAX_PEER_NETWORK_PATHS) { - // Unable to find a candidate next-best, no change - } - else { - _abPath = _paths[_abIdx]; - SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _abPath->localSocket()); + if (_abPathIdx != ZT_MAX_PEER_NETWORK_PATHS) { + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[_abPathIdx].p->localSocket()); if (link) { - _abPath->address().toString(curPathStr); - sprintf(traceMsg, "%s (active-backup) Selected non-primary link %s/%s to peer %llx", - OSUtils::humanReadableTimestamp().c_str(), getLink(_abPath)->ifname().c_str(), curPathStr, _peer->_id.address().toInt()); - RR->t->bondStateMessage(NULL, traceMsg); + _paths[_abPathIdx].p->address().toString(curPathStr); + log("select non-primary link %s/%s", getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr); } } } } } + + // Short-circuit if we don't have an active link yet + if (_abPathIdx == ZT_MAX_PEER_NETWORK_PATHS) { + return; + } + + // Remove ineligible paths from the failover link queue + for (std::deque::iterator it(_abFailoverQueue.begin()); it != _abFailoverQueue.end();) { + if (_paths[(*it)].p && ! _paths[(*it)].eligible) { + _paths[(*it)].p->address().toString(curPathStr); + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[(*it)].p->localSocket()); + it = _abFailoverQueue.erase(it); + if (link) { + log("link %s/%s is now ineligible, removing from failover queue (%zu links in queue)", getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr, _abFailoverQueue.size()); + } + } + else { + ++it; + } + } + /** + * Failover instructions were provided by user, build queue according those as well as IPv + * preference, disregarding performance. + */ + if (userHasSpecifiedFailoverInstructions()) { + /** + * Clear failover scores + */ + for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (_paths[i].p) { + _paths[i].failoverScore = 0; + } + } + // Follow user-specified failover instructions + for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (! _paths[i].p || ! _paths[i].allowed() || ! _paths[i].eligible) { + continue; + } + SharedPtr link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket()); + _paths[i].p->address().toString(pathStr); + + int failoverScoreHandicap = _paths[i].failoverScore; + if (_paths[i].preferred()) { + failoverScoreHandicap += ZT_BOND_FAILOVER_HANDICAP_PREFERRED; + } + if (link->primary()) { + // If using "optimize" primary re-select mode, ignore user link designations + failoverScoreHandicap += ZT_BOND_FAILOVER_HANDICAP_PRIMARY; + } + if (! _paths[i].failoverScore) { + // If we didn't inherit a failover score from a "parent" that wants to use this path as a failover + int newHandicap = failoverScoreHandicap ? failoverScoreHandicap : _paths[i].allocation; + _paths[i].failoverScore = newHandicap; + } + SharedPtr failoverLink; + if (link->failoverToLink().length()) { + failoverLink = RR->bc->getLinkByName(_policyAlias, link->failoverToLink()); + } + if (failoverLink) { + for (int j = 0; j < ZT_MAX_PEER_NETWORK_PATHS; j++) { + if (_paths[j].p && getLink(_paths[j].p) == failoverLink.ptr()) { + _paths[j].p->address().toString(pathStr); + int inheritedHandicap = failoverScoreHandicap - 10; + int newHandicap = _paths[j].failoverScore > inheritedHandicap ? _paths[j].failoverScore : inheritedHandicap; + if (! _paths[j].preferred()) { + newHandicap--; + } + _paths[j].failoverScore = newHandicap; + } + } + } + if (_paths[i].p.ptr() != _paths[_abPathIdx].p.ptr()) { + bool bFoundPathInQueue = false; + for (std::deque::iterator it(_abFailoverQueue.begin()); it != _abFailoverQueue.end(); ++it) { + if (_paths[i].p.ptr() == _paths[(*it)].p.ptr()) { + bFoundPathInQueue = true; + } + } + if (! bFoundPathInQueue) { + _abFailoverQueue.push_front(i); + _paths[i].p->address().toString(curPathStr); + log("add link %s/%s to failover queue (%zu links in queue)", getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr, _abFailoverQueue.size()); + addPathToBond(0, i); + } + } + } + } /** - * Update and maintain the active-backup failover queue + * No failover instructions provided by user, build queue according to performance + * and IPv preference. */ - if (_abPath) { - // Don't worry about the failover queue until we have an active link - // Remove ineligible paths from the failover link queue - for (std::list >::iterator it(_abFailoverQueue.begin()); it!=_abFailoverQueue.end();) { - if ((*it) && !(*it)->eligible(now,_ackSendInterval)) { - (*it)->address().toString(curPathStr); - SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, (*it)->localSocket()); - it = _abFailoverQueue.erase(it); - if (link) { - sprintf(traceMsg, "%s (active-backup) Link %s/%s to peer %llx is now ineligible, removing from failover queue, there are %zu links in the queue", - OSUtils::humanReadableTimestamp().c_str(), getLink(_abPath)->ifname().c_str(), curPathStr, _peer->_id.address().toInt(), _abFailoverQueue.size()); - RR->t->bondStateMessage(NULL, traceMsg); - } - } else { - ++it; + else if (! userHasSpecifiedFailoverInstructions()) { + for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (! _paths[i].p || ! _paths[i].allowed() || ! _paths[i].eligible) { + continue; } - } - /** - * Failover instructions were provided by user, build queue according those as well as IPv - * preference, disregarding performance. - */ - if (userHasSpecifiedFailoverInstructions()) { - /** - * Clear failover scores - */ - for (int i=0; i_failoverScore = 0; - } + int failoverScoreHandicap = 0; + if (_paths[i].preferred()) { + failoverScoreHandicap = ZT_BOND_FAILOVER_HANDICAP_PREFERRED; } - // Follow user-specified failover instructions - for (int i=0; iallowed() || !_paths[i]->eligible(now,_ackSendInterval)) { - continue; - } - SharedPtr link =RR->bc->getLinkBySocket(_policyAlias, _paths[i]->localSocket()); - _paths[i]->address().toString(pathStr); - - int failoverScoreHandicap = _paths[i]->_failoverScore; - if (_paths[i]->preferred()) { - failoverScoreHandicap += ZT_MULTIPATH_FAILOVER_HANDICAP_PREFERRED; - } - if (link->primary()) { - // If using "optimize" primary reselect mode, ignore user link designations - failoverScoreHandicap += ZT_MULTIPATH_FAILOVER_HANDICAP_PRIMARY; - } - if (!_paths[i]->_failoverScore) { - // If we didn't inherit a failover score from a "parent" that wants to use this path as a failover - int newHandicap = failoverScoreHandicap ? failoverScoreHandicap : _paths[i]->_allocation; - _paths[i]->_failoverScore = newHandicap; - } - SharedPtr failoverLink; - if (link->failoverToLink().length()) { - failoverLink = RR->bc->getLinkByName(_policyAlias, link->failoverToLink()); - } - if (failoverLink) { - for (int j=0; jaddress().toString(pathStr); - int inheritedHandicap = failoverScoreHandicap - 10; - int newHandicap = _paths[j]->_failoverScore > inheritedHandicap ? _paths[j]->_failoverScore : inheritedHandicap; - if (!_paths[j]->preferred()) { - newHandicap--; - } - _paths[j]->_failoverScore = newHandicap; - } + if (! _paths[i].eligible) { + failoverScoreHandicap = -10000; + } + if (getLink(_paths[i].p)->primary() && _abLinkSelectMethod != ZT_BOND_RESELECTION_POLICY_OPTIMIZE) { + // If using "optimize" primary re-select mode, ignore user link designations + failoverScoreHandicap = ZT_BOND_FAILOVER_HANDICAP_PRIMARY; + } + if (_paths[i].p.ptr() == _paths[negotiatedPathIdx].p.ptr()) { + _paths[i].negotiated = true; + failoverScoreHandicap = ZT_BOND_FAILOVER_HANDICAP_NEGOTIATED; + } + else { + _paths[i].negotiated = false; + } + _paths[i].failoverScore = _paths[i].allocation + failoverScoreHandicap; + if (_paths[i].p.ptr() != _paths[_abPathIdx].p.ptr()) { + bool bFoundPathInQueue = false; + for (std::deque::iterator it(_abFailoverQueue.begin()); it != _abFailoverQueue.end(); ++it) { + if (_paths[i].p.ptr() == _paths[(*it)].p.ptr()) { + bFoundPathInQueue = true; } } - if (_paths[i].ptr() != _abPath.ptr()) { - bool bFoundPathInQueue = false; - for (std::list >::iterator it(_abFailoverQueue.begin()); it!=_abFailoverQueue.end();++it) { - if (_paths[i].ptr() == (*it).ptr()) { - bFoundPathInQueue = true; - } - } - if (!bFoundPathInQueue) { - _abFailoverQueue.push_front(_paths[i]); - _paths[i]->address().toString(curPathStr); sprintf(traceMsg, "%s (active-backup) Added link %s/%s to peer %llx to failover queue, there are %zu links in the queue", - OSUtils::humanReadableTimestamp().c_str(), getLink(_abPath)->ifname().c_str(), curPathStr, _peer->_id.address().toInt(), _abFailoverQueue.size()); - RR->t->bondStateMessage(NULL, traceMsg); - } + if (! bFoundPathInQueue) { + _abFailoverQueue.push_front(i); + _paths[i].p->address().toString(curPathStr); + log("add link %s/%s to failover queue (%zu links in queue)", getLink(_paths[i].p)->ifname().c_str(), curPathStr, _abFailoverQueue.size()); + addPathToBond(0, i); } } } - /** - * No failover instructions provided by user, build queue according to performance - * and IPv preference. - */ - else if (!userHasSpecifiedFailoverInstructions()) { - for (int i=0; iallowed() - || !_paths[i]->eligible(now,_ackSendInterval)) { - continue; - } - int failoverScoreHandicap = 0; - if (_paths[i]->preferred()) { - failoverScoreHandicap = ZT_MULTIPATH_FAILOVER_HANDICAP_PREFERRED; - } - bool includeRefractoryPeriod = true; - if (!_paths[i]->eligible(now,includeRefractoryPeriod)) { - failoverScoreHandicap = -10000; - } - if (getLink(_paths[i])->primary() && _abLinkSelectMethod != ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { - // If using "optimize" primary reselect mode, ignore user link designations - failoverScoreHandicap = ZT_MULTIPATH_FAILOVER_HANDICAP_PRIMARY; - } - if (_paths[i].ptr() == negotiatedPath.ptr()) { - _paths[i]->_negotiated = true; - failoverScoreHandicap = ZT_MULTIPATH_FAILOVER_HANDICAP_NEGOTIATED; - } else { - _paths[i]->_negotiated = false; - } - _paths[i]->_failoverScore = _paths[i]->_allocation + failoverScoreHandicap; - if (_paths[i].ptr() != _abPath.ptr()) { - bool bFoundPathInQueue = false; - for (std::list >::iterator it(_abFailoverQueue.begin()); it!=_abFailoverQueue.end();++it) { - if (_paths[i].ptr() == (*it).ptr()) { - bFoundPathInQueue = true; - } - } - if (!bFoundPathInQueue) { - _abFailoverQueue.push_front(_paths[i]); - _paths[i]->address().toString(curPathStr); - sprintf(traceMsg, "%s (active-backup) Added link %s/%s to peer %llx to failover queue, there are %zu links in the queue", - OSUtils::humanReadableTimestamp().c_str(), getLink(_paths[i])->ifname().c_str(), curPathStr, _peer->_id.address().toInt(), _abFailoverQueue.size()); - RR->t->bondStateMessage(NULL, traceMsg); - } - } - } - } - _abFailoverQueue.sort(PathQualityComparator()); } + // Sort queue based on performance + if (! _abFailoverQueue.empty()) { + for (int i = 0; i < _abFailoverQueue.size(); i++) { + int value_to_insert = _abFailoverQueue[i]; + int hole_position = i; + while (hole_position > 0 && (_abFailoverQueue[hole_position - 1] > value_to_insert)) { + _abFailoverQueue[hole_position] = _abFailoverQueue[hole_position - 1]; + hole_position = hole_position - 1; + } + _abFailoverQueue[hole_position] = value_to_insert; + } + } + /** * Short-circuit if we have no queued paths */ if (_abFailoverQueue.empty()) { return; } + /** - * Fulfill primary reselect obligations + * Fulfill primary re-select obligations */ - if (_abPath && !_abPath->eligible(now,_ackSendInterval)) { // Implicit ZT_MULTIPATH_RESELECTION_POLICY_FAILURE - _abPath->address().toString(curPathStr); - sprintf(traceMsg, "%s (active-backup) Link %s/%s to peer %llx has failed. Selecting new link from failover queue, there are %zu links in the queue", - OSUtils::humanReadableTimestamp().c_str(), getLink(_abPath)->ifname().c_str(), curPathStr, _peer->_id.address().toInt(), _abFailoverQueue.size()); - RR->t->bondStateMessage(NULL, traceMsg); - if (!_abFailoverQueue.empty()) { + if (_paths[_abPathIdx].p && ! _paths[_abPathIdx].eligible) { // Implicit ZT_BOND_RESELECTION_POLICY_FAILURE + _paths[_abPathIdx].p->address().toString(curPathStr); + log("link %s/%s has failed, select link from failover queue (%zu links in queue)", getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr, _abFailoverQueue.size()); + if (! _abFailoverQueue.empty()) { dequeueNextActiveBackupPath(now); - _abPath->address().toString(curPathStr); - sprintf(traceMsg, "%s (active-backup) Active link to peer %llx has been switched to %s/%s", - OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt(), getLink(_abPath)->ifname().c_str(), curPathStr); - RR->t->bondStateMessage(NULL, traceMsg); - } else { - sprintf(traceMsg, "%s (active-backup) Failover queue is empty. No links to peer %llx to choose from", - OSUtils::humanReadableTimestamp().c_str(), _peer->_id.address().toInt()); - RR->t->bondStateMessage(NULL, traceMsg); + _paths[_abPathIdx].p->address().toString(curPathStr); + log("active link switched to %s/%s", getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr); + } + else { + log("failover queue is empty, no links to choose from"); } } /** * Detect change to prevent flopping during later optimization step. */ - if (prevActiveBackupPath != _abPath) { + if (prevActiveBackupPathIdx != _abPathIdx) { _lastActiveBackupPathChange = now; } - if (_abLinkSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS) { - if (_abPath && !getLink(_abPath)->primary() - && getLink(_abFailoverQueue.front())->primary()) { + if (_abLinkSelectMethod == ZT_BOND_RESELECTION_POLICY_ALWAYS) { + if (_paths[_abPathIdx].p && ! getLink(_paths[_abPathIdx].p)->primary() && getLink(_paths[_abFailoverQueue.front()].p)->primary()) { dequeueNextActiveBackupPath(now); - _abPath->address().toString(curPathStr); - sprintf(traceMsg, "%s (active-backup) Switching back to available primary link %s/%s to peer %llx [linkSelectionMethod = always]", - OSUtils::humanReadableTimestamp().c_str(), getLink(_abPath)->ifname().c_str(), curPathStr, _peer->_id.address().toInt()); - RR->t->bondStateMessage(NULL, traceMsg); + _paths[_abPathIdx].p->address().toString(curPathStr); + log("switch back to available primary link %s/%s (select: always)", getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr); } } - if (_abLinkSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_BETTER) { - if (_abPath && !getLink(_abPath)->primary()) { + if (_abLinkSelectMethod == ZT_BOND_RESELECTION_POLICY_BETTER) { + if (_paths[_abPathIdx].p && ! getLink(_paths[_abPathIdx].p)->primary()) { // Active backup has switched to "better" primary link according to re-select policy. - if (getLink(_abFailoverQueue.front())->primary() - && (_abFailoverQueue.front()->_failoverScore > _abPath->_failoverScore)) { + if (getLink(_paths[_abFailoverQueue.front()].p)->primary() && (_paths[_abFailoverQueue.front()].failoverScore > _paths[_abPathIdx].failoverScore)) { dequeueNextActiveBackupPath(now); - _abPath->address().toString(curPathStr); - sprintf(traceMsg, "%s (active-backup) Switching back to user-defined primary link %s/%s to peer %llx [linkSelectionMethod = better]", - OSUtils::humanReadableTimestamp().c_str(), getLink(_abPath)->ifname().c_str(), curPathStr, _peer->_id.address().toInt()); - RR->t->bondStateMessage(NULL, traceMsg); + _paths[_abPathIdx].p->address().toString(curPathStr); + log("switch back to user-defined primary link %s/%s (select: better)", getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr); } } } - if (_abLinkSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE && !_abFailoverQueue.empty()) { + if (_abLinkSelectMethod == ZT_BOND_RESELECTION_POLICY_OPTIMIZE && ! _abFailoverQueue.empty()) { /** * Implement link negotiation that was previously-decided */ - if (_abFailoverQueue.front()->_negotiated) { + if (_paths[_abFailoverQueue.front()].negotiated) { dequeueNextActiveBackupPath(now); - _abPath->address().toString(prevPathStr); + _paths[_abPathIdx].p->address().toString(prevPathStr); _lastPathNegotiationCheck = now; - _abPath->address().toString(curPathStr); - sprintf(traceMsg, "%s (active-backup) Switching negotiated link %s/%s to peer %llx [linkSelectionMethod = optimize]", - OSUtils::humanReadableTimestamp().c_str(), getLink(_abPath)->ifname().c_str(), curPathStr, _peer->_id.address().toInt()); - RR->t->bondStateMessage(NULL, traceMsg); + _paths[_abPathIdx].p->address().toString(curPathStr); + log("switch negotiated link %s/%s (select: optimize)", getLink(_paths[_abPathIdx].p)->ifname().c_str(), curPathStr); } else { // Try to find a better path and automatically switch to it -- not too often, though. - if ((now - _lastActiveBackupPathChange) > ZT_MULTIPATH_MIN_ACTIVE_BACKUP_AUTOFLOP_INTERVAL) { - if (!_abFailoverQueue.empty()) { - int newFScore = _abFailoverQueue.front()->_failoverScore; - int prevFScore = _abPath->_failoverScore; + if ((now - _lastActiveBackupPathChange) > ZT_BOND_OPTIMIZE_INTERVAL) { + if (! _abFailoverQueue.empty()) { + int newFScore = _paths[_abFailoverQueue.front()].failoverScore; + int prevFScore = _paths[_abPathIdx].failoverScore; // Establish a minimum switch threshold to prevent flapping - int failoverScoreDifference = _abFailoverQueue.front()->_failoverScore - _abPath->_failoverScore; - int thresholdQuantity = (ZT_MULTIPATH_ACTIVE_BACKUP_OPTIMIZE_MIN_THRESHOLD * (float)_abPath->_allocation); + int failoverScoreDifference = _paths[_abFailoverQueue.front()].failoverScore - _paths[_abPathIdx].failoverScore; + int thresholdQuantity = (int)(ZT_BOND_ACTIVE_BACKUP_OPTIMIZE_MIN_THRESHOLD * (float)_paths[_abPathIdx].allocation); if ((failoverScoreDifference > 0) && (failoverScoreDifference > thresholdQuantity)) { - SharedPtr oldPath = _abPath; - _abPath->address().toString(prevPathStr); + SharedPtr oldPath = _paths[_abPathIdx].p; + _paths[_abPathIdx].p->address().toString(prevPathStr); dequeueNextActiveBackupPath(now); - _abPath->address().toString(curPathStr); - sprintf(traceMsg, "%s (active-backup) Switching from %s/%s (fscore=%d) to better link %s/%s (fscore=%d) for peer %llx [linkSelectionMethod = optimize]", - OSUtils::humanReadableTimestamp().c_str(), getLink(oldPath)->ifname().c_str(), prevPathStr, prevFScore, getLink(_abPath)->ifname().c_str(), curPathStr, newFScore, _peer->_id.address().toInt()); - RR->t->bondStateMessage(NULL, traceMsg); + _paths[_abPathIdx].p->address().toString(curPathStr); + log("ab", + "switch from %s/%s (score: %d) to better link %s/%s (score: %d) for peer %llx (select: optimize)", + getLink(oldPath)->ifname().c_str(), + prevPathStr, + prevFScore, + getLink(_paths[_abPathIdx].p)->ifname().c_str(), + curPathStr, + newFScore, + _peerId); } } } @@ -1641,165 +1603,132 @@ void Bond::processActiveBackupTasks(void *tPtr, const int64_t now) } } -void Bond::setReasonableDefaults(int policy, SharedPtr templateBond, bool useTemplate) +void Bond::setBondParameters(int policy, SharedPtr templateBond, bool useTemplate) { - // If invalid bonding policy, try default - int _defaultBondingPolicy = BondController::defaultBondingPolicy(); - if (policy <= ZT_BONDING_POLICY_NONE || policy > ZT_BONDING_POLICY_BALANCE_AWARE) { - // If no default set, use NONE (effectively disabling this bond) - if (_defaultBondingPolicy < ZT_BONDING_POLICY_NONE || _defaultBondingPolicy > ZT_BONDING_POLICY_BALANCE_AWARE) { - _bondingPolicy= ZT_BONDING_POLICY_NONE; - } - _bondingPolicy= _defaultBondingPolicy; - } else { - _bondingPolicy= policy; - } + // Sanity check for policy - _freeRandomByte = 0; + _defaultPolicy = (_defaultPolicy <= ZT_BOND_POLICY_NONE || _defaultPolicy > ZT_BOND_POLICY_BALANCE_AWARE) ? ZT_BOND_POLICY_NONE : _defaultPolicy; + _policy = (policy <= ZT_BOND_POLICY_NONE || policy > ZT_BOND_POLICY_BALANCE_AWARE) ? ZT_BOND_POLICY_NONE : _defaultPolicy; + + // Flows + + _lastFlowExpirationCheck = 0; + _lastFlowRebalance = 0; + _allowFlowHashing = false; + + // Path negotiation + + _lastSentPathNegotiationRequest = 0; + _lastPathNegotiationCheck = 0; + _allowPathNegotiation = false; + _pathNegotiationCutoffCount = 0; + _lastPathNegotiationReceived = 0; + _localUtility = 0; + + // QOS Verb (and related checks) + + _qosCutoffCount = 0; + _lastQoSRateCheck = 0; + _lastQualityEstimation = 0; + + // User preferences which may override the default bonding algorithm's behavior _userHasSpecifiedPrimaryLink = false; _userHasSpecifiedFailoverInstructions = false; + _userHasSpecifiedLinkSpeeds = 0; + // Bond status + + _lastBondStatusLog = 0; + _lastSummaryDump = 0; _isHealthy = false; _numAliveLinks = 0; _numTotalLinks = 0; + _numBondedPaths = 0; + + // active-backup + + _lastActiveBackupPathChange = 0; + _abPathIdx = ZT_MAX_PEER_NETWORK_PATHS; + + // rr + + _rrPacketsSentOnCurrLink = 0; + _rrIdx = 0; + + // General parameters _downDelay = 0; _upDelay = 0; - _allowFlowHashing=false; - _bondMonitorInterval=0; - _shouldCollectPathStatistics=false; + _monitorInterval = 0; - // Path negotiation - _allowPathNegotiation=false; - _pathNegotiationCutoffCount=0; - _localUtility=0; - - _numBondedPaths=0; - _rrPacketsSentOnCurrLink=0; - _rrIdx=0; - - _totalBondUnderload = 0; + // (Sane?) limits _maxAcceptableLatency = 100; _maxAcceptablePacketDelayVariance = 50; - _maxAcceptablePacketLossRatio = 0.10; - _maxAcceptablePacketErrorRatio = 0.10; - _userHasSpecifiedLinkSpeeds=0; + _maxAcceptablePacketLossRatio = 0.10f; + _maxAcceptablePacketErrorRatio = 0.10f; - /* ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_PASSIVE is the most conservative strategy and is - least likely to cause unexpected behavior */ - _flowRebalanceStrategy = ZT_MULTIPATH_FLOW_REBALANCE_STRATEGY_AGGRESSIVE; + // General timers + + _lastFrame = 0; + _lastBackgroundTaskCheck = 0; + + // balance-aware + + _totalBondUnderload = 0; + + _overheadBytes = 0; /** - * Paths are actively monitored to provide a real-time quality/preference-ordered rapid failover queue. + * Policy-specific defaults */ - switch (policy) { - case ZT_BONDING_POLICY_ACTIVE_BACKUP: - _failoverInterval = 500; - _abLinkSelectMethod = ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE; - _linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; - _qualityWeights[ZT_QOS_LAT_IDX] = 0.2f; - _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; - _qualityWeights[ZT_QOS_PDV_IDX] = 0.2f; - _qualityWeights[ZT_QOS_PLR_IDX] = 0.2f; - _qualityWeights[ZT_QOS_PER_IDX] = 0.2f; - _qualityWeights[ZT_QOS_THR_IDX] = 0.2f; - _qualityWeights[ZT_QOS_THM_IDX] = 0.0f; - _qualityWeights[ZT_QOS_THV_IDX] = 0.0f; - _qualityWeights[ZT_QOS_SCP_IDX] = 0.0f; + switch (_policy) { + case ZT_BOND_POLICY_ACTIVE_BACKUP: + _abLinkSelectMethod = ZT_BOND_RESELECTION_POLICY_OPTIMIZE; break; - /** - * All seemingly-alive paths are used. Paths are not actively monitored. - */ - case ZT_BONDING_POLICY_BROADCAST: + case ZT_BOND_POLICY_BROADCAST: _downDelay = 30000; _upDelay = 0; break; - /** - * Paths are monitored to determine when/if one needs to be added or removed from the rotation - */ - case ZT_BONDING_POLICY_BALANCE_RR: - _failoverInterval = 3000; - _allowFlowHashing = false; - _packetsPerLink = 1024; - _linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; - _qualityWeights[ZT_QOS_LAT_IDX] = 0.4f; - _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; - _qualityWeights[ZT_QOS_PDV_IDX] = 0.2f; - _qualityWeights[ZT_QOS_PLR_IDX] = 0.1f; - _qualityWeights[ZT_QOS_PER_IDX] = 0.1f; - _qualityWeights[ZT_QOS_THR_IDX] = 0.1f; - _qualityWeights[ZT_QOS_THM_IDX] = 0.0f; - _qualityWeights[ZT_QOS_THV_IDX] = 0.0f; - _qualityWeights[ZT_QOS_SCP_IDX] = 0.0f; + case ZT_BOND_POLICY_BALANCE_RR: + _packetsPerLink = 64; break; - /** - * Path monitoring is used to determine the capacity of each - * path and where to place the next flow. - */ - case ZT_BONDING_POLICY_BALANCE_XOR: - _failoverInterval = 3000; - _upDelay = _bondMonitorInterval * 2; + case ZT_BOND_POLICY_BALANCE_XOR: _allowFlowHashing = true; - _linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; - _qualityWeights[ZT_QOS_LAT_IDX] = 0.4f; - _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; - _qualityWeights[ZT_QOS_PDV_IDX] = 0.2f; - _qualityWeights[ZT_QOS_PLR_IDX] = 0.1f; - _qualityWeights[ZT_QOS_PER_IDX] = 0.1f; - _qualityWeights[ZT_QOS_THR_IDX] = 0.1f; - _qualityWeights[ZT_QOS_THM_IDX] = 0.0f; - _qualityWeights[ZT_QOS_THV_IDX] = 0.0f; - _qualityWeights[ZT_QOS_SCP_IDX] = 0.0f; break; - /** - * Path monitoring is used to determine the capacity of each - * path and where to place the next flow. Additionally, re-shuffling - * of flows may take place. - */ - case ZT_BONDING_POLICY_BALANCE_AWARE: - _failoverInterval = 3000; + case ZT_BOND_POLICY_BALANCE_AWARE: _allowFlowHashing = true; - _linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; - _qualityWeights[ZT_QOS_LAT_IDX] = 0.4f; - _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; - _qualityWeights[ZT_QOS_PDV_IDX] = 0.4f; - _qualityWeights[ZT_QOS_PLR_IDX] = 0.2f; - _qualityWeights[ZT_QOS_PER_IDX] = 0.0f; - _qualityWeights[ZT_QOS_THR_IDX] = 0.0f; - _qualityWeights[ZT_QOS_THM_IDX] = 0.0f; - _qualityWeights[ZT_QOS_THV_IDX] = 0.0f; - _qualityWeights[ZT_QOS_SCP_IDX] = 0.0f; break; default: break; } - /* If a user has specified custom parameters for this bonding policy, overlay - them onto the defaults that were previously set */ + _qw[ZT_QOS_LAT_IDX] = 0.3f; + _qw[ZT_QOS_LTM_IDX] = 0.1f; + _qw[ZT_QOS_PDV_IDX] = 0.3f; + _qw[ZT_QOS_PLR_IDX] = 0.1f; + _qw[ZT_QOS_PER_IDX] = 0.1f; + _qw[ZT_QOS_SCP_IDX] = 0.1f; + + _failoverInterval = ZT_BOND_FAILOVER_DEFAULT_INTERVAL; + + /* If a user has specified custom parameters for this bonding policy, overlay them onto the defaults */ if (useTemplate) { _policyAlias = templateBond->_policyAlias; - _failoverInterval = templateBond->_failoverInterval >= 250 ? templateBond->_failoverInterval : _failoverInterval; + _failoverInterval = templateBond->_failoverInterval >= ZT_BOND_FAILOVER_MIN_INTERVAL ? templateBond->_failoverInterval : ZT_BOND_FAILOVER_MIN_INTERVAL; _downDelay = templateBond->_downDelay; _upDelay = templateBond->_upDelay; - if (templateBond->_linkMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE - && templateBond->_failoverInterval != 0) { - //fprintf(stderr, "warning: passive path monitoring was specified, this will prevent failovers from happening in a timely manner.\n"); - } _abLinkSelectMethod = templateBond->_abLinkSelectMethod; - memcpy(_qualityWeights, templateBond->_qualityWeights, ZT_QOS_WEIGHT_SIZE * sizeof(float)); + memcpy(_qw, templateBond->_qw, ZT_QOS_WEIGHT_SIZE * sizeof(float)); } - /* Set timer geometries */ - _bondMonitorInterval = _failoverInterval / 3; - BondController::setMinReqPathMonitorInterval(_bondMonitorInterval); - _ackSendInterval = _failoverInterval; + + // Timer geometry + + _monitorInterval = _failoverInterval / ZT_BOND_ECHOS_PER_FAILOVER_INTERVAL; _qualityEstimationInterval = _failoverInterval * 2; - _dynamicPathMonitorInterval = 0; - _ackCutoffCount = 0; - _qosSendInterval = _bondMonitorInterval * 4; + _qosSendInterval = _failoverInterval * 2; _qosCutoffCount = 0; - throughputMeasurementInterval = _ackSendInterval * 2; _defaultPathRefractoryPeriod = 8000; } @@ -1807,27 +1736,54 @@ void Bond::setUserQualityWeights(float weights[], int len) { if (len == ZT_QOS_WEIGHT_SIZE) { float weightTotal = 0.0; - for (unsigned int i=0; i 0.99 && weightTotal < 1.01) { - memcpy(_qualityWeights, weights, len * sizeof(float)); + memcpy(_qw, weights, len * sizeof(float)); } } } -bool Bond::relevant() { - return false; -} - SharedPtr Bond::getLink(const SharedPtr& path) { return RR->bc->getLinkBySocket(_policyAlias, path->localSocket()); } -void Bond::dumpInfo(const int64_t now) +void Bond::dumpPathStatus(int64_t now, int pathIdx) { - // Omitted + char pathStr[64] = { 0 }; + _paths[pathIdx].p->address().toString(pathStr); + log("path status: [%2d] alive:%d, eli:%d, bonded:%d, flows:%6d, lat:%10.3f, jitter:%10.3f, error:%6.4f, loss:%6.4f, age:%6d --- (%s/%s)", + pathIdx, + _paths[pathIdx].alive, + _paths[pathIdx].eligible, + _paths[pathIdx].bonded, + _paths[pathIdx].assignedFlowCount, + _paths[pathIdx].latencyMean, + _paths[pathIdx].latencyVariance, + _paths[pathIdx].packetErrorRatio, + _paths[pathIdx].packetLossRatio, + _paths[pathIdx].p->age(now), + getLink(_paths[pathIdx].p)->ifname().c_str(), + pathStr); } -} // namespace ZeroTier \ No newline at end of file +void Bond::dumpInfo(int64_t now, bool force) +{ + uint64_t timeSinceLastDump = now - _lastSummaryDump; + if (! force && timeSinceLastDump < ZT_BOND_STATUS_INTERVAL) { + return; + } + _lastSummaryDump = now; + float overhead = (_overheadBytes / (timeSinceLastDump / 1000.0f) / 1000.0f); + _overheadBytes = 0; + log("bond status: bp: %d, fi: %d, mi: %d, ud: %d, dd: %d, flows: %lu, ambient: %f KB/s", _policy, _failoverInterval, _monitorInterval, _upDelay, _downDelay, (unsigned long)_flows.size(), overhead); + for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (_paths[i].p) { + dumpPathStatus(now, i); + } + } +} + +} // namespace ZeroTier diff --git a/node/Bond.hpp b/node/Bond.hpp index 697aee676..90c80d974 100644 --- a/node/Bond.hpp +++ b/node/Bond.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2013-2020 ZeroTier, Inc. + * Copyright (c)2013-2021 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2025-01-01 + * Change Date: 2026-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -14,46 +14,521 @@ #ifndef ZT_BOND_HPP #define ZT_BOND_HPP +#include "../osdep/Phy.hpp" +#include "Packet.hpp" +#include "Path.hpp" +#include "RuntimeEnvironment.hpp" +#include "Trace.hpp" + +#include +#include +#include #include -#include "Path.hpp" -#include "Peer.hpp" -#include "../osdep/Link.hpp" -#include "Flow.hpp" +/** + * Indices for the path quality weight vector + */ +enum ZT_BondQualityWeightIndex { ZT_QOS_LAT_IDX, ZT_QOS_LTM_IDX, ZT_QOS_PDV_IDX, ZT_QOS_PLR_IDX, ZT_QOS_PER_IDX, ZT_QOS_THR_IDX, ZT_QOS_THM_IDX, ZT_QOS_THV_IDX, ZT_QOS_AGE_IDX, ZT_QOS_SCP_IDX, ZT_QOS_WEIGHT_SIZE }; + +/** + * Multipath bonding policy + */ +enum ZT_BondBondingPolicy { + /** + * Normal operation. No fault tolerance, no load balancing + */ + ZT_BOND_POLICY_NONE = 0, + + /** + * Sends traffic out on only one path at a time. Configurable immediate + * fail-over. + */ + ZT_BOND_POLICY_ACTIVE_BACKUP = 1, + + /** + * Sends traffic out on all paths + */ + ZT_BOND_POLICY_BROADCAST = 2, + + /** + * Stripes packets across all paths + */ + ZT_BOND_POLICY_BALANCE_RR = 3, + + /** + * Packets destined for specific peers will always be sent over the same + * path. + */ + ZT_BOND_POLICY_BALANCE_XOR = 4, + + /** + * Balances flows among all paths according to path performance + */ + ZT_BOND_POLICY_BALANCE_AWARE = 5 +}; + +/** + * Multipath active re-selection policy (linkSelectMethod) + */ +enum ZT_BondLinkSelectMethod { + /** + * Primary link regains status as active link whenever it comes back up + * (default when links are explicitly specified) + */ + ZT_BOND_RESELECTION_POLICY_ALWAYS = 0, + + /** + * Primary link regains status as active link when it comes back up and + * (if) it is better than the currently-active link. + */ + ZT_BOND_RESELECTION_POLICY_BETTER = 1, + + /** + * Primary link regains status as active link only if the currently-active + * link fails. + */ + ZT_BOND_RESELECTION_POLICY_FAILURE = 2, + + /** + * The primary link can change if a superior path is detected. + * (default if user provides no fail-over guidance) + */ + ZT_BOND_RESELECTION_POLICY_OPTIMIZE = 3 +}; + +/** + * Mode of multipath link interface + */ +enum ZT_BondLinkMode { ZT_BOND_SLAVE_MODE_PRIMARY = 0, ZT_BOND_SLAVE_MODE_SPARE = 1 }; + +#include "../node/AtomicCounter.hpp" +#include "../node/SharedPtr.hpp" + +#include namespace ZeroTier { -class RuntimeEnvironment; -class Link; +class Link { + friend class SharedPtr; + + public: + /** + * + * @param ifnameStr + * @param ipvPref + * @param speed + * @param enabled + * @param mode + * @param failoverToLinkStr + * @param userSpecifiedAlloc + */ + Link(std::string ifnameStr, uint8_t ipvPref, uint32_t speed, bool enabled, uint8_t mode, std::string failoverToLinkStr, float userSpecifiedAlloc) + : _ifnameStr(ifnameStr) + , _ipvPref(ipvPref) + , _speed(speed) + , _relativeSpeed(0) + , _enabled(enabled) + , _mode(mode) + , _failoverToLinkStr(failoverToLinkStr) + , _userSpecifiedAlloc(userSpecifiedAlloc) + , _isUserSpecified(false) + { + } + + /** + * @return The string representation of this link's underlying interface's system name. + */ + inline std::string ifname() + { + return _ifnameStr; + } + + /** + * @return Whether this link is designated as a primary. + */ + inline bool primary() + { + return _mode == ZT_BOND_SLAVE_MODE_PRIMARY; + } + + /** + * @return Whether this link is designated as a spare. + */ + inline bool spare() + { + return _mode == ZT_BOND_SLAVE_MODE_SPARE; + } + + /** + * @return The name of the link interface that should be used in the event of a failure. + */ + inline std::string failoverToLink() + { + return _failoverToLinkStr; + } + + /** + * @return Whether this link interface was specified by the user or auto-detected. + */ + inline bool isUserSpecified() + { + return _isUserSpecified; + } + + /** + * Signify that this link was specified by the user and not the result of auto-detection. + * + * @param isUserSpecified + */ + inline void setAsUserSpecified(bool isUserSpecified) + { + _isUserSpecified = isUserSpecified; + } + + /** + * @return Whether or not the user has specified failover instructions. + */ + inline bool userHasSpecifiedFailoverInstructions() + { + return _failoverToLinkStr.length(); + } + + /** + * @return The speed of the link relative to others in the bond. + */ + inline uint8_t relativeSpeed() + { + return _relativeSpeed; + } + + /** + * Sets the speed of the link relative to others in the bond. + * + * @param relativeSpeed The speed relative to the rest of the link. + */ + inline void setRelativeSpeed(uint8_t relativeSpeed) + { + _relativeSpeed = relativeSpeed; + } + + /** + * @return The absolute speed of the link (as specified by the user.) + */ + inline uint32_t speed() + { + return _speed; + } + + /** + * @return The address preference for this link (as specified by the user.) + */ + inline uint8_t ipvPref() + { + return _ipvPref; + } + + /** + * @return The mode (e.g. primary/spare) for this link (as specified by the user.) + */ + inline uint8_t mode() + { + return _mode; + } + + /** + * @return Whether this link is enabled or disabled + */ + inline uint8_t enabled() + { + return _enabled; + } + + private: + /** + * String representation of underlying interface's system name + */ + std::string _ifnameStr; + + /** + * What preference (if any) a user has for IP protocol version used in + * path aggregations. Preference is expressed in the order of the digits: + * + * 0: no preference + * 4: IPv4 only + * 6: IPv6 only + * 46: IPv4 over IPv6 + * 64: IPv6 over IPv4 + */ + uint8_t _ipvPref; + + /** + * User-specified speed of this link + */ + uint32_t _speed; + + /** + * Speed relative to other specified links (computed by Bond) + */ + uint8_t _relativeSpeed; + + /** + * Whether this link is enabled, or (disabled (possibly bad config)) + */ + uint8_t _enabled; + + /** + * Whether this link is designated as a primary, a spare, or no preference. + */ + uint8_t _mode; + + /** + * The specific name of the link to be used in the event that this + * link fails. + */ + std::string _failoverToLinkStr; + + /** + * User-specified allocation + */ + float _userSpecifiedAlloc; + + /** + * Whether or not this link was created as a result of manual user specification. This is + * important to know because certain policy decisions are dependent on whether the user + * intents to use a specific set of interfaces. + */ + bool _isUserSpecified; + + AtomicCounter __refCount; +}; + +class Link; +class Peer; + +class Bond { + public: + /** + * @return Whether this link is permitted to become a member of a bond. + */ + static bool linkAllowed(std::string& policyAlias, SharedPtr link); + + /** + * @return The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements. + */ + static int minReqMonitorInterval() + { + return _minReqMonitorInterval; + } + + /** + * @return Whether the bonding layer is currently set up to be used. + */ + static bool inUse() + { + return ! _bondPolicyTemplates.empty() || _defaultPolicy; + } + + /** + * @param basePolicyName Bonding policy name (See ZeroTierOne.h) + * @return The bonding policy code for a given human-readable bonding policy name + */ + static int getPolicyCodeByStr(const std::string& basePolicyName) + { + if (basePolicyName == "active-backup") { + return 1; + } + if (basePolicyName == "broadcast") { + return 2; + } + if (basePolicyName == "balance-rr") { + return 3; + } + if (basePolicyName == "balance-xor") { + return 4; + } + if (basePolicyName == "balance-aware") { + return 5; + } + return 0; // "none" + } + + /** + * @param policy Bonding policy code (See ZeroTierOne.h) + * @return The human-readable name for the given bonding policy code + */ + static std::string getPolicyStrByCode(int policy) + { + if (policy == 1) { + return "active-backup"; + } + if (policy == 2) { + return "broadcast"; + } + if (policy == 3) { + return "balance-rr"; + } + if (policy == 4) { + return "balance-xor"; + } + if (policy == 5) { + return "balance-aware"; + } + return "none"; + } + + /** + * Sets the default bonding policy for new or undefined bonds. + * + * @param bp Bonding policy + */ + static void setBondingLayerDefaultPolicy(uint8_t bp) + { + _defaultPolicy = bp; + } + + /** + * Sets the default (custom) bonding policy for new or undefined bonds. + * + * @param alias Human-readable string alias for bonding policy + */ + static void setBondingLayerDefaultPolicyStr(std::string alias) + { + _defaultPolicyStr = alias; + } + + /** + * Add a user-defined link to a given bonding policy. + * + * @param policyAlias User-defined custom name for variant of bonding policy + * @param link Pointer to new link definition + */ + static void addCustomLink(std::string& policyAlias, SharedPtr link); + + /** + * Add a user-defined bonding policy that is based on one of the standard types. + * + * @param newBond Pointer to custom Bond object + * @return Whether a uniquely-named custom policy was successfully added + */ + static bool addCustomPolicy(const SharedPtr& newBond); + + /** + * Assigns a specific bonding policy + * + * @param identity + * @param policyAlias + * @return + */ + static bool assignBondingPolicyToPeer(int64_t identity, const std::string& policyAlias); + + /** + * Get pointer to bond by a given peer ID + * + * @param peer Remote peer ID + * @return A pointer to the Bond + */ + static SharedPtr getBondByPeerId(int64_t identity); + + /** + * Add a new bond to the bond controller. + * + * @param renv Runtime environment + * @param peer Remote peer that this bond services + * @return A pointer to the newly created Bond + */ + static SharedPtr createTransportTriggeredBond(const RuntimeEnvironment* renv, const SharedPtr& peer); + + /** + * Periodically perform maintenance tasks for the bonding layer. + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param now Current time + */ + static void processBackgroundTasks(void* tPtr, int64_t now); + + /** + * Gets a reference to a physical link definition given a policy alias and a local socket. + * + * @param policyAlias Policy in use + * @param localSocket Local source socket + * @return Physical link definition + */ + static SharedPtr getLinkBySocket(const std::string& policyAlias, uint64_t localSocket); + + /** + * Gets a reference to a physical link definition given its human-readable system name. + * + * @param policyAlias Policy in use + * @param ifname Alphanumeric human-readable name + * @return Physical link definition + */ + static SharedPtr getLinkByName(const std::string& policyAlias, const std::string& ifname); + + private: + static Phy* _phy; + + static Mutex _bonds_m; + static Mutex _links_m; + + /** + * The minimum required monitoring interval among all bonds + */ + static int _minReqMonitorInterval; + + /** + * The default bonding policy used for new bonds unless otherwise specified. + */ + static uint8_t _defaultPolicy; + + /** + * The default bonding policy used for new bonds unless otherwise specified. + */ + static std::string _defaultPolicyStr; + + /** + * All currently active bonds. + */ + static std::map > _bonds; + + /** + * Map of peers to custom bonding policies + */ + static std::map _policyTemplateAssignments; + + /** + * User-defined bonding policies (can be assigned to a peer) + */ + static std::map > _bondPolicyTemplates; + + /** + * Set of links defined for a given bonding policy + */ + static std::map > > _linkDefinitions; + + /** + * Set of link objects mapped to their physical interfaces + */ + static std::map > > _interfaceToLinkMap; + + struct NominatedPath; + struct Flow; -class Bond -{ friend class SharedPtr; friend class Peer; - friend class BondController; - struct PathQualityComparator - { - bool operator ()(const SharedPtr & a, const SharedPtr & b) - { - if(a->_failoverScore == b->_failoverScore) { - return a < b; - } - return a->_failoverScore > b->_failoverScore; - } - }; - -public: - - // TODO: Remove - bool _header; - int64_t _lastLogTS; - int64_t _lastPrintTS; - void dumpInfo(const int64_t now); - bool relevant(); + public: + void dumpInfo(int64_t now, bool force); + void dumpPathStatus(int64_t now, int pathIdx); SharedPtr getLink(const SharedPtr& path); + /** + * Constructor + * + * + */ + Bond(const RuntimeEnvironment* renv); + /** * Constructor. Creates a bond based off of ZT defaults * @@ -61,7 +536,7 @@ public: * @param policy Bonding policy * @param peer */ - Bond(const RuntimeEnvironment *renv, int policy, const SharedPtr& peer); + Bond(const RuntimeEnvironment* renv, int policy, const SharedPtr& peer); /** * Constructor. For use when user intends to manually specify parameters @@ -70,7 +545,7 @@ public: * @param policyAlias * @param peer */ - Bond(const RuntimeEnvironment *renv, std::string& basePolicy, std::string& policyAlias, const SharedPtr& peer); + Bond(const RuntimeEnvironment* renv, std::string& basePolicy, std::string& policyAlias, const SharedPtr& peer); /** * Constructor. Creates a bond based off of a user-defined bond template @@ -79,12 +554,15 @@ public: * @param original * @param peer */ - Bond(const RuntimeEnvironment *renv, SharedPtr originalBond, const SharedPtr& peer); + Bond(const RuntimeEnvironment* renv, SharedPtr originalBond, const SharedPtr& peer); /** * @return The human-readable name of the bonding policy */ - std::string policyAlias() { return _policyAlias; } + std::string policyAlias() + { + return _policyAlias; + } /** * Inform the bond about the path that its peer (owning object) just learned about. @@ -94,12 +572,16 @@ public: * @param path Newly-learned Path which should now be handled by the Bond * @param now Current time */ - void nominatePath(const SharedPtr& path, int64_t now); + void nominatePathToBond(const SharedPtr& path, int64_t now); /** - * Propagate and memoize often-used bonding preferences for each path + * Add a nominated path to the bond. This merely maps the index from the nominated set + * to a smaller set and sets the path's bonded flag to true. + * + * @param nominatedIdx The index in the nominated set + * @param bondedIdx The index in the bonded set (subset of nominated) */ - void applyUserPrefs(); + void addPathToBond(int nominatedIdx, int bondedIdx); /** * Check path states and perform bond rebuilds if needed. @@ -107,7 +589,7 @@ public: * @param now Current time * @param rebuild Whether or not the bond should be reconstructed. */ - void curateBond(const int64_t now, bool rebuild); + void curateBond(int64_t now, bool rebuild); /** * Periodically perform statistical summaries of quality metrics for all paths. @@ -135,8 +617,7 @@ public: * @param flowId Flow ID * @param now Current time */ - void recordOutgoingPacket(const SharedPtr &path, uint64_t packetId, - uint16_t payloadLength, Packet::Verb verb, int32_t flowId, int64_t now); + void recordOutgoingPacket(const SharedPtr& path, uint64_t packetId, uint16_t payloadLength, Packet::Verb verb, int32_t flowId, int64_t now); /** * Process the contents of an inbound VERB_QOS_MEASUREMENT to gather path quality observations. @@ -146,16 +627,7 @@ public: * @param rx_id table of packet IDs * @param rx_ts table of holding times */ - void receivedQoS(const SharedPtr& path, int64_t now, int count, uint64_t *rx_id, uint16_t *rx_ts); - - /** - * Process the contents of an inbound VERB_ACK to gather path quality observations. - * - * @param path Path over which packet was received - * @param now Current time - * @param ackedBytes Number of bytes ACKed by this VERB_ACK - */ - void receivedAck(const SharedPtr& path, int64_t now, int32_t ackedBytes); + void receivedQoS(const SharedPtr& path, int64_t now, int count, uint64_t* rx_id, uint16_t* rx_ts); /** * Generate the contents of a VERB_QOS_MEASUREMENT packet. @@ -164,7 +636,7 @@ public: * @param qosBuffer destination buffer * @return Size of payload */ - int32_t generateQoSPacket(const SharedPtr& path, int64_t now, char *qosBuffer); + int32_t generateQoSPacket(int pathIdx, int64_t now, char* qosBuffer); /** * Record statistics for an inbound packet. @@ -176,8 +648,7 @@ public: * @param flowId Flow ID * @param now Current time */ - void recordIncomingPacket(const SharedPtr& path, uint64_t packetId, uint16_t payloadLength, - Packet::Verb verb, int32_t flowId, int64_t now); + void recordIncomingPacket(const SharedPtr& path, uint64_t packetId, uint16_t payloadLength, Packet::Verb verb, int32_t flowId, int64_t now); /** * Determines the most appropriate path for packet and flow egress. This decision is made by @@ -192,13 +663,13 @@ public: /** * Creates a new flow record * - * @param path Path over which flow shall be handled + * @param np Path over which flow shall be handled * @param flowId Flow ID * @param entropy A byte of entropy to be used by the bonding algorithm * @param now Current time * @return Pointer to newly-created Flow */ - SharedPtr createFlow(const SharedPtr &path, int32_t flowId, unsigned char entropy, int64_t now); + SharedPtr createFlow(int pathIdx, int32_t flowId, unsigned char entropy, int64_t now); /** * Removes flow records that are past a certain age limit. @@ -215,7 +686,7 @@ public: * @param flow Flow to be assigned * @param now Current time */ - bool assignFlowToBondedPath(SharedPtr &flow, int64_t now); + bool assignFlowToBondedPath(SharedPtr& flow, int64_t now); /** * Determine whether a path change should occur given the remote peer's reported utility and our @@ -226,7 +697,7 @@ public: * @param path Path over which the negotiation request was received * @param remoteUtility How much utility the remote peer claims to gain by using the declared path */ - void processIncomingPathNegotiationRequest(uint64_t now, SharedPtr &path, int16_t remoteUtility); + void processIncomingPathNegotiationRequest(uint64_t now, SharedPtr& path, int16_t remoteUtility); /** * Determine state of path synchronization and whether a negotiation request @@ -235,7 +706,7 @@ public: * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call * @param now Current time */ - void pathNegotiationCheck(void *tPtr, const int64_t now); + void pathNegotiationCheck(void* tPtr, int64_t now); /** * Sends a VERB_ACK to the remote peer. @@ -246,8 +717,7 @@ public: * @param atAddress * @param now Current time */ - void sendACK(void *tPtr, const SharedPtr &path,int64_t localSocket, - const InetAddress &atAddress,int64_t now); + void sendACK(void* tPtr, int pathIdx, int64_t localSocket, const InetAddress& atAddress, int64_t now); /** * Sends a VERB_QOS_MEASUREMENT to the remote peer. @@ -258,8 +728,7 @@ public: * @param atAddress * @param now Current time */ - void sendQOS_MEASUREMENT(void *tPtr,const SharedPtr &path,int64_t localSocket, - const InetAddress &atAddress,int64_t now); + void sendQOS_MEASUREMENT(void* tPtr, int pathIdx, int64_t localSocket, const InetAddress& atAddress, int64_t now); /** * Sends a VERB_PATH_NEGOTIATION_REQUEST to the remote peer. @@ -267,7 +736,7 @@ public: * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call * @param path Path over which packet should be sent */ - void sendPATH_NEGOTIATION_REQUEST(void *tPtr, const SharedPtr &path); + void sendPATH_NEGOTIATION_REQUEST(void* tPtr, int pathIdx); /** * @@ -281,7 +750,7 @@ public: * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call * @param now Current time */ - void processActiveBackupTasks(void *tPtr, int64_t now); + void processActiveBackupTasks(void* tPtr, int64_t now); /** * Switches the active link in an active-backup scenario to the next best during @@ -298,7 +767,7 @@ public: * @param policy Bonding policy * @param templateBond */ - void setReasonableDefaults(int policy, SharedPtr templateBond, bool useTemplate); + void setBondParameters(int policy, SharedPtr templateBond, bool useTemplate); /** * Check and assign user-specified quality weights to this bond. @@ -311,64 +780,82 @@ public: /** * @param latencyInMilliseconds Maximum acceptable latency. */ - void setMaxAcceptableLatency(int16_t latencyInMilliseconds) { + void setMaxAcceptableLatency(int16_t latencyInMilliseconds) + { _maxAcceptableLatency = latencyInMilliseconds; } /** * @param latencyInMilliseconds Maximum acceptable (mean) latency. */ - void setMaxAcceptableMeanLatency(int16_t latencyInMilliseconds) { + void setMaxAcceptableMeanLatency(int16_t latencyInMilliseconds) + { _maxAcceptableMeanLatency = latencyInMilliseconds; } /** * @param latencyVarianceInMilliseconds Maximum acceptable packet delay variance (jitter). */ - void setMaxAcceptablePacketDelayVariance(int16_t latencyVarianceInMilliseconds) { + void setMaxAcceptablePacketDelayVariance(int16_t latencyVarianceInMilliseconds) + { _maxAcceptablePacketDelayVariance = latencyVarianceInMilliseconds; } /** * @param lossRatio Maximum acceptable packet loss ratio (PLR). */ - void setMaxAcceptablePacketLossRatio(float lossRatio) { + void setMaxAcceptablePacketLossRatio(float lossRatio) + { _maxAcceptablePacketLossRatio = lossRatio; } /** * @param errorRatio Maximum acceptable packet error ratio (PER). */ - void setMaxAcceptablePacketErrorRatio(float errorRatio) { + void setMaxAcceptablePacketErrorRatio(float errorRatio) + { _maxAcceptablePacketErrorRatio = errorRatio; } /** * @param errorRatio Maximum acceptable packet error ratio (PER). */ - void setMinAcceptableAllocation(float minAlloc) { - _minAcceptableAllocation = minAlloc * 255; + void setMinAcceptableAllocation(float minAlloc) + { + _minAcceptableAllocation = (uint8_t)(minAlloc * 255); } /** * @return Whether the user has defined links for use on this bond */ - inline bool userHasSpecifiedLinks() { return _userHasSpecifiedLinks; } + inline bool userHasSpecifiedLinks() + { + return _userHasSpecifiedLinks; + } /** * @return Whether the user has defined a set of failover link(s) for this bond */ - inline bool userHasSpecifiedFailoverInstructions() { return _userHasSpecifiedFailoverInstructions; }; + inline bool userHasSpecifiedFailoverInstructions() + { + return _userHasSpecifiedFailoverInstructions; + }; /** * @return Whether the user has specified a primary link */ - inline bool userHasSpecifiedPrimaryLink() { return _userHasSpecifiedPrimaryLink; } + inline bool userHasSpecifiedPrimaryLink() + { + return _userHasSpecifiedPrimaryLink; + } /** * @return Whether the user has specified link speeds */ - inline bool userHasSpecifiedLinkSpeeds() { return _userHasSpecifiedLinkSpeeds; } + inline bool userHasSpecifiedLinkSpeeds() + { + return _userHasSpecifiedLinkSpeeds; + } /** * Periodically perform maintenance tasks for each active bond. @@ -376,26 +863,7 @@ public: * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call * @param now Current time */ - void processBackgroundTasks(void *tPtr, int64_t now); - - /** - * Rate limit gate for VERB_ACK - * - * @param now Current time - * @return Whether the incoming packet should be rate-gated - */ - inline bool rateGateACK(const int64_t now) - { - _ackCutoffCount++; - int numToDrain = _lastAckRateCheck ? (now - _lastAckRateCheck) / ZT_ACK_DRAINAGE_DIVISOR : _ackCutoffCount; - _lastAckRateCheck = now; - if (_ackCutoffCount > numToDrain) { - _ackCutoffCount-=numToDrain; - } else { - _ackCutoffCount = 0; - } - return (_ackCutoffCount < ZT_ACK_CUTOFF_LIMIT); - } + void processBackgroundBondTasks(void* tPtr, int64_t now); /** * Rate limit gate for VERB_QOS_MEASUREMENT @@ -403,17 +871,21 @@ public: * @param now Current time * @return Whether the incoming packet should be rate-gated */ - inline bool rateGateQoS(const int64_t now) + inline bool rateGateQoS(int64_t now, SharedPtr& path) { - _qosCutoffCount++; - int numToDrain = (now - _lastQoSRateCheck) / ZT_QOS_DRAINAGE_DIVISOR; - _lastQoSRateCheck = now; - if (_qosCutoffCount > numToDrain) { - _qosCutoffCount-=numToDrain; - } else { + // TODO: Verify before production + char pathStr[64] = { 0 }; + path->address().toString(pathStr); + int diff = now - _lastQoSRateCheck; + if ((diff) <= (_qosSendInterval / ZT_MAX_PEER_NETWORK_PATHS)) { + ++_qosCutoffCount; + } + else { _qosCutoffCount = 0; } - return (_qosCutoffCount < ZT_QOS_CUTOFF_LIMIT); + _lastQoSRateCheck = now; + // fprintf(stderr, "rateGateQoS (count=%d, send_interval=%d, diff=%d, path=%s)\n", _qosCutoffCount, _qosSendInterval, diff, pathStr); + return (_qosCutoffCount < (ZT_MAX_PEER_NETWORK_PATHS * 2)); } /** @@ -422,142 +894,219 @@ public: * @param now Current time * @return Whether the incoming packet should be rate-gated */ - inline bool rateGatePathNegotiation(const int64_t now) + inline bool rateGatePathNegotiation(int64_t now, SharedPtr& path) { - if ((now - _lastPathNegotiationReceived) <= ZT_PATH_NEGOTIATION_CUTOFF_TIME) + // TODO: Verify before production + char pathStr[64] = { 0 }; + path->address().toString(pathStr); + int diff = now - _lastPathNegotiationReceived; + if ((diff) <= (ZT_PATH_NEGOTIATION_CUTOFF_TIME / ZT_MAX_PEER_NETWORK_PATHS)) { ++_pathNegotiationCutoffCount; - else _pathNegotiationCutoffCount = 0; + } + else { + _pathNegotiationCutoffCount = 0; + } _lastPathNegotiationReceived = now; - return (_pathNegotiationCutoffCount < ZT_PATH_NEGOTIATION_CUTOFF_LIMIT); + // fprintf(stderr, "rateGateNeg (count=%d, send_interval=%d, diff=%d, path=%s)\n", _pathNegotiationCutoffCount, (ZT_PATH_NEGOTIATION_CUTOFF_TIME / ZT_MAX_PEER_NETWORK_PATHS), diff, pathStr); + return (_pathNegotiationCutoffCount < (ZT_MAX_PEER_NETWORK_PATHS * 2)); } /** * @param interval Maximum amount of time user expects a failover to take on this bond. */ - inline void setFailoverInterval(uint32_t interval) { _failoverInterval = interval; } + inline void setFailoverInterval(uint32_t interval) + { + _failoverInterval = interval; + } /** * @param interval Maximum amount of time user expects a failover to take on this bond. */ - inline uint32_t getFailoverInterval() { return _failoverInterval; } + inline uint32_t getFailoverInterval() + { + return _failoverInterval; + } /** * @param strategy Strategy that the bond uses to re-assign protocol flows. */ - inline void setFlowRebalanceStrategy(uint32_t strategy) { _flowRebalanceStrategy = strategy; } + inline void setFlowRebalanceStrategy(uint32_t strategy) + { + _flowRebalanceStrategy = strategy; + } /** * @param strategy Strategy that the bond uses to prob for path aliveness and quality */ - inline void setLinkMonitorStrategy(uint8_t strategy) { _linkMonitorStrategy = strategy; } - - /** - * @param abOverflowEnabled Whether "overflow" mode is enabled for this active-backup bond - */ - inline void setOverflowMode(bool abOverflowEnabled) { _abOverflowEnabled = abOverflowEnabled; } + inline void setLinkMonitorStrategy(uint8_t strategy) + { + _linkMonitorStrategy = strategy; + } /** * @return the current up delay parameter */ - inline uint16_t getUpDelay() { return _upDelay; } + inline uint16_t getUpDelay() + { + return _upDelay; + } /** * @param upDelay Length of time before a newly-discovered path is admitted to the bond */ - inline void setUpDelay(int upDelay) { if (upDelay >= 0) { _upDelay = upDelay; } } + inline void setUpDelay(int upDelay) + { + if (upDelay >= 0) { + _upDelay = upDelay; + } + } /** * @return Length of time before a newly-failed path is removed from the bond */ - inline uint16_t getDownDelay() { return _downDelay; } + inline uint16_t getDownDelay() + { + return _downDelay; + } /** * @param downDelay Length of time before a newly-failed path is removed from the bond */ - inline void setDownDelay(int downDelay) { if (downDelay >= 0) { _downDelay = downDelay; } } + inline void setDownDelay(int downDelay) + { + if (downDelay >= 0) { + _downDelay = downDelay; + } + } /** - * @return the current monitoring interval for the bond (can be overridden with intervals specific to certain links.) + * @return The current monitoring interval for the bond */ - inline uint16_t getBondMonitorInterval() { return _bondMonitorInterval; } + inline int monitorInterval() + { + return _monitorInterval; + } /** * Set the current monitoring interval for the bond (can be overridden with intervals specific to certain links.) * * @param monitorInterval How often gratuitous VERB_HELLO(s) are sent to remote peer. */ - inline void setBondMonitorInterval(uint16_t interval) { _bondMonitorInterval = interval; } + inline void setBondMonitorInterval(uint16_t interval) + { + _monitorInterval = interval; + } /** * @param policy Bonding policy for this bond */ - inline void setPolicy(uint8_t policy) { _bondingPolicy = policy; } - + /* + inline void setPolicy(uint8_t policy) + { + _policy = policy; + } +*/ /** * @return the current bonding policy */ - inline uint8_t getPolicy() { return _bondingPolicy; } + inline uint8_t policy() + { + return _policy; + } /** * @return the health status of the bond */ - inline bool isHealthy() { return _isHealthy; } + inline bool isHealthy() + { + return _isHealthy; + } /** * @return the number of links comprising this bond which are considered alive */ - inline uint8_t getNumAliveLinks() { return _numAliveLinks; }; + inline uint8_t getNumAliveLinks() + { + return _numAliveLinks; + }; /** * @return the number of links comprising this bond */ - inline uint8_t getNumTotalLinks() { return _numTotalLinks; } + inline uint8_t getNumTotalLinks() + { + return _numTotalLinks; + } /** * * @param allowFlowHashing */ - inline void setFlowHashing(bool allowFlowHashing) { _allowFlowHashing = allowFlowHashing; } + inline void setFlowHashing(bool allowFlowHashing) + { + _allowFlowHashing = allowFlowHashing; + } /** * @return Whether flow-hashing is currently enabled for this bond. */ - bool flowHashingEnabled() { return _allowFlowHashing; } + bool flowHashingEnabled() + { + return _allowFlowHashing; + } /** * * @param packetsPerLink */ - inline void setPacketsPerLink(int packetsPerLink) { _packetsPerLink = packetsPerLink; } + inline void setPacketsPerLink(int packetsPerLink) + { + _packetsPerLink = packetsPerLink; + } /** * @return Number of packets to be sent on each interface in a balance-rr bond */ - inline int getPacketsPerLink() { return _packetsPerLink; } + inline int getPacketsPerLink() + { + return _packetsPerLink; + } /** * * @param linkSelectMethod */ - inline void setLinkSelectMethod(uint8_t method) { _abLinkSelectMethod = method; } + inline void setLinkSelectMethod(uint8_t method) + { + _abLinkSelectMethod = method; + } /** * * @return */ - inline uint8_t getLinkSelectMethod() { return _abLinkSelectMethod; } + inline uint8_t getLinkSelectMethod() + { + return _abLinkSelectMethod; + } /** * * @param allowPathNegotiation */ - inline void setAllowPathNegotiation(bool allowPathNegotiation) { _allowPathNegotiation = allowPathNegotiation; } + inline void setAllowPathNegotiation(bool allowPathNegotiation) + { + _allowPathNegotiation = allowPathNegotiation; + } /** * * @return */ - inline bool allowPathNegotiation() { return _allowPathNegotiation; } + inline bool allowPathNegotiation() + { + return _allowPathNegotiation; + } /** * Forcibly rotates the currently active link used in an active-backup bond to the next link in the failover queue @@ -566,56 +1115,303 @@ public: */ bool abForciblyRotateLink(); - SharedPtr getPeer() { return _peer; } + /** + * @param now Current time + * @return All known paths to this peer + */ + inline std::vector > paths(const int64_t now) const + { + std::vector > pp; + Mutex::Lock _l(_paths_m); + for (unsigned int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (! _paths[i].p) + break; + pp.push_back(_paths[i].p); + } + return pp; + } -private: + /** + * Emit message to tracing system but with added timestamp and subsystem info + * + * TODO: Will be replaced when better logging facilities exist in Trace.hpp + */ + void log(const char* fmt, ...) + { + time_t rawtime; + struct tm* timeinfo; + char timestamp[80]; + time(&rawtime); + timeinfo = localtime(&rawtime); + strftime(timestamp, 80, "%F %T", timeinfo); +#define MAX_BOND_MSG_LEN 1024 + char traceMsg[MAX_BOND_MSG_LEN]; + char userMsg[MAX_BOND_MSG_LEN]; + va_list args; + va_start(args, fmt); + if (vsnprintf(userMsg, sizeof(userMsg), fmt, args) < 0) { + fprintf(stderr, "Encountered format encoding error while writing to trace log\n"); + return; + } + snprintf(traceMsg, MAX_BOND_MSG_LEN, "%s (%llx/%s) %s", timestamp, _peerId, _policyAlias.c_str(), userMsg); + va_end(args); + RR->t->bondStateMessage(NULL, traceMsg); +#undef MAX_MSG_LEN + } - const RuntimeEnvironment *RR; + private: + struct NominatedPath { + NominatedPath() + : lastQoSMeasurement(0) + , lastThroughputEstimation(0) + , lastRefractoryUpdate(0) + , lastAliveToggle(0) + , alive(false) + , eligible(true) + , whenNominated(0) + , refractoryPeriod(0) + , ipvPref(0) + , mode(0) + , onlyPathOnLink(false) + , bonded(false) + , negotiated(false) + , shouldReallocateFlows(false) + , assignedFlowCount(0) + , latencyMean(0) + , latencyVariance(0) + , packetLossRatio(0) + , packetErrorRatio(0) + , throughputMean(0) + , throughputMax(0) + , throughputVariance(0) + , allocation(0) + , byteLoad(0) + , relativeByteLoad(0) + , affinity(0) + , failoverScore(0) + , packetsReceivedSinceLastQoS(0) + , packetsIn(0) + , packetsOut(0) + { + } + + /** + * Set or update a refractory period for the path. + * + * @param punishment How much a path should be punished + * @param pathFailure Whether this call is the result of a recent path failure + */ + inline void adjustRefractoryPeriod(int64_t now, uint32_t punishment, bool pathFailure) + { + if (pathFailure) { + unsigned int suggestedRefractoryPeriod = refractoryPeriod ? punishment + (refractoryPeriod * 2) : punishment; + refractoryPeriod = std::min(suggestedRefractoryPeriod, (unsigned int)ZT_BOND_MAX_REFRACTORY_PERIOD); + lastRefractoryUpdate = 0; + } + else { + uint32_t drainRefractory = 0; + if (lastRefractoryUpdate) { + drainRefractory = (now - lastRefractoryUpdate); + } + else { + drainRefractory = (now - lastAliveToggle); + } + lastRefractoryUpdate = now; + if (refractoryPeriod > drainRefractory) { + refractoryPeriod -= drainRefractory; + } + else { + refractoryPeriod = 0; + lastRefractoryUpdate = 0; + } + } + } + + /** + * @return True if a path is permitted to be used in a bond (according to user pref.) + */ + inline bool allowed() + { + return (! ipvPref || ((p->_addr.isV4() && (ipvPref == 4 || ipvPref == 46 || ipvPref == 64)) || ((p->_addr.isV6() && (ipvPref == 6 || ipvPref == 46 || ipvPref == 64))))); + } + + /** + * @return True if a path is preferred over another on the same physical link (according to user pref.) + */ + inline bool preferred() + { + return onlyPathOnLink || (p->_addr.isV4() && (ipvPref == 4 || ipvPref == 46)) || (p->_addr.isV6() && (ipvPref == 6 || ipvPref == 64)); + } + + /** + * @param now Current time + * @return Whether a QoS (VERB_QOS_MEASUREMENT) packet needs to be emitted at this time + */ + inline bool needsToSendQoS(int64_t now, int qosSendInterval) + { + // fprintf(stderr, "QOS table (%d / %d)\n", packetsReceivedSinceLastQoS, ZT_QOS_TABLE_SIZE); + return ((packetsReceivedSinceLastQoS >= ZT_QOS_TABLE_SIZE) || ((now - lastQoSMeasurement) > qosSendInterval)) && packetsReceivedSinceLastQoS; + } + + /** + * Reset packet counters + */ + inline void resetPacketCounts() + { + packetsIn = 0; + packetsOut = 0; + } + + std::map qosStatsOut; // id:egress_time + std::map qosStatsIn; // id:now + + RingBuffer qosRecordSize; + RingBuffer qosRecordLossSamples; + RingBuffer throughputSamples; + RingBuffer packetValiditySamples; + RingBuffer throughputVarianceSamples; + RingBuffer latencySamples; + + uint64_t lastQoSMeasurement; // Last time that a VERB_QOS_MEASUREMENT was sent out on this path. + uint64_t lastThroughputEstimation; // Last time that the path's throughput was estimated. + uint64_t lastRefractoryUpdate; // The last time that the refractory period was updated. + uint64_t lastAliveToggle; // The last time that the path was marked as "alive". + bool alive; + bool eligible; // State of eligibility at last check. Used for determining state changes. + uint64_t whenNominated; // Timestamp indicating when this path's trial period began. + uint32_t refractoryPeriod; // Amount of time that this path will be prevented from becoming a member of a bond. + uint8_t ipvPref; // IP version preference inherited from the physical link. + uint8_t mode; // Mode inherited from the physical link. + bool onlyPathOnLink; // IP version preference inherited from the physical link. + bool enabled; // Enabled state inherited from the physical link. + bool bonded; // Whether this path is currently part of a bond. + bool negotiated; // Whether this path was intentionally negotiated by either peer. + bool shouldReallocateFlows; // Whether flows should be moved from this path. Current traffic flows will be re-allocated immediately. + uint16_t assignedFlowCount; // The number of flows currently assigned to this path. + float latencyMean; // The mean latency (computed from a sliding window.) + float latencyVariance; // Packet delay variance (computed from a sliding window.) + float packetLossRatio; // The ratio of lost packets to received packets. + float packetErrorRatio; // The ratio of packets that failed their MAC/CRC checks to those that did not. + uint64_t throughputMean; // The estimated mean throughput of this path. + uint64_t throughputMax; // The maximum observed throughput of this path. + float throughputVariance; // The variance in the estimated throughput of this path. + uint8_t allocation; // The relative quality of this path to all others in the bond, [0-255]. + uint64_t byteLoad; // How much load this path is under. + uint8_t relativeByteLoad; // How much load this path is under (relative to other paths in the bond.) + uint8_t affinity; // Relative value expressing how "deserving" this path is of new traffic. + uint32_t failoverScore; // Score that indicates to what degree this path is preferred over others that are available to the bonding policy. (specifically for active-backup) + int32_t packetsReceivedSinceLastQoS; // Number of packets received since the last VERB_QOS_MEASUREMENT was sent to the remote peer. + + /** + * Counters used for tracking path load. + */ + int packetsIn; + int packetsOut; + + AtomicCounter __refCount; + + SharedPtr p; + void set(uint64_t now, const SharedPtr& path) + { + p = path; + whenNominated = now; + p->_bondingMetricPtr = (void*)this; + } + }; + + /** + * Paths nominated to the bond (may or may not actually be bonded) + */ + NominatedPath _paths[ZT_MAX_PEER_NETWORK_PATHS]; + + inline int getNominatedPathIdx(const SharedPtr& path) + { + for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { + if (_paths[i].p == path) { + return i; + } + } + return ZT_MAX_PEER_NETWORK_PATHS; + } + + /** + * A protocol flow that is identified by the origin and destination port. + */ + struct Flow { + /** + * @param flowId Given flow ID + * @param now Current time + */ + Flow(int32_t flowId, int64_t now) : id(flowId), bytesIn(0), bytesOut(0), lastActivity(now), lastPathReassignment(0), assignedPath(ZT_MAX_PEER_NETWORK_PATHS) + { + } + + /** + * Reset flow statistics + */ + inline void resetByteCounts() + { + bytesIn = 0; + bytesOut = 0; + } + + /** + * How long since a packet was sent or received in this flow + * + * @param now Current time + * @return The age of the flow in terms of last recorded activity + */ + int64_t age(int64_t now) + { + return now - lastActivity; + } + + /** + * @param path Assigned path over which this flow should be handled + */ + inline void assignPath(int pathIdx, int64_t now) + { + assignedPath = pathIdx; + lastPathReassignment = now; + } + + AtomicCounter __refCount; + + int32_t id; // Flow ID used for hashing and path selection + uint64_t bytesIn; // Used for tracking flow size + uint64_t bytesOut; // Used for tracking flow size + int64_t lastActivity; // The last time that this flow handled traffic + int64_t lastPathReassignment; // Time of last path assignment. Used for anti-flapping + int assignedPath; // Index of path to which this flow is assigned + }; + + const RuntimeEnvironment* RR; AtomicCounter __refCount; - /** - * Custom name given by the user to this bond type. - */ - std::string _policyAlias; - - /** - * Paths that this bond has been made aware of but that are not necessarily - * part of the bond proper. - */ - SharedPtr _paths[ZT_MAX_PEER_NETWORK_PATHS]; + std::string _policyAlias; // Custom name given by the user to this bond type. /** * Set of indices corresponding to paths currently included in the bond proper. This * may only be updated during a call to curateBond(). The reason for this is so that * we can simplify the high frequency packet egress logic. */ - int _bondedIdx[ZT_MAX_PEER_NETWORK_PATHS]; + int _bondIdxMap[ZT_MAX_PEER_NETWORK_PATHS]; + int _numBondedPaths; // Number of paths currently included in the _bondIdxMap set. + std::map > _flows; // Flows hashed according to port and protocol + float _qw[ZT_QOS_WEIGHT_SIZE]; // How much each factor contributes to the "quality" score of a path. - /** - * Number of paths currently included in the _bondedIdx set. - */ - int _numBondedPaths; - - /** - * Flows hashed according to port and protocol - */ - std::map > _flows; - - float _qualityWeights[ZT_QOS_WEIGHT_SIZE]; // How much each factor contributes to the "quality" score of a path. - - uint8_t _bondingPolicy; + uint8_t _policy; uint32_t _upDelay; uint32_t _downDelay; // active-backup - SharedPtr _abPath; // current active path - std::list > _abFailoverQueue; - uint8_t _abLinkSelectMethod; // link re-selection policy for the primary link in active-backup - bool _abOverflowEnabled; + int _abPathIdx; // current active path + std::deque _abFailoverQueue; + uint8_t _abLinkSelectMethod; // link re-selection policy for the primary link in active-backup // balance-rr - uint8_t _rrIdx; // index to path currently in use during Round Robin operation - uint16_t _rrPacketsSentOnCurrLink; // number of packets sent on this link since the most recent path switch. + uint8_t _rrIdx; // index to path currently in use during Round Robin operation + uint16_t _rrPacketsSentOnCurrLink; // number of packets sent on this link since the most recent path switch. /** * How many packets will be sent on a path before moving to the next path * in the round-robin sequence. A value of zero will cause a random path @@ -629,13 +1425,11 @@ private: // dynamic link monitoring uint8_t _linkMonitorStrategy; - uint32_t _dynamicPathMonitorInterval; // path negotiation int16_t _localUtility; - SharedPtr negotiatedPath; + int negotiatedPathIdx; uint8_t _numSentPathNegotiationRequests; - unsigned int _pathNegotiationCutoffCount; bool _allowPathNegotiation; /** @@ -669,82 +1463,52 @@ private: * drains linearly. For each eligibility change the remaining punishment is doubled. */ uint32_t _defaultPathRefractoryPeriod; + unsigned char _freeRandomByte; // Free byte of entropy that is updated on every packet egress event. + SharedPtr _peer; // Remote peer that this bond services + unsigned long long _peerId; // ID of the peer that this bond services /** - * Whether the current bonding policy requires computation of path statistics - */ - bool _shouldCollectPathStatistics; - - /** - * Free byte of entropy that is updated on every packet egress event. - */ - unsigned char _freeRandomByte; - - /** - * Remote peer that this bond services - */ - SharedPtr _peer; - - /** - * Rate-limit cutoffs + * Rate-limiting */ uint16_t _qosCutoffCount; - uint16_t _ackCutoffCount; + uint64_t _lastQoSRateCheck; + uint16_t _pathNegotiationCutoffCount; + uint64_t _lastPathNegotiationReceived; /** * Recent event timestamps */ - uint64_t _lastAckRateCheck; - uint64_t _lastQoSRateCheck; + uint64_t _lastSummaryDump; + uint64_t _lastQualityEstimation; - uint64_t _lastCheckUserPreferences; uint64_t _lastBackgroundTaskCheck; uint64_t _lastBondStatusLog; - uint64_t _lastPathNegotiationReceived; uint64_t _lastPathNegotiationCheck; uint64_t _lastSentPathNegotiationRequest; - uint64_t _lastFlowStatReset; uint64_t _lastFlowExpirationCheck; uint64_t _lastFlowRebalance; uint64_t _lastFrame; uint64_t _lastActiveBackupPathChange; Mutex _paths_m; + Mutex _flows_m; - /** - * Whether the user has specified links for this bond. - */ - bool _userHasSpecifiedLinks; - - /** - * Whether the user has specified a primary link for this bond. - */ - bool _userHasSpecifiedPrimaryLink; - - /** - * Whether the user has specified failover instructions for this bond. - */ - bool _userHasSpecifiedFailoverInstructions; - - /** - * Whether the user has specified links speeds for this bond. - */ - bool _userHasSpecifiedLinkSpeeds; - + bool _userHasSpecifiedLinks; // Whether the user has specified links for this bond. + bool _userHasSpecifiedPrimaryLink; // Whether the user has specified a primary link for this bond. + bool _userHasSpecifiedFailoverInstructions; // Whether the user has specified failover instructions for this bond. + bool _userHasSpecifiedLinkSpeeds; // Whether the user has specified links speeds for this bond. /** * How frequently (in ms) a VERB_ECHO is sent to a peer to verify that a * path is still active. A value of zero (0) will disable active path * monitoring; as result, all monitoring will be a function of traffic. */ - uint16_t _bondMonitorInterval; + int _monitorInterval; + bool _allowFlowHashing; // Whether or not flow hashing is allowed. - /** - * Whether or not flow hashing is allowed. - */ - bool _allowFlowHashing; + uint64_t _overheadBytes; }; -} // namespace ZeroTier +} // namespace ZeroTier -#endif \ No newline at end of file +#endif diff --git a/node/BondController.cpp b/node/BondController.cpp deleted file mode 100644 index 5692390c1..000000000 --- a/node/BondController.cpp +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright (c)2013-2020 ZeroTier, Inc. - * - * Use of this software is governed by the Business Source License included - * in the LICENSE.TXT file in the project's root directory. - * - * Change Date: 2025-01-01 - * - * On the date above, in accordance with the Business Source License, use - * of this software will be governed by version 2.0 of the Apache License. - */ -/****/ - -#include "../osdep/OSUtils.hpp" - -#include "Constants.hpp" -#include "BondController.hpp" -#include "Peer.hpp" - -namespace ZeroTier { - -int BondController::_minReqPathMonitorInterval; -uint8_t BondController::_defaultBondingPolicy; - -BondController::BondController(const RuntimeEnvironment *renv) : - RR(renv) -{ - bondStartTime = RR->node->now(); - _defaultBondingPolicy = ZT_BONDING_POLICY_NONE; -} - -bool BondController::linkAllowed(std::string &policyAlias, SharedPtr link) -{ - bool foundInDefinitions = false; - if (_linkDefinitions.count(policyAlias)) { - auto it = _linkDefinitions[policyAlias].begin(); - while (it != _linkDefinitions[policyAlias].end()) { - if (link->ifname() == (*it)->ifname()) { - foundInDefinitions = true; - break; - } - ++it; - } - } - return _linkDefinitions[policyAlias].empty() || foundInDefinitions; -} - -void BondController::addCustomLink(std::string& policyAlias, SharedPtr link) -{ - Mutex::Lock _l(_links_m); - _linkDefinitions[policyAlias].push_back(link); - auto search = _interfaceToLinkMap[policyAlias].find(link->ifname()); - if (search == _interfaceToLinkMap[policyAlias].end()) { - link->setAsUserSpecified(true); - _interfaceToLinkMap[policyAlias].insert(std::pair>(link->ifname(), link)); - } -} - -bool BondController::addCustomPolicy(const SharedPtr& newBond) -{ - Mutex::Lock _l(_bonds_m); - if (!_bondPolicyTemplates.count(newBond->policyAlias())) { - _bondPolicyTemplates[newBond->policyAlias()] = newBond; - return true; - } - return false; -} - -bool BondController::assignBondingPolicyToPeer(int64_t identity, const std::string& policyAlias) -{ - Mutex::Lock _l(_bonds_m); - if (!_policyTemplateAssignments.count(identity)) { - _policyTemplateAssignments[identity] = policyAlias; - return true; - } - return false; -} - -SharedPtr BondController::getBondByPeerId(int64_t identity) -{ - Mutex::Lock _l(_bonds_m); - return _bonds.count(identity) ? _bonds[identity] : SharedPtr(); -} - -SharedPtr BondController::createTransportTriggeredBond(const RuntimeEnvironment *renv, const SharedPtr& peer) -{ - Mutex::Lock _l(_bonds_m); - int64_t identity = peer->identity().address().toInt(); - Bond *bond = nullptr; - char traceMsg[128]; - if (!_bonds.count(identity)) { - std::string policyAlias; - if (!_policyTemplateAssignments.count(identity)) { - if (_defaultBondingPolicy) { - sprintf(traceMsg, "%s (bond) Creating new default %s bond to peer %llx", - OSUtils::humanReadableTimestamp().c_str(), getPolicyStrByCode(_defaultBondingPolicy).c_str(), identity); RR->t->bondStateMessage(NULL, traceMsg); - bond = new Bond(renv, _defaultBondingPolicy, peer); - } - if (!_defaultBondingPolicy && _defaultBondingPolicyStr.length()) { - sprintf(traceMsg, "%s (bond) Creating new default custom %s bond to peer %llx", - OSUtils::humanReadableTimestamp().c_str(), _defaultBondingPolicyStr.c_str(), identity); - RR->t->bondStateMessage(NULL, traceMsg); - bond = new Bond(renv, _bondPolicyTemplates[_defaultBondingPolicyStr].ptr(), peer); - } - } - else { - if (!_bondPolicyTemplates[_policyTemplateAssignments[identity]]) { - sprintf(traceMsg, "%s (bond) Creating new bond. Assignment for peer %llx was specified as %s but the bond definition was not found. Using default %s", - OSUtils::humanReadableTimestamp().c_str(), identity, _policyTemplateAssignments[identity].c_str(), getPolicyStrByCode(_defaultBondingPolicy).c_str()); - RR->t->bondStateMessage(NULL, traceMsg); - bond = new Bond(renv, _defaultBondingPolicy, peer); - } - else { - sprintf(traceMsg, "%s (bond) Creating new default bond %s to peer %llx", - OSUtils::humanReadableTimestamp().c_str(), _defaultBondingPolicyStr.c_str(), identity); - RR->t->bondStateMessage(NULL, traceMsg); - bond = new Bond(renv, _bondPolicyTemplates[_policyTemplateAssignments[identity]].ptr(), peer); - } - } - } - if (bond) { - _bonds[identity] = bond; - /** - * Determine if user has specified anything that could affect the bonding policy's decisions - */ - if (_interfaceToLinkMap.count(bond->policyAlias())) { - std::map >::iterator it = _interfaceToLinkMap[bond->policyAlias()].begin(); - while (it != _interfaceToLinkMap[bond->policyAlias()].end()) { - if (it->second->isUserSpecified()) { - bond->_userHasSpecifiedLinks = true; - } - if (it->second->isUserSpecified() && it->second->primary()) { - bond->_userHasSpecifiedPrimaryLink = true; - } - if (it->second->isUserSpecified() && it->second->userHasSpecifiedFailoverInstructions()) { - bond->_userHasSpecifiedFailoverInstructions = true; - } - if (it->second->isUserSpecified() && (it->second->speed() > 0)) { - bond->_userHasSpecifiedLinkSpeeds = true; - } - ++it; - } - } - return bond; - } - return SharedPtr(); -} - -SharedPtr BondController::getLinkBySocket(const std::string& policyAlias, uint64_t localSocket) -{ - Mutex::Lock _l(_links_m); - char ifname[16]; - _phy->getIfName((PhySocket *) ((uintptr_t)localSocket), ifname, 16); - std::string ifnameStr(ifname); - auto search = _interfaceToLinkMap[policyAlias].find(ifnameStr); - if (search == _interfaceToLinkMap[policyAlias].end()) { - SharedPtr s = new Link(ifnameStr, 0, 0, 0, 0, 0, true, ZT_MULTIPATH_SLAVE_MODE_SPARE, "", 0.0); - _interfaceToLinkMap[policyAlias].insert(std::pair >(ifnameStr, s)); - return s; - } - else { - return search->second; - } -} - -SharedPtr BondController::getLinkByName(const std::string& policyAlias, const std::string& ifname) -{ - Mutex::Lock _l(_links_m); - auto search = _interfaceToLinkMap[policyAlias].find(ifname); - if (search != _interfaceToLinkMap[policyAlias].end()) { - return search->second; - } - return SharedPtr(); -} - -bool BondController::allowedToBind(const std::string& ifname) -{ - return true; - /* - if (!_defaultBondingPolicy) { - return true; // no restrictions - } - Mutex::Lock _l(_links_m); - if (_interfaceToLinkMap.empty()) { - return true; // no restrictions - } - std::map > >::iterator policyItr = _interfaceToLinkMap.begin(); - while (policyItr != _interfaceToLinkMap.end()) { - std::map >::iterator linkItr = policyItr->second.begin(); - while (linkItr != policyItr->second.end()) { - if (linkItr->first == ifname) { - return true; - } - ++linkItr; - } - ++policyItr; - } - return false; - */ -} - -void BondController::processBackgroundTasks(void *tPtr, const int64_t now) -{ - Mutex::Lock _l(_bonds_m); - std::map >::iterator bondItr = _bonds.begin(); - while (bondItr != _bonds.end()) { - bondItr->second->processBackgroundTasks(tPtr, now); - ++bondItr; - } -} - -} // namespace ZeroTier \ No newline at end of file diff --git a/node/BondController.hpp b/node/BondController.hpp deleted file mode 100644 index 7d8e8e69b..000000000 --- a/node/BondController.hpp +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright (c)2013-2020 ZeroTier, Inc. - * - * Use of this software is governed by the Business Source License included - * in the LICENSE.TXT file in the project's root directory. - * - * Change Date: 2025-01-01 - * - * On the date above, in accordance with the Business Source License, use - * of this software will be governed by version 2.0 of the Apache License. - */ -/****/ - -#ifndef ZT_BONDCONTROLLER_HPP -#define ZT_BONDCONTROLLER_HPP - -#include -#include - -#include "SharedPtr.hpp" -#include "../osdep/Phy.hpp" -#include "../osdep/Link.hpp" - -namespace ZeroTier { - -class RuntimeEnvironment; -class Bond; -class Peer; - -class BondController -{ - friend class Bond; - -public: - - BondController(const RuntimeEnvironment *renv); - - /** - * @return Whether this link is permitted to become a member of a bond. - */ - bool linkAllowed(std::string &policyAlias, SharedPtr link); - - /** - * @return The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements. - */ - int minReqPathMonitorInterval() { return _minReqPathMonitorInterval; } - - /** - * @param minReqPathMonitorInterval The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements. - */ - static void setMinReqPathMonitorInterval(int minReqPathMonitorInterval) { _minReqPathMonitorInterval = minReqPathMonitorInterval; } - - /** - * @return Whether the bonding layer is currently set up to be used. - */ - bool inUse() { return !_bondPolicyTemplates.empty() || _defaultBondingPolicy; } - - /** - * @param basePolicyName Bonding policy name (See ZeroTierOne.h) - * @return The bonding policy code for a given human-readable bonding policy name - */ - static int getPolicyCodeByStr(const std::string& basePolicyName) - { - if (basePolicyName == "active-backup") { return 1; } - if (basePolicyName == "broadcast") { return 2; } - if (basePolicyName == "balance-rr") { return 3; } - if (basePolicyName == "balance-xor") { return 4; } - if (basePolicyName == "balance-aware") { return 5; } - return 0; // "none" - } - - /** - * @param policy Bonding policy code (See ZeroTierOne.h) - * @return The human-readable name for the given bonding policy code - */ - static std::string getPolicyStrByCode(int policy) - { - if (policy == 1) { return "active-backup"; } - if (policy == 2) { return "broadcast"; } - if (policy == 3) { return "balance-rr"; } - if (policy == 4) { return "balance-xor"; } - if (policy == 5) { return "balance-aware"; } - return "none"; - } - - /** - * Sets the default bonding policy for new or undefined bonds. - * - * @param bp Bonding policy - */ - void setBondingLayerDefaultPolicy(uint8_t bp) { _defaultBondingPolicy = bp; } - - /** - * Sets the default (custom) bonding policy for new or undefined bonds. - * - * @param alias Human-readable string alias for bonding policy - */ - void setBondingLayerDefaultPolicyStr(std::string alias) { _defaultBondingPolicyStr = alias; } - - /** - * @return The default bonding policy - */ - static int defaultBondingPolicy() { return _defaultBondingPolicy; } - - /** - * Add a user-defined link to a given bonding policy. - * - * @param policyAlias User-defined custom name for variant of bonding policy - * @param link Pointer to new link definition - */ - void addCustomLink(std::string& policyAlias, SharedPtr link); - - /** - * Add a user-defined bonding policy that is based on one of the standard types. - * - * @param newBond Pointer to custom Bond object - * @return Whether a uniquely-named custom policy was successfully added - */ - bool addCustomPolicy(const SharedPtr& newBond); - - /** - * Assigns a specific bonding policy - * - * @param identity - * @param policyAlias - * @return - */ - bool assignBondingPolicyToPeer(int64_t identity, const std::string& policyAlias); - - /** - * Get pointer to bond by a given peer ID - * - * @param peer Remote peer ID - * @return A pointer to the Bond - */ - SharedPtr getBondByPeerId(int64_t identity); - - /** - * Add a new bond to the bond controller. - * - * @param renv Runtime environment - * @param peer Remote peer that this bond services - * @return A pointer to the newly created Bond - */ - SharedPtr createTransportTriggeredBond(const RuntimeEnvironment *renv, const SharedPtr& peer); - - /** - * Periodically perform maintenance tasks for the bonding layer. - * - * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call - * @param now Current time - */ - void processBackgroundTasks(void *tPtr, int64_t now); - - /** - * Gets a reference to a physical link definition given a policy alias and a local socket. - * - * @param policyAlias Policy in use - * @param localSocket Local source socket - * @return Physical link definition - */ - SharedPtr getLinkBySocket(const std::string& policyAlias, uint64_t localSocket); - - /** - * Gets a reference to a physical link definition given its human-readable system name. - * - * @param policyAlias Policy in use - * @param ifname Alphanumeric human-readable name - * @return Physical link definition - */ - SharedPtr getLinkByName(const std::string& policyAlias, const std::string& ifname); - - /** - * @param ifname Name of interface that we want to know if we can bind to - */ - bool allowedToBind(const std::string& ifname); - - uint64_t getBondStartTime() { return bondStartTime; } - -private: - - Phy *_phy; - const RuntimeEnvironment *RR; - - Mutex _bonds_m; - Mutex _links_m; - - /** - * The last time that the bond controller updated the set of bonds. - */ - uint64_t _lastBackgroundBondControlTaskCheck; - - /** - * The minimum monitoring interval among all paths in this bond. - */ - static int _minReqPathMonitorInterval; - - /** - * The default bonding policy used for new bonds unless otherwise specified. - */ - static uint8_t _defaultBondingPolicy; - - /** - * The default bonding policy used for new bonds unless otherwise specified. - */ - std::string _defaultBondingPolicyStr; - - /** - * All currently active bonds. - */ - std::map > _bonds; - - /** - * Map of peers to custom bonding policies - */ - std::map _policyTemplateAssignments; - - /** - * User-defined bonding policies (can be assigned to a peer) - */ - std::map > _bondPolicyTemplates; - - /** - * Set of links defined for a given bonding policy - */ - std::map > > _linkDefinitions; - - /** - * Set of link objects mapped to their physical interfaces - */ - std::map > > _interfaceToLinkMap; - - // TODO: Remove - uint64_t bondStartTime; -}; - -} // namespace ZeroTier - -#endif \ No newline at end of file diff --git a/node/CertificateOfMembership.cpp b/node/CertificateOfMembership.cpp index 10cb0863a..dbda9939f 100644 --- a/node/CertificateOfMembership.cpp +++ b/node/CertificateOfMembership.cpp @@ -20,165 +20,67 @@ namespace ZeroTier { -void CertificateOfMembership::setQualifier(uint64_t id,uint64_t value,uint64_t maxDelta) +CertificateOfMembership::CertificateOfMembership(uint64_t timestamp,uint64_t timestampMaxDelta,uint64_t nwid,const Identity &issuedTo) { - _signedBy.zero(); + _qualifiers[0].id = COM_RESERVED_ID_TIMESTAMP; + _qualifiers[0].value = timestamp; + _qualifiers[0].maxDelta = timestampMaxDelta; + _qualifiers[1].id = COM_RESERVED_ID_NETWORK_ID; + _qualifiers[1].value = nwid; + _qualifiers[1].maxDelta = 0; + _qualifiers[2].id = COM_RESERVED_ID_ISSUED_TO; + _qualifiers[2].value = issuedTo.address().toInt(); + _qualifiers[2].maxDelta = 0xffffffffffffffffULL; - for(unsigned int i=0;i<_qualifierCount;++i) { - if (_qualifiers[i].id == id) { - _qualifiers[i].value = value; - _qualifiers[i].maxDelta = maxDelta; - return; - } + // Include hash of full identity public key in COM for hardening purposes. Pack it in + // using the original COM format. Format may be revised in the future to make this cleaner. + uint64_t idHash[6]; + issuedTo.publicKeyHash(idHash); + for(unsigned long i=0;i<4;++i) { + _qualifiers[i + 3].id = (uint64_t)(i + 3); + _qualifiers[i + 3].value = Utils::ntoh(idHash[i]); + _qualifiers[i + 3].maxDelta = 0xffffffffffffffffULL; } - if (_qualifierCount < ZT_NETWORK_COM_MAX_QUALIFIERS) { - _qualifiers[_qualifierCount].id = id; - _qualifiers[_qualifierCount].value = value; - _qualifiers[_qualifierCount].maxDelta = maxDelta; - ++_qualifierCount; - std::sort(&(_qualifiers[0]),&(_qualifiers[_qualifierCount])); - } -} - -#ifdef ZT_SUPPORT_OLD_STYLE_NETCONF - -std::string CertificateOfMembership::toString() const -{ - char tmp[ZT_NETWORK_COM_MAX_QUALIFIERS * 32]; - std::string s; - - s.append("1:"); // COM_UINT64_ED25519 - - uint64_t *const buf = new uint64_t[_qualifierCount * 3]; - try { - unsigned int ptr = 0; - for(unsigned int i=0;i<_qualifierCount;++i) { - buf[ptr++] = Utils::hton(_qualifiers[i].id); - buf[ptr++] = Utils::hton(_qualifiers[i].value); - buf[ptr++] = Utils::hton(_qualifiers[i].maxDelta); - } - s.append(Utils::hex(buf,ptr * sizeof(uint64_t),tmp)); - delete [] buf; - } catch ( ... ) { - delete [] buf; - throw; - } - - s.push_back(':'); - - s.append(_signedBy.toString(tmp)); - - if (_signedBy) { - s.push_back(':'); - s.append(Utils::hex(_signature.data,ZT_C25519_SIGNATURE_LEN,tmp)); - } - - return s; -} - -void CertificateOfMembership::fromString(const char *s) -{ - _qualifierCount = 0; - _signedBy.zero(); + _qualifierCount = 7; memset(_signature.data,0,ZT_C25519_SIGNATURE_LEN); - - if (!*s) - return; - - unsigned int colonAt = 0; - while ((s[colonAt])&&(s[colonAt] != ':')) ++colonAt; - - if (!((colonAt == 1)&&(s[0] == '1'))) // COM_UINT64_ED25519? - return; - - s += colonAt + 1; - colonAt = 0; - while ((s[colonAt])&&(s[colonAt] != ':')) ++colonAt; - - if (colonAt) { - const unsigned int buflen = colonAt / 2; - char *const buf = new char[buflen]; - unsigned int bufactual = Utils::unhex(s,colonAt,buf,buflen); - char *bufptr = buf; - try { - while (bufactual >= 24) { - if (_qualifierCount < ZT_NETWORK_COM_MAX_QUALIFIERS) { - _qualifiers[_qualifierCount].id = Utils::ntoh(*((uint64_t *)bufptr)); bufptr += 8; - _qualifiers[_qualifierCount].value = Utils::ntoh(*((uint64_t *)bufptr)); bufptr += 8; - _qualifiers[_qualifierCount].maxDelta = Utils::ntoh(*((uint64_t *)bufptr)); bufptr += 8; - ++_qualifierCount; - } else { - bufptr += 24; - } - bufactual -= 24; - } - } catch ( ... ) {} - delete [] buf; - } - - if (s[colonAt]) { - s += colonAt + 1; - colonAt = 0; - while ((s[colonAt])&&(s[colonAt] != ':')) ++colonAt; - - if (colonAt) { - char addrbuf[ZT_ADDRESS_LENGTH]; - if (Utils::unhex(s,colonAt,addrbuf,sizeof(addrbuf)) == ZT_ADDRESS_LENGTH) - _signedBy.setTo(addrbuf,ZT_ADDRESS_LENGTH); - - if ((_signedBy)&&(s[colonAt])) { - s += colonAt + 1; - colonAt = 0; - while ((s[colonAt])&&(s[colonAt] != ':')) ++colonAt; - if (colonAt) { - if (Utils::unhex(s,colonAt,_signature.data,ZT_C25519_SIGNATURE_LEN) != ZT_C25519_SIGNATURE_LEN) - _signedBy.zero(); - } else { - _signedBy.zero(); - } - } else { - _signedBy.zero(); - } - } - } - - std::sort(&(_qualifiers[0]),&(_qualifiers[_qualifierCount])); } -#endif // ZT_SUPPORT_OLD_STYLE_NETCONF - -bool CertificateOfMembership::agreesWith(const CertificateOfMembership &other) const +bool CertificateOfMembership::agreesWith(const CertificateOfMembership &other, const Identity &otherIdentity) const { - unsigned int myidx = 0; - unsigned int otheridx = 0; - if ((_qualifierCount == 0)||(other._qualifierCount == 0)) return false; - while (myidx < _qualifierCount) { - // Fail if we're at the end of other, since this means the field is - // missing. - if (otheridx >= other._qualifierCount) - return false; + std::map< uint64_t, uint64_t > otherFields; + for(unsigned int i=0;i= other._qualifierCount) + bool fullIdentityVerification = false; + for(unsigned int i=0;i<_qualifierCount;++i) { + const uint64_t qid = _qualifiers[i].id; + if ((qid >= 3)&&(qid <= 6)) + fullIdentityVerification = true; + std::map< uint64_t, uint64_t >::iterator otherQ(otherFields.find(qid)); + if (otherQ == otherFields.end()) + return false; + const uint64_t a = _qualifiers[i].value; + const uint64_t b = otherQ->second; + if (((a >= b) ? (a - b) : (b - a)) > _qualifiers[i].maxDelta) + return false; + } + + // If this COM has a full hash of its identity, assume the other must have this as well. + // Otherwise we are on a controller that does not incorporate these. + if (fullIdentityVerification) { + uint64_t idHash[6]; + otherIdentity.publicKeyHash(idHash); + for(unsigned long i=0;i<4;++i) { + std::map< uint64_t, uint64_t >::iterator otherQ(otherFields.find((uint64_t)(i + 3))); + if (otherQ == otherFields.end()) + return false; + if (otherQ->second != Utils::ntoh(idHash[i])) return false; } - - // Compare to determine if the absolute value of the difference - // between these two parameters is within our maxDelta. - const uint64_t a = _qualifiers[myidx].value; - const uint64_t b = other._qualifiers[myidx].value; - if (((a >= b) ? (a - b) : (b - a)) > _qualifiers[myidx].maxDelta) - return false; - - ++myidx; } return true; diff --git a/node/CertificateOfMembership.hpp b/node/CertificateOfMembership.hpp index f8500628d..1948dd7b7 100644 --- a/node/CertificateOfMembership.hpp +++ b/node/CertificateOfMembership.hpp @@ -94,6 +94,8 @@ public: * ZeroTier address to whom certificate was issued */ COM_RESERVED_ID_ISSUED_TO = 2 + + // IDs 3-6 reserved for full hash of identity to which this COM was issued. }; /** @@ -110,20 +112,7 @@ public: * @param nwid Network ID * @param issuedTo Certificate recipient */ - CertificateOfMembership(uint64_t timestamp,uint64_t timestampMaxDelta,uint64_t nwid,const Address &issuedTo) - { - _qualifiers[0].id = COM_RESERVED_ID_TIMESTAMP; - _qualifiers[0].value = timestamp; - _qualifiers[0].maxDelta = timestampMaxDelta; - _qualifiers[1].id = COM_RESERVED_ID_NETWORK_ID; - _qualifiers[1].value = nwid; - _qualifiers[1].maxDelta = 0; - _qualifiers[2].id = COM_RESERVED_ID_ISSUED_TO; - _qualifiers[2].value = issuedTo.toInt(); - _qualifiers[2].maxDelta = 0xffffffffffffffffULL; - _qualifierCount = 3; - memset(_signature.data,0,ZT_C25519_SIGNATURE_LEN); - } + CertificateOfMembership(uint64_t timestamp,uint64_t timestampMaxDelta,uint64_t nwid,const Identity &issuedTo); /** * Create from binary-serialized COM in buffer @@ -183,36 +172,6 @@ public: return 0ULL; } - /** - * Add or update a qualifier in this certificate - * - * Any signature is invalidated and signedBy is set to null. - * - * @param id Qualifier ID - * @param value Qualifier value - * @param maxDelta Qualifier maximum allowed difference (absolute value of difference) - */ - void setQualifier(uint64_t id,uint64_t value,uint64_t maxDelta); - inline void setQualifier(ReservedId id,uint64_t value,uint64_t maxDelta) { setQualifier((uint64_t)id,value,maxDelta); } - -#ifdef ZT_SUPPORT_OLD_STYLE_NETCONF - /** - * @return String-serialized representation of this certificate - */ - std::string toString() const; - - /** - * Set this certificate equal to the hex-serialized string - * - * Invalid strings will result in invalid or undefined certificate - * contents. These will subsequently fail validation and comparison. - * Empty strings will result in an empty certificate. - * - * @param s String to deserialize - */ - void fromString(const char *s); -#endif // ZT_SUPPORT_OLD_STYLE_NETCONF - /** * Compare two certificates for parameter agreement * @@ -224,9 +183,10 @@ public: * tuples present in this cert but not in other result in 'false'. * * @param other Cert to compare with + * @param otherIdentity Identity of other node * @return True if certs agree and 'other' may be communicated with */ - bool agreesWith(const CertificateOfMembership &other) const; + bool agreesWith(const CertificateOfMembership &other, const Identity &otherIdentity) const; /** * Sign this certificate diff --git a/node/Constants.hpp b/node/Constants.hpp index 400976c13..21d0754ec 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -361,7 +361,7 @@ /** * Maximum number of outgoing packets we monitor for QoS information */ -#define ZT_QOS_MAX_OUTSTANDING_RECORDS (1024*16) +#define ZT_QOS_MAX_OUTSTANDING_RECORDS (1024 * 16) /** * Interval used for rate-limiting the computation of path quality estimates. @@ -403,117 +403,11 @@ /** * All unspecified traffic is put in this bucket. Anything in a bucket with a - * smaller value is deprioritized. Anything in a bucket with a higher value is + * smaller value is de-prioritized. Anything in a bucket with a higher value is prioritized over other traffic. */ #define ZT_AQM_DEFAULT_BUCKET 0 -/** - * How often we emit a one-liner bond summary for each peer - */ -#define ZT_MULTIPATH_BOND_STATUS_INTERVAL 60000 - -/** - * How long before we consider a path to be dead in the general sense. This is - * used while searching for default or alternative paths to try in the absence - * of direct guidance from the user or a selection policy. - */ -#define ZT_MULTIPATH_DEFAULT_FAILOVER_INTERVAL 10000 - -/** - * How often flows are evaluated - */ -#define ZT_MULTIPATH_FLOW_CHECK_INTERVAL 10000 - -/** - * How long before we consider a flow to be dead and remove it from the - * policy's list. - */ -#define ZT_MULTIPATH_FLOW_EXPIRATION_INTERVAL (60000 * 5) - -/** - * How often a flow's statistical counters are reset - */ -#define ZT_FLOW_STATS_RESET_INTERVAL ZT_MULTIPATH_FLOW_EXPIRATION_INTERVAL - -/** - * Maximum number of flows allowed before we start forcibly forgetting old ones - */ -#define ZT_FLOW_MAX_COUNT (1024*64) - -/** - * How often flows are rebalanced across link (if at all) - */ -#define ZT_FLOW_MIN_REBALANCE_INTERVAL 5000 - -/** - * How often flows are rebalanced across link (if at all) - */ -#define ZT_FLOW_REBALANCE_INTERVAL 5000 - -/** - * A defensive timer to prevent path quality metrics from being - * processed too often. - */ -#define ZT_BOND_BACKGROUND_TASK_MIN_INTERVAL ZT_CORE_TIMER_TASK_GRANULARITY - -/** - * How often a bonding policy's background tasks are processed, - * some need more frequent attention than others. - */ -#define ZT_MULTIPATH_ACTIVE_BACKUP_CHECK_INTERVAL ZT_CORE_TIMER_TASK_GRANULARITY - -/** - * Minimum amount of time (since a previous transition) before the active-backup bonding - * policy is allowed to transition to a different link. Only valid for active-backup. - */ -#define ZT_MULTIPATH_MIN_ACTIVE_BACKUP_AUTOFLOP_INTERVAL 10000 - -/** - * How often a peer checks that incoming (and outgoing) traffic on a bonded link is - * appropriately paired. - */ -#define ZT_PATH_NEGOTIATION_CHECK_INTERVAL 15000 - -/** - * Time horizon for path negotiation paths cutoff - */ -#define ZT_PATH_NEGOTIATION_CUTOFF_TIME 60000 - -/** - * Maximum number of path negotiations within cutoff time - * - * This limits response to PATH_NEGOTIATION to CUTOFF_LIMIT responses - * per CUTOFF_TIME milliseconds per peer to prevent this from being - * useful for DOS amplification attacks. - */ -#define ZT_PATH_NEGOTIATION_CUTOFF_LIMIT 8 - -/** - * How many times a peer will attempt to petition another peer to synchronize its - * traffic to the same path before giving up and surrendering to the other peer's preference. - */ -#define ZT_PATH_NEGOTIATION_TRY_COUNT 3 - -/** - * How much greater the quality of a path should be before an - * optimization procedure triggers a switch. - */ -#define ZT_MULTIPATH_ACTIVE_BACKUP_OPTIMIZE_MIN_THRESHOLD 0.10 - -/** - * Artificially inflates the failover score for paths which meet - * certain non-performance-related policy ranking criteria. - */ -#define ZT_MULTIPATH_FAILOVER_HANDICAP_PREFERRED 500 -#define ZT_MULTIPATH_FAILOVER_HANDICAP_PRIMARY 1000 -#define ZT_MULTIPATH_FAILOVER_HANDICAP_NEGOTIATED 5000 - -/** - * An indicator that no flow is to be associated with the given packet - */ -#define ZT_QOS_NO_FLOW -1 - /** * Timeout for overall peer activity (measured from last receive) */ @@ -604,8 +498,8 @@ #define ZT_ACK_CUTOFF_LIMIT 128 #define ZT_ACK_DRAINAGE_DIVISOR (1000 / ZT_ACK_CUTOFF_LIMIT) -#define ZT_MULTIPATH_DEFAULT_REFRCTORY_PERIOD 8000 -#define ZT_MULTIPATH_MAX_REFRACTORY_PERIOD 600000 +#define ZT_BOND_DEFAULT_REFRCTORY_PERIOD 8000 +#define ZT_BOND_MAX_REFRACTORY_PERIOD 600000 /** * Maximum number of direct path pushes within cutoff time @@ -641,6 +535,92 @@ */ #define ZT_PEER_GENERAL_RATE_LIMIT 1000 + +/** + * Minimum allowed amount of time between flow/path optimizations (anti-flapping) + */ +#define ZT_BOND_OPTIMIZE_INTERVAL 15000 + +/** + * Maximum number of flows allowed before we start forcibly forgetting old ones + */ +#define ZT_FLOW_MAX_COUNT (1024 * 64) + +/** + * How often we emit a bond summary for each bond + */ +#define ZT_BOND_STATUS_INTERVAL 30000 + +/** + * How long before we consider a path to be dead in the general sense. This is + * used while searching for default or alternative paths to try in the absence + * of direct guidance from the user or a selection policy. + */ +#define ZT_BOND_FAILOVER_DEFAULT_INTERVAL 5000 + +/** + * Anything below this value gets into thrashing territory since we divide + * this value by ZT_BOND_ECHOS_PER_FAILOVER_INTERVAL to send ECHOs often. + */ +#define ZT_BOND_FAILOVER_MIN_INTERVAL 250 + +/** + * How many times per failover interval that an ECHO is sent. This should be + * at least 2. Anything more then 4 starts to increase overhead significantly. + */ +#define ZT_BOND_ECHOS_PER_FAILOVER_INTERVAL 4 + +/** + * A defensive timer to prevent path quality metrics from being + * processed too often. + */ +#define ZT_BOND_BACKGROUND_TASK_MIN_INTERVAL ZT_CORE_TIMER_TASK_GRANULARITY + +/** + * How often a bonding policy's background tasks are processed, + * some need more frequent attention than others. + */ +#define ZT_BOND_ACTIVE_BACKUP_CHECK_INTERVAL ZT_CORE_TIMER_TASK_GRANULARITY + +/** + * Time horizon for path negotiation paths cutoff + */ +#define ZT_PATH_NEGOTIATION_CUTOFF_TIME 60000 + +/** + * Maximum number of path negotiations within cutoff time + * + * This limits response to PATH_NEGOTIATION to CUTOFF_LIMIT responses + * per CUTOFF_TIME milliseconds per peer to prevent this from being + * useful for DOS amplification attacks. + */ +#define ZT_PATH_NEGOTIATION_CUTOFF_LIMIT 8 + +/** + * How many times a peer will attempt to petition another peer to synchronize its + * traffic to the same path before giving up and surrendering to the other peer's preference. + */ +#define ZT_PATH_NEGOTIATION_TRY_COUNT 3 + +/** + * How much greater the quality of a path should be before an + * optimization procedure triggers a switch. + */ +#define ZT_BOND_ACTIVE_BACKUP_OPTIMIZE_MIN_THRESHOLD 0.10 + +/** + * Artificially inflates the failover score for paths which meet + * certain non-performance-related policy ranking criteria. + */ +#define ZT_BOND_FAILOVER_HANDICAP_PREFERRED 500 +#define ZT_BOND_FAILOVER_HANDICAP_PRIMARY 1000 +#define ZT_BOND_FAILOVER_HANDICAP_NEGOTIATED 5000 + +/** + * An indicator that no flow is to be associated with the given packet + */ +#define ZT_QOS_NO_FLOW -1 + /** * Don't do expensive identity validation more often than this * @@ -666,11 +646,6 @@ */ #define ZT_TRUST_EXPIRATION 600000 -/** - * Enable support for older network configurations from older (pre-1.1.6) controllers - */ -#define ZT_SUPPORT_OLD_STYLE_NETCONF 1 - /** * Desired buffer size for UDP sockets (used in service and osdep but defined here) */ diff --git a/node/Flow.hpp b/node/Flow.hpp deleted file mode 100644 index 77a4b207f..000000000 --- a/node/Flow.hpp +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c)2013-2020 ZeroTier, Inc. - * - * Use of this software is governed by the Business Source License included - * in the LICENSE.TXT file in the project's root directory. - * - * Change Date: 2025-01-01 - * - * On the date above, in accordance with the Business Source License, use - * of this software will be governed by version 2.0 of the Apache License. - */ -/****/ - -#ifndef ZT_FLOW_HPP -#define ZT_FLOW_HPP - -#include "Path.hpp" -#include "SharedPtr.hpp" - -namespace ZeroTier { - -/** - * A protocol flow that is identified by the origin and destination port. - */ -struct Flow -{ - /** - * @param flowId Given flow ID - * @param now Current time - */ - Flow(int32_t flowId, int64_t now) : - _flowId(flowId), - _bytesInPerUnitTime(0), - _bytesOutPerUnitTime(0), - _lastActivity(now), - _lastPathReassignment(0), - _assignedPath(SharedPtr()) - {} - - /** - * Reset flow statistics - */ - void resetByteCounts() - { - _bytesInPerUnitTime = 0; - _bytesOutPerUnitTime = 0; - } - - /** - * @return The Flow's ID - */ - int32_t id() { return _flowId; } - - /** - * @return Number of incoming bytes processed on this flow per unit time - */ - int64_t bytesInPerUnitTime() { return _bytesInPerUnitTime; } - - /** - * Record number of incoming bytes on this flow - * - * @param bytes Number of incoming bytes - */ - void recordIncomingBytes(uint64_t bytes) { _bytesInPerUnitTime += bytes; } - - /** - * @return Number of outgoing bytes processed on this flow per unit time - */ - int64_t bytesOutPerUnitTime() { return _bytesOutPerUnitTime; } - - /** - * Record number of outgoing bytes on this flow - * - * @param bytes - */ - void recordOutgoingBytes(uint64_t bytes) { _bytesOutPerUnitTime += bytes; } - - /** - * @return The total number of bytes processed on this flow - */ - uint64_t totalBytes() { return _bytesInPerUnitTime + _bytesOutPerUnitTime; } - - /** - * How long since a packet was sent or received in this flow - * - * @param now Current time - * @return The age of the flow in terms of last recorded activity - */ - int64_t age(int64_t now) { return now - _lastActivity; } - - /** - * Record that traffic was processed on this flow at the given time. - * - * @param now Current time - */ - void updateActivity(int64_t now) { _lastActivity = now; } - - /** - * @return Path assigned to this flow - */ - SharedPtr assignedPath() { return _assignedPath; } - - /** - * @param path Assigned path over which this flow should be handled - */ - void assignPath(const SharedPtr &path, int64_t now) { - _assignedPath = path; - _lastPathReassignment = now; - } - - AtomicCounter __refCount; - - int32_t _flowId; - uint64_t _bytesInPerUnitTime; - uint64_t _bytesOutPerUnitTime; - int64_t _lastActivity; - int64_t _lastPathReassignment; - SharedPtr _assignedPath; - SharedPtr _previouslyAssignedPath; -}; - -} // namespace ZeroTier - -#endif \ No newline at end of file diff --git a/node/Identity.hpp b/node/Identity.hpp index e6f658dc3..cc8de5126 100644 --- a/node/Identity.hpp +++ b/node/Identity.hpp @@ -109,6 +109,18 @@ public: */ inline bool hasPrivate() const { return (_privateKey != (C25519::Private *)0); } + /** + * Compute a SHA384 hash of this identity's address and public key(s). + * + * @param sha384buf Buffer with 48 bytes of space to receive hash + */ + inline void publicKeyHash(void *sha384buf) const + { + uint8_t address[ZT_ADDRESS_LENGTH]; + _address.copyTo(address, ZT_ADDRESS_LENGTH); + SHA384(sha384buf, address, ZT_ADDRESS_LENGTH, _publicKey.data, ZT_C25519_PUBLIC_KEY_LEN); + } + /** * Compute the SHA512 hash of our private key (if we have one) * diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index d1f0f51dc..5a2a94642 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -88,7 +88,6 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr,int32_t f peer->received(tPtr,_path,hops(),packetId(),payloadLength(),v,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); break; case Packet::VERB_HELLO: r = _doHELLO(RR,tPtr,true); break; - case Packet::VERB_ACK: r = _doACK(RR,tPtr,peer); break; case Packet::VERB_QOS_MEASUREMENT: r = _doQOS_MEASUREMENT(RR,tPtr,peer); break; case Packet::VERB_ERROR: r = _doERROR(RR,tPtr,peer); break; case Packet::VERB_OK: r = _doOK(RR,tPtr,peer); break; @@ -191,6 +190,29 @@ bool IncomingPacket::_doERROR(const RuntimeEnvironment *RR,void *tPtr,const Shar } } break; + case Packet::ERROR_NETWORK_AUTHENTICATION_REQUIRED: { + const SharedPtr network(RR->node->network(at(ZT_PROTO_VERB_ERROR_IDX_PAYLOAD))); + if ((network)&&(network->controller() == peer->address())) { + bool noUrl = true; + int s = (int)size() - (ZT_PROTO_VERB_ERROR_IDX_PAYLOAD + 8); + if (s > 2) { + const uint16_t errorDataSize = at(ZT_PROTO_VERB_ERROR_IDX_PAYLOAD + 8); + s -= 2; + if (s >= (int)errorDataSize) { + Dictionary<3072> authInfo(((const char *)this->data()) + (ZT_PROTO_VERB_ERROR_IDX_PAYLOAD + 10), errorDataSize); + char authenticationURL[2048]; + if (authInfo.get("aU", authenticationURL, sizeof(authenticationURL)) > 0) { + authenticationURL[sizeof(authenticationURL) - 1] = 0; // ensure always zero terminated + network->setAuthenticationRequired(authenticationURL); + noUrl = false; + } + } + } + if (noUrl) + network->setAuthenticationRequired(""); + } + } break; + default: break; } @@ -199,35 +221,12 @@ bool IncomingPacket::_doERROR(const RuntimeEnvironment *RR,void *tPtr,const Shar return true; } -bool IncomingPacket::_doACK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) -{ - SharedPtr bond = peer->bond(); - if (!bond || !bond->rateGateACK(RR->node->now())) { - return true; - } - /* Dissect incoming ACK packet. From this we can estimate current throughput of the path, establish known - * maximums and detect packet loss. */ - int32_t ackedBytes; - if (payloadLength() != sizeof(ackedBytes)) { - return true; // ignore - } - memcpy(&ackedBytes, payload(), sizeof(ackedBytes)); - if (bond) { - bond->receivedAck(_path, RR->node->now(), Utils::ntoh(ackedBytes)); - } - return true; -} - bool IncomingPacket::_doQOS_MEASUREMENT(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) { SharedPtr bond = peer->bond(); - /* TODO: Fix rate gate issue - if (!bond || !bond->rateGateQoS(RR->node->now())) { + if (!bond || !bond->rateGateQoS(RR->node->now(), _path)) { return true; } - */ - /* Dissect incoming QoS packet. From this we can compute latency values and their variance. - * The latency variance is used as a measure of "jitter". */ if (payloadLength() > ZT_QOS_MAX_PACKET_SIZE || payloadLength() < ZT_QOS_MIN_PACKET_SIZE) { return true; // ignore } @@ -1306,7 +1305,7 @@ bool IncomingPacket::_doPATH_NEGOTIATION_REQUEST(const RuntimeEnvironment *RR,vo { uint64_t now = RR->node->now(); SharedPtr bond = peer->bond(); - if (!bond || !bond->rateGatePathNegotiation(now)) { + if (!bond || !bond->rateGatePathNegotiation(now, _path)) { return true; } if (payloadLength() != sizeof(int16_t)) { diff --git a/node/IncomingPacket.hpp b/node/IncomingPacket.hpp index 134b5b3d0..95785795a 100644 --- a/node/IncomingPacket.hpp +++ b/node/IncomingPacket.hpp @@ -112,7 +112,6 @@ private: // been authenticated, decrypted, decompressed, and classified. bool _doERROR(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doHELLO(const RuntimeEnvironment *RR,void *tPtr,const bool alreadyAuthenticated); - bool _doACK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doQOS_MEASUREMENT(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doWHOIS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); diff --git a/node/Membership.hpp b/node/Membership.hpp index 476987714..63a7c10f5 100644 --- a/node/Membership.hpp +++ b/node/Membership.hpp @@ -91,13 +91,14 @@ public: * Check whether the peer represented by this Membership should be allowed on this network at all * * @param nconf Our network config + * @param otherNodeIdentity Identity of remote node * @return True if this peer is allowed on this network at all */ - inline bool isAllowedOnNetwork(const NetworkConfig &nconf) const + inline bool isAllowedOnNetwork(const NetworkConfig &thisNodeNetworkConfig, const Identity &otherNodeIdentity) const { - if (nconf.isPublic()) return true; + if (thisNodeNetworkConfig.isPublic()) return true; if (_com.timestamp() <= _comRevocationThreshold) return false; - return nconf.com.agreesWith(_com); + return thisNodeNetworkConfig.com.agreesWith(_com, otherNodeIdentity); } inline bool recentlyAssociated(const int64_t now) const diff --git a/node/Network.cpp b/node/Network.cpp index 914c96bc6..f3138f3ac 100644 --- a/node/Network.cpp +++ b/node/Network.cpp @@ -1022,6 +1022,7 @@ int Network::setConfiguration(void *tPtr,const NetworkConfig &nconf,bool saveToD } _portError = RR->node->configureVirtualNetworkPort(tPtr,_id,&_uPtr,(oldPortInitialized) ? ZT_VIRTUAL_NETWORK_CONFIG_OPERATION_CONFIG_UPDATE : ZT_VIRTUAL_NETWORK_CONFIG_OPERATION_UP,&ctmp); + _authenticationURL = nconf.authenticationURL; if (saveToDisk) { Dictionary *const d = new Dictionary(); @@ -1226,7 +1227,7 @@ bool Network::gate(void *tPtr,const SharedPtr &peer) try { if (_config) { Membership *m = _memberships.get(peer->address()); - if ( (_config.isPublic()) || ((m)&&(m->isAllowedOnNetwork(_config))) ) { + if ( (_config.isPublic()) || ((m)&&(m->isAllowedOnNetwork(_config, peer->identity()))) ) { if (!m) m = &(_membership(peer->address())); if (m->multicastLikeGate(now)) { @@ -1379,6 +1380,8 @@ ZT_VirtualNetworkStatus Network::_status() const return ZT_NETWORK_STATUS_NOT_FOUND; case NETCONF_FAILURE_NONE: return ((_config) ? ZT_NETWORK_STATUS_OK : ZT_NETWORK_STATUS_REQUESTING_CONFIGURATION); + case NETCONF_FAILURE_AUTHENTICATION_REQUIRED: + return ZT_NETWORK_STATUS_AUTHENTICATION_REQUIRED; default: return ZT_NETWORK_STATUS_PORT_ERROR; } @@ -1429,6 +1432,10 @@ void Network::_externalConfig(ZT_VirtualNetworkConfig *ec) const } memcpy(&ec->dns, &_config.dns, sizeof(ZT_VirtualNetworkDNS)); + + Utils::scopy(ec->authenticationURL, sizeof(ec->authenticationURL), _authenticationURL.c_str()); + ec->authenticationExpiryTime = _config.authenticationExpiryTime; + ec->ssoEnabled = _config.ssoEnabled; } void Network::_sendUpdatesToMembers(void *tPtr,const MulticastGroup *const newMulticastGroup) @@ -1480,8 +1487,11 @@ void Network::_sendUpdatesToMembers(void *tPtr,const MulticastGroup *const newMu Membership *m = (Membership *)0; Hashtable::Iterator i(_memberships); while (i.next(a,m)) { - if ( ( m->multicastLikeGate(now) || (newMulticastGroup) ) && (m->isAllowedOnNetwork(_config)) && (!std::binary_search(alwaysAnnounceTo.begin(),alwaysAnnounceTo.end(),*a)) ) - _announceMulticastGroupsTo(tPtr,*a,groups); + const Identity remoteIdentity(RR->topology->getIdentity(tPtr, *a)); + if (remoteIdentity) { + if ( ( m->multicastLikeGate(now) || (newMulticastGroup) ) && (m->isAllowedOnNetwork(_config, remoteIdentity)) && (!std::binary_search(alwaysAnnounceTo.begin(),alwaysAnnounceTo.end(),*a)) ) + _announceMulticastGroupsTo(tPtr,*a,groups); + } } } } diff --git a/node/Network.hpp b/node/Network.hpp index b20d8b66b..33de16911 100644 --- a/node/Network.hpp +++ b/node/Network.hpp @@ -220,6 +220,17 @@ public: _netconfFailure = NETCONF_FAILURE_NOT_FOUND; } + /** + * Set netconf failure to 'authentication required' possibly with an authorization URL + */ + inline void setAuthenticationRequired(const char *url) + { + Mutex::Lock _l(_lock); + _netconfFailure = NETCONF_FAILURE_AUTHENTICATION_REQUIRED; + _authenticationURL = (url) ? url : ""; + _config.ssoEnabled = true; + } + /** * Causes this network to request an updated configuration from its master node now * @@ -435,9 +446,11 @@ private: NETCONF_FAILURE_NONE, NETCONF_FAILURE_ACCESS_DENIED, NETCONF_FAILURE_NOT_FOUND, - NETCONF_FAILURE_INIT_FAILED + NETCONF_FAILURE_INIT_FAILED, + NETCONF_FAILURE_AUTHENTICATION_REQUIRED } _netconfFailure; int _portError; // return value from port config callback + std::string _authenticationURL; Hashtable _memberships; diff --git a/node/NetworkConfig.cpp b/node/NetworkConfig.cpp index 5259a3e32..6d148cc45 100644 --- a/node/NetworkConfig.cpp +++ b/node/NetworkConfig.cpp @@ -182,6 +182,14 @@ bool NetworkConfig::toDictionary(Dictionary &d,b if (!d.add(ZT_NETWORKCONFIG_DICT_KEY_DNS,*tmp)) return false; } + if (!d.add(ZT_NETWORKCONFIG_DICT_KEY_SSO_ENABLED, this->ssoEnabled)) return false; + if (this->ssoEnabled) { + if (this->authenticationURL[0]) { + if (!d.add(ZT_NETWORKCONFIG_DICT_KEY_AUTHENTICATION_URL, this->authenticationURL)) return false; + } + if (!d.add(ZT_NETWORKCONFIG_DICT_KEY_AUTHENTICATION_EXPIRY_TIME, this->authenticationExpiryTime)) return false; + } + delete tmp; } catch ( ... ) { delete tmp; @@ -365,6 +373,20 @@ bool NetworkConfig::fromDictionary(const DictionaryssoEnabled = d.getB(ZT_NETWORKCONFIG_DICT_KEY_SSO_ENABLED, false); + if (this->ssoEnabled) { + if (d.get(ZT_NETWORKCONFIG_DICT_KEY_AUTHENTICATION_URL, this->authenticationURL, (unsigned int)sizeof(this->authenticationURL)) > 0) { + this->authenticationURL[sizeof(this->authenticationURL) - 1] = 0; // ensure null terminated + } else { + this->authenticationURL[0] = 0; + } + this->authenticationExpiryTime = d.getI(ZT_NETWORKCONFIG_DICT_KEY_AUTHENTICATION_EXPIRY_TIME, 0); + } else { + this->authenticationURL[0] = 0; + this->authenticationExpiryTime = 0; + } } //printf("~~~\n%s\n~~~\n",d.data()); diff --git a/node/NetworkConfig.hpp b/node/NetworkConfig.hpp index 06e0127fe..301852adf 100644 --- a/node/NetworkConfig.hpp +++ b/node/NetworkConfig.hpp @@ -94,7 +94,7 @@ namespace ZeroTier { // Dictionary capacity needed for max size network config -#define ZT_NETWORKCONFIG_DICT_CAPACITY (1024 + (sizeof(ZT_VirtualNetworkRule) * ZT_MAX_NETWORK_RULES) + (sizeof(Capability) * ZT_MAX_NETWORK_CAPABILITIES) + (sizeof(Tag) * ZT_MAX_NETWORK_TAGS) + (sizeof(CertificateOfOwnership) * ZT_MAX_CERTIFICATES_OF_OWNERSHIP)) +#define ZT_NETWORKCONFIG_DICT_CAPACITY (4096 + (sizeof(ZT_VirtualNetworkRule) * ZT_MAX_NETWORK_RULES) + (sizeof(Capability) * ZT_MAX_NETWORK_CAPABILITIES) + (sizeof(Tag) * ZT_MAX_NETWORK_TAGS) + (sizeof(CertificateOfOwnership) * ZT_MAX_CERTIFICATES_OF_OWNERSHIP)) // Dictionary capacity needed for max size network meta-data #define ZT_NETWORKCONFIG_METADATA_DICT_CAPACITY 1024 @@ -178,6 +178,12 @@ namespace ZeroTier { #define ZT_NETWORKCONFIG_DICT_KEY_CERTIFICATES_OF_OWNERSHIP "COO" // dns (binary blobs) #define ZT_NETWORKCONFIG_DICT_KEY_DNS "DNS" +// sso enabld +#define ZT_NETWORKCONFIG_DICT_KEY_SSO_ENABLED "ssoe" +// authentication URL +#define ZT_NETWORKCONFIG_DICT_KEY_AUTHENTICATION_URL "aurl" +// authentication expiry +#define ZT_NETWORKCONFIG_DICT_KEY_AUTHENTICATION_EXPIRY_TIME "aexpt" // Legacy fields -- these are obsoleted but are included when older clients query @@ -233,7 +239,10 @@ public: tags(), certificatesOfOwnership(), type(ZT_NETWORK_TYPE_PRIVATE), - dnsCount(0) + dnsCount(0), + ssoEnabled(false), + authenticationURL(), + authenticationExpiryTime(0) { name[0] = 0; memset(specialists, 0, sizeof(uint64_t)*ZT_MAX_NETWORK_SPECIALISTS); @@ -604,6 +613,21 @@ public: * ZT pushed DNS configuration */ ZT_VirtualNetworkDNS dns; + + /** + * SSO enabled flag. + */ + bool ssoEnabled; + + /** + * Authentication URL if authentication is required + */ + char authenticationURL[2048]; + + /** + * Time current authentication expires or 0 if external authentication is disabled + */ + uint64_t authenticationExpiryTime; }; } // namespace ZeroTier diff --git a/node/NetworkController.hpp b/node/NetworkController.hpp index 29a2d8f17..3bf570c97 100644 --- a/node/NetworkController.hpp +++ b/node/NetworkController.hpp @@ -38,7 +38,8 @@ public: NC_ERROR_NONE = 0, NC_ERROR_OBJECT_NOT_FOUND = 1, NC_ERROR_ACCESS_DENIED = 2, - NC_ERROR_INTERNAL_SERVER_ERROR = 3 + NC_ERROR_INTERNAL_SERVER_ERROR = 3, + NC_ERROR_AUTHENTICATION_REQUIRED = 4 }; /** @@ -69,12 +70,17 @@ public: /** * Send a network configuration request error * + * If errorData/errorDataSize are provided they must point to a valid serialized + * Dictionary containing error data. They can be null/zero if not specified. + * * @param nwid Network ID * @param requestPacketId Request packet ID or 0 if none * @param destination Destination peer Address * @param errorCode Error code + * @param errorData Data associated with error or NULL if none + * @param errorDataSize Size of errorData in bytes */ - virtual void ncSendError(uint64_t nwid,uint64_t requestPacketId,const Address &destination,NetworkController::ErrorCode errorCode) = 0; + virtual void ncSendError(uint64_t nwid,uint64_t requestPacketId,const Address &destination,NetworkController::ErrorCode errorCode, const void *errorData, unsigned int errorDataSize) = 0; }; NetworkController() {} diff --git a/node/Node.cpp b/node/Node.cpp index 05f5a247b..5b0fa8cc5 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -103,7 +103,7 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,int64 const unsigned long mcs = sizeof(Multicaster) + (((sizeof(Multicaster) & 0xf) != 0) ? (16 - (sizeof(Multicaster) & 0xf)) : 0); const unsigned long topologys = sizeof(Topology) + (((sizeof(Topology) & 0xf) != 0) ? (16 - (sizeof(Topology) & 0xf)) : 0); const unsigned long sas = sizeof(SelfAwareness) + (((sizeof(SelfAwareness) & 0xf) != 0) ? (16 - (sizeof(SelfAwareness) & 0xf)) : 0); - const unsigned long bc = sizeof(BondController) + (((sizeof(BondController) & 0xf) != 0) ? (16 - (sizeof(BondController) & 0xf)) : 0); + const unsigned long bc = sizeof(Bond) + (((sizeof(Bond) & 0xf) != 0) ? (16 - (sizeof(Bond) & 0xf)) : 0); m = reinterpret_cast(::malloc(16 + ts + sws + mcs + topologys + sas + bc)); if (!m) @@ -121,14 +121,14 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,int64 m += topologys; RR->sa = new (m) SelfAwareness(RR); m += sas; - RR->bc = new (m) BondController(RR); + RR->bc = new (m) Bond(RR); } catch ( ... ) { if (RR->sa) RR->sa->~SelfAwareness(); if (RR->topology) RR->topology->~Topology(); if (RR->mc) RR->mc->~Multicaster(); if (RR->sw) RR->sw->~Switch(); if (RR->t) RR->t->~Trace(); - if (RR->bc) RR->bc->~BondController(); + if (RR->bc) RR->bc->~Bond(); ::free(m); throw; } @@ -147,7 +147,7 @@ Node::~Node() if (RR->mc) RR->mc->~Multicaster(); if (RR->sw) RR->sw->~Switch(); if (RR->t) RR->t->~Trace(); - if (RR->bc) RR->bc->~BondController(); + if (RR->bc) RR->bc->~Bond(); ::free(RR->rtmem); } @@ -252,18 +252,14 @@ ZT_ResultCode Node::processBackgroundTasks(void *tptr,int64_t now,volatile int64 _now = now; Mutex::Lock bl(_backgroundTasksLock); - - unsigned long bondCheckInterval = ZT_CORE_TIMER_TASK_GRANULARITY; + // Process background bond tasks + unsigned long bondCheckInterval = ZT_PING_CHECK_INVERVAL; if (RR->bc->inUse()) { - // Gratuitously ping active peers so that QoS metrics have enough data to work with (if active path monitoring is enabled) - bondCheckInterval = std::min(std::max(RR->bc->minReqPathMonitorInterval(), ZT_CORE_TIMER_TASK_GRANULARITY), ZT_PING_CHECK_INVERVAL); - if ((now - _lastGratuitousPingCheck) >= bondCheckInterval) { - Hashtable< Address,std::vector > alwaysContact; - _PingPeersThatNeedPing pfunc(RR,tptr,alwaysContact,now); - RR->topology->eachPeer<_PingPeersThatNeedPing &>(pfunc); + bondCheckInterval = std::max(RR->bc->minReqMonitorInterval(), ZT_CORE_TIMER_TASK_GRANULARITY); + if ((now - _lastGratuitousPingCheck) >= ZT_CORE_TIMER_TASK_GRANULARITY) { _lastGratuitousPingCheck = now; + RR->bc->processBackgroundTasks(tptr, now); } - RR->bc->processBackgroundTasks(tptr, now); } unsigned long timeUntilNextPingCheck = ZT_PING_CHECK_INVERVAL; @@ -512,7 +508,7 @@ ZT_PeerList *Node::peers() const } if (pi->second->bond()) { p->isBonded = pi->second->bond(); - p->bondingPolicy = pi->second->bond()->getPolicy(); + p->bondingPolicy = pi->second->bond()->policy(); p->isHealthy = pi->second->bond()->isHealthy(); p->numAliveLinks = pi->second->bond()->getNumAliveLinks(); p->numTotalLinks = pi->second->bond()->getNumTotalLinks(); @@ -731,7 +727,7 @@ void Node::ncSendRevocation(const Address &destination,const Revocation &rev) } } -void Node::ncSendError(uint64_t nwid,uint64_t requestPacketId,const Address &destination,NetworkController::ErrorCode errorCode) +void Node::ncSendError(uint64_t nwid,uint64_t requestPacketId,const Address &destination,NetworkController::ErrorCode errorCode, const void *errorData, unsigned int errorDataSize) { if (destination == RR->identity.address()) { SharedPtr n(network(nwid)); @@ -744,6 +740,9 @@ void Node::ncSendError(uint64_t nwid,uint64_t requestPacketId,const Address &des case NetworkController::NC_ERROR_ACCESS_DENIED: n->setAccessDenied(); break; + case NetworkController::NC_ERROR_AUTHENTICATION_REQUIRED: { + } + break; default: break; } @@ -760,8 +759,18 @@ void Node::ncSendError(uint64_t nwid,uint64_t requestPacketId,const Address &des case NetworkController::NC_ERROR_ACCESS_DENIED: outp.append((unsigned char)Packet::ERROR_NETWORK_ACCESS_DENIED_); break; + case NetworkController::NC_ERROR_AUTHENTICATION_REQUIRED: + outp.append((unsigned char)Packet::ERROR_NETWORK_AUTHENTICATION_REQUIRED); + break; } + outp.append(nwid); + + if ((errorData)&&(errorDataSize > 0)&&(errorDataSize <= 0xffff)) { + outp.append((uint16_t)errorDataSize); + outp.append(errorData, errorDataSize); + } + RR->sw->send((void *)0,outp,true); } // else we can't send an ERROR() in response to nothing, so discard } diff --git a/node/Node.hpp b/node/Node.hpp index 2bbd3b47f..52506ed9e 100644 --- a/node/Node.hpp +++ b/node/Node.hpp @@ -34,7 +34,7 @@ #include "Salsa20.hpp" #include "NetworkController.hpp" #include "Hashtable.hpp" -#include "BondController.hpp" +#include "Bond.hpp" // Bit mask for "expecting reply" hash #define ZT_EXPECTING_REPLIES_BUCKET_MASK1 255 @@ -187,7 +187,7 @@ public: inline const Identity &identity() const { return _RR.identity; } - inline BondController *bondController() const { return _RR.bc; } + inline Bond *bondController() const { return _RR.bc; } /** * Register that we are expecting a reply to a packet ID @@ -245,7 +245,7 @@ public: virtual void ncSendConfig(uint64_t nwid,uint64_t requestPacketId,const Address &destination,const NetworkConfig &nc,bool sendLegacyFormatConfig); virtual void ncSendRevocation(const Address &destination,const Revocation &rev); - virtual void ncSendError(uint64_t nwid,uint64_t requestPacketId,const Address &destination,NetworkController::ErrorCode errorCode); + virtual void ncSendError(uint64_t nwid,uint64_t requestPacketId,const Address &destination,NetworkController::ErrorCode errorCode, const void *errorData, unsigned int errorDataSize); inline const Address &remoteTraceTarget() const { return _remoteTraceTarget; } inline Trace::Level remoteTraceLevel() const { return _remoteTraceLevel; } diff --git a/node/Packet.hpp b/node/Packet.hpp index 78846ecdd..7219a3310 100644 --- a/node/Packet.hpp +++ b/node/Packet.hpp @@ -792,6 +792,12 @@ public: * * ERROR response payload: * <[8] 64-bit network ID> + * <[2] 16-bit length of error-related data (optional)> + * <[...] error-related data (optional)> + * + * Error related data is a Dictionary containing things like a URL + * for authentication or a human-readable error message, and is + * optional and may be absent or empty. */ VERB_NETWORK_CONFIG_REQUEST = 0x0b, @@ -1076,7 +1082,10 @@ public: ERROR_NETWORK_ACCESS_DENIED_ = 0x07, /* extra _ at end to avoid Windows name conflict */ /* Multicasts to this group are not wanted */ - ERROR_UNWANTED_MULTICAST = 0x08 + ERROR_UNWANTED_MULTICAST = 0x08, + + /* Network requires external or 2FA authentication (e.g. SSO). */ + ERROR_NETWORK_AUTHENTICATION_REQUIRED = 0x09 }; template diff --git a/node/Path.hpp b/node/Path.hpp index 0839158af..753bf0ab2 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -29,8 +29,6 @@ #include "Packet.hpp" #include "RingBuffer.hpp" -#include "../osdep/Link.hpp" - /** * Maximum return value of preferenceRank() */ @@ -88,46 +86,7 @@ public: _localSocket(-1), _latency(0xffff), _addr(), - _ipScope(InetAddress::IP_SCOPE_NONE), - _lastAckReceived(0), - _lastAckSent(0), - _lastQoSMeasurement(0), - _lastThroughputEstimation(0), - _lastRefractoryUpdate(0), - _lastAliveToggle(0), - _lastEligibilityState(false), - _lastTrialBegin(0), - _refractoryPeriod(0), - _monitorInterval(0), - _upDelay(0), - _downDelay(0), - _ipvPref(0), - _mode(0), - _onlyPathOnLink(false), - _enabled(false), - _bonded(false), - _negotiated(false), - _deprecated(false), - _shouldReallocateFlows(false), - _assignedFlowCount(0), - _latencyMean(0), - _latencyVariance(0), - _packetLossRatio(0), - _packetErrorRatio(0), - _throughputMean(0), - _throughputMax(0), - _throughputVariance(0), - _allocation(0), - _byteLoad(0), - _relativeByteLoad(0), - _affinity(0), - _failoverScore(0), - _unackedBytes(0), - _packetsReceivedSinceLastAck(0), - _packetsReceivedSinceLastQoS(0), - _bytesAckedSinceLastThroughputEstimation(0), - _packetsIn(0), - _packetsOut(0) + _ipScope(InetAddress::IP_SCOPE_NONE) {} Path(const int64_t localSocket,const InetAddress &addr) : @@ -137,46 +96,7 @@ public: _localSocket(localSocket), _latency(0xffff), _addr(addr), - _ipScope(addr.ipScope()), - _lastAckReceived(0), - _lastAckSent(0), - _lastQoSMeasurement(0), - _lastThroughputEstimation(0), - _lastRefractoryUpdate(0), - _lastAliveToggle(0), - _lastEligibilityState(false), - _lastTrialBegin(0), - _refractoryPeriod(0), - _monitorInterval(0), - _upDelay(0), - _downDelay(0), - _ipvPref(0), - _mode(0), - _onlyPathOnLink(false), - _enabled(false), - _bonded(false), - _negotiated(false), - _deprecated(false), - _shouldReallocateFlows(false), - _assignedFlowCount(0), - _latencyMean(0), - _latencyVariance(0), - _packetLossRatio(0), - _packetErrorRatio(0), - _throughputMean(0), - _throughputMax(0), - _throughputVariance(0), - _allocation(0), - _byteLoad(0), - _relativeByteLoad(0), - _affinity(0), - _failoverScore(0), - _unackedBytes(0), - _packetsReceivedSinceLastAck(0), - _packetsReceivedSinceLastQoS(0), - _bytesAckedSinceLastThroughputEstimation(0), - _packetsIn(0), - _packetsOut(0) + _ipScope(addr.ipScope()) {} /** @@ -186,9 +106,6 @@ public: */ inline void received(const uint64_t t) { - if (!alive(t,_bonded)) { - _lastAliveToggle = _lastIn; - } _lastIn = t; } @@ -317,21 +234,11 @@ public: return (((age < (ZT_PATH_HEARTBEAT_PERIOD + 5000)) ? l : (l + 0xffff + age)) * (long)((ZT_INETADDRESS_MAX_SCOPE - _ipScope) + 1)); } - /** - * @param bonded Whether this path is part of a bond. - */ - inline void setBonded(bool bonded) { _bonded = bonded; } - - /** - * @return True if this path is currently part of a bond. - */ - inline bool bonded() { return _bonded; } - /** * @return True if this path is alive (receiving heartbeats) */ - inline bool alive(const int64_t now, bool bondingEnabled = false) const { - return (bondingEnabled && _monitorInterval) ? ((now - _lastIn) < (_monitorInterval * 3)) : ((now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000)); + inline bool alive(const int64_t now) const { + return (now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000); } /** @@ -339,11 +246,6 @@ public: */ inline bool needsHeartbeat(const int64_t now) const { return ((now - _lastOut) >= ZT_PATH_HEARTBEAT_PERIOD); } - /** - * @return True if this path needs a heartbeat in accordance to the user-specified path monitor frequency - */ - inline bool needsGratuitousHeartbeat(const int64_t now) { return allowed() && (_monitorInterval > 0) && ((now - _lastOut) >= _monitorInterval); } - /** * @return Last time we sent something */ @@ -364,134 +266,7 @@ public: */ inline int64_t lastTrustEstablishedPacketReceived() const { return _lastTrustEstablishedPacketReceived; } - /** - * @return Time since last VERB_ACK was received - */ - inline int64_t ackAge(int64_t now) { return _lastAckReceived ? now - _lastAckReceived : 0; } - - /** - * Set or update a refractory period for the path. - * - * @param punishment How much a path should be punished - * @param pathFailure Whether this call is the result of a recent path failure - */ - inline void adjustRefractoryPeriod(int64_t now, uint32_t punishment, bool pathFailure) { - if (pathFailure) { - unsigned int suggestedRefractoryPeriod = _refractoryPeriod ? punishment + (_refractoryPeriod * 2) : punishment; - _refractoryPeriod = std::min(suggestedRefractoryPeriod, (unsigned int)ZT_MULTIPATH_MAX_REFRACTORY_PERIOD); - _lastRefractoryUpdate = 0; - } else { - uint32_t drainRefractory = 0; - if (_lastRefractoryUpdate) { - drainRefractory = (now - _lastRefractoryUpdate); - } else { - drainRefractory = (now - _lastAliveToggle); - } - _lastRefractoryUpdate = now; - if (_refractoryPeriod > drainRefractory) { - _refractoryPeriod -= drainRefractory; - } else { - _refractoryPeriod = 0; - _lastRefractoryUpdate = 0; - } - } - } - - /** - * Determine the current state of eligibility of the path. - * - * @param includeRefractoryPeriod Whether current punishment should be taken into consideration - * @return True if this path can be used in a bond at the current time - */ - inline bool eligible(uint64_t now, int ackSendInterval, bool includeRefractoryPeriod = false) { - if (includeRefractoryPeriod && _refractoryPeriod) { - return false; - } - bool acceptableAge = age(now) < ((_monitorInterval * 4) + _downDelay); // Simple RX age (driven by packets of any type and gratuitous VERB_HELLOs) - bool acceptableAckAge = ackAge(now) < (ackSendInterval); // Whether the remote peer is actually responding to our outgoing traffic or simply sending stuff to us - bool notTooEarly = (now - _lastAliveToggle) >= _upDelay; // Whether we've waited long enough since the link last came online - bool inTrial = (now - _lastTrialBegin) < _upDelay; // Whether this path is still in its trial period - bool currEligibility = allowed() && (((acceptableAge || acceptableAckAge) && notTooEarly) || inTrial); - return currEligibility; - } - - /** - * Record when this path first entered the bond. Each path is given a trial period where it is admitted - * to the bond without requiring observations to prove its performance or reliability. - */ - inline void startTrial(uint64_t now) { _lastTrialBegin = now; } - - /** - * @return True if a path is permitted to be used in a bond (according to user pref.) - */ - inline bool allowed() { - return _enabled - && (!_ipvPref - || ((_addr.isV4() && (_ipvPref == 4 || _ipvPref == 46 || _ipvPref == 64)) - || ((_addr.isV6() && (_ipvPref == 6 || _ipvPref == 46 || _ipvPref == 64))))); - } - - /** - * @return True if a path is preferred over another on the same physical link (according to user pref.) - */ - inline bool preferred() { - return _onlyPathOnLink - || (_addr.isV4() && (_ipvPref == 4 || _ipvPref == 46)) - || (_addr.isV6() && (_ipvPref == 6 || _ipvPref == 64)); - } - - /** - * @param now Current time - * @return Whether an ACK (VERB_ACK) packet needs to be emitted at this time - */ - inline bool needsToSendAck(int64_t now, int ackSendInterval) { - return ((now - _lastAckSent) >= ackSendInterval || - (_packetsReceivedSinceLastAck == ZT_QOS_TABLE_SIZE)) && _packetsReceivedSinceLastAck; - } - - /** - * @param now Current time - * @return Whether a QoS (VERB_QOS_MEASUREMENT) packet needs to be emitted at this time - */ - inline bool needsToSendQoS(int64_t now, int qosSendInterval) { - return ((_packetsReceivedSinceLastQoS >= ZT_QOS_TABLE_SIZE) || - ((now - _lastQoSMeasurement) > qosSendInterval)) && _packetsReceivedSinceLastQoS; - } - - /** - * Reset packet counters - */ - inline void resetPacketCounts() - { - _packetsIn = 0; - _packetsOut = 0; - } - - - /** - * The mean latency (computed from a sliding window.) - */ - float latencyMean() { return _latencyMean; } - - /** - * Packet delay variance (computed from a sliding window.) - */ - float latencyVariance() { return _latencyVariance; } - - /** - * The ratio of lost packets to received packets. - */ - float packetLossRatio() { return _packetLossRatio; } - - /** - * The ratio of packets that failed their MAC/CRC checks to those that did not. - */ - float packetErrorRatio() { return _packetErrorRatio; } - - /** - * - */ - uint8_t allocation() { return _allocation; } + void *_bondingMetricPtr; private: @@ -503,212 +278,6 @@ private: InetAddress _addr; InetAddress::IpScope _ipScope; // memoize this since it's a computed value checked often AtomicCounter __refCount; - - std::map qosStatsOut; // id:egress_time - std::map qosStatsIn; // id:now - std::map ackStatsIn; // id:len - - RingBuffer qosRecordSize; - RingBuffer qosRecordLossSamples; - RingBuffer throughputSamples; - RingBuffer packetValiditySamples; - RingBuffer _throughputVarianceSamples; - RingBuffer latencySamples; - - /** - * Last time that a VERB_ACK was received on this path. - */ - uint64_t _lastAckReceived; - - /** - * Last time that a VERB_ACK was sent out on this path. - */ - uint64_t _lastAckSent; - - /** - * Last time that a VERB_QOS_MEASUREMENT was sent out on this path. - */ - uint64_t _lastQoSMeasurement; - - /** - * Last time that the path's throughput was estimated. - */ - uint64_t _lastThroughputEstimation; - - /** - * The last time that the refractory period was updated. - */ - uint64_t _lastRefractoryUpdate; - - /** - * The last time that the path was marked as "alive". - */ - uint64_t _lastAliveToggle; - - /** - * State of eligibility at last check. Used for determining state changes. - */ - bool _lastEligibilityState; - - /** - * Timestamp indicating when this path's trial period began. - */ - uint64_t _lastTrialBegin; - - /** - * Amount of time that this path will be prevented from becoming a member of a bond. - */ - uint32_t _refractoryPeriod; - - /** - * Monitor interval specific to this path or that was inherited from the bond controller. - */ - int32_t _monitorInterval; - - /** - * Up delay interval specific to this path or that was inherited from the bond controller. - */ - uint32_t _upDelay; - - /** - * Down delay interval specific to this path or that was inherited from the bond controller. - */ - uint32_t _downDelay; - - /** - * IP version preference inherited from the physical link. - */ - uint8_t _ipvPref; - - /** - * Mode inherited from the physical link. - */ - uint8_t _mode; - - /** - * IP version preference inherited from the physical link. - */ - bool _onlyPathOnLink; - - /** - * Enabled state inherited from the physical link. - */ - bool _enabled; - - /** - * Whether this path is currently part of a bond. - */ - bool _bonded; - - /** - * Whether this path was intentionally negotiated by either peer. - */ - bool _negotiated; - - /** - * Whether this path has been deprecated due to performance issues. Current traffic flows - * will be re-allocated to other paths in the most non-disruptive manner (if possible), - * and new traffic will not be allocated to this path. - */ - bool _deprecated; - - /** - * Whether flows should be moved from this path. Current traffic flows will be re-allocated - * immediately. - */ - bool _shouldReallocateFlows; - - /** - * The number of flows currently assigned to this path. - */ - uint16_t _assignedFlowCount; - - /** - * The mean latency (computed from a sliding window.) - */ - float _latencyMean; - - /** - * Packet delay variance (computed from a sliding window.) - */ - float _latencyVariance; - - /** - * The ratio of lost packets to received packets. - */ - float _packetLossRatio; - - /** - * The ratio of packets that failed their MAC/CRC checks to those that did not. - */ - float _packetErrorRatio; - - /** - * The estimated mean throughput of this path. - */ - uint64_t _throughputMean; - - /** - * The maximum observed throughput of this path. - */ - uint64_t _throughputMax; - - /** - * The variance in the estimated throughput of this path. - */ - float _throughputVariance; - - /** - * The relative quality of this path to all others in the bond, [0-255]. - */ - uint8_t _allocation; - - /** - * How much load this path is under. - */ - uint64_t _byteLoad; - - /** - * How much load this path is under (relative to other paths in the bond.) - */ - uint8_t _relativeByteLoad; - - /** - * Relative value expressing how "deserving" this path is of new traffic. - */ - uint8_t _affinity; - - /** - * Score that indicates to what degree this path is preferred over others that - * are available to the bonding policy. (specifically for active-backup) - */ - uint32_t _failoverScore; - - /** - * Number of bytes thus far sent that have not been acknowledged by the remote peer. - */ - int64_t _unackedBytes; - - /** - * Number of packets received since the last VERB_ACK was sent to the remote peer. - */ - int32_t _packetsReceivedSinceLastAck; - - /** - * Number of packets received since the last VERB_QOS_MEASUREMENT was sent to the remote peer. - */ - int32_t _packetsReceivedSinceLastQoS; - - /** - * Bytes acknowledged via incoming VERB_ACK since the last estimation of throughput. - */ - uint64_t _bytesAckedSinceLastThroughputEstimation; - - /** - * Counters used for tracking path load. - */ - int _packetsIn; - int _packetsOut; }; } // namespace ZeroTier diff --git a/node/Peer.cpp b/node/Peer.cpp index fb405ad92..941bd6c48 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -50,12 +50,7 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _directPathPushCutoffCount(0), _credentialsCutoffCount(0), _echoRequestCutoffCount(0), - _uniqueAlivePathCount(0), _localMultipathSupported(false), - _remoteMultipathSupported(false), - _canUseMultipath(false), - _shouldCollectPathStatistics(0), - _bondingPolicy(0), _lastComputedAggregateMeanLatency(0) { if (!myIdentity.agree(peerIdentity,_key)) @@ -151,6 +146,10 @@ void Peer::received( _paths[replacePath].lr = now; _paths[replacePath].p = path; _paths[replacePath].priority = 1; + Mutex::Lock _l(_bond_m); + if(_bond) { + _bond->nominatePathToBond(_paths[replacePath].p, now); + } } } else { Mutex::Lock ltl(_lastTriedPath_m); @@ -229,7 +228,8 @@ void Peer::received( SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired, int32_t flowId) { - if (!_bondToPeer) { + Mutex::Lock _l(_bond_m); + if (!_bond) { Mutex::Lock _l(_paths_m); unsigned int bestPath = ZT_MAX_PEER_NETWORK_PATHS; /** @@ -253,7 +253,7 @@ SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired, int32 } return SharedPtr(); } - return _bondToPeer->getAppropriatePath(now, flowId); + return _bond->getAppropriatePath(now, flowId); } void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr &other) const @@ -444,39 +444,32 @@ void Peer::tryMemorizedPath(void *tPtr,int64_t now) void Peer::performMultipathStateCheck(void *tPtr, int64_t now) { + Mutex::Lock _l(_bond_m); + if (_bond) { + // Once enabled the Bond object persists, no need to update state + return; + } /** * Check for conditions required for multipath bonding and create a bond * if allowed. */ - _localMultipathSupported = ((RR->bc->inUse()) && (ZT_PROTO_VERSION > 9)); - if (_localMultipathSupported) { - int currAlivePathCount = 0; - int duplicatePathsFound = 0; - for (unsigned int i=0;iaddress().ipsEqual2(_paths[j].p->address()) && i != j) { - duplicatePathsFound+=1; - break; - } - } - } + int numAlivePaths = 0; + for(unsigned int i=0;ialive(now)) { + numAlivePaths++; } - _uniqueAlivePathCount = (currAlivePathCount - (duplicatePathsFound / 2)); - _remoteMultipathSupported = _vProto > 9; - _canUseMultipath = _localMultipathSupported && _remoteMultipathSupported && (_uniqueAlivePathCount > 1); } - if (_canUseMultipath && !_bondToPeer) { + _localMultipathSupported = ((numAlivePaths >= 1) && (RR->bc->inUse()) && (ZT_PROTO_VERSION > 9)); + if (_localMultipathSupported && !_bond) { if (RR->bc) { - _bondToPeer = RR->bc->createTransportTriggeredBond(RR, this); + _bond = RR->bc->createTransportTriggeredBond(RR, this); /** * Allow new bond to retroactively learn all paths known to this peer */ - if (_bondToPeer) { + if (_bond) { for (unsigned int i=0;inominatePath(_paths[i].p, now); + _bond->nominatePathToBond(_paths[i].p, now); } } } @@ -510,8 +503,7 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) if (_paths[i].p) { // Clean expired and reduced priority paths if ( ((now - _paths[i].lr) < ZT_PEER_PATH_EXPIRATION) && (_paths[i].priority == maxPriority) ) { - if ((sendFullHello)||(_paths[i].p->needsHeartbeat(now)) - || (_canUseMultipath && _paths[i].p->needsGratuitousHeartbeat(now))) { + if ((sendFullHello)||(_paths[i].p->needsHeartbeat(now))) { attemptToContactAt(tPtr,_paths[i].p->localSocket(),_paths[i].p->address(),now,sendFullHello); _paths[i].p->sent(now); sent |= (_paths[i].p->address().ss_family == AF_INET) ? 0x1 : 0x2; @@ -591,27 +583,24 @@ void Peer::resetWithinScope(void *tPtr,InetAddress::IpScope scope,int inetAddres void Peer::recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now) { - if (!_shouldCollectPathStatistics || !_bondToPeer) { - return; + if (_localMultipathSupported && _bond) { + _bond->recordOutgoingPacket(path, packetId, payloadLength, verb, flowId, now); } - _bondToPeer->recordOutgoingPacket(path, packetId, payloadLength, verb, flowId, now); } void Peer::recordIncomingInvalidPacket(const SharedPtr& path) { - if (!_shouldCollectPathStatistics || !_bondToPeer) { - return; + if (_localMultipathSupported && _bond) { + _bond->recordIncomingInvalidPacket(path); } - _bondToPeer->recordIncomingInvalidPacket(path); } void Peer::recordIncomingPacket(const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now) { - if (!_shouldCollectPathStatistics || !_bondToPeer) { - return; + if (_localMultipathSupported && _bond) { + _bond->recordIncomingPacket(path, packetId, payloadLength, verb, flowId, now); } - _bondToPeer->recordIncomingPacket(path, packetId, payloadLength, verb, flowId, now); } } // namespace ZeroTier diff --git a/node/Peer.hpp b/node/Peer.hpp index a064bf697..cceae3ed4 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -33,7 +33,6 @@ #include "Hashtable.hpp" #include "Mutex.hpp" #include "Bond.hpp" -#include "BondController.hpp" #include "AES.hpp" #define ZT_PEER_MAX_SERIALIZED_STATE_SIZE (sizeof(Peer) + 32 + (sizeof(Path) * 2)) @@ -305,12 +304,13 @@ public: */ inline unsigned int latency(const int64_t now) { - if (_canUseMultipath) { + if (_localMultipathSupported) { return (int)_lastComputedAggregateMeanLatency; } else { SharedPtr bp(getAppropriatePath(now,false)); - if (bp) + if (bp) { return bp->latency(); + } return 0xffff; } } @@ -419,35 +419,15 @@ public: } /** - * Rate limit gate for inbound ECHO requests. This rate limiter works - * by draining a certain number of requests per unit time. Each peer may - * theoretically receive up to ZT_ECHO_CUTOFF_LIMIT requests per second. + * Rate limit gate for inbound ECHO requests */ inline bool rateGateEchoRequest(const int64_t now) { - /* - // TODO: Rethink this - if (_canUseMultipath) { - _echoRequestCutoffCount++; - int numToDrain = (now - _lastEchoCheck) / ZT_ECHO_DRAINAGE_DIVISOR; - _lastEchoCheck = now; - fprintf(stderr, "ZT_ECHO_CUTOFF_LIMIT=%d, (now - _lastEchoCheck)=%d, numToDrain=%d, ZT_ECHO_DRAINAGE_DIVISOR=%d\n", ZT_ECHO_CUTOFF_LIMIT, (now - _lastEchoCheck), numToDrain, ZT_ECHO_DRAINAGE_DIVISOR); - if (_echoRequestCutoffCount > numToDrain) { - _echoRequestCutoffCount-=numToDrain; - } - else { - _echoRequestCutoffCount = 0; - } - return (_echoRequestCutoffCount < ZT_ECHO_CUTOFF_LIMIT); - } else { - if ((now - _lastEchoRequestReceived) >= (ZT_PEER_GENERAL_RATE_LIMIT)) { - _lastEchoRequestReceived = now; - return true; - } - return false; + if ((now - _lastEchoRequestReceived) >= ZT_PEER_GENERAL_RATE_LIMIT) { + _lastEchoRequestReceived = now; + return true; } - */ - return true; + return false; } /** @@ -523,16 +503,20 @@ public: } /** - * - * @return + * @return The bonding policy used to reach this peer */ - SharedPtr bond() { return _bondToPeer; } + SharedPtr bond() { return _bond; } /** - * - * @return + * @return The bonding policy used to reach this peer */ - inline int8_t bondingPolicy() { return _bondingPolicy; } + inline int8_t bondingPolicy() { + Mutex::Lock _l(_paths_m); + if (_bond) { + return _bond->policy(); + } + return ZT_BOND_POLICY_NONE; + } //inline const AES *aesKeysIfSupported() const //{ return (const AES *)0; } @@ -582,6 +566,7 @@ private: _PeerPath _paths[ZT_MAX_PEER_NETWORK_PATHS]; Mutex _paths_m; + Mutex _bond_m; Identity _id; @@ -591,18 +576,13 @@ private: AtomicCounter __refCount; - bool _remotePeerMultipathEnabled; - int _uniqueAlivePathCount; bool _localMultipathSupported; - bool _remoteMultipathSupported; - bool _canUseMultipath; volatile bool _shouldCollectPathStatistics; - volatile int8_t _bondingPolicy; int32_t _lastComputedAggregateMeanLatency; - SharedPtr _bondToPeer; + SharedPtr _bond; }; } // namespace ZeroTier diff --git a/node/RuntimeEnvironment.hpp b/node/RuntimeEnvironment.hpp index 4603afa0f..019645513 100644 --- a/node/RuntimeEnvironment.hpp +++ b/node/RuntimeEnvironment.hpp @@ -30,7 +30,7 @@ class Multicaster; class NetworkController; class SelfAwareness; class Trace; -class BondController; +class Bond; /** * Holds global state for an instance of ZeroTier::Node @@ -76,7 +76,7 @@ public: Multicaster *mc; Topology *topology; SelfAwareness *sa; - BondController *bc; + Bond *bc; // This node's identity and string representations thereof Identity identity; diff --git a/node/Switch.cpp b/node/Switch.cpp index b2040455b..2721cf92f 100644 --- a/node/Switch.cpp +++ b/node/Switch.cpp @@ -1003,14 +1003,12 @@ bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt,int32_t flowId) const SharedPtr peer(RR->topology->getPeer(tPtr,destination)); if (peer) { - if ((peer->bondingPolicy() == ZT_BONDING_POLICY_BROADCAST) + if ((peer->bondingPolicy() == ZT_BOND_POLICY_BROADCAST) && (packet.verb() == Packet::VERB_FRAME || packet.verb() == Packet::VERB_EXT_FRAME)) { const SharedPtr relay(RR->topology->getUpstreamPeer()); Mutex::Lock _l(peer->_paths_m); for(int i=0;i_paths[i].p && peer->_paths[i].p->alive(now)) { - char pathStr[128]; - peer->_paths[i].p->address().toString(pathStr); _sendViaSpecificPath(tPtr,peer,peer->_paths[i].p,now,packet,encrypt,flowId); } } @@ -1047,7 +1045,6 @@ void Switch::_sendViaSpecificPath(void *tPtr,SharedPtr peer,SharedPtrkey(),encrypt,peer->aesKeysIfSupported()); RR->node->expectReplyTo(packet.packetId()); } diff --git a/node/Trace.cpp b/node/Trace.cpp index 05022e95b..8443a1217 100644 --- a/node/Trace.cpp +++ b/node/Trace.cpp @@ -69,7 +69,7 @@ void Trace::peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId, char tmp[128]; if (!path) return; // sanity check - ZT_LOCAL_TRACE(tPtr,RR,"trying unknown path %s to %.10llx (packet %.16llx verb %d local socket %lld network %.16llx)",path->address().toString(tmp),peer.address().toInt(),packetId,(double)verb,path->localSocket(),networkId); + ZT_LOCAL_TRACE(tPtr,RR,"trying unknown path %s to %.10llx (packet %.16llx verb %d local socket %lld network %.16llx)",path->address().toString(tmp),peer.address().toInt(),packetId,verb,path->localSocket(),networkId); std::pair byn; if (networkId) { Mutex::Lock l(_byNet_m); _byNet.get(networkId,byn); } diff --git a/node/Utils.cpp b/node/Utils.cpp index 345326984..a0bf07077 100644 --- a/node/Utils.cpp +++ b/node/Utils.cpp @@ -28,10 +28,12 @@ #include #include #include +#ifdef ZT_ARCH_ARM_HAS_NEON #ifdef __LINUX__ #include #endif #endif +#endif #ifdef __WINDOWS__ #include diff --git a/objects.mk b/objects.mk index cc6f96ee2..61df844bc 100644 --- a/objects.mk +++ b/objects.mk @@ -28,8 +28,7 @@ CORE_OBJS=\ node/Topology.o \ node/Trace.o \ node/Utils.o \ - node/Bond.o \ - node/BondController.o + node/Bond.o ONE_OBJS=\ controller/EmbeddedNetworkController.o \ diff --git a/one.cpp b/one.cpp index 062781745..26bcb8cdf 100644 --- a/one.cpp +++ b/one.cpp @@ -84,7 +84,7 @@ #include "osdep/Http.hpp" #include "osdep/Thread.hpp" -#include "node/BondController.hpp" +#include "node/Bond.hpp" #include "service/OneService.hpp" @@ -393,7 +393,9 @@ static int cli(int argc,char **argv) char tmp[256]; std::string addr = path["address"]; const int64_t now = OSUtils::now(); - OSUtils::ztsnprintf(tmp,sizeof(tmp),"%s;%lld;%lld",addr.c_str(),now - (int64_t)path["lastSend"],now - (int64_t)path["lastReceive"]); + int64_t lastSendDiff = (uint64_t)path["lastSend"] ? now - (uint64_t)path["lastSend"] : -1; + int64_t lastReceiveDiff = (uint64_t)path["lastReceive"] ? now - (uint64_t)path["lastReceive"] : -1; + OSUtils::ztsnprintf(tmp,sizeof(tmp),"%s;%lld;%lld",addr.c_str(),lastSendDiff,lastReceiveDiff); bestPath = tmp; break; } @@ -460,7 +462,9 @@ static int cli(int argc,char **argv) char tmp[256]; std::string addr = path["address"]; const int64_t now = OSUtils::now(); - OSUtils::ztsnprintf(tmp,sizeof(tmp),"%-8lld %-8lld %s",now - (int64_t)path["lastSend"],now - (int64_t)path["lastReceive"],addr.c_str()); + int64_t lastSendDiff = (uint64_t)path["lastSend"] ? now - (uint64_t)path["lastSend"] : -1; + int64_t lastReceiveDiff = (uint64_t)path["lastReceive"] ? now - (uint64_t)path["lastReceive"] : -1; + OSUtils::ztsnprintf(tmp,sizeof(tmp),"%-8lld %-8lld %s",lastSendDiff,lastReceiveDiff,addr.c_str()); bestPath = std::string("DIRECT ") + tmp; break; } @@ -492,14 +496,13 @@ static int cli(int argc,char **argv) return 1; } } else if (command == "bond") { - /* zerotier-cli bond */ + /* zerotier-cli bond */ if (arg1.empty()) { - printf("(bond) command is missing required arugments" ZT_EOL_S); + printf("(bond) command is missing required arguments" ZT_EOL_S); return 2; } /* zerotier-cli bond list */ if (arg1 == "list") { - fprintf(stderr, "zerotier-cli bond list\n"); const unsigned int scode = Http::GET(1024 * 1024 * 16,60000,(const struct sockaddr *)&addr,"/bonds",requestHeaders,responseHeaders,responseBody); if (scode == 0) { printf("Error connecting to the ZeroTier service: %s\n\nPlease check that the service is running and that TCP port 9993 can be contacted via 127.0.0.1." ZT_EOL_S, responseBody.c_str()); @@ -525,11 +528,11 @@ static int cli(int argc,char **argv) for(unsigned long k=0;k= ZT_BONDING_POLICY_NONE && bondingPolicy <= ZT_BONDING_POLICY_BALANCE_AWARE) { - policyStr = BondController::getPolicyStrByCode(bondingPolicy); + if (bondingPolicy >= ZT_BOND_POLICY_NONE && bondingPolicy <= ZT_BOND_POLICY_BALANCE_AWARE) { + policyStr = Bond::getPolicyStrByCode(bondingPolicy); } printf("%10s %32s %8s %d/%d" ZT_EOL_S, OSUtils::jsonString(p ["address"],"-").c_str(), @@ -560,11 +563,7 @@ static int cli(int argc,char **argv) return 1; } } - else if (arg1.length() == 10) { /* zerotier-cli bond enable */ - if (arg2 == "enable") { - fprintf(stderr, "zerotier-cli bond enable\n"); - return 0; - } + else if (arg1.length() == 10) { if (arg2 == "rotate") { /* zerotier-cli bond rotate */ fprintf(stderr, "zerotier-cli bond rotate\n"); requestHeaders["Content-Type"] = "application/json"; @@ -618,7 +617,6 @@ static int cli(int argc,char **argv) if (json) { printf("%s" ZT_EOL_S,OSUtils::jsonDump(j).c_str()); } else { - bool bFoundBond = false; std::string healthStr; if (OSUtils::jsonInt(j["isHealthy"],0)) { healthStr = "Healthy"; @@ -629,15 +627,15 @@ static int cli(int argc,char **argv) int numTotalLinks = OSUtils::jsonInt(j["numTotalLinks"],0); printf("Peer : %s\n", arg1.c_str()); printf("Bond : %s\n", OSUtils::jsonString(j["bondingPolicy"],"-").c_str()); - //if (bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { - printf("Link Select Method : %d\n", OSUtils::jsonInt(j["linkSelectMethod"],0)); + //if (bondingPolicy == ZT_BOND_POLICY_ACTIVE_BACKUP) { + printf("Link Select Method : %d\n", (int)OSUtils::jsonInt(j["linkSelectMethod"],0)); //} printf("Status : %s\n", healthStr.c_str()); printf("Links : %d/%d\n", numAliveLinks, numTotalLinks); - printf("Failover Interval : %d (ms)\n", OSUtils::jsonInt(j["failoverInterval"],0)); - printf("Up Delay : %d (ms)\n", OSUtils::jsonInt(j["upDelay"],0)); - printf("Down Delay : %d (ms)\n", OSUtils::jsonInt(j["downDelay"],0)); - printf("Packets Per Link : %d (ms)\n", OSUtils::jsonInt(j["packetsPerLink"],0)); + printf("Failover Interval : %d (ms)\n", (int)OSUtils::jsonInt(j["failoverInterval"],0)); + printf("Up Delay : %d (ms)\n", (int)OSUtils::jsonInt(j["upDelay"],0)); + printf("Down Delay : %d (ms)\n", (int)OSUtils::jsonInt(j["downDelay"],0)); + printf("Packets Per Link : %d (ms)\n", (int)OSUtils::jsonInt(j["packetsPerLink"],0)); nlohmann::json &p = j["links"]; if (p.is_array()) { printf("\n Interface Name\t\t\t\t\t Path\t Alive\n"); @@ -649,7 +647,7 @@ static int cli(int argc,char **argv) i, OSUtils::jsonString(p[i]["ifname"],"-").c_str(), OSUtils::jsonString(p[i]["path"],"-").c_str(), - OSUtils::jsonInt(p[i]["alive"],0)); + (int)OSUtils::jsonInt(p[i]["alive"],0)); } printf("\n Latency Jitter Loss Error Speed Alloc\n"); for(int i=0; i<80; i++) { printf("-"); } @@ -662,8 +660,8 @@ static int cli(int argc,char **argv) OSUtils::jsonDouble(p[i]["latencyVariance"], 0), OSUtils::jsonDouble(p[i]["packetLossRatio"], 0), OSUtils::jsonDouble(p[i]["packetErrorRatio"], 0), - OSUtils::jsonInt(p[i]["givenLinkSpeed"], 0), - OSUtils::jsonInt(p[i]["allocation"], 0)); + (int)OSUtils::jsonInt(p[i]["givenLinkSpeed"], 0), + (int)OSUtils::jsonInt(p[i]["allocation"], 0)); } } } @@ -676,7 +674,7 @@ static int cli(int argc,char **argv) } } /* zerotier-cli bond command was malformed in some way */ - printf("(bond) command is missing required arugments" ZT_EOL_S); + printf("(bond) command is missing required arguments" ZT_EOL_S); return 2; const unsigned int scode = Http::GET(1024 * 1024 * 16,60000,(const struct sockaddr *)&addr,"/bonds",requestHeaders,responseHeaders,responseBody); if (scode == 0) { @@ -711,14 +709,13 @@ static int cli(int argc,char **argv) if (j.is_array()) { for(unsigned long k=0;k= ZT_BONDING_POLICY_NONE && bondingPolicy <= ZT_BONDING_POLICY_BALANCE_AWARE) { - policyStr = BondController::getPolicyStrByCode(bondingPolicy); + if (bondingPolicy >= ZT_BOND_POLICY_NONE && bondingPolicy <= ZT_BOND_POLICY_BALANCE_AWARE) { + policyStr = Bond::getPolicyStrByCode(bondingPolicy); } printf("%10s %32s %8s %d/%d" ZT_EOL_S, @@ -789,14 +786,23 @@ static int cli(int argc,char **argv) } } if (aa.length() == 0) aa = "-"; + const std::string status = OSUtils::jsonString(n["status"],"-"); printf("200 listnetworks %s %s %s %s %s %s %s" ZT_EOL_S, OSUtils::jsonString(n["nwid"],"-").c_str(), OSUtils::jsonString(n["name"],"-").c_str(), OSUtils::jsonString(n["mac"],"-").c_str(), - OSUtils::jsonString(n["status"],"-").c_str(), + status.c_str(), OSUtils::jsonString(n["type"],"-").c_str(), OSUtils::jsonString(n["portDeviceName"],"-").c_str(), aa.c_str()); + if (OSUtils::jsonBool(n["ssoEnabled"], false)) { + uint64_t authenticationExpiryTime = n["authenticationExpiryTime"]; + if (status == "AUTHENTICATION_REQUIRED") { + printf(" AUTH EXPIRED, URL: %s" ZT_EOL_S, OSUtils::jsonString(n["authenticationURL"], "(null)").c_str()); + } else if (status == "OK") { + printf(" AUTH OK, expires in: %lld seconds" ZT_EOL_S, ((int64_t)authenticationExpiryTime - OSUtils::now()) / 1000LL); + } + } } } } @@ -1310,7 +1316,6 @@ static int cli(int argc,char **argv) struct ifconf ifc; char buf[1024]; char stringBuffer[128]; - int success = 0; int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP); diff --git a/selftest.cpp b/selftest.cpp index 357e9a026..42e9bc232 100644 --- a/selftest.cpp +++ b/selftest.cpp @@ -561,8 +561,8 @@ static int testCertificate() std::cout << idA.address().toString(buf) << ", " << idB.address().toString(buf) << std::endl; std::cout << "[certificate] Generating certificates A and B..."; - CertificateOfMembership cA(10000,100,1,idA.address()); - CertificateOfMembership cB(10099,100,1,idB.address()); + CertificateOfMembership cA(10000,100,1,idA); + CertificateOfMembership cB(10099,100,1,idB); std::cout << std::endl; std::cout << "[certificate] Signing certificates A and B with authority..."; @@ -574,13 +574,13 @@ static int testCertificate() //std::cout << "[certificate] B: " << cB.toString() << std::endl; std::cout << "[certificate] A agrees with B and B with A... "; - if (cA.agreesWith(cB)) + if (cA.agreesWith(cB, idB)) std::cout << "yes, "; else { std::cout << "FAIL" << std::endl; return -1; } - if (cB.agreesWith(cA)) + if (cB.agreesWith(cA, idA)) std::cout << "yes." << std::endl; else { std::cout << "FAIL" << std::endl; @@ -588,18 +588,18 @@ static int testCertificate() } std::cout << "[certificate] Generating two certificates that should not agree..."; - cA = CertificateOfMembership(10000,100,1,idA.address()); - cB = CertificateOfMembership(10101,100,1,idB.address()); + cA = CertificateOfMembership(10000,100,1,idA); + cB = CertificateOfMembership(10101,100,1,idB); std::cout << std::endl; std::cout << "[certificate] A agrees with B and B with A... "; - if (!cA.agreesWith(cB)) + if (!cA.agreesWith(cB, idB)) std::cout << "no, "; else { std::cout << "FAIL" << std::endl; return -1; } - if (!cB.agreesWith(cA)) + if (!cB.agreesWith(cA, idA)) std::cout << "no." << std::endl; else { std::cout << "FAIL" << std::endl; diff --git a/service/OneService.cpp b/service/OneService.cpp index 6f75dbdff..dbbac3560 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -49,7 +49,6 @@ #include "../osdep/Binder.hpp" #include "../osdep/ManagedRoute.hpp" #include "../osdep/BlockingQueue.hpp" -#include "../osdep/Link.hpp" #include "OneService.hpp" #include "SoftwareUpdater.hpp" @@ -184,6 +183,7 @@ static void _networkToJson(nlohmann::json &nj,const ZT_VirtualNetworkConfig *nc, case ZT_NETWORK_STATUS_NOT_FOUND: nstatus = "NOT_FOUND"; break; case ZT_NETWORK_STATUS_PORT_ERROR: nstatus = "PORT_ERROR"; break; case ZT_NETWORK_STATUS_CLIENT_TOO_OLD: nstatus = "CLIENT_TOO_OLD"; break; + case ZT_NETWORK_STATUS_AUTHENTICATION_REQUIRED: nstatus = "AUTHENTICATION_REQUIRED"; break; } switch(nc->type) { case ZT_NETWORK_TYPE_PRIVATE: ntype = "PRIVATE"; break; @@ -251,6 +251,9 @@ static void _networkToJson(nlohmann::json &nj,const ZT_VirtualNetworkConfig *nc, } nj["dns"] = m; + nj["authenticationURL"] = nc->authenticationURL; + nj["authenticationExpiryTime"] = nc->authenticationExpiryTime; + nj["ssoEnabled"] = nc->ssoEnabled; } static void _peerToJson(nlohmann::json &pj,const ZT_Peer *peer) @@ -300,12 +303,11 @@ static void _peerToJson(nlohmann::json &pj,const ZT_Peer *peer) static void _bondToJson(nlohmann::json &pj, SharedPtr &bond) { - char tmp[256]; uint64_t now = OSUtils::now(); - int bondingPolicy = bond->getPolicy(); - pj["bondingPolicy"] = BondController::getPolicyStrByCode(bondingPolicy); - if (bondingPolicy == ZT_BONDING_POLICY_NONE) { + int bondingPolicy = bond->policy(); + pj["bondingPolicy"] = Bond::getPolicyStrByCode(bondingPolicy); + if (bondingPolicy == ZT_BOND_POLICY_NONE) { return; } @@ -315,15 +317,15 @@ static void _bondToJson(nlohmann::json &pj, SharedPtr &bond) pj["failoverInterval"] = bond->getFailoverInterval(); pj["downDelay"] = bond->getDownDelay(); pj["upDelay"] = bond->getUpDelay(); - if (bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) { + if (bondingPolicy == ZT_BOND_POLICY_BALANCE_RR) { pj["packetsPerLink"] = bond->getPacketsPerLink(); } - if (bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) { + if (bondingPolicy == ZT_BOND_POLICY_ACTIVE_BACKUP) { pj["linkSelectMethod"] = bond->getLinkSelectMethod(); } nlohmann::json pa = nlohmann::json::array(); - std::vector< SharedPtr > paths = bond->getPeer()->paths(now); + std::vector< SharedPtr > paths = bond->paths(now); for(unsigned int i=0;i &bond) nlohmann::json j; j["ifname"] = bond->getLink(paths[i])->ifname(); j["path"] = pathStr; + /* j["alive"] = paths[i]->alive(now,true); j["bonded"] = paths[i]->bonded(); j["latencyMean"] = paths[i]->latencyMean(); @@ -340,6 +343,7 @@ static void _bondToJson(nlohmann::json &pj, SharedPtr &bond) j["packetErrorRatio"] = paths[i]->packetErrorRatio(); j["givenLinkSpeed"] = 1000; j["allocation"] = paths[i]->allocation(); + */ pa.push_back(j); } pj["links"] = pa; @@ -535,6 +539,12 @@ public: memset(&config, 0, sizeof(ZT_VirtualNetworkConfig)); } + ~NetworkState() + { + this->managedRoutes.clear(); + this->tap.reset(); + } + std::shared_ptr tap; ZT_VirtualNetworkConfig config; // memcpy() of raw config from core std::vector managedIps; @@ -573,6 +583,7 @@ public: Mutex _run_m; RedisConfig *_rc; + std::string _ssoRedirectURL; // end member variables ---------------------------------------------------- @@ -610,6 +621,7 @@ public: #endif ,_run(true) ,_rc(NULL) + ,_ssoRedirectURL() { _ports[0] = 0; _ports[1] = 0; @@ -723,25 +735,22 @@ public: OSUtils::ztsnprintf(portstr,sizeof(portstr),"%u",_ports[0]); OSUtils::writeFile((_homePath + ZT_PATH_SEPARATOR_S "zerotier-one.port").c_str(),std::string(portstr)); - // Attempt to bind to a secondary port chosen from our ZeroTier address. + // Attempt to bind to a secondary port. // This exists because there are buggy NATs out there that fail if more // than one device behind the same NAT tries to use the same internal // private address port number. Buggy NATs are a running theme. + // + // This used to pick the secondary port based on the node ID until we + // discovered another problem: buggy routers and malicious traffic + // "detection". A lot of routers have such things built in these days + // and mis-detect ZeroTier traffic as malicious and block it resulting + // in a node that appears to be in a coma. Secondary ports are now + // randomized on startup. if (_allowSecondaryPort) { if (_secondaryPort) { _ports[1] = _secondaryPort; } else { - _ports[1] = 20000 + ((unsigned int)_node->address() % 45500); - for(int i=0;;++i) { - if (i > 1000) { - _ports[1] = 0; - break; - } else if (++_ports[1] >= 65536) { - _ports[1] = 20000; - } - if (_trialBind(_ports[1])) - break; - } + _ports[1] = _getRandomPort(); } } #ifdef ZT_USE_MINIUPNPC @@ -753,7 +762,7 @@ public: if (_tertiaryPort) { _ports[2] = _tertiaryPort; } else { - _ports[2] = _ports[1]; + _ports[2] = 20000 + (_ports[0] % 40000); for(int i=0;;++i) { if (i > 1000) { _ports[2] = 0; @@ -779,6 +788,9 @@ public: // Network controller is now enabled by default for desktop and server _controller = new EmbeddedNetworkController(_node,_homePath.c_str(),_controllerDbPath.c_str(),_ports[0], _rc); + if (!_ssoRedirectURL.empty()) { + _controller->setSSORedirectURL(_ssoRedirectURL); + } _node->setNetconfMaster((void *)_controller); // Join existing networks in networks.d @@ -811,6 +823,7 @@ public: int64_t lastCleanedPeersDb = 0; int64_t lastLocalInterfaceAddressCheck = (clockShouldBe - ZT_LOCAL_INTERFACE_CHECK_INTERVAL) + 15000; // do this in 15s to give portmapper time to configure and other things time to settle int64_t lastLocalConfFileCheck = OSUtils::now(); + int64_t lastOnline = lastLocalConfFileCheck; for(;;) { _run_m.lock(); if (!_run) { @@ -852,6 +865,16 @@ public: } } + // If secondary port is not configured to a constant value and we've been offline for a while, + // bind a new secondary port. This is a workaround for a "coma" issue caused by buggy NATs that stop + // working on one port after a while. + if (_node->online()) { + lastOnline = now; + } else if ((_secondaryPort == 0)&&((now - lastOnline) > ZT_PATH_HEARTBEAT_PERIOD)) { + _secondaryPort = _getRandomPort(); + lastBindRefresh = 0; + } + // Refresh bindings in case device's interfaces have changed, and also sync routes to update any shadow routes (e.g. shadow default) if (((now - lastBindRefresh) >= (_node->bondController()->inUse() ? ZT_BINDER_REFRESH_PERIOD / 4 : ZT_BINDER_REFRESH_PERIOD))||(restarted)) { lastBindRefresh = now; @@ -1012,8 +1035,11 @@ public: } } + // Make a copy so lookups don't modify in place; + json lc(_localConfig); + // Get any trusted paths in local.conf (we'll parse the rest of physical[] elsewhere) - json &physical = _localConfig["physical"]; + json &physical = lc["physical"]; if (physical.is_object()) { for(json::iterator phy(physical.begin());phy!=physical.end();++phy) { InetAddress net(OSUtils::jsonString(phy.key(),"").c_str()); @@ -1030,20 +1056,22 @@ public: } } - json &settings = _localConfig["settings"]; + json &settings = lc["settings"]; if (settings.is_object()) { // Allow controller DB path to be put somewhere else const std::string cdbp(OSUtils::jsonString(settings["controllerDbPath"],"")); if (cdbp.length() > 0) _controllerDbPath = cdbp; + _ssoRedirectURL = OSUtils::jsonString(settings["ssoRedirectURL"], ""); + #ifdef ZT_CONTROLLER_USE_LIBPQ // TODO: Redis config json &redis = settings["redis"]; if (redis.is_object() && _rc == NULL) { _rc = new RedisConfig; _rc->hostname = OSUtils::jsonString(redis["hostname"],""); - _rc->port = redis["port"]; + _rc->port = OSUtils::jsonInt(redis["port"],0); _rc->password = OSUtils::jsonString(redis["password"],""); _rc->clusterMode = OSUtils::jsonBool(redis["clusterMode"], false); } @@ -1259,7 +1287,7 @@ public: _bondToJson(res,bond); scode = 200; } else { - fprintf(stderr, "unable to find bond to peer %llx\n", id); + fprintf(stderr, "unable to find bond to peer %llx\n", (unsigned long long)id); scode = 400; } } @@ -1271,8 +1299,11 @@ public: } else { scode = 400; /* bond controller is not enabled */ } - } - if (ps[0] == "status") { + } else if (ps[0] == "config") { + Mutex::Lock lc(_localConfig_m); + res = _localConfig; + scode = 200; + } else if (ps[0] == "status") { ZT_NodeStatus status; _node->status(&status); @@ -1466,7 +1497,7 @@ public: if (bond) { scode = bond->abForciblyRotateLink() ? 200 : 400; } else { - fprintf(stderr, "unable to find bond to peer %llx\n", id); + fprintf(stderr, "unable to find bond to peer %llx\n", (unsigned long long)id); scode = 400; } } @@ -1478,8 +1509,35 @@ public: } else { scode = 400; /* bond controller is not enabled */ } - } - if (ps[0] == "moon") { + } else if (ps[0] == "config") { + // Right now we only support writing the things the UI supports changing. + if (ps.size() == 2) { + if (ps[1] == "settings") { + try { + json j(OSUtils::jsonParse(body)); + if (j.is_object()) { + Mutex::Lock lcl(_localConfig_m); + json lc(_localConfig); + for(json::const_iterator s(j.begin());s!=j.end();++s) { + lc["settings"][s.key()] = s.value(); + } + std::string lcStr = OSUtils::jsonDump(lc, 4); + if (OSUtils::writeFile((_homePath + ZT_PATH_SEPARATOR_S "local.conf").c_str(), lcStr)) { + _localConfig = lc; + } + } else { + scode = 400; + } + } catch ( ... ) { + scode = 400; + } + } else { + scode = 404; + } + } else { + scode = 404; + } + } else if (ps[0] == "moon") { if (ps.size() == 2) { uint64_t seed = 0; @@ -1711,11 +1769,11 @@ public: if (basePolicyStr.empty()) { fprintf(stderr, "error: no base policy was specified for custom policy (%s)\n", customPolicyStr.c_str()); } - if (_node->bondController()->getPolicyCodeByStr(basePolicyStr) == ZT_BONDING_POLICY_NONE) { + if (_node->bondController()->getPolicyCodeByStr(basePolicyStr) == ZT_BOND_POLICY_NONE) { fprintf(stderr, "error: custom policy (%s) is invalid, unknown base policy (%s).\n", customPolicyStr.c_str(), basePolicyStr.c_str()); continue; - } if (_node->bondController()->getPolicyCodeByStr(customPolicyStr) != ZT_BONDING_POLICY_NONE) { + } if (_node->bondController()->getPolicyCodeByStr(customPolicyStr) != ZT_BOND_POLICY_NONE) { fprintf(stderr, "error: custom policy (%s) will be ignored, cannot use standard policy names for custom policies.\n", customPolicyStr.c_str()); continue; @@ -1744,20 +1802,12 @@ public: newTemplateBond->setUserQualityWeights(weights,ZT_QOS_WEIGHT_SIZE); } // Bond-specific properties - newTemplateBond->setOverflowMode(OSUtils::jsonInt(customPolicy["overflow"],false)); newTemplateBond->setUpDelay(OSUtils::jsonInt(customPolicy["upDelay"],-1)); newTemplateBond->setDownDelay(OSUtils::jsonInt(customPolicy["downDelay"],-1)); newTemplateBond->setFlowRebalanceStrategy(OSUtils::jsonInt(customPolicy["flowRebalanceStrategy"],(uint64_t)0)); newTemplateBond->setFailoverInterval(OSUtils::jsonInt(customPolicy["failoverInterval"],(uint64_t)0)); newTemplateBond->setPacketsPerLink(OSUtils::jsonInt(customPolicy["packetsPerLink"],-1)); - std::string linkMonitorStrategyStr(OSUtils::jsonString(customPolicy["linkMonitorStrategy"],"")); - uint8_t linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DEFAULT; - if (linkMonitorStrategyStr == "passive") { linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE; } - if (linkMonitorStrategyStr == "active") { linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_ACTIVE; } - if (linkMonitorStrategyStr == "dynamic") { linkMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; } - newTemplateBond->setLinkMonitorStrategy(linkMonitorStrategy); - // Policy-Specific link set json &links = customPolicy["links"]; for (json::iterator linkItr = links.begin(); linkItr != links.end();++linkItr) { @@ -1773,40 +1823,40 @@ public: speed, alloc, linkNameStr.c_str()); enabled = false; } - uint32_t upDelay = OSUtils::jsonInt(link["upDelay"],-1); - uint32_t downDelay = OSUtils::jsonInt(link["downDelay"],-1); + //uint32_t upDelay = OSUtils::jsonInt(link["upDelay"],-1); + //uint32_t downDelay = OSUtils::jsonInt(link["downDelay"],-1); uint8_t ipvPref = OSUtils::jsonInt(link["ipvPref"],0); - uint32_t linkMonitorInterval = OSUtils::jsonInt(link["monitorInterval"],(uint64_t)0); + //uint32_t linkMonitorInterval = OSUtils::jsonInt(link["monitorInterval"],(uint64_t)0); std::string failoverToStr(OSUtils::jsonString(link["failoverTo"],"")); // Mode std::string linkModeStr(OSUtils::jsonString(link["mode"],"spare")); - uint8_t linkMode = ZT_MULTIPATH_SLAVE_MODE_SPARE; - if (linkModeStr == "primary") { linkMode = ZT_MULTIPATH_SLAVE_MODE_PRIMARY; } - if (linkModeStr == "spare") { linkMode = ZT_MULTIPATH_SLAVE_MODE_SPARE; } + uint8_t linkMode = ZT_BOND_SLAVE_MODE_SPARE; + if (linkModeStr == "primary") { linkMode = ZT_BOND_SLAVE_MODE_PRIMARY; } + if (linkModeStr == "spare") { linkMode = ZT_BOND_SLAVE_MODE_SPARE; } // ipvPref if ((ipvPref != 0) && (ipvPref != 4) && (ipvPref != 6) && (ipvPref != 46) && (ipvPref != 64)) { fprintf(stderr, "error: invalid ipvPref value (%d), link disabled.\n", ipvPref); enabled = false; } - if (linkMode == ZT_MULTIPATH_SLAVE_MODE_SPARE && failoverToStr.length()) { + if (linkMode == ZT_BOND_SLAVE_MODE_SPARE && failoverToStr.length()) { fprintf(stderr, "error: cannot specify failover links for spares, link disabled.\n"); failoverToStr = ""; enabled = false; } - _node->bondController()->addCustomLink(customPolicyStr, new Link(linkNameStr,ipvPref,speed,linkMonitorInterval,upDelay,downDelay,enabled,linkMode,failoverToStr,alloc)); + _node->bondController()->addCustomLink(customPolicyStr, new Link(linkNameStr,ipvPref,speed,enabled,linkMode,failoverToStr,alloc)); } std::string linkSelectMethodStr(OSUtils::jsonString(customPolicy["activeReselect"],"optimize")); if (linkSelectMethodStr == "always") { - newTemplateBond->setLinkSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS); + newTemplateBond->setLinkSelectMethod(ZT_BOND_RESELECTION_POLICY_ALWAYS); } if (linkSelectMethodStr == "better") { - newTemplateBond->setLinkSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_BETTER); + newTemplateBond->setLinkSelectMethod(ZT_BOND_RESELECTION_POLICY_BETTER); } if (linkSelectMethodStr == "failure") { - newTemplateBond->setLinkSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_FAILURE); + newTemplateBond->setLinkSelectMethod(ZT_BOND_RESELECTION_POLICY_FAILURE); } if (linkSelectMethodStr == "optimize") { - newTemplateBond->setLinkSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE); + newTemplateBond->setLinkSelectMethod(ZT_BOND_RESELECTION_POLICY_OPTIMIZE); } if (newTemplateBond->getLinkSelectMethod() < 0 || newTemplateBond->getLinkSelectMethod() > 3) { fprintf(stderr, "warning: invalid value (%s) for linkSelectMethod, assuming mode: always\n", linkSelectMethodStr.c_str()); @@ -1839,7 +1889,7 @@ public: _secondaryPort = (unsigned int)OSUtils::jsonInt(settings["secondaryPort"],0); _tertiaryPort = (unsigned int)OSUtils::jsonInt(settings["tertiaryPort"],0); if (_secondaryPort != 0 || _tertiaryPort != 0) { - fprintf(stderr,"WARNING: using manually-specified ports. This can cause NAT issues." ZT_EOL_S); + fprintf(stderr,"WARNING: using manually-specified secondary and/or tertiary ports. This can cause NAT issues." ZT_EOL_S); } _portMappingEnabled = OSUtils::jsonBool(settings["portMappingEnabled"],true); @@ -2028,7 +2078,7 @@ public: unsigned int mostMatchingPrefixBits = 0; for(std::set::const_iterator i(myIps.begin());i!=myIps.end();++i) { const unsigned int matchingPrefixBits = i->matchingPrefixBits(*target); - if (matchingPrefixBits >= mostMatchingPrefixBits) { + if (matchingPrefixBits >= mostMatchingPrefixBits && ((target->isV4() && i->isV4()) || (target->isV6() && i->isV6()))) { mostMatchingPrefixBits = matchingPrefixBits; src = &(*i); } @@ -2391,7 +2441,7 @@ public: Dictionary<4096> nc; nc.load(nlcbuf.c_str()); Buffer<1024> allowManaged; - if (nc.get("allowManaged", allowManaged) && !allowManaged.size() == 0) { + if (nc.get("allowManaged", allowManaged) && allowManaged.size() > 0) { std::string addresses (allowManaged.begin(), allowManaged.size()); if (allowManaged.size() <= 5) { // untidy parsing for backward compatibility if (allowManaged[0] == '1' || allowManaged[0] == 't' || allowManaged[0] == 'T') { @@ -2625,7 +2675,6 @@ public: case ZT_STATE_OBJECT_NETWORK_CONFIG: OSUtils::ztsnprintf(dirname,sizeof(dirname),"%s" ZT_PATH_SEPARATOR_S "networks.d",_homePath.c_str()); OSUtils::ztsnprintf(p,sizeof(p),"%s" ZT_PATH_SEPARATOR_S "%.16llx.conf",dirname,(unsigned long long)id[0]); - secure = true; break; case ZT_STATE_OBJECT_PEER: OSUtils::ztsnprintf(dirname,sizeof(dirname),"%s" ZT_PATH_SEPARATOR_S "peers.d",_homePath.c_str()); @@ -3044,9 +3093,6 @@ public: if (!strncmp(p->c_str(),ifname,p->length())) return false; } - if (!_node->bondController()->allowedToBind(std::string(ifname))) { - return false; - } } { // Check global blacklists @@ -3084,6 +3130,23 @@ public: return true; } + unsigned int _getRandomPort() + { + unsigned int randp = 0; + Utils::getSecureRandom(&randp,sizeof(randp)); + randp = 20000 + (randp % 45500); + for(int i=0;;++i) { + if (i > 1000) { + return 0; + } else if (++randp >= 65536) { + randp = 20000; + } + if (_trialBind(randp)) + break; + } + return randp; + } + bool _trialBind(unsigned int port) { struct sockaddr_in in4;